16 ani în urmă · 69def9f05d
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -193,7 +193,7 @@ Code	Seq#	Include File		Comments
 
															 0xAD	00	Netfilter device	in development:
														
 
															 					<mailto:rusty@rustcorp.com.au>	
														
 
															 0xAE	all	linux/kvm.h		Kernel-based Virtual Machine
														
 
															-					<mailto:kvm-devel@lists.sourceforge.net>
														
 
															+					<mailto:kvm@vger.kernel.org>
														
 
															 0xB0	all	RATIO devices		in development:
														
 
															 					<mailto:vgo@ratio.de>
														
 
															 0xB1	00-1F	PPPoX			<mailto:mostrows@styx.uwaterloo.ca>
														
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -57,6 +57,7 @@ parameter is applicable:
 
															 	ISAPNP	ISA PnP code is enabled.
														
 
															 	ISDN	Appropriate ISDN support is enabled.
														
 
															 	JOY	Appropriate joystick support is enabled.
														
 
															+	KVM	Kernel Virtual Machine support is enabled.
														
 
															 	LIBATA  Libata driver is enabled
														
 
															 	LP	Printer support is enabled.
														
 
															 	LOOP	Loopback device support is enabled.
														
@@ -1098,6 +1099,44 @@ and is between 256 and 4096 characters. It is defined in the file
 
															 	kstack=N	[X86] Print N words from the kernel stack
														
 
															 			in oops dumps.
														
 
															+	kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
														
 
															+			Default is 0 (don't ignore, but inject #GP)
														
 
															+
														
 
															+	kvm.oos_shadow=	[KVM] Disable out-of-sync shadow paging.
														
 
															+			Default is 1 (enabled)
														
 
															+
														
 
															+	kvm-amd.nested=	[KVM,AMD] Allow nested virtualization in KVM/SVM.
														
 
															+			Default is 0 (off)
														
 
															+
														
 
															+	kvm-amd.npt=	[KVM,AMD] Disable nested paging (virtualized MMU)
														
 
															+			for all guests.
														
 
															+			Default is 1 (enabled) if in 64bit or 32bit-PAE mode
														
 
															+
														
 
															+	kvm-intel.bypass_guest_pf=
														
 
															+			[KVM,Intel] Disables bypassing of guest page faults
														
 
															+			on Intel chips. Default is 1 (enabled)
														
 
															+
														
 
															+	kvm-intel.ept=	[KVM,Intel] Disable extended page tables
														
 
															+			(virtualized MMU) support on capable Intel chips.
														
 
															+			Default is 1 (enabled)
														
 
															+
														
 
															+	kvm-intel.emulate_invalid_guest_state=
														
 
															+			[KVM,Intel] Enable emulation of invalid guest states
														
 
															+			Default is 0 (disabled)
														
 
															+
														
 
															+	kvm-intel.flexpriority=
														
 
															+			[KVM,Intel] Disable FlexPriority feature (TPR shadow).
														
 
															+			Default is 1 (enabled)
														
 
															+
														
 
															+	kvm-intel.unrestricted_guest=
														
 
															+			[KVM,Intel] Disable unrestricted guest feature
														
 
															+			(virtualized real and unpaged mode) on capable
														
 
															+			Intel chips. Default is 1 (enabled)
														
 
															+
														
 
															+	kvm-intel.vpid=	[KVM,Intel] Disable Virtual Processor Identification
														
 
															+			feature (tagged TLBs) on capable Intel chips.
														
 
															+			Default is 1 (enabled)
														
 
															+
														
 
															 	l2cr=		[PPC]
														
 
															 	l3cr=		[PPC]
														
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -0,0 +1,759 @@
 
															+The Definitive KVM (Kernel-based Virtual Machine) API Documentation
														
 
															+===================================================================
														
 
															+
														
 
															+1. General description
														
 
															+
														
 
															+The kvm API is a set of ioctls that are issued to control various aspects
														
 
															+of a virtual machine.  The ioctls belong to three classes
														
 
															+
														
 
															+ - System ioctls: These query and set global attributes which affect the
														
 
															+   whole kvm subsystem.  In addition a system ioctl is used to create
														
 
															+   virtual machines
														
 
															+
														
 
															+ - VM ioctls: These query and set attributes that affect an entire virtual
														
 
															+   machine, for example memory layout.  In addition a VM ioctl is used to
														
 
															+   create virtual cpus (vcpus).
														
 
															+
														
 
															+   Only run VM ioctls from the same process (address space) that was used
														
 
															+   to create the VM.
														
 
															+
														
 
															+ - vcpu ioctls: These query and set attributes that control the operation
														
 
															+   of a single virtual cpu.
														
 
															+
														
 
															+   Only run vcpu ioctls from the same thread that was used to create the
														
 
															+   vcpu.
														
 
															+
														
 
															+2. File descritpors
														
 
															+
														
 
															+The kvm API is centered around file descriptors.  An initial
														
 
															+open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
														
 
															+can be used to issue system ioctls.  A KVM_CREATE_VM ioctl on this
														
 
															+handle will create a VM file descripror which can be used to issue VM
														
 
															+ioctls.  A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
														
 
															+and return a file descriptor pointing to it.  Finally, ioctls on a vcpu
														
 
															+fd can be used to control the vcpu, including the important task of
														
 
															+actually running guest code.
														
 
															+
														
 
															+In general file descriptors can be migrated among processes by means
														
 
															+of fork() and the SCM_RIGHTS facility of unix domain socket.  These
														
 
															+kinds of tricks are explicitly not supported by kvm.  While they will
														
 
															+not cause harm to the host, their actual behavior is not guaranteed by
														
 
															+the API.  The only supported use is one virtual machine per process,
														
 
															+and one vcpu per thread.
														
 
															+
														
 
															+3. Extensions
														
 
															+
														
 
															+As of Linux 2.6.22, the KVM ABI has been stabilized: no backward
														
 
															+incompatible change are allowed.  However, there is an extension
														
 
															+facility that allows backward-compatible extensions to the API to be
														
 
															+queried and used.
														
 
															+
														
 
															+The extension mechanism is not based on on the Linux version number.
														
 
															+Instead, kvm defines extension identifiers and a facility to query
														
 
															+whether a particular extension identifier is available.  If it is, a
														
 
															+set of ioctls is available for application use.
														
 
															+
														
 
															+4. API description
														
 
															+
														
 
															+This section describes ioctls that can be used to control kvm guests.
														
 
															+For each ioctl, the following information is provided along with a
														
 
															+description:
														
 
															+
														
 
															+  Capability: which KVM extension provides this ioctl.  Can be 'basic',
														
 
															+      which means that is will be provided by any kernel that supports
														
 
															+      API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which
														
 
															+      means availability needs to be checked with KVM_CHECK_EXTENSION
														
 
															+      (see section 4.4).
														
 
															+
														
 
															+  Architectures: which instruction set architectures provide this ioctl.
														
 
															+      x86 includes both i386 and x86_64.
														
 
															+
														
 
															+  Type: system, vm, or vcpu.
														
 
															+
														
 
															+  Parameters: what parameters are accepted by the ioctl.
														
 
															+
														
 
															+  Returns: the return value.  General error numbers (EBADF, ENOMEM, EINVAL)
														
 
															+      are not detailed, but errors with specific meanings are.
														
 
															+
														
 
															+4.1 KVM_GET_API_VERSION
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: all
														
 
															+Type: system ioctl
														
 
															+Parameters: none
														
 
															+Returns: the constant KVM_API_VERSION (=12)
														
 
															+
														
 
															+This identifies the API version as the stable kvm API. It is not
														
 
															+expected that this number will change.  However, Linux 2.6.20 and
														
 
															+2.6.21 report earlier versions; these are not documented and not
														
 
															+supported.  Applications should refuse to run if KVM_GET_API_VERSION
														
 
															+returns a value other than 12.  If this check passes, all ioctls
														
 
															+described as 'basic' will be available.
														
 
															+
														
 
															+4.2 KVM_CREATE_VM
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: all
														
 
															+Type: system ioctl
														
 
															+Parameters: none
														
 
															+Returns: a VM fd that can be used to control the new virtual machine.
														
 
															+
														
 
															+The new VM has no virtual cpus and no memory.  An mmap() of a VM fd
														
 
															+will access the virtual machine's physical address space; offset zero
														
 
															+corresponds to guest physical address zero.  Use of mmap() on a VM fd
														
 
															+is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is
														
 
															+available.
														
 
															+
														
 
															+4.3 KVM_GET_MSR_INDEX_LIST
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: system
														
 
															+Parameters: struct kvm_msr_list (in/out)
														
 
															+Returns: 0 on success; -1 on error
														
 
															+Errors:
														
 
															+  E2BIG:     the msr index list is to be to fit in the array specified by
														
 
															+             the user.
														
 
															+
														
 
															+struct kvm_msr_list {
														
 
															+	__u32 nmsrs; /* number of msrs in entries */
														
 
															+	__u32 indices[0];
														
 
															+};
														
 
															+
														
 
															+This ioctl returns the guest msrs that are supported.  The list varies
														
 
															+by kvm version and host processor, but does not change otherwise.  The
														
 
															+user fills in the size of the indices array in nmsrs, and in return
														
 
															+kvm adjusts nmsrs to reflect the actual number of msrs and fills in
														
 
															+the indices array with their numbers.
														
 
															+
														
 
															+4.4 KVM_CHECK_EXTENSION
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: all
														
 
															+Type: system ioctl
														
 
															+Parameters: extension identifier (KVM_CAP_*)
														
 
															+Returns: 0 if unsupported; 1 (or some other positive integer) if supported
														
 
															+
														
 
															+The API allows the application to query about extensions to the core
														
 
															+kvm API.  Userspace passes an extension identifier (an integer) and
														
 
															+receives an integer that describes the extension availability.
														
 
															+Generally 0 means no and 1 means yes, but some extensions may report
														
 
															+additional information in the integer return value.
														
 
															+
														
 
															+4.5 KVM_GET_VCPU_MMAP_SIZE
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: all
														
 
															+Type: system ioctl
														
 
															+Parameters: none
														
 
															+Returns: size of vcpu mmap area, in bytes
														
 
															+
														
 
															+The KVM_RUN ioctl (cf.) communicates with userspace via a shared
														
 
															+memory region.  This ioctl returns the size of that region.  See the
														
 
															+KVM_RUN documentation for details.
														
 
															+
														
 
															+4.6 KVM_SET_MEMORY_REGION
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: all
														
 
															+Type: vm ioctl
														
 
															+Parameters: struct kvm_memory_region (in)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+struct kvm_memory_region {
														
 
															+	__u32 slot;
														
 
															+	__u32 flags;
														
 
															+	__u64 guest_phys_addr;
														
 
															+	__u64 memory_size; /* bytes */
														
 
															+};
														
 
															+
														
 
															+/* for kvm_memory_region::flags */
														
 
															+#define KVM_MEM_LOG_DIRTY_PAGES  1UL
														
 
															+
														
 
															+This ioctl allows the user to create or modify a guest physical memory
														
 
															+slot.  When changing an existing slot, it may be moved in the guest
														
 
															+physical memory space, or its flags may be modified.  It may not be
														
 
															+resized.  Slots may not overlap.
														
 
															+
														
 
															+The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which
														
 
															+instructs kvm to keep track of writes to memory within the slot.  See
														
 
															+the KVM_GET_DIRTY_LOG ioctl.
														
 
															+
														
 
															+It is recommended to use the KVM_SET_USER_MEMORY_REGION ioctl instead
														
 
															+of this API, if available.  This newer API allows placing guest memory
														
 
															+at specified locations in the host address space, yielding better
														
 
															+control and easy access.
														
 
															+
														
 
															+4.6 KVM_CREATE_VCPU
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: all
														
 
															+Type: vm ioctl
														
 
															+Parameters: vcpu id (apic id on x86)
														
 
															+Returns: vcpu fd on success, -1 on error
														
 
															+
														
 
															+This API adds a vcpu to a virtual machine.  The vcpu id is a small integer
														
 
															+in the range [0, max_vcpus).
														
 
															+
														
 
															+4.7 KVM_GET_DIRTY_LOG (vm ioctl)
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: vm ioctl
														
 
															+Parameters: struct kvm_dirty_log (in/out)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+/* for KVM_GET_DIRTY_LOG */
														
 
															+struct kvm_dirty_log {
														
 
															+	__u32 slot;
														
 
															+	__u32 padding;
														
 
															+	union {
														
 
															+		void __user *dirty_bitmap; /* one bit per page */
														
 
															+		__u64 padding;
														
 
															+	};
														
 
															+};
														
 
															+
														
 
															+Given a memory slot, return a bitmap containing any pages dirtied
														
 
															+since the last call to this ioctl.  Bit 0 is the first page in the
														
 
															+memory slot.  Ensure the entire structure is cleared to avoid padding
														
 
															+issues.
														
 
															+
														
 
															+4.8 KVM_SET_MEMORY_ALIAS
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: vm ioctl
														
 
															+Parameters: struct kvm_memory_alias (in)
														
 
															+Returns: 0 (success), -1 (error)
														
 
															+
														
 
															+struct kvm_memory_alias {
														
 
															+	__u32 slot;  /* this has a different namespace than memory slots */
														
 
															+	__u32 flags;
														
 
															+	__u64 guest_phys_addr;
														
 
															+	__u64 memory_size;
														
 
															+	__u64 target_phys_addr;
														
 
															+};
														
 
															+
														
 
															+Defines a guest physical address space region as an alias to another
														
 
															+region.  Useful for aliased address, for example the VGA low memory
														
 
															+window. Should not be used with userspace memory.
														
 
															+
														
 
															+4.9 KVM_RUN
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: all
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: none
														
 
															+Returns: 0 on success, -1 on error
														
 
															+Errors:
														
 
															+  EINTR:     an unmasked signal is pending
														
 
															+
														
 
															+This ioctl is used to run a guest virtual cpu.  While there are no
														
 
															+explicit parameters, there is an implicit parameter block that can be
														
 
															+obtained by mmap()ing the vcpu fd at offset 0, with the size given by
														
 
															+KVM_GET_VCPU_MMAP_SIZE.  The parameter block is formatted as a 'struct
														
 
															+kvm_run' (see below).
														
 
															+
														
 
															+4.10 KVM_GET_REGS
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: all
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: struct kvm_regs (out)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Reads the general purpose registers from the vcpu.
														
 
															+
														
 
															+/* x86 */
														
 
															+struct kvm_regs {
														
 
															+	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
														
 
															+	__u64 rax, rbx, rcx, rdx;
														
 
															+	__u64 rsi, rdi, rsp, rbp;
														
 
															+	__u64 r8,  r9,  r10, r11;
														
 
															+	__u64 r12, r13, r14, r15;
														
 
															+	__u64 rip, rflags;
														
 
															+};
														
 
															+
														
 
															+4.11 KVM_SET_REGS
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: all
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: struct kvm_regs (in)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Writes the general purpose registers into the vcpu.
														
 
															+
														
 
															+See KVM_GET_REGS for the data structure.
														
 
															+
														
 
															+4.12 KVM_GET_SREGS
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: struct kvm_sregs (out)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Reads special registers from the vcpu.
														
 
															+
														
 
															+/* x86 */
														
 
															+struct kvm_sregs {
														
 
															+	struct kvm_segment cs, ds, es, fs, gs, ss;
														
 
															+	struct kvm_segment tr, ldt;
														
 
															+	struct kvm_dtable gdt, idt;
														
 
															+	__u64 cr0, cr2, cr3, cr4, cr8;
														
 
															+	__u64 efer;
														
 
															+	__u64 apic_base;
														
 
															+	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
														
 
															+};
														
 
															+
														
 
															+interrupt_bitmap is a bitmap of pending external interrupts.  At most
														
 
															+one bit may be set.  This interrupt has been acknowledged by the APIC
														
 
															+but not yet injected into the cpu core.
														
 
															+
														
 
															+4.13 KVM_SET_SREGS
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: struct kvm_sregs (in)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Writes special registers into the vcpu.  See KVM_GET_SREGS for the
														
 
															+data structures.
														
 
															+
														
 
															+4.14 KVM_TRANSLATE
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: struct kvm_translation (in/out)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Translates a virtual address according to the vcpu's current address
														
 
															+translation mode.
														
 
															+
														
 
															+struct kvm_translation {
														
 
															+	/* in */
														
 
															+	__u64 linear_address;
														
 
															+
														
 
															+	/* out */
														
 
															+	__u64 physical_address;
														
 
															+	__u8  valid;
														
 
															+	__u8  writeable;
														
 
															+	__u8  usermode;
														
 
															+	__u8  pad[5];
														
 
															+};
														
 
															+
														
 
															+4.15 KVM_INTERRUPT
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: struct kvm_interrupt (in)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Queues a hardware interrupt vector to be injected.  This is only
														
 
															+useful if in-kernel local APIC is not used.
														
 
															+
														
 
															+/* for KVM_INTERRUPT */
														
 
															+struct kvm_interrupt {
														
 
															+	/* in */
														
 
															+	__u32 irq;
														
 
															+};
														
 
															+
														
 
															+Note 'irq' is an interrupt vector, not an interrupt pin or line.
														
 
															+
														
 
															+4.16 KVM_DEBUG_GUEST
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: none
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: none)
														
 
															+Returns: -1 on error
														
 
															+
														
 
															+Support for this has been removed.  Use KVM_SET_GUEST_DEBUG instead.
														
 
															+
														
 
															+4.17 KVM_GET_MSRS
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: struct kvm_msrs (in/out)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Reads model-specific registers from the vcpu.  Supported msr indices can
														
 
															+be obtained using KVM_GET_MSR_INDEX_LIST.
														
 
															+
														
 
															+struct kvm_msrs {
														
 
															+	__u32 nmsrs; /* number of msrs in entries */
														
 
															+	__u32 pad;
														
 
															+
														
 
															+	struct kvm_msr_entry entries[0];
														
 
															+};
														
 
															+
														
 
															+struct kvm_msr_entry {
														
 
															+	__u32 index;
														
 
															+	__u32 reserved;
														
 
															+	__u64 data;
														
 
															+};
														
 
															+
														
 
															+Application code should set the 'nmsrs' member (which indicates the
														
 
															+size of the entries array) and the 'index' member of each array entry.
														
 
															+kvm will fill in the 'data' member.
														
 
															+
														
 
															+4.18 KVM_SET_MSRS
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: struct kvm_msrs (in)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Writes model-specific registers to the vcpu.  See KVM_GET_MSRS for the
														
 
															+data structures.
														
 
															+
														
 
															+Application code should set the 'nmsrs' member (which indicates the
														
 
															+size of the entries array), and the 'index' and 'data' members of each
														
 
															+array entry.
														
 
															+
														
 
															+4.19 KVM_SET_CPUID
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: struct kvm_cpuid (in)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Defines the vcpu responses to the cpuid instruction.  Applications
														
 
															+should use the KVM_SET_CPUID2 ioctl if available.
														
 
															+
														
 
															+
														
 
															+struct kvm_cpuid_entry {
														
 
															+	__u32 function;
														
 
															+	__u32 eax;
														
 
															+	__u32 ebx;
														
 
															+	__u32 ecx;
														
 
															+	__u32 edx;
														
 
															+	__u32 padding;
														
 
															+};
														
 
															+
														
 
															+/* for KVM_SET_CPUID */
														
 
															+struct kvm_cpuid {
														
 
															+	__u32 nent;
														
 
															+	__u32 padding;
														
 
															+	struct kvm_cpuid_entry entries[0];
														
 
															+};
														
 
															+
														
 
															+4.20 KVM_SET_SIGNAL_MASK
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: struct kvm_signal_mask (in)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Defines which signals are blocked during execution of KVM_RUN.  This
														
 
															+signal mask temporarily overrides the threads signal mask.  Any
														
 
															+unblocked signal received (except SIGKILL and SIGSTOP, which retain
														
 
															+their traditional behaviour) will cause KVM_RUN to return with -EINTR.
														
 
															+
														
 
															+Note the signal will only be delivered if not blocked by the original
														
 
															+signal mask.
														
 
															+
														
 
															+/* for KVM_SET_SIGNAL_MASK */
														
 
															+struct kvm_signal_mask {
														
 
															+	__u32 len;
														
 
															+	__u8  sigset[0];
														
 
															+};
														
 
															+
														
 
															+4.21 KVM_GET_FPU
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: struct kvm_fpu (out)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Reads the floating point state from the vcpu.
														
 
															+
														
 
															+/* for KVM_GET_FPU and KVM_SET_FPU */
														
 
															+struct kvm_fpu {
														
 
															+	__u8  fpr[8][16];
														
 
															+	__u16 fcw;
														
 
															+	__u16 fsw;
														
 
															+	__u8  ftwx;  /* in fxsave format */
														
 
															+	__u8  pad1;
														
 
															+	__u16 last_opcode;
														
 
															+	__u64 last_ip;
														
 
															+	__u64 last_dp;
														
 
															+	__u8  xmm[16][16];
														
 
															+	__u32 mxcsr;
														
 
															+	__u32 pad2;
														
 
															+};
														
 
															+
														
 
															+4.22 KVM_SET_FPU
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: vcpu ioctl
														
 
															+Parameters: struct kvm_fpu (in)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Writes the floating point state to the vcpu.
														
 
															+
														
 
															+/* for KVM_GET_FPU and KVM_SET_FPU */
														
 
															+struct kvm_fpu {
														
 
															+	__u8  fpr[8][16];
														
 
															+	__u16 fcw;
														
 
															+	__u16 fsw;
														
 
															+	__u8  ftwx;  /* in fxsave format */
														
 
															+	__u8  pad1;
														
 
															+	__u16 last_opcode;
														
 
															+	__u64 last_ip;
														
 
															+	__u64 last_dp;
														
 
															+	__u8  xmm[16][16];
														
 
															+	__u32 mxcsr;
														
 
															+	__u32 pad2;
														
 
															+};
														
 
															+
														
 
															+4.23 KVM_CREATE_IRQCHIP
														
 
															+
														
 
															+Capability: KVM_CAP_IRQCHIP
														
 
															+Architectures: x86, ia64
														
 
															+Type: vm ioctl
														
 
															+Parameters: none
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Creates an interrupt controller model in the kernel.  On x86, creates a virtual
														
 
															+ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
														
 
															+local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
														
 
															+only go to the IOAPIC.  On ia64, a IOSAPIC is created.
														
 
															+
														
 
															+4.24 KVM_IRQ_LINE
														
 
															+
														
 
															+Capability: KVM_CAP_IRQCHIP
														
 
															+Architectures: x86, ia64
														
 
															+Type: vm ioctl
														
 
															+Parameters: struct kvm_irq_level
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Sets the level of a GSI input to the interrupt controller model in the kernel.
														
 
															+Requires that an interrupt controller model has been previously created with
														
 
															+KVM_CREATE_IRQCHIP.  Note that edge-triggered interrupts require the level
														
 
															+to be set to 1 and then back to 0.
														
 
															+
														
 
															+struct kvm_irq_level {
														
 
															+	union {
														
 
															+		__u32 irq;     /* GSI */
														
 
															+		__s32 status;  /* not used for KVM_IRQ_LEVEL */
														
 
															+	};
														
 
															+	__u32 level;           /* 0 or 1 */
														
 
															+};
														
 
															+
														
 
															+4.25 KVM_GET_IRQCHIP
														
 
															+
														
 
															+Capability: KVM_CAP_IRQCHIP
														
 
															+Architectures: x86, ia64
														
 
															+Type: vm ioctl
														
 
															+Parameters: struct kvm_irqchip (in/out)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Reads the state of a kernel interrupt controller created with
														
 
															+KVM_CREATE_IRQCHIP into a buffer provided by the caller.
														
 
															+
														
 
															+struct kvm_irqchip {
														
 
															+	__u32 chip_id;  /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
														
 
															+	__u32 pad;
														
 
															+        union {
														
 
															+		char dummy[512];  /* reserving space */
														
 
															+		struct kvm_pic_state pic;
														
 
															+		struct kvm_ioapic_state ioapic;
														
 
															+	} chip;
														
 
															+};
														
 
															+
														
 
															+4.26 KVM_SET_IRQCHIP
														
 
															+
														
 
															+Capability: KVM_CAP_IRQCHIP
														
 
															+Architectures: x86, ia64
														
 
															+Type: vm ioctl
														
 
															+Parameters: struct kvm_irqchip (in)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Sets the state of a kernel interrupt controller created with
														
 
															+KVM_CREATE_IRQCHIP from a buffer provided by the caller.
														
 
															+
														
 
															+struct kvm_irqchip {
														
 
															+	__u32 chip_id;  /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
														
 
															+	__u32 pad;
														
 
															+        union {
														
 
															+		char dummy[512];  /* reserving space */
														
 
															+		struct kvm_pic_state pic;
														
 
															+		struct kvm_ioapic_state ioapic;
														
 
															+	} chip;
														
 
															+};
														
 
															+
														
 
															+5. The kvm_run structure
														
 
															+
														
 
															+Application code obtains a pointer to the kvm_run structure by
														
 
															+mmap()ing a vcpu fd.  From that point, application code can control
														
 
															+execution by changing fields in kvm_run prior to calling the KVM_RUN
														
 
															+ioctl, and obtain information about the reason KVM_RUN returned by
														
 
															+looking up structure members.
														
 
															+
														
 
															+struct kvm_run {
														
 
															+	/* in */
														
 
															+	__u8 request_interrupt_window;
														
 
															+
														
 
															+Request that KVM_RUN return when it becomes possible to inject external
														
 
															+interrupts into the guest.  Useful in conjunction with KVM_INTERRUPT.
														
 
															+
														
 
															+	__u8 padding1[7];
														
 
															+
														
 
															+	/* out */
														
 
															+	__u32 exit_reason;
														
 
															+
														
 
															+When KVM_RUN has returned successfully (return value 0), this informs
														
 
															+application code why KVM_RUN has returned.  Allowable values for this
														
 
															+field are detailed below.
														
 
															+
														
 
															+	__u8 ready_for_interrupt_injection;
														
 
															+
														
 
															+If request_interrupt_window has been specified, this field indicates
														
 
															+an interrupt can be injected now with KVM_INTERRUPT.
														
 
															+
														
 
															+	__u8 if_flag;
														
 
															+
														
 
															+The value of the current interrupt flag.  Only valid if in-kernel
														
 
															+local APIC is not used.
														
 
															+
														
 
															+	__u8 padding2[2];
														
 
															+
														
 
															+	/* in (pre_kvm_run), out (post_kvm_run) */
														
 
															+	__u64 cr8;
														
 
															+
														
 
															+The value of the cr8 register.  Only valid if in-kernel local APIC is
														
 
															+not used.  Both input and output.
														
 
															+
														
 
															+	__u64 apic_base;
														
 
															+
														
 
															+The value of the APIC BASE msr.  Only valid if in-kernel local
														
 
															+APIC is not used.  Both input and output.
														
 
															+
														
 
															+	union {
														
 
															+		/* KVM_EXIT_UNKNOWN */
														
 
															+		struct {
														
 
															+			__u64 hardware_exit_reason;
														
 
															+		} hw;
														
 
															+
														
 
															+If exit_reason is KVM_EXIT_UNKNOWN, the vcpu has exited due to unknown
														
 
															+reasons.  Further architecture-specific information is available in
														
 
															+hardware_exit_reason.
														
 
															+
														
 
															+		/* KVM_EXIT_FAIL_ENTRY */
														
 
															+		struct {
														
 
															+			__u64 hardware_entry_failure_reason;
														
 
															+		} fail_entry;
														
 
															+
														
 
															+If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due
														
 
															+to unknown reasons.  Further architecture-specific information is
														
 
															+available in hardware_entry_failure_reason.
														
 
															+
														
 
															+		/* KVM_EXIT_EXCEPTION */
														
 
															+		struct {
														
 
															+			__u32 exception;
														
 
															+			__u32 error_code;
														
 
															+		} ex;
														
 
															+
														
 
															+Unused.
														
 
															+
														
 
															+		/* KVM_EXIT_IO */
														
 
															+		struct {
														
 
															+#define KVM_EXIT_IO_IN  0
														
 
															+#define KVM_EXIT_IO_OUT 1
														
 
															+			__u8 direction;
														
 
															+			__u8 size; /* bytes */
														
 
															+			__u16 port;
														
 
															+			__u32 count;
														
 
															+			__u64 data_offset; /* relative to kvm_run start */
														
 
															+		} io;
														
 
															+
														
 
															+If exit_reason is KVM_EXIT_IO_IN or KVM_EXIT_IO_OUT, then the vcpu has
														
 
															+executed a port I/O instruction which could not be satisfied by kvm.
														
 
															+data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
														
 
															+where kvm expects application code to place the data for the next
														
 
															+KVM_RUN invocation (KVM_EXIT_IO_IN).  Data format is a patcked array.
														
 
															+
														
 
															+		struct {
														
 
															+			struct kvm_debug_exit_arch arch;
														
 
															+		} debug;
														
 
															+
														
 
															+Unused.
														
 
															+
														
 
															+		/* KVM_EXIT_MMIO */
														
 
															+		struct {
														
 
															+			__u64 phys_addr;
														
 
															+			__u8  data[8];
														
 
															+			__u32 len;
														
 
															+			__u8  is_write;
														
 
															+		} mmio;
														
 
															+
														
 
															+If exit_reason is KVM_EXIT_MMIO or KVM_EXIT_IO_OUT, then the vcpu has
														
 
															+executed a memory-mapped I/O instruction which could not be satisfied
														
 
															+by kvm.  The 'data' member contains the written data if 'is_write' is
														
 
															+true, and should be filled by application code otherwise.
														
 
															+
														
 
															+		/* KVM_EXIT_HYPERCALL */
														
 
															+		struct {
														
 
															+			__u64 nr;
														
 
															+			__u64 args[6];
														
 
															+			__u64 ret;
														
 
															+			__u32 longmode;
														
 
															+			__u32 pad;
														
 
															+		} hypercall;
														
 
															+
														
 
															+Unused.
														
 
															+
														
 
															+		/* KVM_EXIT_TPR_ACCESS */
														
 
															+		struct {
														
 
															+			__u64 rip;
														
 
															+			__u32 is_write;
														
 
															+			__u32 pad;
														
 
															+		} tpr_access;
														
 
															+
														
 
															+To be documented (KVM_TPR_ACCESS_REPORTING).
														
 
															+
														
 
															+		/* KVM_EXIT_S390_SIEIC */
														
 
															+		struct {
														
 
															+			__u8 icptcode;
														
 
															+			__u64 mask; /* psw upper half */
														
 
															+			__u64 addr; /* psw lower half */
														
 
															+			__u16 ipa;
														
 
															+			__u32 ipb;
														
 
															+		} s390_sieic;
														
 
															+
														
 
															+s390 specific.
														
 
															+
														
 
															+		/* KVM_EXIT_S390_RESET */
														
 
															+#define KVM_S390_RESET_POR       1
														
 
															+#define KVM_S390_RESET_CLEAR     2
														
 
															+#define KVM_S390_RESET_SUBSYSTEM 4
														
 
															+#define KVM_S390_RESET_CPU_INIT  8
														
 
															+#define KVM_S390_RESET_IPL       16
														
 
															+		__u64 s390_reset_flags;
														
 
															+
														
 
															+s390 specific.
														
 
															+
														
 
															+		/* KVM_EXIT_DCR */
														
 
															+		struct {
														
 
															+			__u32 dcrn;
														
 
															+			__u32 data;
														
 
															+			__u8  is_write;
														
 
															+		} dcr;
														
 
															+
														
 
															+powerpc specific.
														
 
															+
														
 
															+		/* Fix the size of the union. */
														
 
															+		char padding[256];
														
 
															+	};
														
 
															+};
														
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2926,6 +2926,7 @@ F:	include/linux/sunrpc/
 
															 KERNEL VIRTUAL MACHINE (KVM)
														
 
															 M:	Avi Kivity <avi@redhat.com>
														
 
															+M:	Marcelo Tosatti <mtosatti@redhat.com>
														
 
															 L:	kvm@vger.kernel.org
														
 
															 W:	http://kvm.qumranet.com
														
 
															 S:	Supported
														
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -235,7 +235,8 @@ struct kvm_vm_data {
 
															 #define KVM_REQ_PTC_G		32
														
 
															 #define KVM_REQ_RESUME		33
														
 
															-#define KVM_PAGES_PER_HPAGE	1
														
 
															+#define KVM_NR_PAGE_SIZES	1
														
 
															+#define KVM_PAGES_PER_HPAGE(x)	1
														
 
															 struct kvm;
														
 
															 struct kvm_vcpu;
														
@@ -465,7 +466,6 @@ struct kvm_arch {
 
															 	unsigned long	metaphysical_rr4;
														
 
															 	unsigned long	vmm_init_rr;
														
 
															-	int		online_vcpus;
														
 
															 	int		is_sn2;
														
 
															 	struct kvm_ioapic *vioapic;
														
--- a/arch/ia64/include/asm/kvm_para.h
+++ b/arch/ia64/include/asm/kvm_para.h
@@ -19,9 +19,13 @@
 
															  *
														
 
															  */
														
 
															+#ifdef __KERNEL__
														
 
															+
														
 
															 static inline unsigned int kvm_arch_para_features(void)
														
 
															 {
														
 
															 	return 0;
														
 
															 }
														
 
															 #endif
														
 
															+
														
 
															+#endif
														
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -1,12 +1,8 @@
 
															 #
														
 
															 # KVM configuration
														
 
															 #
														
 
															-config HAVE_KVM
														
 
															-	bool
														
 
															-config HAVE_KVM_IRQCHIP
														
 
															-       bool
														
 
															-       default y
														
 
															+source "virt/kvm/Kconfig"
														
 
															 menuconfig VIRTUALIZATION
														
 
															 	bool "Virtualization"
														
@@ -28,6 +24,8 @@ config KVM
 
															 	depends on PCI
														
 
															 	select PREEMPT_NOTIFIERS
														
 
															 	select ANON_INODES
														
 
															+	select HAVE_KVM_IRQCHIP
														
 
															+	select KVM_APIC_ARCHITECTURE
														
 
															 	---help---
														
 
															 	  Support hosting fully virtualized guest machines using hardware
														
 
															 	  virtualization extensions.  You will need a fairly recent
														
@@ -49,9 +47,6 @@ config KVM_INTEL
 
															 	  Provides support for KVM on Itanium 2 processors equipped with the VT
														
 
															 	  extensions.
														
 
															-config KVM_TRACE
														
 
															-       bool
														
 
															-
														
 
															 source drivers/virtio/Kconfig
														
 
															 endif # VIRTUALIZATION
														
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -210,16 +210,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 }
														
 
															-static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
														
 
															-					gpa_t addr, int len, int is_write)
														
 
															-{
														
 
															-	struct kvm_io_device *dev;
														
 
															-
														
 
															-	dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write);
														
 
															-
														
 
															-	return dev;
														
 
															-}
														
 
															-
														
 
															 static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
														
 
															 {
														
 
															 	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
														
@@ -231,6 +221,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 {
														
 
															 	struct kvm_mmio_req *p;
														
 
															 	struct kvm_io_device *mmio_dev;
														
 
															+	int r;
														
 
															 	p = kvm_get_vcpu_ioreq(vcpu);
														
@@ -247,16 +238,13 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	kvm_run->exit_reason = KVM_EXIT_MMIO;
														
 
															 	return 0;
														
 
															 mmio:
														
 
															-	mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir);
														
 
															-	if (mmio_dev) {
														
 
															-		if (!p->dir)
														
 
															-			kvm_iodevice_write(mmio_dev, p->addr, p->size,
														
 
															-						&p->data);
														
 
															-		else
														
 
															-			kvm_iodevice_read(mmio_dev, p->addr, p->size,
														
 
															-						&p->data);
														
 
															-
														
 
															-	} else
														
 
															+	if (p->dir)
														
 
															+		r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr,
														
 
															+				    p->size, &p->data);
														
 
															+	else
														
 
															+		r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr,
														
 
															+				     p->size, &p->data);
														
 
															+	if (r)
														
 
															 		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
														
 
															 	p->state = STATE_IORESP_READY;
														
@@ -337,13 +325,12 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
 
															 {
														
 
															 	union ia64_lid lid;
														
 
															 	int i;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															-	for (i = 0; i < kvm->arch.online_vcpus; i++) {
														
 
															-		if (kvm->vcpus[i]) {
														
 
															-			lid.val = VCPU_LID(kvm->vcpus[i]);
														
 
															-			if (lid.id == id && lid.eid == eid)
														
 
															-				return kvm->vcpus[i];
														
 
															-		}
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		lid.val = VCPU_LID(vcpu);
														
 
															+		if (lid.id == id && lid.eid == eid)
														
 
															+			return vcpu;
														
 
															 	}
														
 
															 	return NULL;
														
@@ -409,21 +396,21 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	struct kvm *kvm = vcpu->kvm;
														
 
															 	struct call_data call_data;
														
 
															 	int i;
														
 
															+	struct kvm_vcpu *vcpui;
														
 
															 	call_data.ptc_g_data = p->u.ptc_g_data;
														
 
															-	for (i = 0; i < kvm->arch.online_vcpus; i++) {
														
 
															-		if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
														
 
															-						KVM_MP_STATE_UNINITIALIZED ||
														
 
															-					vcpu == kvm->vcpus[i])
														
 
															+	kvm_for_each_vcpu(i, vcpui, kvm) {
														
 
															+		if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
														
 
															+				vcpu == vcpui)
														
 
															 			continue;
														
 
															-		if (waitqueue_active(&kvm->vcpus[i]->wq))
														
 
															-			wake_up_interruptible(&kvm->vcpus[i]->wq);
														
 
															+		if (waitqueue_active(&vcpui->wq))
														
 
															+			wake_up_interruptible(&vcpui->wq);
														
 
															-		if (kvm->vcpus[i]->cpu != -1) {
														
 
															-			call_data.vcpu = kvm->vcpus[i];
														
 
															-			smp_call_function_single(kvm->vcpus[i]->cpu,
														
 
															+		if (vcpui->cpu != -1) {
														
 
															+			call_data.vcpu = vcpui;
														
 
															+			smp_call_function_single(vcpui->cpu,
														
 
															 					vcpu_global_purge, &call_data, 1);
														
 
															 		} else
														
 
															 			printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
														
@@ -852,8 +839,6 @@ struct  kvm *kvm_arch_create_vm(void)
 
															 	kvm_init_vm(kvm);
														
 
															-	kvm->arch.online_vcpus = 0;
														
 
															-
														
 
															 	return kvm;
														
 
															 }
														
@@ -1000,10 +985,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 			goto out;
														
 
															 		if (irqchip_in_kernel(kvm)) {
														
 
															 			__s32 status;
														
 
															-			mutex_lock(&kvm->lock);
														
 
															+			mutex_lock(&kvm->irq_lock);
														
 
															 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
														
 
															 				    irq_event.irq, irq_event.level);
														
 
															-			mutex_unlock(&kvm->lock);
														
 
															+			mutex_unlock(&kvm->irq_lock);
														
 
															 			if (ioctl == KVM_IRQ_LINE_STATUS) {
														
 
															 				irq_event.status = status;
														
 
															 				if (copy_to_user(argp, &irq_event,
														
@@ -1216,7 +1201,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
															 	if (IS_ERR(vmm_vcpu))
														
 
															 		return PTR_ERR(vmm_vcpu);
														
 
															-	if (vcpu->vcpu_id == 0) {
														
 
															+	if (kvm_vcpu_is_bsp(vcpu)) {
														
 
															 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
														
 
															 		/*Set entry address for first run.*/
														
@@ -1224,7 +1209,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
															 		/*Initialize itc offset for vcpus*/
														
 
															 		itc_offset = 0UL - kvm_get_itc(vcpu);
														
 
															-		for (i = 0; i < kvm->arch.online_vcpus; i++) {
														
 
															+		for (i = 0; i < KVM_MAX_VCPUS; i++) {
														
 
															 			v = (struct kvm_vcpu *)((char *)vcpu +
														
 
															 					sizeof(struct kvm_vcpu_data) * i);
														
 
															 			v->arch.itc_offset = itc_offset;
														
@@ -1356,8 +1341,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
															 		goto fail;
														
 
															 	}
														
 
															-	kvm->arch.online_vcpus++;
														
 
															-
														
 
															 	return vcpu;
														
 
															 fail:
														
 
															 	return ERR_PTR(r);
														
@@ -1952,19 +1935,6 @@ int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
 
															     return find_highest_bits((int *)&vpd->irr[0]);
														
 
															 }
														
 
															-int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
														
 
															-{
														
 
															-	if (kvm_highest_pending_irq(vcpu) != -1)
														
 
															-		return 1;
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
														
 
															-{
														
 
															-	/* do real check here */
														
 
															-	return 1;
														
 
															-}
														
 
															-
														
 
															 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	return vcpu->arch.timer_fired;
														
@@ -1977,7 +1947,8 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 
															 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE;
														
 
															+	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
														
 
															+		(kvm_highest_pending_irq(vcpu) != -1);
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
														
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -830,8 +830,8 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
 
															 	kvm = (struct kvm *)KVM_VM_BASE;
														
 
															-	if (vcpu->vcpu_id == 0) {
														
 
															-		for (i = 0; i < kvm->arch.online_vcpus; i++) {
														
 
															+	if (kvm_vcpu_is_bsp(vcpu)) {
														
 
															+		for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
														
 
															 			v = (struct kvm_vcpu *)((char *)vcpu +
														
 
															 					sizeof(struct kvm_vcpu_data) * i);
														
 
															 			VMX(v, itc_offset) = itc_offset;
														
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -34,7 +34,8 @@
 
															 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
														
 
															 /* We don't currently support large pages. */
														
 
															-#define KVM_PAGES_PER_HPAGE (1UL << 31)
														
 
															+#define KVM_NR_PAGE_SIZES	1
														
 
															+#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
														
 
															 struct kvm;
														
 
															 struct kvm_run;
														
@@ -153,7 +154,6 @@ struct kvm_vcpu_arch {
 
															 	u32 pid;
														
 
															 	u32 swap_pid;
														
 
															-	u32 pvr;
														
 
															 	u32 ccr0;
														
 
															 	u32 ccr1;
														
 
															 	u32 dbcr0;
														
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -138,7 +138,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 
															 	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
														
 
															 }
														
 
															-static int kvmppc_44x_init(void)
														
 
															+static int __init kvmppc_44x_init(void)
														
 
															 {
														
 
															 	int r;
														
@@ -149,7 +149,7 @@ static int kvmppc_44x_init(void)
 
															 	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
														
 
															 }
														
 
															-static void kvmppc_44x_exit(void)
														
 
															+static void __exit kvmppc_44x_exit(void)
														
 
															 {
														
 
															 	kvmppc_booke_exit();
														
 
															 }
														
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -30,6 +30,7 @@
 
															 #include "timing.h"
														
 
															 #include "44x_tlb.h"
														
 
															+#include "trace.h"
														
 
															 #ifndef PPC44x_TLBE_SIZE
														
 
															 #define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
														
@@ -263,7 +264,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
 
															 	/* XXX set tlb_44x_index to stlb_index? */
														
 
															-	KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
														
 
															+	trace_kvm_stlb_inval(stlb_index);
														
 
															 }
														
 
															 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
														
@@ -365,8 +366,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
 
															 	/* Insert shadow mapping into hardware TLB. */
														
 
															 	kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
														
 
															 	kvmppc_44x_tlbwe(victim, &stlbe);
														
 
															-	KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
														
 
															-	            stlbe.word2, handler);
														
 
															+	trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1,
														
 
															+			     stlbe.word2);
														
 
															 }
														
 
															 /* For a particular guest TLB entry, invalidate the corresponding host TLB
														
@@ -485,8 +486,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 
															 		kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
														
 
															 	}
														
 
															-	KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
														
 
															-	            tlbe->word1, tlbe->word2, handler);
														
 
															+	trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1,
														
 
															+			     tlbe->word2);
														
 
															 	kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
														
 
															 	return EMULATE_DONE;
														
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -2,8 +2,7 @@
 
															 # KVM configuration
														
 
															 #
														
 
															-config HAVE_KVM_IRQCHIP
														
 
															-       bool
														
 
															+source "virt/kvm/Kconfig"
														
 
															 menuconfig VIRTUALIZATION
														
 
															 	bool "Virtualization"
														
@@ -59,17 +58,6 @@ config KVM_E500
 
															 	  If unsure, say N.
														
 
															-config KVM_TRACE
														
 
															-	bool "KVM trace support"
														
 
															-	depends on KVM && MARKERS && SYSFS
														
 
															-	select RELAY
														
 
															-	select DEBUG_FS
														
 
															-	default n
														
 
															-	---help---
														
 
															-	  This option allows reading a trace of kvm-related events through
														
 
															-	  relayfs.  Note the ABI is not considered stable and will be
														
 
															-	  modified in future updates.
														
 
															-
														
 
															 source drivers/virtio/Kconfig
														
 
															 endif # VIRTUALIZATION
														
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,7 +8,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
 
															 common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
														
 
															-common-objs-$(CONFIG_KVM_TRACE)  += $(addprefix ../../../virt/kvm/, kvm_trace.o)
														
 
															+CFLAGS_44x_tlb.o  := -I.
														
 
															+CFLAGS_e500_tlb.o := -I.
														
 
															+CFLAGS_emulate.o  := -I.
														
 
															 kvm-objs := $(common-objs-y) powerpc.o emulate.o
														
 
															 obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
														
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -520,7 +520,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
															 	return kvmppc_core_vcpu_translate(vcpu, tr);
														
 
															 }
														
 
															-int kvmppc_booke_init(void)
														
 
															+int __init kvmppc_booke_init(void)
														
 
															 {
														
 
															 	unsigned long ivor[16];
														
 
															 	unsigned long max_ivor = 0;
														
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -60,9 +60,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 
															 	kvmppc_e500_tlb_setup(vcpu_e500);
														
 
															-	/* Use the same core vertion as host's */
														
 
															-	vcpu->arch.pvr = mfspr(SPRN_PVR);
														
 
															-
														
 
															 	return 0;
														
 
															 }
														
@@ -132,7 +129,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 
															 	kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
														
 
															 }
														
 
															-static int kvmppc_e500_init(void)
														
 
															+static int __init kvmppc_e500_init(void)
														
 
															 {
														
 
															 	int r, i;
														
 
															 	unsigned long ivor[3];
														
@@ -160,7 +157,7 @@ static int kvmppc_e500_init(void)
 
															 	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE);
														
 
															 }
														
 
															-static void kvmppc_e500_exit(void)
														
 
															+static void __init kvmppc_e500_exit(void)
														
 
															 {
														
 
															 	kvmppc_booke_exit();
														
 
															 }
														
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -180,6 +180,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 
															 	case SPRN_MMUCSR0:
														
 
															 		vcpu->arch.gpr[rt] = 0; break;
														
 
															+	case SPRN_MMUCFG:
														
 
															+		vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break;
														
 
															+
														
 
															 	/* extra exceptions */
														
 
															 	case SPRN_IVOR32:
														
 
															 		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
														
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -22,6 +22,7 @@
 
															 #include "../mm/mmu_decl.h"
														
 
															 #include "e500_tlb.h"
														
 
															+#include "trace.h"
														
 
															 #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
														
@@ -224,9 +225,8 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
 
															 	kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
														
 
															 	stlbe->mas1 = 0;
														
 
															-	KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel),
														
 
															-			stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
														
 
															-			handler);
														
 
															+	trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
														
 
															+			     stlbe->mas3, stlbe->mas7);
														
 
															 }
														
 
															 static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
														
@@ -269,7 +269,7 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
 
															 	tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
														
 
															 	victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
														
 
															 	pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
														
 
															-	tsized = (vcpu_e500->mas4 >> 8) & 0xf;
														
 
															+	tsized = (vcpu_e500->mas4 >> 7) & 0x1f;
														
 
															 	vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
														
 
															 		| MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
														
@@ -309,7 +309,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
															 	vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
														
 
															 	/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
														
 
															-	stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
														
 
															+	stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
														
 
															 		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
														
 
															 	stlbe->mas2 = (gvaddr & MAS2_EPN)
														
 
															 		| e500_shadow_mas2_attrib(gtlbe->mas2,
														
@@ -319,9 +319,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
															 				vcpu_e500->vcpu.arch.msr & MSR_PR);
														
 
															 	stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
														
 
															-	KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel),
														
 
															-			stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
														
 
															-			handler);
														
 
															+	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
														
 
															+			     stlbe->mas3, stlbe->mas7);
														
 
															 }
														
 
															 /* XXX only map the one-one case, for now use TLB0 */
														
@@ -535,9 +534,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 
															 	gtlbe->mas3 = vcpu_e500->mas3;
														
 
															 	gtlbe->mas7 = vcpu_e500->mas7;
														
 
															-	KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0,
														
 
															-			gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7,
														
 
															-			handler);
														
 
															+	trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
														
 
															+			     gtlbe->mas3, gtlbe->mas7);
														
 
															 	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
														
 
															 	if (tlbe_is_host_safe(vcpu, gtlbe)) {
														
@@ -545,7 +543,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 
															 		case 0:
														
 
															 			/* TLB0 */
														
 
															 			gtlbe->mas1 &= ~MAS1_TSIZE(~0);
														
 
															-			gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
														
 
															+			gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
														
 
															 			stlbsel = 0;
														
 
															 			sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
														
@@ -679,14 +677,14 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
 
															 	/* Insert large initial mapping for guest. */
														
 
															 	tlbe = &vcpu_e500->guest_tlb[1][0];
														
 
															-	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
														
 
															+	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
														
 
															 	tlbe->mas2 = 0;
														
 
															 	tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
														
 
															 	tlbe->mas7 = 0;
														
 
															 	/* 4K map for serial output. Used by kernel wrapper. */
														
 
															 	tlbe = &vcpu_e500->guest_tlb[1][1];
														
 
															-	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
														
 
															+	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
														
 
															 	tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
														
 
															 	tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
														
 
															 	tlbe->mas7 = 0;
														
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -16,7 +16,7 @@
 
															 #define __KVM_E500_TLB_H__
														
 
															 #include <linux/kvm_host.h>
														
 
															-#include <asm/mmu-fsl-booke.h>
														
 
															+#include <asm/mmu-book3e.h>
														
 
															 #include <asm/tlb.h>
														
 
															 #include <asm/kvm_e500.h>
														
@@ -59,7 +59,7 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
 
															 /* TLB helper functions */
														
 
															 static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
														
 
															 {
														
 
															-	return (tlbe->mas1 >> 8) & 0xf;
														
 
															+	return (tlbe->mas1 >> 7) & 0x1f;
														
 
															 }
														
 
															 static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
														
@@ -70,7 +70,7 @@ static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
 
															 static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
														
 
															 {
														
 
															 	unsigned int pgsize = get_tlb_size(tlbe);
														
 
															-	return 1ULL << 10 << (pgsize << 1);
														
 
															+	return 1ULL << 10 << pgsize;
														
 
															 }
														
 
															 static inline gva_t get_tlb_end(const struct tlbe *tlbe)
														
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -29,6 +29,7 @@
 
															 #include <asm/kvm_ppc.h>
														
 
															 #include <asm/disassemble.h>
														
 
															 #include "timing.h"
														
 
															+#include "trace.h"
														
 
															 #define OP_TRAP 3
														
@@ -187,7 +188,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
															 			case SPRN_SRR1:
														
 
															 				vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
														
 
															 			case SPRN_PVR:
														
 
															-				vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
														
 
															+				vcpu->arch.gpr[rt] = mfspr(SPRN_PVR); break;
														
 
															+			case SPRN_PIR:
														
 
															+				vcpu->arch.gpr[rt] = mfspr(SPRN_PIR); break;
														
 
															 			/* Note: mftb and TBRL/TBWL are user-accessible, so
														
 
															 			 * the guest can always access the real TB anyways.
														
@@ -417,7 +420,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
															 		}
														
 
															 	}
														
 
															-	KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
														
 
															+	trace_kvm_ppc_instr(inst, vcpu->arch.pc, emulated);
														
 
															 	if (advance)
														
 
															 		vcpu->arch.pc += 4; /* Advance past emulated instruction. */
														
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -31,25 +31,17 @@
 
															 #include "timing.h"
														
 
															 #include "../mm/mmu_decl.h"
														
 
															+#define CREATE_TRACE_POINTS
														
 
															+#include "trace.h"
														
 
															+
														
 
															 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
														
 
															 {
														
 
															 	return gfn;
														
 
															 }
														
 
															-int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
														
 
															-{
														
 
															-	return !!(v->arch.pending_exceptions);
														
 
															-}
														
 
															-
														
 
															-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
														
 
															-{
														
 
															-	/* do real check here */
														
 
															-	return 1;
														
 
															-}
														
 
															-
														
 
															 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
														
 
															 {
														
 
															-	return !(v->arch.msr & MSR_WE);
														
 
															+	return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions);
														
 
															 }
														
@@ -122,13 +114,17 @@ struct kvm *kvm_arch_create_vm(void)
 
															 static void kvmppc_free_vcpus(struct kvm *kvm)
														
 
															 {
														
 
															 	unsigned int i;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
														
 
															-		if (kvm->vcpus[i]) {
														
 
															-			kvm_arch_vcpu_free(kvm->vcpus[i]);
														
 
															-			kvm->vcpus[i] = NULL;
														
 
															-		}
														
 
															-	}
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm)
														
 
															+		kvm_arch_vcpu_free(vcpu);
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
														
 
															+		kvm->vcpus[i] = NULL;
														
 
															+
														
 
															+	atomic_set(&kvm->online_vcpus, 0);
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															 }
														
 
															 void kvm_arch_sync_events(struct kvm *kvm)
														
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -0,0 +1,104 @@
 
															+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
														
 
															+#define _TRACE_KVM_H
														
 
															+
														
 
															+#include <linux/tracepoint.h>
														
 
															+
														
 
															+#undef TRACE_SYSTEM
														
 
															+#define TRACE_SYSTEM kvm
														
 
															+#define TRACE_INCLUDE_PATH .
														
 
															+#define TRACE_INCLUDE_FILE trace
														
 
															+
														
 
															+/*
														
 
															+ * Tracepoint for guest mode entry.
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_ppc_instr,
														
 
															+	TP_PROTO(unsigned int inst, unsigned long pc, unsigned int emulate),
														
 
															+	TP_ARGS(inst, pc, emulate),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	inst		)
														
 
															+		__field(	unsigned long,	pc		)
														
 
															+		__field(	unsigned int,	emulate		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->inst		= inst;
														
 
															+		__entry->pc		= pc;
														
 
															+		__entry->emulate	= emulate;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("inst %u pc 0x%lx emulate %u\n",
														
 
															+		  __entry->inst, __entry->pc, __entry->emulate)
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(kvm_stlb_inval,
														
 
															+	TP_PROTO(unsigned int stlb_index),
														
 
															+	TP_ARGS(stlb_index),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	stlb_index	)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->stlb_index	= stlb_index;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("stlb_index %u", __entry->stlb_index)
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(kvm_stlb_write,
														
 
															+	TP_PROTO(unsigned int victim, unsigned int tid, unsigned int word0,
														
 
															+		 unsigned int word1, unsigned int word2),
														
 
															+	TP_ARGS(victim, tid, word0, word1, word2),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	victim		)
														
 
															+		__field(	unsigned int,	tid		)
														
 
															+		__field(	unsigned int,	word0		)
														
 
															+		__field(	unsigned int,	word1		)
														
 
															+		__field(	unsigned int,	word2		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->victim		= victim;
														
 
															+		__entry->tid		= tid;
														
 
															+		__entry->word0		= word0;
														
 
															+		__entry->word1		= word1;
														
 
															+		__entry->word2		= word2;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("victim %u tid %u w0 %u w1 %u w2 %u",
														
 
															+		__entry->victim, __entry->tid, __entry->word0,
														
 
															+		__entry->word1, __entry->word2)
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(kvm_gtlb_write,
														
 
															+	TP_PROTO(unsigned int gtlb_index, unsigned int tid, unsigned int word0,
														
 
															+		 unsigned int word1, unsigned int word2),
														
 
															+	TP_ARGS(gtlb_index, tid, word0, word1, word2),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	gtlb_index	)
														
 
															+		__field(	unsigned int,	tid		)
														
 
															+		__field(	unsigned int,	word0		)
														
 
															+		__field(	unsigned int,	word1		)
														
 
															+		__field(	unsigned int,	word2		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->gtlb_index	= gtlb_index;
														
 
															+		__entry->tid		= tid;
														
 
															+		__entry->word0		= word0;
														
 
															+		__entry->word1		= word1;
														
 
															+		__entry->word2		= word2;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("gtlb_index %u tid %u w0 %u w1 %u w2 %u",
														
 
															+		__entry->gtlb_index, __entry->tid, __entry->word0,
														
 
															+		__entry->word1, __entry->word2)
														
 
															+);
														
 
															+
														
 
															+#endif /* _TRACE_KVM_H */
														
 
															+
														
 
															+/* This part must be outside protection */
														
 
															+#include <trace/define_trace.h>
														
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -15,15 +15,6 @@
 
															  */
														
 
															 #include <linux/types.h>
														
 
															-/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
														
 
															-struct kvm_pic_state {
														
 
															-	/* no PIC for s390 */
														
 
															-};
														
 
															-
														
 
															-struct kvm_ioapic_state {
														
 
															-	/* no IOAPIC for s390 */
														
 
															-};
														
 
															-
														
 
															 /* for KVM_GET_REGS and KVM_SET_REGS */
														
 
															 struct kvm_regs {
														
 
															 	/* general purpose regs for s390 */
														
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -1,7 +1,7 @@
 
															 /*
														
 
															  * asm-s390/kvm_host.h - definition for kernel virtual machines on s390
														
 
															  *
														
 
															- * Copyright IBM Corp. 2008
														
 
															+ * Copyright IBM Corp. 2008,2009
														
 
															  *
														
 
															  * This program is free software; you can redistribute it and/or modify
														
 
															  * it under the terms of the GNU General Public License (version 2 only)
														
@@ -40,7 +40,11 @@ struct sca_block {
 
															 	struct sca_entry cpu[64];
														
 
															 } __attribute__((packed));
														
 
															-#define KVM_PAGES_PER_HPAGE 256
														
 
															+#define KVM_NR_PAGE_SIZES 2
														
 
															+#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + ((x) - 1) * 8)
														
 
															+#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
														
 
															+#define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
														
 
															+#define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
														
 
															 #define CPUSTAT_HOST       0x80000000
														
 
															 #define CPUSTAT_WAIT       0x10000000
														
@@ -182,8 +186,9 @@ struct kvm_s390_interrupt_info {
 
															 };
														
 
															 /* for local_interrupt.action_flags */
														
 
															-#define ACTION_STORE_ON_STOP 1
														
 
															-#define ACTION_STOP_ON_STOP  2
														
 
															+#define ACTION_STORE_ON_STOP		(1<<0)
														
 
															+#define ACTION_STOP_ON_STOP		(1<<1)
														
 
															+#define ACTION_RELOADVCPU_ON_STOP	(1<<2)
														
 
															 struct kvm_s390_local_interrupt {
														
 
															 	spinlock_t lock;
														
@@ -227,8 +232,6 @@ struct kvm_vm_stat {
 
															 };
														
 
															 struct kvm_arch{
														
 
															-	unsigned long guest_origin;
														
 
															-	unsigned long guest_memsize;
														
 
															 	struct sca_block *sca;
														
 
															 	debug_info_t *dbf;
														
 
															 	struct kvm_s390_float_interrupt float_int;
														
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -13,6 +13,8 @@
 
															 #ifndef __S390_KVM_PARA_H
														
 
															 #define __S390_KVM_PARA_H
														
 
															+#ifdef __KERNEL__
														
 
															+
														
 
															 /*
														
 
															  * Hypercalls for KVM on s390. The calling convention is similar to the
														
 
															  * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1
														
@@ -147,4 +149,6 @@ static inline unsigned int kvm_arch_para_features(void)
 
															 	return 0;
														
 
															 }
														
 
															+#endif
														
 
															+
														
 
															 #endif /* __S390_KVM_PARA_H */
														
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -1,11 +1,7 @@
 
															 #
														
 
															 # KVM configuration
														
 
															 #
														
 
															-config HAVE_KVM
														
 
															-       bool
														
 
															-
														
 
															-config HAVE_KVM_IRQCHIP
														
 
															-       bool
														
 
															+source "virt/kvm/Kconfig"
														
 
															 menuconfig VIRTUALIZATION
														
 
															 	bool "Virtualization"
														
@@ -38,9 +34,6 @@ config KVM
 
															 	  If unsure, say N.
														
 
															-config KVM_TRACE
														
 
															-       bool
														
 
															-
														
 
															 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
														
 
															 # the virtualization menu.
														
 
															 source drivers/virtio/Kconfig
														
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -1,7 +1,7 @@
 
															 /*
														
 
															  * gaccess.h -  access guest memory
														
 
															  *
														
 
															- * Copyright IBM Corp. 2008
														
 
															+ * Copyright IBM Corp. 2008,2009
														
 
															  *
														
 
															  * This program is free software; you can redistribute it and/or modify
														
 
															  * it under the terms of the GNU General Public License (version 2 only)
														
@@ -16,13 +16,14 @@
 
															 #include <linux/compiler.h>
														
 
															 #include <linux/kvm_host.h>
														
 
															 #include <asm/uaccess.h>
														
 
															+#include "kvm-s390.h"
														
 
															 static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
														
 
															 					       unsigned long guestaddr)
														
 
															 {
														
 
															 	unsigned long prefix  = vcpu->arch.sie_block->prefix;
														
 
															-	unsigned long origin  = vcpu->kvm->arch.guest_origin;
														
 
															-	unsigned long memsize = vcpu->kvm->arch.guest_memsize;
														
 
															+	unsigned long origin  = vcpu->arch.sie_block->gmsor;
														
 
															+	unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
														
 
															 	if (guestaddr < 2 * PAGE_SIZE)
														
 
															 		guestaddr += prefix;
														
@@ -158,8 +159,8 @@ static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest,
 
															 				const void *from, unsigned long n)
														
 
															 {
														
 
															 	unsigned long prefix  = vcpu->arch.sie_block->prefix;
														
 
															-	unsigned long origin  = vcpu->kvm->arch.guest_origin;
														
 
															-	unsigned long memsize = vcpu->kvm->arch.guest_memsize;
														
 
															+	unsigned long origin  = vcpu->arch.sie_block->gmsor;
														
 
															+	unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
														
 
															 	if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
														
 
															 		goto slowpath;
														
@@ -209,8 +210,8 @@ static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
 
															 				  unsigned long guestsrc, unsigned long n)
														
 
															 {
														
 
															 	unsigned long prefix  = vcpu->arch.sie_block->prefix;
														
 
															-	unsigned long origin  = vcpu->kvm->arch.guest_origin;
														
 
															-	unsigned long memsize = vcpu->kvm->arch.guest_memsize;
														
 
															+	unsigned long origin  = vcpu->arch.sie_block->gmsor;
														
 
															+	unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
														
 
															 	if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
														
 
															 		goto slowpath;
														
@@ -244,8 +245,8 @@ static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu,
 
															 					 unsigned long guestdest,
														
 
															 					 const void *from, unsigned long n)
														
 
															 {
														
 
															-	unsigned long origin  = vcpu->kvm->arch.guest_origin;
														
 
															-	unsigned long memsize = vcpu->kvm->arch.guest_memsize;
														
 
															+	unsigned long origin  = vcpu->arch.sie_block->gmsor;
														
 
															+	unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
														
 
															 	if (guestdest + n > memsize)
														
 
															 		return -EFAULT;
														
@@ -262,8 +263,8 @@ static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
 
															 					   unsigned long guestsrc,
														
 
															 					   unsigned long n)
														
 
															 {
														
 
															-	unsigned long origin  = vcpu->kvm->arch.guest_origin;
														
 
															-	unsigned long memsize = vcpu->kvm->arch.guest_memsize;
														
 
															+	unsigned long origin  = vcpu->arch.sie_block->gmsor;
														
 
															+	unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
														
 
															 	if (guestsrc + n > memsize)
														
 
															 		return -EFAULT;
														
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -1,7 +1,7 @@
 
															 /*
														
 
															  * intercept.c - in-kernel handling for sie intercepts
														
 
															  *
														
 
															- * Copyright IBM Corp. 2008
														
 
															+ * Copyright IBM Corp. 2008,2009
														
 
															  *
														
 
															  * This program is free software; you can redistribute it and/or modify
														
 
															  * it under the terms of the GNU General Public License (version 2 only)
														
@@ -128,7 +128,7 @@ static int handle_noop(struct kvm_vcpu *vcpu)
 
															 static int handle_stop(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	int rc;
														
 
															+	int rc = 0;
														
 
															 	vcpu->stat.exit_stop_request++;
														
 
															 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
														
@@ -141,12 +141,18 @@ static int handle_stop(struct kvm_vcpu *vcpu)
 
															 			rc = -ENOTSUPP;
														
 
															 	}
														
 
															+	if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
														
 
															+		vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
														
 
															+		rc = SIE_INTERCEPT_RERUNVCPU;
														
 
															+		vcpu->run->exit_reason = KVM_EXIT_INTR;
														
 
															+	}
														
 
															+
														
 
															 	if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
														
 
															 		vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
														
 
															 		VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
														
 
															 		rc = -ENOTSUPP;
														
 
															-	} else
														
 
															-		rc = 0;
														
 
															+	}
														
 
															+
														
 
															 	spin_unlock_bh(&vcpu->arch.local_int.lock);
														
 
															 	return rc;
														
 
															 }
														
@@ -158,9 +164,9 @@ static int handle_validity(struct kvm_vcpu *vcpu)
 
															 	vcpu->stat.exit_validity++;
														
 
															 	if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix
														
 
															-		<= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){
														
 
															+		<= kvm_s390_vcpu_get_memsize(vcpu) - 2*PAGE_SIZE)) {
														
 
															 		rc = fault_in_pages_writeable((char __user *)
														
 
															-			 vcpu->kvm->arch.guest_origin +
														
 
															+			 vcpu->arch.sie_block->gmsor +
														
 
															 			 vcpu->arch.sie_block->prefix,
														
 
															 			 2*PAGE_SIZE);
														
 
															 		if (rc)
														
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -283,7 +283,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 
															 	return 1;
														
 
															 }
														
 
															-int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
														
 
															+static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
 
															 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
														
@@ -320,12 +320,6 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 
															 	return rc;
														
 
															 }
														
 
															-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
														
 
															-{
														
 
															-	/* do real check here */
														
 
															-	return 1;
														
 
															-}
														
 
															-
														
 
															 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	return 0;
														
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1,7 +1,7 @@
 
															 /*
														
 
															  * s390host.c --  hosting zSeries kernel virtual machines
														
 
															  *
														
 
															- * Copyright IBM Corp. 2008
														
 
															+ * Copyright IBM Corp. 2008,2009
														
 
															  *
														
 
															  * This program is free software; you can redistribute it and/or modify
														
 
															  * it under the terms of the GNU General Public License (version 2 only)
														
@@ -10,6 +10,7 @@
 
															  *    Author(s): Carsten Otte <cotte@de.ibm.com>
														
 
															  *               Christian Borntraeger <borntraeger@de.ibm.com>
														
 
															  *               Heiko Carstens <heiko.carstens@de.ibm.com>
														
 
															+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
														
 
															  */
														
 
															 #include <linux/compiler.h>
														
@@ -210,13 +211,17 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
															 static void kvm_free_vcpus(struct kvm *kvm)
														
 
															 {
														
 
															 	unsigned int i;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
														
 
															-		if (kvm->vcpus[i]) {
														
 
															-			kvm_arch_vcpu_destroy(kvm->vcpus[i]);
														
 
															-			kvm->vcpus[i] = NULL;
														
 
															-		}
														
 
															-	}
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm)
														
 
															+		kvm_arch_vcpu_destroy(vcpu);
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
														
 
															+		kvm->vcpus[i] = NULL;
														
 
															+
														
 
															+	atomic_set(&kvm->online_vcpus, 0);
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															 }
														
 
															 void kvm_arch_sync_events(struct kvm *kvm)
														
@@ -278,16 +283,10 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 
															 	vcpu->arch.sie_block->gbea = 1;
														
 
															 }
														
 
															-/* The current code can have up to 256 pages for virtio */
														
 
															-#define VIRTIODESCSPACE (256ul * 4096ul)
														
 
															-
														
 
															 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
														
 
															-	vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
														
 
															-				      vcpu->kvm->arch.guest_origin +
														
 
															-				      VIRTIODESCSPACE - 1ul;
														
 
															-	vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
														
 
															+	set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests);
														
 
															 	vcpu->arch.sie_block->ecb   = 2;
														
 
															 	vcpu->arch.sie_block->eca   = 0xC1002001U;
														
 
															 	vcpu->arch.sie_block->fac   = (int) (long) facilities;
														
@@ -319,8 +318,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
															 	BUG_ON(!kvm->arch.sca);
														
 
															 	if (!kvm->arch.sca->cpu[id].sda)
														
 
															 		kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
														
 
															-	else
														
 
															-		BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
														
 
															 	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
														
 
															 	vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
														
@@ -490,9 +487,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	vcpu_load(vcpu);
														
 
															+rerun_vcpu:
														
 
															+	if (vcpu->requests)
														
 
															+		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
														
 
															+			kvm_s390_vcpu_set_mem(vcpu);
														
 
															+
														
 
															 	/* verify, that memory has been registered */
														
 
															-	if (!vcpu->kvm->arch.guest_memsize) {
														
 
															+	if (!vcpu->arch.sie_block->gmslm) {
														
 
															 		vcpu_put(vcpu);
														
 
															+		VCPU_EVENT(vcpu, 3, "%s", "no memory registered to run vcpu");
														
 
															 		return -EINVAL;
														
 
															 	}
														
@@ -509,6 +512,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 		vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
														
 
															 		break;
														
 
															 	case KVM_EXIT_UNKNOWN:
														
 
															+	case KVM_EXIT_INTR:
														
 
															 	case KVM_EXIT_S390_RESET:
														
 
															 		break;
														
 
															 	default:
														
@@ -522,8 +526,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 		rc = kvm_handle_sie_intercept(vcpu);
														
 
															 	} while (!signal_pending(current) && !rc);
														
 
															-	if (signal_pending(current) && !rc)
														
 
															+	if (rc == SIE_INTERCEPT_RERUNVCPU)
														
 
															+		goto rerun_vcpu;
														
 
															+
														
 
															+	if (signal_pending(current) && !rc) {
														
 
															+		kvm_run->exit_reason = KVM_EXIT_INTR;
														
 
															 		rc = -EINTR;
														
 
															+	}
														
 
															 	if (rc == -ENOTSUPP) {
														
 
															 		/* intercept cannot be handled in-kernel, prepare kvm-run */
														
@@ -676,6 +685,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
															 				int user_alloc)
														
 
															 {
														
 
															 	int i;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															 	/* A few sanity checks. We can have exactly one memory slot which has
														
 
															 	   to start at guest virtual zero and which has to be located at a
														
@@ -684,7 +694,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
															 	   vmas. It is okay to mmap() and munmap() stuff in this slot after
														
 
															 	   doing this call at any time */
														
 
															-	if (mem->slot || kvm->arch.guest_memsize)
														
 
															+	if (mem->slot)
														
 
															 		return -EINVAL;
														
 
															 	if (mem->guest_phys_addr)
														
@@ -699,36 +709,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
															 	if (!user_alloc)
														
 
															 		return -EINVAL;
														
 
															-	/* lock all vcpus */
														
 
															-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
														
 
															-		if (!kvm->vcpus[i])
														
 
															+	/* request update of sie control block for all available vcpus */
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
														
 
															 			continue;
														
 
															-		if (!mutex_trylock(&kvm->vcpus[i]->mutex))
														
 
															-			goto fail_out;
														
 
															-	}
														
 
															-
														
 
															-	kvm->arch.guest_origin = mem->userspace_addr;
														
 
															-	kvm->arch.guest_memsize = mem->memory_size;
														
 
															-
														
 
															-	/* update sie control blocks, and unlock all vcpus */
														
 
															-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
														
 
															-		if (kvm->vcpus[i]) {
														
 
															-			kvm->vcpus[i]->arch.sie_block->gmsor =
														
 
															-				kvm->arch.guest_origin;
														
 
															-			kvm->vcpus[i]->arch.sie_block->gmslm =
														
 
															-				kvm->arch.guest_memsize +
														
 
															-				kvm->arch.guest_origin +
														
 
															-				VIRTIODESCSPACE - 1ul;
														
 
															-			mutex_unlock(&kvm->vcpus[i]->mutex);
														
 
															-		}
														
 
															+		kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
														
 
															 	}
														
 
															 	return 0;
														
 
															-
														
 
															-fail_out:
														
 
															-	for (; i >= 0; i--)
														
 
															-		mutex_unlock(&kvm->vcpus[i]->mutex);
														
 
															-	return -EINVAL;
														
 
															 }
														
 
															 void kvm_arch_flush_shadow(struct kvm *kvm)
														
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -1,7 +1,7 @@
 
															 /*
														
 
															  * kvm_s390.h -  definition for kvm on s390
														
 
															  *
														
 
															- * Copyright IBM Corp. 2008
														
 
															+ * Copyright IBM Corp. 2008,2009
														
 
															  *
														
 
															  * This program is free software; you can redistribute it and/or modify
														
 
															  * it under the terms of the GNU General Public License (version 2 only)
														
@@ -9,6 +9,7 @@
 
															  *
														
 
															  *    Author(s): Carsten Otte <cotte@de.ibm.com>
														
 
															  *               Christian Borntraeger <borntraeger@de.ibm.com>
														
 
															+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
														
 
															  */
														
 
															 #ifndef ARCH_S390_KVM_S390_H
														
@@ -18,8 +19,13 @@
 
															 #include <linux/kvm.h>
														
 
															 #include <linux/kvm_host.h>
														
 
															+/* The current code can have up to 256 pages for virtio */
														
 
															+#define VIRTIODESCSPACE (256ul * 4096ul)
														
 
															+
														
 
															 typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
														
 
															+/* negativ values are error codes, positive values for internal conditions */
														
 
															+#define SIE_INTERCEPT_RERUNVCPU		(1<<0)
														
 
															 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
														
 
															 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
														
@@ -50,6 +56,30 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 
															 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
														
 
															 		struct kvm_s390_interrupt *s390int);
														
 
															 int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
														
 
															+int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
														
 
															+
														
 
															+static inline int kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	return vcpu->arch.sie_block->gmslm
														
 
															+		- vcpu->arch.sie_block->gmsor
														
 
															+		- VIRTIODESCSPACE + 1ul;
														
 
															+}
														
 
															+
														
 
															+static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct kvm_memory_slot *mem;
														
 
															+
														
 
															+	down_read(&vcpu->kvm->slots_lock);
														
 
															+	mem = &vcpu->kvm->memslots[0];
														
 
															+
														
 
															+	vcpu->arch.sie_block->gmsor = mem->userspace_addr;
														
 
															+	vcpu->arch.sie_block->gmslm =
														
 
															+		mem->userspace_addr +
														
 
															+		(mem->npages << PAGE_SHIFT) +
														
 
															+		VIRTIODESCSPACE - 1ul;
														
 
															+
														
 
															+	up_read(&vcpu->kvm->slots_lock);
														
 
															+}
														
 
															 /* implemented in priv.c */
														
 
															 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
														
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,7 +1,7 @@
 
															 /*
														
 
															  * sigp.c - handlinge interprocessor communication
														
 
															  *
														
 
															- * Copyright IBM Corp. 2008
														
 
															+ * Copyright IBM Corp. 2008,2009
														
 
															  *
														
 
															  * This program is free software; you can redistribute it and/or modify
														
 
															  * it under the terms of the GNU General Public License (version 2 only)
														
@@ -9,6 +9,7 @@
 
															  *
														
 
															  *    Author(s): Carsten Otte <cotte@de.ibm.com>
														
 
															  *               Christian Borntraeger <borntraeger@de.ibm.com>
														
 
															+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
														
 
															  */
														
 
															 #include <linux/kvm.h>
														
@@ -107,46 +108,57 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
 
															 	return rc;
														
 
															 }
														
 
															-static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
														
 
															+static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
														
 
															 {
														
 
															-	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
														
 
															-	struct kvm_s390_local_interrupt *li;
														
 
															 	struct kvm_s390_interrupt_info *inti;
														
 
															-	int rc;
														
 
															-
														
 
															-	if (cpu_addr >= KVM_MAX_VCPUS)
														
 
															-		return 3; /* not operational */
														
 
															 	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
														
 
															 	if (!inti)
														
 
															 		return -ENOMEM;
														
 
															-
														
 
															 	inti->type = KVM_S390_SIGP_STOP;
														
 
															-	spin_lock(&fi->lock);
														
 
															-	li = fi->local_int[cpu_addr];
														
 
															-	if (li == NULL) {
														
 
															-		rc = 3; /* not operational */
														
 
															-		kfree(inti);
														
 
															-		goto unlock;
														
 
															-	}
														
 
															 	spin_lock_bh(&li->lock);
														
 
															 	list_add_tail(&inti->list, &li->list);
														
 
															 	atomic_set(&li->active, 1);
														
 
															 	atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
														
 
															-	if (store)
														
 
															-		li->action_bits |= ACTION_STORE_ON_STOP;
														
 
															-	li->action_bits |= ACTION_STOP_ON_STOP;
														
 
															+	li->action_bits |= action;
														
 
															 	if (waitqueue_active(&li->wq))
														
 
															 		wake_up_interruptible(&li->wq);
														
 
															 	spin_unlock_bh(&li->lock);
														
 
															-	rc = 0; /* order accepted */
														
 
															+
														
 
															+	return 0; /* order accepted */
														
 
															+}
														
 
															+
														
 
															+static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
														
 
															+{
														
 
															+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
														
 
															+	struct kvm_s390_local_interrupt *li;
														
 
															+	int rc;
														
 
															+
														
 
															+	if (cpu_addr >= KVM_MAX_VCPUS)
														
 
															+		return 3; /* not operational */
														
 
															+
														
 
															+	spin_lock(&fi->lock);
														
 
															+	li = fi->local_int[cpu_addr];
														
 
															+	if (li == NULL) {
														
 
															+		rc = 3; /* not operational */
														
 
															+		goto unlock;
														
 
															+	}
														
 
															+
														
 
															+	rc = __inject_sigp_stop(li, action);
														
 
															+
														
 
															 unlock:
														
 
															 	spin_unlock(&fi->lock);
														
 
															 	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
														
 
															 	return rc;
														
 
															 }
														
 
															+int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action)
														
 
															+{
														
 
															+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
 
															+	return __inject_sigp_stop(li, action);
														
 
															+}
														
 
															+
														
 
															 static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
														
 
															 {
														
 
															 	int rc;
														
@@ -177,9 +189,9 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 
															 	/* make sure that the new value is valid memory */
														
 
															 	address = address & 0x7fffe000u;
														
 
															 	if ((copy_from_guest(vcpu, &tmp,
														
 
															-		(u64) (address + vcpu->kvm->arch.guest_origin) , 1)) ||
														
 
															+		(u64) (address + vcpu->arch.sie_block->gmsor) , 1)) ||
														
 
															 	   (copy_from_guest(vcpu, &tmp, (u64) (address +
														
 
															-			vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) {
														
 
															+			vcpu->arch.sie_block->gmsor + PAGE_SIZE), 1))) {
														
 
															 		*reg |= SIGP_STAT_INVALID_PARAMETER;
														
 
															 		return 1; /* invalid parameter */
														
 
															 	}
														
@@ -262,11 +274,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 
															 		break;
														
 
															 	case SIGP_STOP:
														
 
															 		vcpu->stat.instruction_sigp_stop++;
														
 
															-		rc = __sigp_stop(vcpu, cpu_addr, 0);
														
 
															+		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP);
														
 
															 		break;
														
 
															 	case SIGP_STOP_STORE_STATUS:
														
 
															 		vcpu->stat.instruction_sigp_stop++;
														
 
															-		rc = __sigp_stop(vcpu, cpu_addr, 1);
														
 
															+		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP);
														
 
															 		break;
														
 
															 	case SIGP_SET_ARCH:
														
 
															 		vcpu->stat.instruction_sigp_arch++;
														
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -15,6 +15,7 @@
 
															 #define	APIC_LVR	0x30
														
 
															 #define		APIC_LVR_MASK		0xFF00FF
														
 
															+#define		APIC_LVR_DIRECTED_EOI	(1 << 24)
														
 
															 #define		GET_APIC_VERSION(x)	((x) & 0xFFu)
														
 
															 #define		GET_APIC_MAXLVT(x)	(((x) >> 16) & 0xFFu)
														
 
															 #ifdef CONFIG_X86_32
														
@@ -41,6 +42,7 @@
 
															 #define		APIC_DFR_CLUSTER		0x0FFFFFFFul
														
 
															 #define		APIC_DFR_FLAT			0xFFFFFFFFul
														
 
															 #define	APIC_SPIV	0xF0
														
 
															+#define		APIC_SPIV_DIRECTED_EOI		(1 << 12)
														
 
															 #define		APIC_SPIV_FOCUS_DISABLED	(1 << 9)
														
 
															 #define		APIC_SPIV_APIC_ENABLED		(1 << 8)
														
 
															 #define	APIC_ISR	0x100
														
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -17,6 +17,8 @@
 
															 #define __KVM_HAVE_USER_NMI
														
 
															 #define __KVM_HAVE_GUEST_DEBUG
														
 
															 #define __KVM_HAVE_MSIX
														
 
															+#define __KVM_HAVE_MCE
														
 
															+#define __KVM_HAVE_PIT_STATE2
														
 
															 /* Architectural interrupt line count. */
														
 
															 #define KVM_NR_INTERRUPTS 256
														
@@ -236,6 +238,14 @@ struct kvm_pit_state {
 
															 	struct kvm_pit_channel_state channels[3];
														
 
															 };
														
 
															+#define KVM_PIT_FLAGS_HPET_LEGACY  0x00000001
														
 
															+
														
 
															+struct kvm_pit_state2 {
														
 
															+	struct kvm_pit_channel_state channels[3];
														
 
															+	__u32 flags;
														
 
															+	__u32 reserved[9];
														
 
															+};
														
 
															+
														
 
															 struct kvm_reinject_control {
														
 
															 	__u8 pit_reinject;
														
 
															 	__u8 reserved[31];
														
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -14,6 +14,7 @@
 
															 #include <linux/types.h>
														
 
															 #include <linux/mm.h>
														
 
															 #include <linux/mmu_notifier.h>
														
 
															+#include <linux/tracepoint.h>
														
 
															 #include <linux/kvm.h>
														
 
															 #include <linux/kvm_para.h>
														
@@ -37,12 +38,14 @@
 
															 #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS |	\
														
 
															 				  0xFFFFFF0000000000ULL)
														
 
															-#define KVM_GUEST_CR0_MASK				   \
														
 
															-	(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \
														
 
															-	 | X86_CR0_NW | X86_CR0_CD)
														
 
															+#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST				\
														
 
															+	(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
														
 
															+#define KVM_GUEST_CR0_MASK						\
														
 
															+	(KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
														
 
															+#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST				\
														
 
															+	(X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
														
 
															 #define KVM_VM_CR0_ALWAYS_ON						\
														
 
															-	(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \
														
 
															-	 | X86_CR0_MP)
														
 
															+	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
														
 
															 #define KVM_GUEST_CR4_MASK						\
														
 
															 	(X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
														
 
															 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
														
@@ -51,12 +54,12 @@
 
															 #define INVALID_PAGE (~(hpa_t)0)
														
 
															 #define UNMAPPED_GVA (~(gpa_t)0)
														
 
															-/* shadow tables are PAE even on non-PAE hosts */
														
 
															-#define KVM_HPAGE_SHIFT 21
														
 
															-#define KVM_HPAGE_SIZE (1UL << KVM_HPAGE_SHIFT)
														
 
															-#define KVM_HPAGE_MASK (~(KVM_HPAGE_SIZE - 1))
														
 
															-
														
 
															-#define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE)
														
 
															+/* KVM Hugepage definitions for x86 */
														
 
															+#define KVM_NR_PAGE_SIZES	3
														
 
															+#define KVM_HPAGE_SHIFT(x)	(PAGE_SHIFT + (((x) - 1) * 9))
														
 
															+#define KVM_HPAGE_SIZE(x)	(1UL << KVM_HPAGE_SHIFT(x))
														
 
															+#define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
														
 
															+#define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
														
 
															 #define DE_VECTOR 0
														
 
															 #define DB_VECTOR 1
														
@@ -120,6 +123,10 @@ enum kvm_reg {
 
															 	NR_VCPU_REGS
														
 
															 };
														
 
															+enum kvm_reg_ex {
														
 
															+	VCPU_EXREG_PDPTR = NR_VCPU_REGS,
														
 
															+};
														
 
															+
														
 
															 enum {
														
 
															 	VCPU_SREG_ES,
														
 
															 	VCPU_SREG_CS,
														
@@ -131,7 +138,7 @@ enum {
 
															 	VCPU_SREG_LDTR,
														
 
															 };
														
 
															-#include <asm/kvm_x86_emulate.h>
														
 
															+#include <asm/kvm_emulate.h>
														
 
															 #define KVM_NR_MEM_OBJS 40
														
@@ -308,7 +315,6 @@ struct kvm_vcpu_arch {
 
															 	struct {
														
 
															 		gfn_t gfn;	/* presumed gfn during guest pte update */
														
 
															 		pfn_t pfn;	/* pfn corresponding to that gfn */
														
 
															-		int largepage;
														
 
															 		unsigned long mmu_seq;
														
 
															 	} update_pte;
														
@@ -334,16 +340,6 @@ struct kvm_vcpu_arch {
 
															 		u8 nr;
														
 
															 	} interrupt;
														
 
															-	struct {
														
 
															-		int vm86_active;
														
 
															-		u8 save_iopl;
														
 
															-		struct kvm_save_segment {
														
 
															-			u16 selector;
														
 
															-			unsigned long base;
														
 
															-			u32 limit;
														
 
															-			u32 ar;
														
 
															-		} tr, es, ds, fs, gs;
														
 
															-	} rmode;
														
 
															 	int halt_request; /* real mode on Intel only */
														
 
															 	int cpuid_nent;
														
@@ -366,13 +362,15 @@ struct kvm_vcpu_arch {
 
															 	u32 pat;
														
 
															 	int switch_db_regs;
														
 
															-	unsigned long host_db[KVM_NR_DB_REGS];
														
 
															-	unsigned long host_dr6;
														
 
															-	unsigned long host_dr7;
														
 
															 	unsigned long db[KVM_NR_DB_REGS];
														
 
															 	unsigned long dr6;
														
 
															 	unsigned long dr7;
														
 
															 	unsigned long eff_db[KVM_NR_DB_REGS];
														
 
															+
														
 
															+	u64 mcg_cap;
														
 
															+	u64 mcg_status;
														
 
															+	u64 mcg_ctl;
														
 
															+	u64 *mce_banks;
														
 
															 };
														
 
															 struct kvm_mem_alias {
														
@@ -409,6 +407,7 @@ struct kvm_arch{
 
															 	struct page *ept_identity_pagetable;
														
 
															 	bool ept_identity_pagetable_done;
														
 
															+	gpa_t ept_identity_map_addr;
														
 
															 	unsigned long irq_sources_bitmap;
														
 
															 	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
														
@@ -526,6 +525,9 @@ struct kvm_x86_ops {
 
															 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
														
 
															 	int (*get_tdp_level)(void);
														
 
															 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
														
 
															+	bool (*gb_page_enable)(void);
														
 
															+
														
 
															+	const struct trace_print_flags *exit_reasons_str;
														
 
															 };
														
 
															 extern struct kvm_x86_ops *kvm_x86_ops;
														
@@ -618,6 +620,7 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
 
															 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
														
 
															 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
														
 
															 			   u32 error_code);
														
 
															+bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
														
 
															 int kvm_pic_set_irq(void *opaque, int irq, int level);
														
@@ -752,8 +755,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 
															 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
														
 
															 }
														
 
															-#define MSR_IA32_TIME_STAMP_COUNTER		0x010
														
 
															-
														
 
															 #define TSS_IOPB_BASE_OFFSET 0x66
														
 
															 #define TSS_BASE_SIZE 0x68
														
 
															 #define TSS_IOPB_SIZE (65536 / 8)
														
@@ -796,5 +797,8 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 
															 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
														
 
															 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
														
 
															 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
														
 
															+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
														
 
															+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
														
 
															+int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
														
 
															 #endif /* _ASM_X86_KVM_HOST_H */
														
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -1,6 +1,8 @@
 
															 #ifndef _ASM_X86_KVM_PARA_H
														
 
															 #define _ASM_X86_KVM_PARA_H
														
 
															+#include <linux/types.h>
														
 
															+
														
 
															 /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
														
 
															  * should be used to determine that a VM is running under KVM.
														
 
															  */
														
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -374,6 +374,7 @@
 
															 /* AMD-V MSRs */
														
 
															 #define MSR_VM_CR                       0xc0010114
														
 
															+#define MSR_VM_IGNNE                    0xc0010115
														
 
															 #define MSR_VM_HSAVE_PA                 0xc0010117
														
 
															 #endif /* _ASM_X86_MSR_INDEX_H */
														
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -55,6 +55,7 @@
 
															 #define SECONDARY_EXEC_ENABLE_EPT               0x00000002
														
 
															 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
														
 
															 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
														
 
															+#define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
														
 
															 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
														
@@ -351,9 +352,16 @@ enum vmcs_field {
 
															 #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR		0
														
 
															 #define VMX_EPT_EXTENT_CONTEXT			1
														
 
															 #define VMX_EPT_EXTENT_GLOBAL			2
														
 
															+
														
 
															+#define VMX_EPT_EXECUTE_ONLY_BIT		(1ull)
														
 
															+#define VMX_EPT_PAGE_WALK_4_BIT			(1ull << 6)
														
 
															+#define VMX_EPTP_UC_BIT				(1ull << 8)
														
 
															+#define VMX_EPTP_WB_BIT				(1ull << 14)
														
 
															+#define VMX_EPT_2MB_PAGE_BIT			(1ull << 16)
														
 
															 #define VMX_EPT_EXTENT_INDIVIDUAL_BIT		(1ull << 24)
														
 
															 #define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
														
 
															 #define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
														
 
															+
														
 
															 #define VMX_EPT_DEFAULT_GAW			3
														
 
															 #define VMX_EPT_MAX_GAW				0x4
														
 
															 #define VMX_EPT_MT_EPTE_SHIFT			3
														
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,7 +34,6 @@
 
															 struct kvm_para_state {
														
 
															 	u8 mmu_queue[MMU_QUEUE_SIZE];
														
 
															 	int mmu_queue_len;
														
 
															-	enum paravirt_lazy_mode mode;
														
 
															 };
														
 
															 static DEFINE_PER_CPU(struct kvm_para_state, para_state);
														
@@ -77,7 +76,7 @@ static void kvm_deferred_mmu_op(void *buffer, int len)
 
															 {
														
 
															 	struct kvm_para_state *state = kvm_para_state();
														
 
															-	if (state->mode != PARAVIRT_LAZY_MMU) {
														
 
															+	if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) {
														
 
															 		kvm_mmu_op(buffer, len);
														
 
															 		return;
														
 
															 	}
														
@@ -185,10 +184,7 @@ static void kvm_release_pt(unsigned long pfn)
 
															 static void kvm_enter_lazy_mmu(void)
														
 
															 {
														
 
															-	struct kvm_para_state *state = kvm_para_state();
														
 
															-
														
 
															 	paravirt_enter_lazy_mmu();
														
 
															-	state->mode = paravirt_get_lazy_mode();
														
 
															 }
														
 
															 static void kvm_leave_lazy_mmu(void)
														
@@ -197,7 +193,6 @@ static void kvm_leave_lazy_mmu(void)
 
															 	mmu_queue_flush(state);
														
 
															 	paravirt_leave_lazy_mmu();
														
 
															-	state->mode = paravirt_get_lazy_mode();
														
 
															 }
														
 
															 static void __init paravirt_ops_setup(void)
														
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -50,8 +50,8 @@ static unsigned long kvm_get_wallclock(void)
 
															 	struct timespec ts;
														
 
															 	int low, high;
														
 
															-	low = (int)__pa(&wall_clock);
														
 
															-	high = ((u64)__pa(&wall_clock) >> 32);
														
 
															+	low = (int)__pa_symbol(&wall_clock);
														
 
															+	high = ((u64)__pa_symbol(&wall_clock) >> 32);
														
 
															 	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
														
 
															 	vcpu_time = &get_cpu_var(hv_clock);
														
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -1,12 +1,8 @@
 
															 #
														
 
															 # KVM configuration
														
 
															 #
														
 
															-config HAVE_KVM
														
 
															-       bool
														
 
															-config HAVE_KVM_IRQCHIP
														
 
															-       bool
														
 
															-       default y
														
 
															+source "virt/kvm/Kconfig"
														
 
															 menuconfig VIRTUALIZATION
														
 
															 	bool "Virtualization"
														
@@ -29,6 +25,9 @@ config KVM
 
															 	select PREEMPT_NOTIFIERS
														
 
															 	select MMU_NOTIFIER
														
 
															 	select ANON_INODES
														
 
															+	select HAVE_KVM_IRQCHIP
														
 
															+	select HAVE_KVM_EVENTFD
														
 
															+	select KVM_APIC_ARCHITECTURE
														
 
															 	---help---
														
 
															 	  Support hosting fully virtualized guest machines using hardware
														
 
															 	  virtualization extensions.  You will need a fairly recent
														
@@ -63,18 +62,6 @@ config KVM_AMD
 
															 	  To compile this as a module, choose M here: the module
														
 
															 	  will be called kvm-amd.
														
 
															-config KVM_TRACE
														
 
															-	bool "KVM trace support"
														
 
															-	depends on KVM && SYSFS
														
 
															-	select MARKERS
														
 
															-	select RELAY
														
 
															-	select DEBUG_FS
														
 
															-	default n
														
 
															-	---help---
														
 
															-	  This option allows reading a trace of kvm-related events through
														
 
															-	  relayfs.  Note the ABI is not considered stable and will be
														
 
															-	  modified in future updates.
														
 
															-
														
 
															 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
														
 
															 # the virtualization menu.
														
 
															 source drivers/lguest/Kconfig
														
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,22 +1,19 @@
 
															-#
														
 
															-# Makefile for Kernel-based Virtual Machine module
														
 
															-#
														
 
															-
														
 
															-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
														
 
															-                coalesced_mmio.o irq_comm.o)
														
 
															-ifeq ($(CONFIG_KVM_TRACE),y)
														
 
															-common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
														
 
															-endif
														
 
															-ifeq ($(CONFIG_IOMMU_API),y)
														
 
															-common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
														
 
															-endif
														
 
															 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
														
 
															-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
														
 
															-	i8254.o timer.o
														
 
															-obj-$(CONFIG_KVM) += kvm.o
														
 
															-kvm-intel-objs = vmx.o
														
 
															-obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
														
 
															-kvm-amd-objs = svm.o
														
 
															-obj-$(CONFIG_KVM_AMD) += kvm-amd.o
														
 
															+CFLAGS_x86.o := -I.
														
 
															+CFLAGS_svm.o := -I.
														
 
															+CFLAGS_vmx.o := -I.
														
 
															+
														
 
															+kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
														
 
															+				coalesced_mmio.o irq_comm.o eventfd.o)
														
 
															+kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
														
 
															+
														
 
															+kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
														
 
															+			   i8254.o timer.o
														
 
															+kvm-intel-y		+= vmx.o
														
 
															+kvm-amd-y		+= svm.o
														
 
															+
														
 
															+obj-$(CONFIG_KVM)	+= kvm.o
														
 
															+obj-$(CONFIG_KVM_INTEL)	+= kvm-intel.o
														
 
															+obj-$(CONFIG_KVM_AMD)	+= kvm-amd.o
														
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1,5 +1,5 @@
 
															 /******************************************************************************
														
 
															- * x86_emulate.c
														
 
															+ * emulate.c
														
 
															  *
														
 
															  * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
														
 
															  *
														
@@ -30,7 +30,9 @@
 
															 #define DPRINTF(x...) do {} while (0)
														
 
															 #endif
														
 
															 #include <linux/module.h>
														
 
															-#include <asm/kvm_x86_emulate.h>
														
 
															+#include <asm/kvm_emulate.h>
														
 
															+
														
 
															+#include "mmu.h"		/* for is_long_mode() */
														
 
															 /*
														
 
															  * Opcode effective-address decode tables.
														
@@ -60,6 +62,7 @@
 
															 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
														
 
															 #define SrcOne      (7<<4)	/* Implied '1' */
														
 
															 #define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */
														
 
															+#define SrcImmU     (9<<4)      /* Immediate operand, unsigned */
														
 
															 #define SrcMask     (0xf<<4)
														
 
															 /* Generic ModRM decode. */
														
 
															 #define ModRM       (1<<8)
														
@@ -97,11 +100,11 @@ static u32 opcode_table[256] = {
 
															 	/* 0x10 - 0x17 */
														
 
															 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
														
 
															 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
														
 
															-	0, 0, 0, 0,
														
 
															+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
														
 
															 	/* 0x18 - 0x1F */
														
 
															 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
														
 
															 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
														
 
															-	0, 0, 0, 0,
														
 
															+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
														
 
															 	/* 0x20 - 0x27 */
														
 
															 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
														
 
															 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
														
@@ -195,7 +198,7 @@ static u32 opcode_table[256] = {
 
															 	ByteOp | SrcImmUByte, SrcImmUByte,
														
 
															 	/* 0xE8 - 0xEF */
														
 
															 	SrcImm | Stack, SrcImm | ImplicitOps,
														
 
															-	SrcImm | Src2Imm16, SrcImmByte | ImplicitOps,
														
 
															+	SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps,
														
 
															 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
														
 
															 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
														
 
															 	/* 0xF0 - 0xF7 */
														
@@ -208,7 +211,7 @@ static u32 opcode_table[256] = {
 
															 static u32 twobyte_table[256] = {
														
 
															 	/* 0x00 - 0x0F */
														
 
															-	0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
														
 
															+	0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
														
 
															 	ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
														
 
															 	/* 0x10 - 0x1F */
														
 
															 	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
														
@@ -216,7 +219,9 @@ static u32 twobyte_table[256] = {
 
															 	ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
														
 
															 	0, 0, 0, 0, 0, 0, 0, 0,
														
 
															 	/* 0x30 - 0x3F */
														
 
															-	ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
														
 
															+	ImplicitOps, 0, ImplicitOps, 0,
														
 
															+	ImplicitOps, ImplicitOps, 0, 0,
														
 
															+	0, 0, 0, 0, 0, 0, 0, 0,
														
 
															 	/* 0x40 - 0x47 */
														
 
															 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
														
 
															 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
														
@@ -319,8 +324,11 @@ static u32 group2_table[] = {
 
															 };
														
 
															 /* EFLAGS bit definitions. */
														
 
															+#define EFLG_VM (1<<17)
														
 
															+#define EFLG_RF (1<<16)
														
 
															 #define EFLG_OF (1<<11)
														
 
															 #define EFLG_DF (1<<10)
														
 
															+#define EFLG_IF (1<<9)
														
 
															 #define EFLG_SF (1<<7)
														
 
															 #define EFLG_ZF (1<<6)
														
 
															 #define EFLG_AF (1<<4)
														
@@ -1027,6 +1035,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
															 		c->src.type = OP_MEM;
														
 
															 		break;
														
 
															 	case SrcImm:
														
 
															+	case SrcImmU:
														
 
															 		c->src.type = OP_IMM;
														
 
															 		c->src.ptr = (unsigned long *)c->eip;
														
 
															 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
														
@@ -1044,6 +1053,19 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
															 			c->src.val = insn_fetch(s32, 4, c->eip);
														
 
															 			break;
														
 
															 		}
														
 
															+		if ((c->d & SrcMask) == SrcImmU) {
														
 
															+			switch (c->src.bytes) {
														
 
															+			case 1:
														
 
															+				c->src.val &= 0xff;
														
 
															+				break;
														
 
															+			case 2:
														
 
															+				c->src.val &= 0xffff;
														
 
															+				break;
														
 
															+			case 4:
														
 
															+				c->src.val &= 0xffffffff;
														
 
															+				break;
														
 
															+			}
														
 
															+		}
														
 
															 		break;
														
 
															 	case SrcImmByte:
														
 
															 	case SrcImmUByte:
														
@@ -1375,6 +1397,217 @@ static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
 
															 		ctxt->interruptibility = mask;
														
 
															 }
														
 
															+static inline void
														
 
															+setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
														
 
															+	struct kvm_segment *cs, struct kvm_segment *ss)
														
 
															+{
														
 
															+	memset(cs, 0, sizeof(struct kvm_segment));
														
 
															+	kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
														
 
															+	memset(ss, 0, sizeof(struct kvm_segment));
														
 
															+
														
 
															+	cs->l = 0;		/* will be adjusted later */
														
 
															+	cs->base = 0;		/* flat segment */
														
 
															+	cs->g = 1;		/* 4kb granularity */
														
 
															+	cs->limit = 0xffffffff;	/* 4GB limit */
														
 
															+	cs->type = 0x0b;	/* Read, Execute, Accessed */
														
 
															+	cs->s = 1;
														
 
															+	cs->dpl = 0;		/* will be adjusted later */
														
 
															+	cs->present = 1;
														
 
															+	cs->db = 1;
														
 
															+
														
 
															+	ss->unusable = 0;
														
 
															+	ss->base = 0;		/* flat segment */
														
 
															+	ss->limit = 0xffffffff;	/* 4GB limit */
														
 
															+	ss->g = 1;		/* 4kb granularity */
														
 
															+	ss->s = 1;
														
 
															+	ss->type = 0x03;	/* Read/Write, Accessed */
														
 
															+	ss->db = 1;		/* 32bit stack segment */
														
 
															+	ss->dpl = 0;
														
 
															+	ss->present = 1;
														
 
															+}
														
 
															+
														
 
															+static int
														
 
															+emulate_syscall(struct x86_emulate_ctxt *ctxt)
														
 
															+{
														
 
															+	struct decode_cache *c = &ctxt->decode;
														
 
															+	struct kvm_segment cs, ss;
														
 
															+	u64 msr_data;
														
 
															+
														
 
															+	/* syscall is not available in real mode */
														
 
															+	if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
														
 
															+		|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
														
 
															+		return -1;
														
 
															+
														
 
															+	setup_syscalls_segments(ctxt, &cs, &ss);
														
 
															+
														
 
															+	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
														
 
															+	msr_data >>= 32;
														
 
															+	cs.selector = (u16)(msr_data & 0xfffc);
														
 
															+	ss.selector = (u16)(msr_data + 8);
														
 
															+
														
 
															+	if (is_long_mode(ctxt->vcpu)) {
														
 
															+		cs.db = 0;
														
 
															+		cs.l = 1;
														
 
															+	}
														
 
															+	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
														
 
															+	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
														
 
															+
														
 
															+	c->regs[VCPU_REGS_RCX] = c->eip;
														
 
															+	if (is_long_mode(ctxt->vcpu)) {
														
 
															+#ifdef CONFIG_X86_64
														
 
															+		c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
														
 
															+
														
 
															+		kvm_x86_ops->get_msr(ctxt->vcpu,
														
 
															+			ctxt->mode == X86EMUL_MODE_PROT64 ?
														
 
															+			MSR_LSTAR : MSR_CSTAR, &msr_data);
														
 
															+		c->eip = msr_data;
														
 
															+
														
 
															+		kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
														
 
															+		ctxt->eflags &= ~(msr_data | EFLG_RF);
														
 
															+#endif
														
 
															+	} else {
														
 
															+		/* legacy mode */
														
 
															+		kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
														
 
															+		c->eip = (u32)msr_data;
														
 
															+
														
 
															+		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int
														
 
															+emulate_sysenter(struct x86_emulate_ctxt *ctxt)
														
 
															+{
														
 
															+	struct decode_cache *c = &ctxt->decode;
														
 
															+	struct kvm_segment cs, ss;
														
 
															+	u64 msr_data;
														
 
															+
														
 
															+	/* inject #UD if LOCK prefix is used */
														
 
															+	if (c->lock_prefix)
														
 
															+		return -1;
														
 
															+
														
 
															+	/* inject #GP if in real mode or paging is disabled */
														
 
															+	if (ctxt->mode == X86EMUL_MODE_REAL ||
														
 
															+		!(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
														
 
															+		kvm_inject_gp(ctxt->vcpu, 0);
														
 
															+		return -1;
														
 
															+	}
														
 
															+
														
 
															+	/* XXX sysenter/sysexit have not been tested in 64bit mode.
														
 
															+	* Therefore, we inject an #UD.
														
 
															+	*/
														
 
															+	if (ctxt->mode == X86EMUL_MODE_PROT64)
														
 
															+		return -1;
														
 
															+
														
 
															+	setup_syscalls_segments(ctxt, &cs, &ss);
														
 
															+
														
 
															+	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
														
 
															+	switch (ctxt->mode) {
														
 
															+	case X86EMUL_MODE_PROT32:
														
 
															+		if ((msr_data & 0xfffc) == 0x0) {
														
 
															+			kvm_inject_gp(ctxt->vcpu, 0);
														
 
															+			return -1;
														
 
															+		}
														
 
															+		break;
														
 
															+	case X86EMUL_MODE_PROT64:
														
 
															+		if (msr_data == 0x0) {
														
 
															+			kvm_inject_gp(ctxt->vcpu, 0);
														
 
															+			return -1;
														
 
															+		}
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
														
 
															+	cs.selector = (u16)msr_data;
														
 
															+	cs.selector &= ~SELECTOR_RPL_MASK;
														
 
															+	ss.selector = cs.selector + 8;
														
 
															+	ss.selector &= ~SELECTOR_RPL_MASK;
														
 
															+	if (ctxt->mode == X86EMUL_MODE_PROT64
														
 
															+		|| is_long_mode(ctxt->vcpu)) {
														
 
															+		cs.db = 0;
														
 
															+		cs.l = 1;
														
 
															+	}
														
 
															+
														
 
															+	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
														
 
															+	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
														
 
															+
														
 
															+	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
														
 
															+	c->eip = msr_data;
														
 
															+
														
 
															+	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
														
 
															+	c->regs[VCPU_REGS_RSP] = msr_data;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int
														
 
															+emulate_sysexit(struct x86_emulate_ctxt *ctxt)
														
 
															+{
														
 
															+	struct decode_cache *c = &ctxt->decode;
														
 
															+	struct kvm_segment cs, ss;
														
 
															+	u64 msr_data;
														
 
															+	int usermode;
														
 
															+
														
 
															+	/* inject #UD if LOCK prefix is used */
														
 
															+	if (c->lock_prefix)
														
 
															+		return -1;
														
 
															+
														
 
															+	/* inject #GP if in real mode or paging is disabled */
														
 
															+	if (ctxt->mode == X86EMUL_MODE_REAL
														
 
															+		|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
														
 
															+		kvm_inject_gp(ctxt->vcpu, 0);
														
 
															+		return -1;
														
 
															+	}
														
 
															+
														
 
															+	/* sysexit must be called from CPL 0 */
														
 
															+	if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
														
 
															+		kvm_inject_gp(ctxt->vcpu, 0);
														
 
															+		return -1;
														
 
															+	}
														
 
															+
														
 
															+	setup_syscalls_segments(ctxt, &cs, &ss);
														
 
															+
														
 
															+	if ((c->rex_prefix & 0x8) != 0x0)
														
 
															+		usermode = X86EMUL_MODE_PROT64;
														
 
															+	else
														
 
															+		usermode = X86EMUL_MODE_PROT32;
														
 
															+
														
 
															+	cs.dpl = 3;
														
 
															+	ss.dpl = 3;
														
 
															+	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
														
 
															+	switch (usermode) {
														
 
															+	case X86EMUL_MODE_PROT32:
														
 
															+		cs.selector = (u16)(msr_data + 16);
														
 
															+		if ((msr_data & 0xfffc) == 0x0) {
														
 
															+			kvm_inject_gp(ctxt->vcpu, 0);
														
 
															+			return -1;
														
 
															+		}
														
 
															+		ss.selector = (u16)(msr_data + 24);
														
 
															+		break;
														
 
															+	case X86EMUL_MODE_PROT64:
														
 
															+		cs.selector = (u16)(msr_data + 32);
														
 
															+		if (msr_data == 0x0) {
														
 
															+			kvm_inject_gp(ctxt->vcpu, 0);
														
 
															+			return -1;
														
 
															+		}
														
 
															+		ss.selector = cs.selector + 8;
														
 
															+		cs.db = 0;
														
 
															+		cs.l = 1;
														
 
															+		break;
														
 
															+	}
														
 
															+	cs.selector |= SELECTOR_RPL_MASK;
														
 
															+	ss.selector |= SELECTOR_RPL_MASK;
														
 
															+
														
 
															+	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
														
 
															+	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
														
 
															+
														
 
															+	c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
														
 
															+	c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 int
														
 
															 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
														
 
															 {
														
@@ -1970,6 +2203,12 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
															 			goto cannot_emulate;
														
 
															 		}
														
 
															 		break;
														
 
															+	case 0x05: 		/* syscall */
														
 
															+		if (emulate_syscall(ctxt) == -1)
														
 
															+			goto cannot_emulate;
														
 
															+		else
														
 
															+			goto writeback;
														
 
															+		break;
														
 
															 	case 0x06:
														
 
															 		emulate_clts(ctxt->vcpu);
														
 
															 		c->dst.type = OP_NONE;
														
@@ -2036,6 +2275,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
															 		rc = X86EMUL_CONTINUE;
														
 
															 		c->dst.type = OP_NONE;
														
 
															 		break;
														
 
															+	case 0x34:		/* sysenter */
														
 
															+		if (emulate_sysenter(ctxt) == -1)
														
 
															+			goto cannot_emulate;
														
 
															+		else
														
 
															+			goto writeback;
														
 
															+		break;
														
 
															+	case 0x35:		/* sysexit */
														
 
															+		if (emulate_sysexit(ctxt) == -1)
														
 
															+			goto cannot_emulate;
														
 
															+		else
														
 
															+			goto writeback;
														
 
															+		break;
														
 
															 	case 0x40 ... 0x4f:	/* cmov */
														
 
															 		c->dst.val = c->dst.orig_val = c->src.val;
														
 
															 		if (!test_cc(c->b, ctxt->eflags))
														
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -231,7 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
														
 
															-	if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack)
														
 
															+	if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
														
 
															 		return atomic_read(&pit->pit_state.pit_timer.pending);
														
 
															 	return 0;
														
 
															 }
														
@@ -252,7 +252,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 
															 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
														
 
															 	struct hrtimer *timer;
														
 
															-	if (vcpu->vcpu_id != 0 || !pit)
														
 
															+	if (!kvm_vcpu_is_bsp(vcpu) || !pit)
														
 
															 		return;
														
 
															 	timer = &pit->pit_state.pit_timer.timer;
														
@@ -294,7 +294,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
 
															 	pt->timer.function = kvm_timer_fn;
														
 
															 	pt->t_ops = &kpit_ops;
														
 
															 	pt->kvm = ps->pit->kvm;
														
 
															-	pt->vcpu_id = 0;
														
 
															+	pt->vcpu = pt->kvm->bsp_vcpu;
														
 
															 	atomic_set(&pt->pending, 0);
														
 
															 	ps->irq_ack = 1;
														
@@ -332,33 +332,62 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
 
															 	case 1:
														
 
															         /* FIXME: enhance mode 4 precision */
														
 
															 	case 4:
														
 
															-		create_pit_timer(ps, val, 0);
														
 
															+		if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
														
 
															+			create_pit_timer(ps, val, 0);
														
 
															+		}
														
 
															 		break;
														
 
															 	case 2:
														
 
															 	case 3:
														
 
															-		create_pit_timer(ps, val, 1);
														
 
															+		if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
														
 
															+			create_pit_timer(ps, val, 1);
														
 
															+		}
														
 
															 		break;
														
 
															 	default:
														
 
															 		destroy_pit_timer(&ps->pit_timer);
														
 
															 	}
														
 
															 }
														
 
															-void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val)
														
 
															+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start)
														
 
															+{
														
 
															+	u8 saved_mode;
														
 
															+	if (hpet_legacy_start) {
														
 
															+		/* save existing mode for later reenablement */
														
 
															+		saved_mode = kvm->arch.vpit->pit_state.channels[0].mode;
														
 
															+		kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */
														
 
															+		pit_load_count(kvm, channel, val);
														
 
															+		kvm->arch.vpit->pit_state.channels[0].mode = saved_mode;
														
 
															+	} else {
														
 
															+		pit_load_count(kvm, channel, val);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static inline struct kvm_pit *dev_to_pit(struct kvm_io_device *dev)
														
 
															+{
														
 
															+	return container_of(dev, struct kvm_pit, dev);
														
 
															+}
														
 
															+
														
 
															+static inline struct kvm_pit *speaker_to_pit(struct kvm_io_device *dev)
														
 
															 {
														
 
															-	mutex_lock(&kvm->arch.vpit->pit_state.lock);
														
 
															-	pit_load_count(kvm, channel, val);
														
 
															-	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
														
 
															+	return container_of(dev, struct kvm_pit, speaker_dev);
														
 
															 }
														
 
															-static void pit_ioport_write(struct kvm_io_device *this,
														
 
															-			     gpa_t addr, int len, const void *data)
														
 
															+static inline int pit_in_range(gpa_t addr)
														
 
															 {
														
 
															-	struct kvm_pit *pit = (struct kvm_pit *)this->private;
														
 
															+	return ((addr >= KVM_PIT_BASE_ADDRESS) &&
														
 
															+		(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
														
 
															+}
														
 
															+
														
 
															+static int pit_ioport_write(struct kvm_io_device *this,
														
 
															+			    gpa_t addr, int len, const void *data)
														
 
															+{
														
 
															+	struct kvm_pit *pit = dev_to_pit(this);
														
 
															 	struct kvm_kpit_state *pit_state = &pit->pit_state;
														
 
															 	struct kvm *kvm = pit->kvm;
														
 
															 	int channel, access;
														
 
															 	struct kvm_kpit_channel_state *s;
														
 
															 	u32 val = *(u32 *) data;
														
 
															+	if (!pit_in_range(addr))
														
 
															+		return -EOPNOTSUPP;
														
 
															 	val  &= 0xff;
														
 
															 	addr &= KVM_PIT_CHANNEL_MASK;
														
@@ -421,16 +450,19 @@ static void pit_ioport_write(struct kvm_io_device *this,
 
															 	}
														
 
															 	mutex_unlock(&pit_state->lock);
														
 
															+	return 0;
														
 
															 }
														
 
															-static void pit_ioport_read(struct kvm_io_device *this,
														
 
															-			    gpa_t addr, int len, void *data)
														
 
															+static int pit_ioport_read(struct kvm_io_device *this,
														
 
															+			   gpa_t addr, int len, void *data)
														
 
															 {
														
 
															-	struct kvm_pit *pit = (struct kvm_pit *)this->private;
														
 
															+	struct kvm_pit *pit = dev_to_pit(this);
														
 
															 	struct kvm_kpit_state *pit_state = &pit->pit_state;
														
 
															 	struct kvm *kvm = pit->kvm;
														
 
															 	int ret, count;
														
 
															 	struct kvm_kpit_channel_state *s;
														
 
															+	if (!pit_in_range(addr))
														
 
															+		return -EOPNOTSUPP;
														
 
															 	addr &= KVM_PIT_CHANNEL_MASK;
														
 
															 	s = &pit_state->channels[addr];
														
@@ -485,37 +517,36 @@ static void pit_ioport_read(struct kvm_io_device *this,
 
															 	memcpy(data, (char *)&ret, len);
														
 
															 	mutex_unlock(&pit_state->lock);
														
 
															+	return 0;
														
 
															 }
														
 
															-static int pit_in_range(struct kvm_io_device *this, gpa_t addr,
														
 
															-			int len, int is_write)
														
 
															-{
														
 
															-	return ((addr >= KVM_PIT_BASE_ADDRESS) &&
														
 
															-		(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
														
 
															-}
														
 
															-
														
 
															-static void speaker_ioport_write(struct kvm_io_device *this,
														
 
															-				 gpa_t addr, int len, const void *data)
														
 
															+static int speaker_ioport_write(struct kvm_io_device *this,
														
 
															+				gpa_t addr, int len, const void *data)
														
 
															 {
														
 
															-	struct kvm_pit *pit = (struct kvm_pit *)this->private;
														
 
															+	struct kvm_pit *pit = speaker_to_pit(this);
														
 
															 	struct kvm_kpit_state *pit_state = &pit->pit_state;
														
 
															 	struct kvm *kvm = pit->kvm;
														
 
															 	u32 val = *(u32 *) data;
														
 
															+	if (addr != KVM_SPEAKER_BASE_ADDRESS)
														
 
															+		return -EOPNOTSUPP;
														
 
															 	mutex_lock(&pit_state->lock);
														
 
															 	pit_state->speaker_data_on = (val >> 1) & 1;
														
 
															 	pit_set_gate(kvm, 2, val & 1);
														
 
															 	mutex_unlock(&pit_state->lock);
														
 
															+	return 0;
														
 
															 }
														
 
															-static void speaker_ioport_read(struct kvm_io_device *this,
														
 
															-				gpa_t addr, int len, void *data)
														
 
															+static int speaker_ioport_read(struct kvm_io_device *this,
														
 
															+			       gpa_t addr, int len, void *data)
														
 
															 {
														
 
															-	struct kvm_pit *pit = (struct kvm_pit *)this->private;
														
 
															+	struct kvm_pit *pit = speaker_to_pit(this);
														
 
															 	struct kvm_kpit_state *pit_state = &pit->pit_state;
														
 
															 	struct kvm *kvm = pit->kvm;
														
 
															 	unsigned int refresh_clock;
														
 
															 	int ret;
														
 
															+	if (addr != KVM_SPEAKER_BASE_ADDRESS)
														
 
															+		return -EOPNOTSUPP;
														
 
															 	/* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
														
 
															 	refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
														
@@ -527,12 +558,7 @@ static void speaker_ioport_read(struct kvm_io_device *this,
 
															 		len = sizeof(ret);
														
 
															 	memcpy(data, (char *)&ret, len);
														
 
															 	mutex_unlock(&pit_state->lock);
														
 
															-}
														
 
															-
														
 
															-static int speaker_in_range(struct kvm_io_device *this, gpa_t addr,
														
 
															-			    int len, int is_write)
														
 
															-{
														
 
															-	return (addr == KVM_SPEAKER_BASE_ADDRESS);
														
 
															+	return 0;
														
 
															 }
														
 
															 void kvm_pit_reset(struct kvm_pit *pit)
														
@@ -541,6 +567,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
 
															 	struct kvm_kpit_channel_state *c;
														
 
															 	mutex_lock(&pit->pit_state.lock);
														
 
															+	pit->pit_state.flags = 0;
														
 
															 	for (i = 0; i < 3; i++) {
														
 
															 		c = &pit->pit_state.channels[i];
														
 
															 		c->mode = 0xff;
														
@@ -563,10 +590,22 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
 
															 	}
														
 
															 }
														
 
															-struct kvm_pit *kvm_create_pit(struct kvm *kvm)
														
 
															+static const struct kvm_io_device_ops pit_dev_ops = {
														
 
															+	.read     = pit_ioport_read,
														
 
															+	.write    = pit_ioport_write,
														
 
															+};
														
 
															+
														
 
															+static const struct kvm_io_device_ops speaker_dev_ops = {
														
 
															+	.read     = speaker_ioport_read,
														
 
															+	.write    = speaker_ioport_write,
														
 
															+};
														
 
															+
														
 
															+/* Caller must have writers lock on slots_lock */
														
 
															+struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
														
 
															 {
														
 
															 	struct kvm_pit *pit;
														
 
															 	struct kvm_kpit_state *pit_state;
														
 
															+	int ret;
														
 
															 	pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
														
 
															 	if (!pit)
														
@@ -582,19 +621,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 
															 	mutex_lock(&pit->pit_state.lock);
														
 
															 	spin_lock_init(&pit->pit_state.inject_lock);
														
 
															-	/* Initialize PIO device */
														
 
															-	pit->dev.read = pit_ioport_read;
														
 
															-	pit->dev.write = pit_ioport_write;
														
 
															-	pit->dev.in_range = pit_in_range;
														
 
															-	pit->dev.private = pit;
														
 
															-	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
														
 
															-
														
 
															-	pit->speaker_dev.read = speaker_ioport_read;
														
 
															-	pit->speaker_dev.write = speaker_ioport_write;
														
 
															-	pit->speaker_dev.in_range = speaker_in_range;
														
 
															-	pit->speaker_dev.private = pit;
														
 
															-	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
														
 
															-
														
 
															 	kvm->arch.vpit = pit;
														
 
															 	pit->kvm = kvm;
														
@@ -613,7 +639,30 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 
															 	pit->mask_notifier.func = pit_mask_notifer;
														
 
															 	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
														
 
															+	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
														
 
															+	ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
														
 
															+	if (ret < 0)
														
 
															+		goto fail;
														
 
															+
														
 
															+	if (flags & KVM_PIT_SPEAKER_DUMMY) {
														
 
															+		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
														
 
															+		ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
														
 
															+						&pit->speaker_dev);
														
 
															+		if (ret < 0)
														
 
															+			goto fail_unregister;
														
 
															+	}
														
 
															+
														
 
															 	return pit;
														
 
															+
														
 
															+fail_unregister:
														
 
															+	__kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
														
 
															+
														
 
															+fail:
														
 
															+	if (pit->irq_source_id >= 0)
														
 
															+		kvm_free_irq_source_id(kvm, pit->irq_source_id);
														
 
															+
														
 
															+	kfree(pit);
														
 
															+	return NULL;
														
 
															 }
														
 
															 void kvm_free_pit(struct kvm *kvm)
														
@@ -623,6 +672,8 @@ void kvm_free_pit(struct kvm *kvm)
 
															 	if (kvm->arch.vpit) {
														
 
															 		kvm_unregister_irq_mask_notifier(kvm, 0,
														
 
															 					       &kvm->arch.vpit->mask_notifier);
														
 
															+		kvm_unregister_irq_ack_notifier(kvm,
														
 
															+				&kvm->arch.vpit->pit_state.irq_ack_notifier);
														
 
															 		mutex_lock(&kvm->arch.vpit->pit_state.lock);
														
 
															 		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
														
 
															 		hrtimer_cancel(timer);
														
@@ -637,10 +688,10 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 
															 	struct kvm_vcpu *vcpu;
														
 
															 	int i;
														
 
															-	mutex_lock(&kvm->lock);
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
														
 
															 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
														
 
															-	mutex_unlock(&kvm->lock);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															 	/*
														
 
															 	 * Provides NMI watchdog support via Virtual Wire mode.
														
@@ -652,11 +703,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 
															 	 * VCPU0, and only if its LVT0 is in EXTINT mode.
														
 
															 	 */
														
 
															 	if (kvm->arch.vapics_in_nmi_mode > 0)
														
 
															-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
														
 
															-			vcpu = kvm->vcpus[i];
														
 
															-			if (vcpu)
														
 
															-				kvm_apic_nmi_wd_deliver(vcpu);
														
 
															-		}
														
 
															+		kvm_for_each_vcpu(i, vcpu, kvm)
														
 
															+			kvm_apic_nmi_wd_deliver(vcpu);
														
 
															 }
														
 
															 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
														
@@ -665,7 +713,7 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
 
															 	struct kvm *kvm = vcpu->kvm;
														
 
															 	struct kvm_kpit_state *ps;
														
 
															-	if (vcpu && pit) {
														
 
															+	if (pit) {
														
 
															 		int inject = 0;
														
 
															 		ps = &pit->pit_state;
														
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -21,6 +21,7 @@ struct kvm_kpit_channel_state {
 
															 struct kvm_kpit_state {
														
 
															 	struct kvm_kpit_channel_state channels[3];
														
 
															+	u32 flags;
														
 
															 	struct kvm_timer pit_timer;
														
 
															 	bool is_periodic;
														
 
															 	u32    speaker_data_on;
														
@@ -49,8 +50,8 @@ struct kvm_pit {
 
															 #define KVM_PIT_CHANNEL_MASK	    0x3
														
 
															 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
														
 
															-void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
														
 
															-struct kvm_pit *kvm_create_pit(struct kvm *kvm);
														
 
															+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
														
 
															+struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
														
 
															 void kvm_free_pit(struct kvm *kvm);
														
 
															 void kvm_pit_reset(struct kvm_pit *pit);
														
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -30,50 +30,24 @@
 
															 #include "irq.h"
														
 
															 #include <linux/kvm_host.h>
														
 
															-
														
 
															-static void pic_lock(struct kvm_pic *s)
														
 
															-	__acquires(&s->lock)
														
 
															-{
														
 
															-	spin_lock(&s->lock);
														
 
															-}
														
 
															-
														
 
															-static void pic_unlock(struct kvm_pic *s)
														
 
															-	__releases(&s->lock)
														
 
															-{
														
 
															-	struct kvm *kvm = s->kvm;
														
 
															-	unsigned acks = s->pending_acks;
														
 
															-	bool wakeup = s->wakeup_needed;
														
 
															-	struct kvm_vcpu *vcpu;
														
 
															-
														
 
															-	s->pending_acks = 0;
														
 
															-	s->wakeup_needed = false;
														
 
															-
														
 
															-	spin_unlock(&s->lock);
														
 
															-
														
 
															-	while (acks) {
														
 
															-		kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
														
 
															-				     __ffs(acks));
														
 
															-		acks &= acks - 1;
														
 
															-	}
														
 
															-
														
 
															-	if (wakeup) {
														
 
															-		vcpu = s->kvm->vcpus[0];
														
 
															-		if (vcpu)
														
 
															-			kvm_vcpu_kick(vcpu);
														
 
															-	}
														
 
															-}
														
 
															+#include "trace.h"
														
 
															 static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
														
 
															 {
														
 
															 	s->isr &= ~(1 << irq);
														
 
															 	s->isr_ack |= (1 << irq);
														
 
															+	if (s != &s->pics_state->pics[0])
														
 
															+		irq += 8;
														
 
															+	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
														
 
															 }
														
 
															 void kvm_pic_clear_isr_ack(struct kvm *kvm)
														
 
															 {
														
 
															 	struct kvm_pic *s = pic_irqchip(kvm);
														
 
															+	spin_lock(&s->lock);
														
 
															 	s->pics[0].isr_ack = 0xff;
														
 
															 	s->pics[1].isr_ack = 0xff;
														
 
															+	spin_unlock(&s->lock);
														
 
															 }
														
 
															 /*
														
@@ -174,9 +148,9 @@ static void pic_update_irq(struct kvm_pic *s)
 
															 void kvm_pic_update_irq(struct kvm_pic *s)
														
 
															 {
														
 
															-	pic_lock(s);
														
 
															+	spin_lock(&s->lock);
														
 
															 	pic_update_irq(s);
														
 
															-	pic_unlock(s);
														
 
															+	spin_unlock(&s->lock);
														
 
															 }
														
 
															 int kvm_pic_set_irq(void *opaque, int irq, int level)
														
@@ -184,12 +158,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
 
															 	struct kvm_pic *s = opaque;
														
 
															 	int ret = -1;
														
 
															-	pic_lock(s);
														
 
															+	spin_lock(&s->lock);
														
 
															 	if (irq >= 0 && irq < PIC_NUM_PINS) {
														
 
															 		ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
														
 
															 		pic_update_irq(s);
														
 
															+		trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
														
 
															+				      s->pics[irq >> 3].imr, ret == 0);
														
 
															 	}
														
 
															-	pic_unlock(s);
														
 
															+	spin_unlock(&s->lock);
														
 
															 	return ret;
														
 
															 }
														
@@ -217,7 +193,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
															 	int irq, irq2, intno;
														
 
															 	struct kvm_pic *s = pic_irqchip(kvm);
														
 
															-	pic_lock(s);
														
 
															+	spin_lock(&s->lock);
														
 
															 	irq = pic_get_irq(&s->pics[0]);
														
 
															 	if (irq >= 0) {
														
 
															 		pic_intack(&s->pics[0], irq);
														
@@ -242,8 +218,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
															 		intno = s->pics[0].irq_base + irq;
														
 
															 	}
														
 
															 	pic_update_irq(s);
														
 
															-	pic_unlock(s);
														
 
															-	kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
														
 
															+	spin_unlock(&s->lock);
														
 
															 	return intno;
														
 
															 }
														
@@ -252,7 +227,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 
															 {
														
 
															 	int irq, irqbase, n;
														
 
															 	struct kvm *kvm = s->pics_state->irq_request_opaque;
														
 
															-	struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
														
 
															+	struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu;
														
 
															 	if (s == &s->pics_state->pics[0])
														
 
															 		irqbase = 0;
														
@@ -263,7 +238,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 
															 		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
														
 
															 			if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
														
 
															 				n = irq + irqbase;
														
 
															-				s->pics_state->pending_acks |= 1 << n;
														
 
															+				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
														
 
															 			}
														
 
															 	}
														
 
															 	s->last_irr = 0;
														
@@ -428,8 +403,7 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
 
															 	return s->elcr;
														
 
															 }
														
 
															-static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
														
 
															-			   int len, int is_write)
														
 
															+static int picdev_in_range(gpa_t addr)
														
 
															 {
														
 
															 	switch (addr) {
														
 
															 	case 0x20:
														
@@ -444,18 +418,25 @@ static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
 
															 	}
														
 
															 }
														
 
															-static void picdev_write(struct kvm_io_device *this,
														
 
															+static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
														
 
															+{
														
 
															+	return container_of(dev, struct kvm_pic, dev);
														
 
															+}
														
 
															+
														
 
															+static int picdev_write(struct kvm_io_device *this,
														
 
															 			 gpa_t addr, int len, const void *val)
														
 
															 {
														
 
															-	struct kvm_pic *s = this->private;
														
 
															+	struct kvm_pic *s = to_pic(this);
														
 
															 	unsigned char data = *(unsigned char *)val;
														
 
															+	if (!picdev_in_range(addr))
														
 
															+		return -EOPNOTSUPP;
														
 
															 	if (len != 1) {
														
 
															 		if (printk_ratelimit())
														
 
															 			printk(KERN_ERR "PIC: non byte write\n");
														
 
															-		return;
														
 
															+		return 0;
														
 
															 	}
														
 
															-	pic_lock(s);
														
 
															+	spin_lock(&s->lock);
														
 
															 	switch (addr) {
														
 
															 	case 0x20:
														
 
															 	case 0x21:
														
@@ -468,21 +449,24 @@ static void picdev_write(struct kvm_io_device *this,
 
															 		elcr_ioport_write(&s->pics[addr & 1], addr, data);
														
 
															 		break;
														
 
															 	}
														
 
															-	pic_unlock(s);
														
 
															+	spin_unlock(&s->lock);
														
 
															+	return 0;
														
 
															 }
														
 
															-static void picdev_read(struct kvm_io_device *this,
														
 
															-			gpa_t addr, int len, void *val)
														
 
															+static int picdev_read(struct kvm_io_device *this,
														
 
															+		       gpa_t addr, int len, void *val)
														
 
															 {
														
 
															-	struct kvm_pic *s = this->private;
														
 
															+	struct kvm_pic *s = to_pic(this);
														
 
															 	unsigned char data = 0;
														
 
															+	if (!picdev_in_range(addr))
														
 
															+		return -EOPNOTSUPP;
														
 
															 	if (len != 1) {
														
 
															 		if (printk_ratelimit())
														
 
															 			printk(KERN_ERR "PIC: non byte read\n");
														
 
															-		return;
														
 
															+		return 0;
														
 
															 	}
														
 
															-	pic_lock(s);
														
 
															+	spin_lock(&s->lock);
														
 
															 	switch (addr) {
														
 
															 	case 0x20:
														
 
															 	case 0x21:
														
@@ -496,7 +480,8 @@ static void picdev_read(struct kvm_io_device *this,
 
															 		break;
														
 
															 	}
														
 
															 	*(unsigned char *)val = data;
														
 
															-	pic_unlock(s);
														
 
															+	spin_unlock(&s->lock);
														
 
															+	return 0;
														
 
															 }
														
 
															 /*
														
@@ -505,20 +490,27 @@ static void picdev_read(struct kvm_io_device *this,
 
															 static void pic_irq_request(void *opaque, int level)
														
 
															 {
														
 
															 	struct kvm *kvm = opaque;
														
 
															-	struct kvm_vcpu *vcpu = kvm->vcpus[0];
														
 
															+	struct kvm_vcpu *vcpu = kvm->bsp_vcpu;
														
 
															 	struct kvm_pic *s = pic_irqchip(kvm);
														
 
															 	int irq = pic_get_irq(&s->pics[0]);
														
 
															 	s->output = level;
														
 
															 	if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
														
 
															 		s->pics[0].isr_ack &= ~(1 << irq);
														
 
															-		s->wakeup_needed = true;
														
 
															+		kvm_vcpu_kick(vcpu);
														
 
															 	}
														
 
															 }
														
 
															+static const struct kvm_io_device_ops picdev_ops = {
														
 
															+	.read     = picdev_read,
														
 
															+	.write    = picdev_write,
														
 
															+};
														
 
															+
														
 
															 struct kvm_pic *kvm_create_pic(struct kvm *kvm)
														
 
															 {
														
 
															 	struct kvm_pic *s;
														
 
															+	int ret;
														
 
															+
														
 
															 	s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
														
 
															 	if (!s)
														
 
															 		return NULL;
														
@@ -534,10 +526,12 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 
															 	/*
														
 
															 	 * Initialize PIO device
														
 
															 	 */
														
 
															-	s->dev.read = picdev_read;
														
 
															-	s->dev.write = picdev_write;
														
 
															-	s->dev.in_range = picdev_in_range;
														
 
															-	s->dev.private = s;
														
 
															-	kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev);
														
 
															+	kvm_iodevice_init(&s->dev, &picdev_ops);
														
 
															+	ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
														
 
															+	if (ret < 0) {
														
 
															+		kfree(s);
														
 
															+		return NULL;
														
 
															+	}
														
 
															+
														
 
															 	return s;
														
 
															 }
														
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -63,7 +63,6 @@ struct kvm_kpic_state {
 
															 struct kvm_pic {
														
 
															 	spinlock_t lock;
														
 
															-	bool wakeup_needed;
														
 
															 	unsigned pending_acks;
														
 
															 	struct kvm *kvm;
														
 
															 	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
														
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -29,4 +29,13 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)
 
															 	kvm_register_write(vcpu, VCPU_REGS_RIP, val);
														
 
															 }
														
 
															+static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
														
 
															+{
														
 
															+	if (!test_bit(VCPU_EXREG_PDPTR,
														
 
															+		      (unsigned long *)&vcpu->arch.regs_avail))
														
 
															+		kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
														
 
															+
														
 
															+	return vcpu->arch.pdptrs[index];
														
 
															+}
														
 
															+
														
 
															 #endif
														
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -1,51 +0,0 @@
 
															-#ifndef __KVM_SVM_H
														
 
															-#define __KVM_SVM_H
														
 
															-
														
 
															-#include <linux/kernel.h>
														
 
															-#include <linux/types.h>
														
 
															-#include <linux/list.h>
														
 
															-#include <linux/kvm_host.h>
														
 
															-#include <asm/msr.h>
														
 
															-
														
 
															-#include <asm/svm.h>
														
 
															-
														
 
															-static const u32 host_save_user_msrs[] = {
														
 
															-#ifdef CONFIG_X86_64
														
 
															-	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
														
 
															-	MSR_FS_BASE,
														
 
															-#endif
														
 
															-	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
														
 
															-};
														
 
															-
														
 
															-#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
														
 
															-
														
 
															-struct kvm_vcpu;
														
 
															-
														
 
															-struct vcpu_svm {
														
 
															-	struct kvm_vcpu vcpu;
														
 
															-	struct vmcb *vmcb;
														
 
															-	unsigned long vmcb_pa;
														
 
															-	struct svm_cpu_data *svm_data;
														
 
															-	uint64_t asid_generation;
														
 
															-
														
 
															-	u64 next_rip;
														
 
															-
														
 
															-	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
														
 
															-	u64 host_gs_base;
														
 
															-	unsigned long host_cr2;
														
 
															-
														
 
															-	u32 *msrpm;
														
 
															-	struct vmcb *hsave;
														
 
															-	u64 hsave_msr;
														
 
															-
														
 
															-	u64 nested_vmcb;
														
 
															-
														
 
															-	/* These are the merged vectors */
														
 
															-	u32 *nested_msrpm;
														
 
															-
														
 
															-	/* gpa pointers to the real vectors */
														
 
															-	u64 nested_vmcb_msrpm;
														
 
															-};
														
 
															-
														
 
															-#endif
														
 
															-
														
--- a/arch/x86/kvm/kvm_timer.h
+++ b/arch/x86/kvm/kvm_timer.h
@@ -6,7 +6,7 @@ struct kvm_timer {
 
															 	bool reinject;
														
 
															 	struct kvm_timer_ops *t_ops;
														
 
															 	struct kvm *kvm;
														
 
															-	int vcpu_id;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															 };
														
 
															 struct kvm_timer_ops {
														
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -32,8 +32,11 @@
 
															 #include <asm/current.h>
														
 
															 #include <asm/apicdef.h>
														
 
															 #include <asm/atomic.h>
														
 
															+#include <asm/apicdef.h>
														
 
															 #include "kvm_cache_regs.h"
														
 
															 #include "irq.h"
														
 
															+#include "trace.h"
														
 
															+#include "x86.h"
														
 
															 #ifndef CONFIG_X86_64
														
 
															 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
														
@@ -141,6 +144,26 @@ static inline int apic_lvt_nmi_mode(u32 lvt_val)
 
															 	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
														
 
															 }
														
 
															+void kvm_apic_set_version(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															+	struct kvm_cpuid_entry2 *feat;
														
 
															+	u32 v = APIC_VERSION;
														
 
															+
														
 
															+	if (!irqchip_in_kernel(vcpu->kvm))
														
 
															+		return;
														
 
															+
														
 
															+	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
														
 
															+	if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
														
 
															+		v |= APIC_LVR_DIRECTED_EOI;
														
 
															+	apic_set_reg(apic, APIC_LVR, v);
														
 
															+}
														
 
															+
														
 
															+static inline int apic_x2apic_mode(struct kvm_lapic *apic)
														
 
															+{
														
 
															+	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
														
 
															+}
														
 
															+
														
 
															 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
														
 
															 	LVT_MASK | APIC_LVT_TIMER_PERIODIC,	/* LVTT */
														
 
															 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
														
@@ -165,36 +188,52 @@ static int find_highest_vector(void *bitmap)
 
															 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
														
 
															 {
														
 
															+	apic->irr_pending = true;
														
 
															 	return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
														
 
															 }
														
 
															-static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
														
 
															+static inline int apic_search_irr(struct kvm_lapic *apic)
														
 
															 {
														
 
															-	apic_clear_vector(vec, apic->regs + APIC_IRR);
														
 
															+	return find_highest_vector(apic->regs + APIC_IRR);
														
 
															 }
														
 
															 static inline int apic_find_highest_irr(struct kvm_lapic *apic)
														
 
															 {
														
 
															 	int result;
														
 
															-	result = find_highest_vector(apic->regs + APIC_IRR);
														
 
															+	if (!apic->irr_pending)
														
 
															+		return -1;
														
 
															+
														
 
															+	result = apic_search_irr(apic);
														
 
															 	ASSERT(result == -1 || result >= 16);
														
 
															 	return result;
														
 
															 }
														
 
															+static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
														
 
															+{
														
 
															+	apic->irr_pending = false;
														
 
															+	apic_clear_vector(vec, apic->regs + APIC_IRR);
														
 
															+	if (apic_search_irr(apic) != -1)
														
 
															+		apic->irr_pending = true;
														
 
															+}
														
 
															+
														
 
															 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															 	int highest_irr;
														
 
															+	/* This may race with setting of irr in __apic_accept_irq() and
														
 
															+	 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
														
 
															+	 * will cause vmexit immediately and the value will be recalculated
														
 
															+	 * on the next vmentry.
														
 
															+	 */
														
 
															 	if (!apic)
														
 
															 		return 0;
														
 
															 	highest_irr = apic_find_highest_irr(apic);
														
 
															 	return highest_irr;
														
 
															 }
														
 
															-EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
														
 
															 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
														
 
															 			     int vector, int level, int trig_mode);
														
@@ -251,7 +290,12 @@ int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
 
															 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
														
 
															 {
														
 
															 	int result = 0;
														
 
															-	u8 logical_id;
														
 
															+	u32 logical_id;
														
 
															+
														
 
															+	if (apic_x2apic_mode(apic)) {
														
 
															+		logical_id = apic_get_reg(apic, APIC_LDR);
														
 
															+		return logical_id & mda;
														
 
															+	}
														
 
															 	logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
														
@@ -331,6 +375,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
															 			break;
														
 
															 		result = !apic_test_and_set_irr(vector, apic);
														
 
															+		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
														
 
															+					  trig_mode, vector, !result);
														
 
															 		if (!result) {
														
 
															 			if (trig_mode)
														
 
															 				apic_debug("level trig mode repeatedly for "
														
@@ -425,7 +471,11 @@ static void apic_set_eoi(struct kvm_lapic *apic)
 
															 		trigger_mode = IOAPIC_LEVEL_TRIG;
														
 
															 	else
														
 
															 		trigger_mode = IOAPIC_EDGE_TRIG;
														
 
															-	kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
														
 
															+	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
														
 
															+		mutex_lock(&apic->vcpu->kvm->irq_lock);
														
 
															+		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
														
 
															+		mutex_unlock(&apic->vcpu->kvm->irq_lock);
														
 
															+	}
														
 
															 }
														
 
															 static void apic_send_ipi(struct kvm_lapic *apic)
														
@@ -440,7 +490,12 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
															 	irq.level = icr_low & APIC_INT_ASSERT;
														
 
															 	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
														
 
															 	irq.shorthand = icr_low & APIC_SHORT_MASK;
														
 
															-	irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
														
 
															+	if (apic_x2apic_mode(apic))
														
 
															+		irq.dest_id = icr_high;
														
 
															+	else
														
 
															+		irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
														
 
															+
														
 
															+	trace_kvm_apic_ipi(icr_low, irq.dest_id);
														
 
															 	apic_debug("icr_high 0x%x, icr_low 0x%x, "
														
 
															 		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
														
@@ -449,7 +504,9 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
															 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
														
 
															 		   irq.vector);
														
 
															+	mutex_lock(&apic->vcpu->kvm->irq_lock);
														
 
															 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
														
 
															+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
														
 
															 }
														
 
															 static u32 apic_get_tmcct(struct kvm_lapic *apic)
														
@@ -495,12 +552,16 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 
															 {
														
 
															 	u32 val = 0;
														
 
															-	KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
														
 
															-
														
 
															 	if (offset >= LAPIC_MMIO_LENGTH)
														
 
															 		return 0;
														
 
															 	switch (offset) {
														
 
															+	case APIC_ID:
														
 
															+		if (apic_x2apic_mode(apic))
														
 
															+			val = kvm_apic_id(apic);
														
 
															+		else
														
 
															+			val = kvm_apic_id(apic) << 24;
														
 
															+		break;
														
 
															 	case APIC_ARBPRI:
														
 
															 		printk(KERN_WARNING "Access APIC ARBPRI register "
														
 
															 		       "which is for P6\n");
														
@@ -522,21 +583,35 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 
															 	return val;
														
 
															 }
														
 
															-static void apic_mmio_read(struct kvm_io_device *this,
														
 
															-			   gpa_t address, int len, void *data)
														
 
															+static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
														
 
															+{
														
 
															+	return container_of(dev, struct kvm_lapic, dev);
														
 
															+}
														
 
															+
														
 
															+static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
														
 
															+		void *data)
														
 
															 {
														
 
															-	struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
														
 
															-	unsigned int offset = address - apic->base_address;
														
 
															 	unsigned char alignment = offset & 0xf;
														
 
															 	u32 result;
														
 
															+	/* this bitmask has a bit cleared for each reserver register */
														
 
															+	static const u64 rmask = 0x43ff01ffffffe70cULL;
														
 
															 	if ((alignment + len) > 4) {
														
 
															-		printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d",
														
 
															-		       (unsigned long)address, len);
														
 
															-		return;
														
 
															+		apic_debug("KVM_APIC_READ: alignment error %x %d\n",
														
 
															+			   offset, len);
														
 
															+		return 1;
														
 
															 	}
														
 
															+
														
 
															+	if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
														
 
															+		apic_debug("KVM_APIC_READ: read reserved register %x\n",
														
 
															+			   offset);
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															 	result = __apic_read(apic, offset & ~0xf);
														
 
															+	trace_kvm_apic_read(offset, result);
														
 
															+
														
 
															 	switch (len) {
														
 
															 	case 1:
														
 
															 	case 2:
														
@@ -548,6 +623,28 @@ static void apic_mmio_read(struct kvm_io_device *this,
 
															 		       "should be 1,2, or 4 instead\n", len);
														
 
															 		break;
														
 
															 	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
														
 
															+{
														
 
															+	return apic_hw_enabled(apic) &&
														
 
															+	    addr >= apic->base_address &&
														
 
															+	    addr < apic->base_address + LAPIC_MMIO_LENGTH;
														
 
															+}
														
 
															+
														
 
															+static int apic_mmio_read(struct kvm_io_device *this,
														
 
															+			   gpa_t address, int len, void *data)
														
 
															+{
														
 
															+	struct kvm_lapic *apic = to_lapic(this);
														
 
															+	u32 offset = address - apic->base_address;
														
 
															+
														
 
															+	if (!apic_mmio_in_range(apic, address))
														
 
															+		return -EOPNOTSUPP;
														
 
															+
														
 
															+	apic_reg_read(apic, offset, len, data);
														
 
															+
														
 
															+	return 0;
														
 
															 }
														
 
															 static void update_divide_count(struct kvm_lapic *apic)
														
@@ -573,6 +670,15 @@ static void start_apic_timer(struct kvm_lapic *apic)
 
															 	if (!apic->lapic_timer.period)
														
 
															 		return;
														
 
															+	/*
														
 
															+	 * Do not allow the guest to program periodic timers with small
														
 
															+	 * interval, since the hrtimers are not throttled by the host
														
 
															+	 * scheduler.
														
 
															+	 */
														
 
															+	if (apic_lvtt_period(apic)) {
														
 
															+		if (apic->lapic_timer.period < NSEC_PER_MSEC/2)
														
 
															+			apic->lapic_timer.period = NSEC_PER_MSEC/2;
														
 
															+	}
														
 
															 	hrtimer_start(&apic->lapic_timer.timer,
														
 
															 		      ktime_add_ns(now, apic->lapic_timer.period),
														
@@ -603,40 +709,18 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
 
															 		apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
														
 
															 }
														
 
															-static void apic_mmio_write(struct kvm_io_device *this,
														
 
															-			    gpa_t address, int len, const void *data)
														
 
															+static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
														
 
															 {
														
 
															-	struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
														
 
															-	unsigned int offset = address - apic->base_address;
														
 
															-	unsigned char alignment = offset & 0xf;
														
 
															-	u32 val;
														
 
															-
														
 
															-	/*
														
 
															-	 * APIC register must be aligned on 128-bits boundary.
														
 
															-	 * 32/64/128 bits registers must be accessed thru 32 bits.
														
 
															-	 * Refer SDM 8.4.1
														
 
															-	 */
														
 
															-	if (len != 4 || alignment) {
														
 
															-		/* Don't shout loud, $infamous_os would cause only noise. */
														
 
															-		apic_debug("apic write: bad size=%d %lx\n",
														
 
															-			   len, (long)address);
														
 
															-		return;
														
 
															-	}
														
 
															-
														
 
															-	val = *(u32 *) data;
														
 
															-
														
 
															-	/* too common printing */
														
 
															-	if (offset != APIC_EOI)
														
 
															-		apic_debug("%s: offset 0x%x with length 0x%x, and value is "
														
 
															-			   "0x%x\n", __func__, offset, len, val);
														
 
															-
														
 
															-	offset &= 0xff0;
														
 
															+	int ret = 0;
														
 
															-	KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
														
 
															+	trace_kvm_apic_write(reg, val);
														
 
															-	switch (offset) {
														
 
															+	switch (reg) {
														
 
															 	case APIC_ID:		/* Local APIC ID */
														
 
															-		apic_set_reg(apic, APIC_ID, val);
														
 
															+		if (!apic_x2apic_mode(apic))
														
 
															+			apic_set_reg(apic, APIC_ID, val);
														
 
															+		else
														
 
															+			ret = 1;
														
 
															 		break;
														
 
															 	case APIC_TASKPRI:
														
@@ -649,15 +733,24 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
															 		break;
														
 
															 	case APIC_LDR:
														
 
															-		apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
														
 
															+		if (!apic_x2apic_mode(apic))
														
 
															+			apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
														
 
															+		else
														
 
															+			ret = 1;
														
 
															 		break;
														
 
															 	case APIC_DFR:
														
 
															-		apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
														
 
															+		if (!apic_x2apic_mode(apic))
														
 
															+			apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
														
 
															+		else
														
 
															+			ret = 1;
														
 
															 		break;
														
 
															-	case APIC_SPIV:
														
 
															-		apic_set_reg(apic, APIC_SPIV, val & 0x3ff);
														
 
															+	case APIC_SPIV: {
														
 
															+		u32 mask = 0x3ff;
														
 
															+		if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
														
 
															+			mask |= APIC_SPIV_DIRECTED_EOI;
														
 
															+		apic_set_reg(apic, APIC_SPIV, val & mask);
														
 
															 		if (!(val & APIC_SPIV_APIC_ENABLED)) {
														
 
															 			int i;
														
 
															 			u32 lvt_val;
														
@@ -672,7 +765,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
															 		}
														
 
															 		break;
														
 
															-
														
 
															+	}
														
 
															 	case APIC_ICR:
														
 
															 		/* No delay here, so we always clear the pending bit */
														
 
															 		apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
														
@@ -680,7 +773,9 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
															 		break;
														
 
															 	case APIC_ICR2:
														
 
															-		apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
														
 
															+		if (!apic_x2apic_mode(apic))
														
 
															+			val &= 0xff000000;
														
 
															+		apic_set_reg(apic, APIC_ICR2, val);
														
 
															 		break;
														
 
															 	case APIC_LVT0:
														
@@ -694,8 +789,8 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
															 		if (!apic_sw_enabled(apic))
														
 
															 			val |= APIC_LVT_MASKED;
														
 
															-		val &= apic_lvt_mask[(offset - APIC_LVTT) >> 4];
														
 
															-		apic_set_reg(apic, offset, val);
														
 
															+		val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
														
 
															+		apic_set_reg(apic, reg, val);
														
 
															 		break;
														
@@ -703,7 +798,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
															 		hrtimer_cancel(&apic->lapic_timer.timer);
														
 
															 		apic_set_reg(apic, APIC_TMICT, val);
														
 
															 		start_apic_timer(apic);
														
 
															-		return;
														
 
															+		break;
														
 
															 	case APIC_TDCR:
														
 
															 		if (val & 4)
														
@@ -712,27 +807,59 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
															 		update_divide_count(apic);
														
 
															 		break;
														
 
															+	case APIC_ESR:
														
 
															+		if (apic_x2apic_mode(apic) && val != 0) {
														
 
															+			printk(KERN_ERR "KVM_WRITE:ESR not zero %x\n", val);
														
 
															+			ret = 1;
														
 
															+		}
														
 
															+		break;
														
 
															+
														
 
															+	case APIC_SELF_IPI:
														
 
															+		if (apic_x2apic_mode(apic)) {
														
 
															+			apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
														
 
															+		} else
														
 
															+			ret = 1;
														
 
															+		break;
														
 
															 	default:
														
 
															-		apic_debug("Local APIC Write to read-only register %x\n",
														
 
															-			   offset);
														
 
															+		ret = 1;
														
 
															 		break;
														
 
															 	}
														
 
															-
														
 
															+	if (ret)
														
 
															+		apic_debug("Local APIC Write to read-only register %x\n", reg);
														
 
															+	return ret;
														
 
															 }
														
 
															-static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr,
														
 
															-			   int len, int size)
														
 
															+static int apic_mmio_write(struct kvm_io_device *this,
														
 
															+			    gpa_t address, int len, const void *data)
														
 
															 {
														
 
															-	struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
														
 
															-	int ret = 0;
														
 
															+	struct kvm_lapic *apic = to_lapic(this);
														
 
															+	unsigned int offset = address - apic->base_address;
														
 
															+	u32 val;
														
 
															+	if (!apic_mmio_in_range(apic, address))
														
 
															+		return -EOPNOTSUPP;
														
 
															-	if (apic_hw_enabled(apic) &&
														
 
															-	    (addr >= apic->base_address) &&
														
 
															-	    (addr < (apic->base_address + LAPIC_MMIO_LENGTH)))
														
 
															-		ret = 1;
														
 
															+	/*
														
 
															+	 * APIC register must be aligned on 128-bits boundary.
														
 
															+	 * 32/64/128 bits registers must be accessed thru 32 bits.
														
 
															+	 * Refer SDM 8.4.1
														
 
															+	 */
														
 
															+	if (len != 4 || (offset & 0xf)) {
														
 
															+		/* Don't shout loud, $infamous_os would cause only noise. */
														
 
															+		apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
														
 
															+		return 0;
														
 
															+	}
														
 
															-	return ret;
														
 
															+	val = *(u32*)data;
														
 
															+
														
 
															+	/* too common printing */
														
 
															+	if (offset != APIC_EOI)
														
 
															+		apic_debug("%s: offset 0x%x with length 0x%x, and value is "
														
 
															+			   "0x%x\n", __func__, offset, len, val);
														
 
															+
														
 
															+	apic_reg_write(apic, offset & 0xff0, val);
														
 
															+
														
 
															+	return 0;
														
 
															 }
														
 
															 void kvm_free_lapic(struct kvm_vcpu *vcpu)
														
@@ -763,7 +890,6 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
 
															 	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
														
 
															 		     | (apic_get_reg(apic, APIC_TASKPRI) & 4));
														
 
															 }
														
 
															-EXPORT_SYMBOL_GPL(kvm_lapic_set_tpr);
														
 
															 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
														
 
															 {
														
@@ -776,7 +902,6 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 
															 	return (tpr & 0xf0) >> 4;
														
 
															 }
														
 
															-EXPORT_SYMBOL_GPL(kvm_lapic_get_cr8);
														
 
															 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
														
 
															 {
														
@@ -787,10 +912,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 
															 		vcpu->arch.apic_base = value;
														
 
															 		return;
														
 
															 	}
														
 
															-	if (apic->vcpu->vcpu_id)
														
 
															+
														
 
															+	if (!kvm_vcpu_is_bsp(apic->vcpu))
														
 
															 		value &= ~MSR_IA32_APICBASE_BSP;
														
 
															 	vcpu->arch.apic_base = value;
														
 
															+	if (apic_x2apic_mode(apic)) {
														
 
															+		u32 id = kvm_apic_id(apic);
														
 
															+		u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
														
 
															+		apic_set_reg(apic, APIC_LDR, ldr);
														
 
															+	}
														
 
															 	apic->base_address = apic->vcpu->arch.apic_base &
														
 
															 			     MSR_IA32_APICBASE_BASE;
														
@@ -800,12 +931,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 
															 }
														
 
															-u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
														
 
															-{
														
 
															-	return vcpu->arch.apic_base;
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
														
 
															-
														
 
															 void kvm_lapic_reset(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct kvm_lapic *apic;
														
@@ -821,7 +946,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
															 	hrtimer_cancel(&apic->lapic_timer.timer);
														
 
															 	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
														
 
															-	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
														
 
															+	kvm_apic_set_version(apic->vcpu);
														
 
															 	for (i = 0; i < APIC_LVT_NUM; i++)
														
 
															 		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
														
@@ -842,9 +967,10 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
															 		apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
														
 
															 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
														
 
															 	}
														
 
															+	apic->irr_pending = false;
														
 
															 	update_divide_count(apic);
														
 
															 	atomic_set(&apic->lapic_timer.pending, 0);
														
 
															-	if (vcpu->vcpu_id == 0)
														
 
															+	if (kvm_vcpu_is_bsp(vcpu))
														
 
															 		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
														
 
															 	apic_update_ppr(apic);
														
@@ -855,7 +981,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
															 		   vcpu, kvm_apic_id(apic),
														
 
															 		   vcpu->arch.apic_base, apic->base_address);
														
 
															 }
														
 
															-EXPORT_SYMBOL_GPL(kvm_lapic_reset);
														
 
															 bool kvm_apic_present(struct kvm_vcpu *vcpu)
														
 
															 {
														
@@ -866,7 +991,6 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
														
 
															 }
														
 
															-EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
														
 
															 /*
														
 
															  *----------------------------------------------------------------------
														
@@ -917,6 +1041,11 @@ static struct kvm_timer_ops lapic_timer_ops = {
 
															 	.is_periodic = lapic_is_periodic,
														
 
															 };
														
 
															+static const struct kvm_io_device_ops apic_mmio_ops = {
														
 
															+	.read     = apic_mmio_read,
														
 
															+	.write    = apic_mmio_write,
														
 
															+};
														
 
															+
														
 
															 int kvm_create_lapic(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct kvm_lapic *apic;
														
@@ -945,16 +1074,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 
															 	apic->lapic_timer.timer.function = kvm_timer_fn;
														
 
															 	apic->lapic_timer.t_ops = &lapic_timer_ops;
														
 
															 	apic->lapic_timer.kvm = vcpu->kvm;
														
 
															-	apic->lapic_timer.vcpu_id = vcpu->vcpu_id;
														
 
															+	apic->lapic_timer.vcpu = vcpu;
														
 
															 	apic->base_address = APIC_DEFAULT_PHYS_BASE;
														
 
															 	vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
														
 
															 	kvm_lapic_reset(vcpu);
														
 
															-	apic->dev.read = apic_mmio_read;
														
 
															-	apic->dev.write = apic_mmio_write;
														
 
															-	apic->dev.in_range = apic_mmio_range;
														
 
															-	apic->dev.private = apic;
														
 
															+	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
														
 
															 	return 0;
														
 
															 nomem_free_apic:
														
@@ -962,7 +1088,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 
															 nomem:
														
 
															 	return -ENOMEM;
														
 
															 }
														
 
															-EXPORT_SYMBOL_GPL(kvm_create_lapic);
														
 
															 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
														
 
															 {
														
@@ -985,7 +1110,7 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
 
															 	u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
														
 
															 	int r = 0;
														
 
															-	if (vcpu->vcpu_id == 0) {
														
 
															+	if (kvm_vcpu_is_bsp(vcpu)) {
														
 
															 		if (!apic_hw_enabled(vcpu->arch.apic))
														
 
															 			r = 1;
														
 
															 		if ((lvt0 & APIC_LVT_MASKED) == 0 &&
														
@@ -1025,7 +1150,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 
															 	apic->base_address = vcpu->arch.apic_base &
														
 
															 			     MSR_IA32_APICBASE_BASE;
														
 
															-	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
														
 
															+	kvm_apic_set_version(vcpu);
														
 
															+
														
 
															 	apic_update_ppr(apic);
														
 
															 	hrtimer_cancel(&apic->lapic_timer.timer);
														
 
															 	update_divide_count(apic);
														
@@ -1092,3 +1218,35 @@ void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
 
															 	vcpu->arch.apic->vapic_addr = vapic_addr;
														
 
															 }
														
 
															+
														
 
															+int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
														
 
															+{
														
 
															+	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															+	u32 reg = (msr - APIC_BASE_MSR) << 4;
														
 
															+
														
 
															+	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
														
 
															+		return 1;
														
 
															+
														
 
															+	/* if this is ICR write vector before command */
														
 
															+	if (msr == 0x830)
														
 
															+		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
														
 
															+	return apic_reg_write(apic, reg, (u32)data);
														
 
															+}
														
 
															+
														
 
															+int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
														
 
															+{
														
 
															+	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															+	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
														
 
															+
														
 
															+	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
														
 
															+		return 1;
														
 
															+
														
 
															+	if (apic_reg_read(apic, reg, 4, &low))
														
 
															+		return 1;
														
 
															+	if (msr == 0x830)
														
 
															+		apic_reg_read(apic, APIC_ICR2, 4, &high);
														
 
															+
														
 
															+	*data = (((u64)high) << 32) | low;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -12,6 +12,7 @@ struct kvm_lapic {
 
															 	struct kvm_timer lapic_timer;
														
 
															 	u32 divide_count;
														
 
															 	struct kvm_vcpu *vcpu;
														
 
															+	bool irr_pending;
														
 
															 	struct page *regs_page;
														
 
															 	void *regs;
														
 
															 	gpa_t vapic_addr;
														
@@ -28,6 +29,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
 
															 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
														
 
															 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
														
 
															 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
														
 
															+void kvm_apic_set_version(struct kvm_vcpu *vcpu);
														
 
															 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
														
 
															 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
														
@@ -44,4 +46,6 @@ void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
 
															 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
														
 
															 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
														
 
															+int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
														
 
															+int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
														
 
															 #endif
														
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -18,6 +18,7 @@
 
															  */
														
 
															 #include "mmu.h"
														
 
															+#include "kvm_cache_regs.h"
														
 
															 #include <linux/kvm_host.h>
														
 
															 #include <linux/types.h>
														
@@ -107,6 +108,9 @@ module_param(oos_shadow, bool, 0644);
 
															 #define PT32_LEVEL_MASK(level) \
														
 
															 		(((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
														
 
															+#define PT32_LVL_OFFSET_MASK(level) \
														
 
															+	(PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
														
 
															+						* PT32_LEVEL_BITS))) - 1))
														
 
															 #define PT32_INDEX(address, level)\
														
 
															 	(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
														
@@ -115,10 +119,19 @@ module_param(oos_shadow, bool, 0644);
 
															 #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
														
 
															 #define PT64_DIR_BASE_ADDR_MASK \
														
 
															 	(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
														
 
															+#define PT64_LVL_ADDR_MASK(level) \
														
 
															+	(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
														
 
															+						* PT64_LEVEL_BITS))) - 1))
														
 
															+#define PT64_LVL_OFFSET_MASK(level) \
														
 
															+	(PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
														
 
															+						* PT64_LEVEL_BITS))) - 1))
														
 
															 #define PT32_BASE_ADDR_MASK PAGE_MASK
														
 
															 #define PT32_DIR_BASE_ADDR_MASK \
														
 
															 	(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
														
 
															+#define PT32_LVL_ADDR_MASK(level) \
														
 
															+	(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
														
 
															+					    * PT32_LEVEL_BITS))) - 1))
														
 
															 #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
														
 
															 			| PT64_NX_MASK)
														
@@ -129,6 +142,7 @@ module_param(oos_shadow, bool, 0644);
 
															 #define PFERR_RSVD_MASK (1U << 3)
														
 
															 #define PFERR_FETCH_MASK (1U << 4)
														
 
															+#define PT_PDPE_LEVEL 3
														
 
															 #define PT_DIRECTORY_LEVEL 2
														
 
															 #define PT_PAGE_TABLE_LEVEL 1
														
@@ -139,10 +153,13 @@ module_param(oos_shadow, bool, 0644);
 
															 #define ACC_USER_MASK    PT_USER_MASK
														
 
															 #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
														
 
															+#define CREATE_TRACE_POINTS
														
 
															+#include "mmutrace.h"
														
 
															+
														
 
															 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
														
 
															 struct kvm_rmap_desc {
														
 
															-	u64 *shadow_ptes[RMAP_EXT];
														
 
															+	u64 *sptes[RMAP_EXT];
														
 
															 	struct kvm_rmap_desc *more;
														
 
															 };
														
@@ -239,16 +256,25 @@ static int is_writeble_pte(unsigned long pte)
 
															 	return pte & PT_WRITABLE_MASK;
														
 
															 }
														
 
															-static int is_dirty_pte(unsigned long pte)
														
 
															+static int is_dirty_gpte(unsigned long pte)
														
 
															 {
														
 
															-	return pte & shadow_dirty_mask;
														
 
															+	return pte & PT_DIRTY_MASK;
														
 
															 }
														
 
															-static int is_rmap_pte(u64 pte)
														
 
															+static int is_rmap_spte(u64 pte)
														
 
															 {
														
 
															 	return is_shadow_present_pte(pte);
														
 
															 }
														
 
															+static int is_last_spte(u64 pte, int level)
														
 
															+{
														
 
															+	if (level == PT_PAGE_TABLE_LEVEL)
														
 
															+		return 1;
														
 
															+	if (is_large_pte(pte))
														
 
															+		return 1;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 static pfn_t spte_to_pfn(u64 pte)
														
 
															 {
														
 
															 	return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
														
@@ -261,7 +287,7 @@ static gfn_t pse36_gfn_delta(u32 gpte)
 
															 	return (gpte & PT32_DIR_PSE36_MASK) << shift;
														
 
															 }
														
 
															-static void set_shadow_pte(u64 *sptep, u64 spte)
														
 
															+static void __set_spte(u64 *sptep, u64 spte)
														
 
															 {
														
 
															 #ifdef CONFIG_X86_64
														
 
															 	set_64bit((unsigned long *)sptep, spte);
														
@@ -380,37 +406,52 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
 
															  * Return the pointer to the largepage write count for a given
														
 
															  * gfn, handling slots that are not large page aligned.
														
 
															  */
														
 
															-static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot)
														
 
															+static int *slot_largepage_idx(gfn_t gfn,
														
 
															+			       struct kvm_memory_slot *slot,
														
 
															+			       int level)
														
 
															 {
														
 
															 	unsigned long idx;
														
 
															-	idx = (gfn / KVM_PAGES_PER_HPAGE) -
														
 
															-	      (slot->base_gfn / KVM_PAGES_PER_HPAGE);
														
 
															-	return &slot->lpage_info[idx].write_count;
														
 
															+	idx = (gfn / KVM_PAGES_PER_HPAGE(level)) -
														
 
															+	      (slot->base_gfn / KVM_PAGES_PER_HPAGE(level));
														
 
															+	return &slot->lpage_info[level - 2][idx].write_count;
														
 
															 }
														
 
															 static void account_shadowed(struct kvm *kvm, gfn_t gfn)
														
 
															 {
														
 
															+	struct kvm_memory_slot *slot;
														
 
															 	int *write_count;
														
 
															+	int i;
														
 
															 	gfn = unalias_gfn(kvm, gfn);
														
 
															-	write_count = slot_largepage_idx(gfn,
														
 
															-					 gfn_to_memslot_unaliased(kvm, gfn));
														
 
															-	*write_count += 1;
														
 
															+
														
 
															+	slot = gfn_to_memslot_unaliased(kvm, gfn);
														
 
															+	for (i = PT_DIRECTORY_LEVEL;
														
 
															+	     i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
														
 
															+		write_count   = slot_largepage_idx(gfn, slot, i);
														
 
															+		*write_count += 1;
														
 
															+	}
														
 
															 }
														
 
															 static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
														
 
															 {
														
 
															+	struct kvm_memory_slot *slot;
														
 
															 	int *write_count;
														
 
															+	int i;
														
 
															 	gfn = unalias_gfn(kvm, gfn);
														
 
															-	write_count = slot_largepage_idx(gfn,
														
 
															-					 gfn_to_memslot_unaliased(kvm, gfn));
														
 
															-	*write_count -= 1;
														
 
															-	WARN_ON(*write_count < 0);
														
 
															+	for (i = PT_DIRECTORY_LEVEL;
														
 
															+	     i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
														
 
															+		slot          = gfn_to_memslot_unaliased(kvm, gfn);
														
 
															+		write_count   = slot_largepage_idx(gfn, slot, i);
														
 
															+		*write_count -= 1;
														
 
															+		WARN_ON(*write_count < 0);
														
 
															+	}
														
 
															 }
														
 
															-static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
														
 
															+static int has_wrprotected_page(struct kvm *kvm,
														
 
															+				gfn_t gfn,
														
 
															+				int level)
														
 
															 {
														
 
															 	struct kvm_memory_slot *slot;
														
 
															 	int *largepage_idx;
														
@@ -418,47 +459,67 @@ static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
 
															 	gfn = unalias_gfn(kvm, gfn);
														
 
															 	slot = gfn_to_memslot_unaliased(kvm, gfn);
														
 
															 	if (slot) {
														
 
															-		largepage_idx = slot_largepage_idx(gfn, slot);
														
 
															+		largepage_idx = slot_largepage_idx(gfn, slot, level);
														
 
															 		return *largepage_idx;
														
 
															 	}
														
 
															 	return 1;
														
 
															 }
														
 
															-static int host_largepage_backed(struct kvm *kvm, gfn_t gfn)
														
 
															+static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
														
 
															 {
														
 
															+	unsigned long page_size = PAGE_SIZE;
														
 
															 	struct vm_area_struct *vma;
														
 
															 	unsigned long addr;
														
 
															-	int ret = 0;
														
 
															+	int i, ret = 0;
														
 
															 	addr = gfn_to_hva(kvm, gfn);
														
 
															 	if (kvm_is_error_hva(addr))
														
 
															-		return ret;
														
 
															+		return page_size;
														
 
															 	down_read(&current->mm->mmap_sem);
														
 
															 	vma = find_vma(current->mm, addr);
														
 
															-	if (vma && is_vm_hugetlb_page(vma))
														
 
															-		ret = 1;
														
 
															+	if (!vma)
														
 
															+		goto out;
														
 
															+
														
 
															+	page_size = vma_kernel_pagesize(vma);
														
 
															+
														
 
															+out:
														
 
															 	up_read(&current->mm->mmap_sem);
														
 
															+	for (i = PT_PAGE_TABLE_LEVEL;
														
 
															+	     i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
														
 
															+		if (page_size >= KVM_HPAGE_SIZE(i))
														
 
															+			ret = i;
														
 
															+		else
														
 
															+			break;
														
 
															+	}
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
 
															-static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn)
														
 
															+static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
														
 
															 {
														
 
															 	struct kvm_memory_slot *slot;
														
 
															-
														
 
															-	if (has_wrprotected_page(vcpu->kvm, large_gfn))
														
 
															-		return 0;
														
 
															-
														
 
															-	if (!host_largepage_backed(vcpu->kvm, large_gfn))
														
 
															-		return 0;
														
 
															+	int host_level;
														
 
															+	int level = PT_PAGE_TABLE_LEVEL;
														
 
															 	slot = gfn_to_memslot(vcpu->kvm, large_gfn);
														
 
															 	if (slot && slot->dirty_bitmap)
														
 
															-		return 0;
														
 
															+		return PT_PAGE_TABLE_LEVEL;
														
 
															-	return 1;
														
 
															+	host_level = host_mapping_level(vcpu->kvm, large_gfn);
														
 
															+
														
 
															+	if (host_level == PT_PAGE_TABLE_LEVEL)
														
 
															+		return host_level;
														
 
															+
														
 
															+	for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) {
														
 
															+
														
 
															+		if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
														
 
															+			break;
														
 
															+	}
														
 
															+
														
 
															+	return level - 1;
														
 
															 }
														
 
															 /*
														
@@ -466,19 +527,19 @@ static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn)
 
															  * Note: gfn must be unaliased before this function get called
														
 
															  */
														
 
															-static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
														
 
															+static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
														
 
															 {
														
 
															 	struct kvm_memory_slot *slot;
														
 
															 	unsigned long idx;
														
 
															 	slot = gfn_to_memslot(kvm, gfn);
														
 
															-	if (!lpage)
														
 
															+	if (likely(level == PT_PAGE_TABLE_LEVEL))
														
 
															 		return &slot->rmap[gfn - slot->base_gfn];
														
 
															-	idx = (gfn / KVM_PAGES_PER_HPAGE) -
														
 
															-	      (slot->base_gfn / KVM_PAGES_PER_HPAGE);
														
 
															+	idx = (gfn / KVM_PAGES_PER_HPAGE(level)) -
														
 
															+		(slot->base_gfn / KVM_PAGES_PER_HPAGE(level));
														
 
															-	return &slot->lpage_info[idx].rmap_pde;
														
 
															+	return &slot->lpage_info[level - 2][idx].rmap_pde;
														
 
															 }
														
 
															 /*
														
@@ -494,42 +555,42 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
 
															  * the spte was not added.
														
 
															  *
														
 
															  */
														
 
															-static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
														
 
															+static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
														
 
															 {
														
 
															 	struct kvm_mmu_page *sp;
														
 
															 	struct kvm_rmap_desc *desc;
														
 
															 	unsigned long *rmapp;
														
 
															 	int i, count = 0;
														
 
															-	if (!is_rmap_pte(*spte))
														
 
															+	if (!is_rmap_spte(*spte))
														
 
															 		return count;
														
 
															 	gfn = unalias_gfn(vcpu->kvm, gfn);
														
 
															 	sp = page_header(__pa(spte));
														
 
															 	sp->gfns[spte - sp->spt] = gfn;
														
 
															-	rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
														
 
															+	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
														
 
															 	if (!*rmapp) {
														
 
															 		rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
														
 
															 		*rmapp = (unsigned long)spte;
														
 
															 	} else if (!(*rmapp & 1)) {
														
 
															 		rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
														
 
															 		desc = mmu_alloc_rmap_desc(vcpu);
														
 
															-		desc->shadow_ptes[0] = (u64 *)*rmapp;
														
 
															-		desc->shadow_ptes[1] = spte;
														
 
															+		desc->sptes[0] = (u64 *)*rmapp;
														
 
															+		desc->sptes[1] = spte;
														
 
															 		*rmapp = (unsigned long)desc | 1;
														
 
															 	} else {
														
 
															 		rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
														
 
															 		desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
														
 
															-		while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) {
														
 
															+		while (desc->sptes[RMAP_EXT-1] && desc->more) {
														
 
															 			desc = desc->more;
														
 
															 			count += RMAP_EXT;
														
 
															 		}
														
 
															-		if (desc->shadow_ptes[RMAP_EXT-1]) {
														
 
															+		if (desc->sptes[RMAP_EXT-1]) {
														
 
															 			desc->more = mmu_alloc_rmap_desc(vcpu);
														
 
															 			desc = desc->more;
														
 
															 		}
														
 
															-		for (i = 0; desc->shadow_ptes[i]; ++i)
														
 
															+		for (i = 0; desc->sptes[i]; ++i)
														
 
															 			;
														
 
															-		desc->shadow_ptes[i] = spte;
														
 
															+		desc->sptes[i] = spte;
														
 
															 	}
														
 
															 	return count;
														
 
															 }
														
@@ -541,14 +602,14 @@ static void rmap_desc_remove_entry(unsigned long *rmapp,
 
															 {
														
 
															 	int j;
														
 
															-	for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
														
 
															+	for (j = RMAP_EXT - 1; !desc->sptes[j] && j > i; --j)
														
 
															 		;
														
 
															-	desc->shadow_ptes[i] = desc->shadow_ptes[j];
														
 
															-	desc->shadow_ptes[j] = NULL;
														
 
															+	desc->sptes[i] = desc->sptes[j];
														
 
															+	desc->sptes[j] = NULL;
														
 
															 	if (j != 0)
														
 
															 		return;
														
 
															 	if (!prev_desc && !desc->more)
														
 
															-		*rmapp = (unsigned long)desc->shadow_ptes[0];
														
 
															+		*rmapp = (unsigned long)desc->sptes[0];
														
 
															 	else
														
 
															 		if (prev_desc)
														
 
															 			prev_desc->more = desc->more;
														
@@ -566,7 +627,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 
															 	unsigned long *rmapp;
														
 
															 	int i;
														
 
															-	if (!is_rmap_pte(*spte))
														
 
															+	if (!is_rmap_spte(*spte))
														
 
															 		return;
														
 
															 	sp = page_header(__pa(spte));
														
 
															 	pfn = spte_to_pfn(*spte);
														
@@ -576,7 +637,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 
															 		kvm_release_pfn_dirty(pfn);
														
 
															 	else
														
 
															 		kvm_release_pfn_clean(pfn);
														
 
															-	rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte));
														
 
															+	rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
														
 
															 	if (!*rmapp) {
														
 
															 		printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
														
 
															 		BUG();
														
@@ -593,8 +654,8 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 
															 		desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
														
 
															 		prev_desc = NULL;
														
 
															 		while (desc) {
														
 
															-			for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
														
 
															-				if (desc->shadow_ptes[i] == spte) {
														
 
															+			for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i)
														
 
															+				if (desc->sptes[i] == spte) {
														
 
															 					rmap_desc_remove_entry(rmapp,
														
 
															 							       desc, i,
														
 
															 							       prev_desc);
														
@@ -625,10 +686,10 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
 
															 	prev_desc = NULL;
														
 
															 	prev_spte = NULL;
														
 
															 	while (desc) {
														
 
															-		for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) {
														
 
															+		for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) {
														
 
															 			if (prev_spte == spte)
														
 
															-				return desc->shadow_ptes[i];
														
 
															-			prev_spte = desc->shadow_ptes[i];
														
 
															+				return desc->sptes[i];
														
 
															+			prev_spte = desc->sptes[i];
														
 
															 		}
														
 
															 		desc = desc->more;
														
 
															 	}
														
@@ -639,10 +700,10 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 
															 {
														
 
															 	unsigned long *rmapp;
														
 
															 	u64 *spte;
														
 
															-	int write_protected = 0;
														
 
															+	int i, write_protected = 0;
														
 
															 	gfn = unalias_gfn(kvm, gfn);
														
 
															-	rmapp = gfn_to_rmap(kvm, gfn, 0);
														
 
															+	rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL);
														
 
															 	spte = rmap_next(kvm, rmapp, NULL);
														
 
															 	while (spte) {
														
@@ -650,7 +711,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 
															 		BUG_ON(!(*spte & PT_PRESENT_MASK));
														
 
															 		rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
														
 
															 		if (is_writeble_pte(*spte)) {
														
 
															-			set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
														
 
															+			__set_spte(spte, *spte & ~PT_WRITABLE_MASK);
														
 
															 			write_protected = 1;
														
 
															 		}
														
 
															 		spte = rmap_next(kvm, rmapp, spte);
														
@@ -664,21 +725,24 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 
															 	}
														
 
															 	/* check for huge page mappings */
														
 
															-	rmapp = gfn_to_rmap(kvm, gfn, 1);
														
 
															-	spte = rmap_next(kvm, rmapp, NULL);
														
 
															-	while (spte) {
														
 
															-		BUG_ON(!spte);
														
 
															-		BUG_ON(!(*spte & PT_PRESENT_MASK));
														
 
															-		BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
														
 
															-		pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
														
 
															-		if (is_writeble_pte(*spte)) {
														
 
															-			rmap_remove(kvm, spte);
														
 
															-			--kvm->stat.lpages;
														
 
															-			set_shadow_pte(spte, shadow_trap_nonpresent_pte);
														
 
															-			spte = NULL;
														
 
															-			write_protected = 1;
														
 
															+	for (i = PT_DIRECTORY_LEVEL;
														
 
															+	     i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
														
 
															+		rmapp = gfn_to_rmap(kvm, gfn, i);
														
 
															+		spte = rmap_next(kvm, rmapp, NULL);
														
 
															+		while (spte) {
														
 
															+			BUG_ON(!spte);
														
 
															+			BUG_ON(!(*spte & PT_PRESENT_MASK));
														
 
															+			BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
														
 
															+			pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
														
 
															+			if (is_writeble_pte(*spte)) {
														
 
															+				rmap_remove(kvm, spte);
														
 
															+				--kvm->stat.lpages;
														
 
															+				__set_spte(spte, shadow_trap_nonpresent_pte);
														
 
															+				spte = NULL;
														
 
															+				write_protected = 1;
														
 
															+			}
														
 
															+			spte = rmap_next(kvm, rmapp, spte);
														
 
															 		}
														
 
															-		spte = rmap_next(kvm, rmapp, spte);
														
 
															 	}
														
 
															 	return write_protected;
														
@@ -693,7 +757,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
 
															 		BUG_ON(!(*spte & PT_PRESENT_MASK));
														
 
															 		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
														
 
															 		rmap_remove(kvm, spte);
														
 
															-		set_shadow_pte(spte, shadow_trap_nonpresent_pte);
														
 
															+		__set_spte(spte, shadow_trap_nonpresent_pte);
														
 
															 		need_tlb_flush = 1;
														
 
															 	}
														
 
															 	return need_tlb_flush;
														
@@ -702,7 +766,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
 
															 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
														
 
															 			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
														
 
															 {
														
 
															-	int i;
														
 
															+	int i, j;
														
 
															 	int retval = 0;
														
 
															 	/*
														
@@ -721,11 +785,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 
															 		end = start + (memslot->npages << PAGE_SHIFT);
														
 
															 		if (hva >= start && hva < end) {
														
 
															 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
														
 
															+
														
 
															 			retval |= handler(kvm, &memslot->rmap[gfn_offset]);
														
 
															-			retval |= handler(kvm,
														
 
															-					  &memslot->lpage_info[
														
 
															-						  gfn_offset /
														
 
															-						  KVM_PAGES_PER_HPAGE].rmap_pde);
														
 
															+
														
 
															+			for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
														
 
															+				int idx = gfn_offset;
														
 
															+				idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
														
 
															+				retval |= handler(kvm,
														
 
															+					&memslot->lpage_info[j][idx].rmap_pde);
														
 
															+			}
														
 
															 		}
														
 
															 	}
														
@@ -763,12 +831,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
 
															 #define RMAP_RECYCLE_THRESHOLD 1000
														
 
															-static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage)
														
 
															+static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
														
 
															 {
														
 
															 	unsigned long *rmapp;
														
 
															+	struct kvm_mmu_page *sp;
														
 
															+
														
 
															+	sp = page_header(__pa(spte));
														
 
															 	gfn = unalias_gfn(vcpu->kvm, gfn);
														
 
															-	rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
														
 
															+	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
														
 
															 	kvm_unmap_rmapp(vcpu->kvm, rmapp);
														
 
															 	kvm_flush_remote_tlbs(vcpu->kvm);
														
@@ -1109,6 +1180,7 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
															 		return 1;
														
 
															 	}
														
 
															+	trace_kvm_mmu_sync_page(sp);
														
 
															 	if (rmap_write_protect(vcpu->kvm, sp->gfn))
														
 
															 		kvm_flush_remote_tlbs(vcpu->kvm);
														
 
															 	kvm_unlink_unsync_page(vcpu->kvm, sp);
														
@@ -1231,8 +1303,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 
															 		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
														
 
															 		role.quadrant = quadrant;
														
 
															 	}
														
 
															-	pgprintk("%s: looking gfn %lx role %x\n", __func__,
														
 
															-		 gfn, role.word);
														
 
															 	index = kvm_page_table_hashfn(gfn);
														
 
															 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
														
 
															 	hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link)
														
@@ -1249,14 +1319,13 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 
															 				set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
														
 
															 				kvm_mmu_mark_parents_unsync(vcpu, sp);
														
 
															 			}
														
 
															-			pgprintk("%s: found\n", __func__);
														
 
															+			trace_kvm_mmu_get_page(sp, false);
														
 
															 			return sp;
														
 
															 		}
														
 
															 	++vcpu->kvm->stat.mmu_cache_miss;
														
 
															 	sp = kvm_mmu_alloc_page(vcpu, parent_pte);
														
 
															 	if (!sp)
														
 
															 		return sp;
														
 
															-	pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
														
 
															 	sp->gfn = gfn;
														
 
															 	sp->role = role;
														
 
															 	hlist_add_head(&sp->hash_link, bucket);
														
@@ -1269,6 +1338,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 
															 		vcpu->arch.mmu.prefetch_page(vcpu, sp);
														
 
															 	else
														
 
															 		nonpaging_prefetch_page(vcpu, sp);
														
 
															+	trace_kvm_mmu_get_page(sp, true);
														
 
															 	return sp;
														
 
															 }
														
@@ -1292,6 +1362,11 @@ static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
 
															 {
														
 
															 	if (iterator->level < PT_PAGE_TABLE_LEVEL)
														
 
															 		return false;
														
 
															+
														
 
															+	if (iterator->level == PT_PAGE_TABLE_LEVEL)
														
 
															+		if (is_large_pte(*iterator->sptep))
														
 
															+			return false;
														
 
															+
														
 
															 	iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
														
 
															 	iterator->sptep	= ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
														
 
															 	return true;
														
@@ -1312,25 +1387,17 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
 
															 	pt = sp->spt;
														
 
															-	if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
														
 
															-		for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
														
 
															-			if (is_shadow_present_pte(pt[i]))
														
 
															-				rmap_remove(kvm, &pt[i]);
														
 
															-			pt[i] = shadow_trap_nonpresent_pte;
														
 
															-		}
														
 
															-		return;
														
 
															-	}
														
 
															-
														
 
															 	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
														
 
															 		ent = pt[i];
														
 
															 		if (is_shadow_present_pte(ent)) {
														
 
															-			if (!is_large_pte(ent)) {
														
 
															+			if (!is_last_spte(ent, sp->role.level)) {
														
 
															 				ent &= PT64_BASE_ADDR_MASK;
														
 
															 				mmu_page_remove_parent_pte(page_header(ent),
														
 
															 							   &pt[i]);
														
 
															 			} else {
														
 
															-				--kvm->stat.lpages;
														
 
															+				if (is_large_pte(ent))
														
 
															+					--kvm->stat.lpages;
														
 
															 				rmap_remove(kvm, &pt[i]);
														
 
															 			}
														
 
															 		}
														
@@ -1346,10 +1413,10 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
 
															 static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
														
 
															 {
														
 
															 	int i;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															-	for (i = 0; i < KVM_MAX_VCPUS; ++i)
														
 
															-		if (kvm->vcpus[i])
														
 
															-			kvm->vcpus[i]->arch.last_pte_updated = NULL;
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm)
														
 
															+		vcpu->arch.last_pte_updated = NULL;
														
 
															 }
														
 
															 static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
														
@@ -1368,7 +1435,7 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 
															 		}
														
 
															 		BUG_ON(!parent_pte);
														
 
															 		kvm_mmu_put_page(sp, parent_pte);
														
 
															-		set_shadow_pte(parent_pte, shadow_trap_nonpresent_pte);
														
 
															+		__set_spte(parent_pte, shadow_trap_nonpresent_pte);
														
 
															 	}
														
 
															 }
														
@@ -1400,6 +1467,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
 
															 static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
														
 
															 {
														
 
															 	int ret;
														
 
															+
														
 
															+	trace_kvm_mmu_zap_page(sp);
														
 
															 	++kvm->stat.mmu_shadow_zapped;
														
 
															 	ret = mmu_zap_unsync_children(kvm, sp);
														
 
															 	kvm_mmu_page_unlink_children(kvm, sp);
														
@@ -1516,7 +1585,7 @@ static void mmu_convert_notrap(struct kvm_mmu_page *sp)
 
															 	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
														
 
															 		if (pt[i] == shadow_notrap_nonpresent_pte)
														
 
															-			set_shadow_pte(&pt[i], shadow_trap_nonpresent_pte);
														
 
															+			__set_spte(&pt[i], shadow_trap_nonpresent_pte);
														
 
															 	}
														
 
															 }
														
@@ -1646,6 +1715,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
															 	struct kvm_mmu_page *s;
														
 
															 	struct hlist_node *node, *n;
														
 
															+	trace_kvm_mmu_unsync_page(sp);
														
 
															 	index = kvm_page_table_hashfn(sp->gfn);
														
 
															 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
														
 
															 	/* don't unsync if pagetable is shadowed with multiple roles */
														
@@ -1682,9 +1752,9 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 
															 	return 0;
														
 
															 }
														
 
															-static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
														
 
															+static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
														
 
															 		    unsigned pte_access, int user_fault,
														
 
															-		    int write_fault, int dirty, int largepage,
														
 
															+		    int write_fault, int dirty, int level,
														
 
															 		    gfn_t gfn, pfn_t pfn, bool speculative,
														
 
															 		    bool can_unsync)
														
 
															 {
														
@@ -1707,7 +1777,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 
															 		spte |= shadow_nx_mask;
														
 
															 	if (pte_access & ACC_USER_MASK)
														
 
															 		spte |= shadow_user_mask;
														
 
															-	if (largepage)
														
 
															+	if (level > PT_PAGE_TABLE_LEVEL)
														
 
															 		spte |= PT_PAGE_SIZE_MASK;
														
 
															 	if (tdp_enabled)
														
 
															 		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
														
@@ -1718,7 +1788,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 
															 	if ((pte_access & ACC_WRITE_MASK)
														
 
															 	    || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
														
 
															-		if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) {
														
 
															+		if (level > PT_PAGE_TABLE_LEVEL &&
														
 
															+		    has_wrprotected_page(vcpu->kvm, gfn, level)) {
														
 
															 			ret = 1;
														
 
															 			spte = shadow_trap_nonpresent_pte;
														
 
															 			goto set_pte;
														
@@ -1732,7 +1803,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 
															 		 * is responsibility of mmu_get_page / kvm_sync_page.
														
 
															 		 * Same reasoning can be applied to dirty page accounting.
														
 
															 		 */
														
 
															-		if (!can_unsync && is_writeble_pte(*shadow_pte))
														
 
															+		if (!can_unsync && is_writeble_pte(*sptep))
														
 
															 			goto set_pte;
														
 
															 		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
														
@@ -1749,65 +1820,67 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 
															 		mark_page_dirty(vcpu->kvm, gfn);
														
 
															 set_pte:
														
 
															-	set_shadow_pte(shadow_pte, spte);
														
 
															+	__set_spte(sptep, spte);
														
 
															 	return ret;
														
 
															 }
														
 
															-static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
														
 
															+static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
														
 
															 			 unsigned pt_access, unsigned pte_access,
														
 
															 			 int user_fault, int write_fault, int dirty,
														
 
															-			 int *ptwrite, int largepage, gfn_t gfn,
														
 
															+			 int *ptwrite, int level, gfn_t gfn,
														
 
															 			 pfn_t pfn, bool speculative)
														
 
															 {
														
 
															 	int was_rmapped = 0;
														
 
															-	int was_writeble = is_writeble_pte(*shadow_pte);
														
 
															+	int was_writeble = is_writeble_pte(*sptep);
														
 
															 	int rmap_count;
														
 
															 	pgprintk("%s: spte %llx access %x write_fault %d"
														
 
															 		 " user_fault %d gfn %lx\n",
														
 
															-		 __func__, *shadow_pte, pt_access,
														
 
															+		 __func__, *sptep, pt_access,
														
 
															 		 write_fault, user_fault, gfn);
														
 
															-	if (is_rmap_pte(*shadow_pte)) {
														
 
															+	if (is_rmap_spte(*sptep)) {
														
 
															 		/*
														
 
															 		 * If we overwrite a PTE page pointer with a 2MB PMD, unlink
														
 
															 		 * the parent of the now unreachable PTE.
														
 
															 		 */
														
 
															-		if (largepage && !is_large_pte(*shadow_pte)) {
														
 
															+		if (level > PT_PAGE_TABLE_LEVEL &&
														
 
															+		    !is_large_pte(*sptep)) {
														
 
															 			struct kvm_mmu_page *child;
														
 
															-			u64 pte = *shadow_pte;
														
 
															+			u64 pte = *sptep;
														
 
															 			child = page_header(pte & PT64_BASE_ADDR_MASK);
														
 
															-			mmu_page_remove_parent_pte(child, shadow_pte);
														
 
															-		} else if (pfn != spte_to_pfn(*shadow_pte)) {
														
 
															+			mmu_page_remove_parent_pte(child, sptep);
														
 
															+		} else if (pfn != spte_to_pfn(*sptep)) {
														
 
															 			pgprintk("hfn old %lx new %lx\n",
														
 
															-				 spte_to_pfn(*shadow_pte), pfn);
														
 
															-			rmap_remove(vcpu->kvm, shadow_pte);
														
 
															+				 spte_to_pfn(*sptep), pfn);
														
 
															+			rmap_remove(vcpu->kvm, sptep);
														
 
															 		} else
														
 
															 			was_rmapped = 1;
														
 
															 	}
														
 
															-	if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
														
 
															-		      dirty, largepage, gfn, pfn, speculative, true)) {
														
 
															+
														
 
															+	if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
														
 
															+		      dirty, level, gfn, pfn, speculative, true)) {
														
 
															 		if (write_fault)
														
 
															 			*ptwrite = 1;
														
 
															 		kvm_x86_ops->tlb_flush(vcpu);
														
 
															 	}
														
 
															-	pgprintk("%s: setting spte %llx\n", __func__, *shadow_pte);
														
 
															+	pgprintk("%s: setting spte %llx\n", __func__, *sptep);
														
 
															 	pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n",
														
 
															-		 is_large_pte(*shadow_pte)? "2MB" : "4kB",
														
 
															-		 is_present_pte(*shadow_pte)?"RW":"R", gfn,
														
 
															-		 *shadow_pte, shadow_pte);
														
 
															-	if (!was_rmapped && is_large_pte(*shadow_pte))
														
 
															+		 is_large_pte(*sptep)? "2MB" : "4kB",
														
 
															+		 *sptep & PT_PRESENT_MASK ?"RW":"R", gfn,
														
 
															+		 *sptep, sptep);
														
 
															+	if (!was_rmapped && is_large_pte(*sptep))
														
 
															 		++vcpu->kvm->stat.lpages;
														
 
															-	page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
														
 
															+	page_header_update_slot(vcpu->kvm, sptep, gfn);
														
 
															 	if (!was_rmapped) {
														
 
															-		rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage);
														
 
															-		if (!is_rmap_pte(*shadow_pte))
														
 
															+		rmap_count = rmap_add(vcpu, sptep, gfn);
														
 
															+		if (!is_rmap_spte(*sptep))
														
 
															 			kvm_release_pfn_clean(pfn);
														
 
															 		if (rmap_count > RMAP_RECYCLE_THRESHOLD)
														
 
															-			rmap_recycle(vcpu, gfn, largepage);
														
 
															+			rmap_recycle(vcpu, sptep, gfn);
														
 
															 	} else {
														
 
															 		if (was_writeble)
														
 
															 			kvm_release_pfn_dirty(pfn);
														
@@ -1815,7 +1888,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 
															 			kvm_release_pfn_clean(pfn);
														
 
															 	}
														
 
															 	if (speculative) {
														
 
															-		vcpu->arch.last_pte_updated = shadow_pte;
														
 
															+		vcpu->arch.last_pte_updated = sptep;
														
 
															 		vcpu->arch.last_pte_gfn = gfn;
														
 
															 	}
														
 
															 }
														
@@ -1825,7 +1898,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
 
															 }
														
 
															 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
														
 
															-			int largepage, gfn_t gfn, pfn_t pfn)
														
 
															+			int level, gfn_t gfn, pfn_t pfn)
														
 
															 {
														
 
															 	struct kvm_shadow_walk_iterator iterator;
														
 
															 	struct kvm_mmu_page *sp;
														
@@ -1833,11 +1906,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 
															 	gfn_t pseudo_gfn;
														
 
															 	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
														
 
															-		if (iterator.level == PT_PAGE_TABLE_LEVEL
														
 
															-		    || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
														
 
															+		if (iterator.level == level) {
														
 
															 			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
														
 
															 				     0, write, 1, &pt_write,
														
 
															-				     largepage, gfn, pfn, false);
														
 
															+				     level, gfn, pfn, false);
														
 
															 			++vcpu->stat.pf_fixed;
														
 
															 			break;
														
 
															 		}
														
@@ -1853,10 +1925,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 
															 				return -ENOMEM;
														
 
															 			}
														
 
															-			set_shadow_pte(iterator.sptep,
														
 
															-				       __pa(sp->spt)
														
 
															-				       | PT_PRESENT_MASK | PT_WRITABLE_MASK
														
 
															-				       | shadow_user_mask | shadow_x_mask);
														
 
															+			__set_spte(iterator.sptep,
														
 
															+				   __pa(sp->spt)
														
 
															+				   | PT_PRESENT_MASK | PT_WRITABLE_MASK
														
 
															+				   | shadow_user_mask | shadow_x_mask);
														
 
															 		}
														
 
															 	}
														
 
															 	return pt_write;
														
@@ -1865,14 +1937,20 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 
															 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
														
 
															 {
														
 
															 	int r;
														
 
															-	int largepage = 0;
														
 
															+	int level;
														
 
															 	pfn_t pfn;
														
 
															 	unsigned long mmu_seq;
														
 
															-	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
														
 
															-		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
														
 
															-		largepage = 1;
														
 
															-	}
														
 
															+	level = mapping_level(vcpu, gfn);
														
 
															+
														
 
															+	/*
														
 
															+	 * This path builds a PAE pagetable - so we can map 2mb pages at
														
 
															+	 * maximum. Therefore check if the level is larger than that.
														
 
															+	 */
														
 
															+	if (level > PT_DIRECTORY_LEVEL)
														
 
															+		level = PT_DIRECTORY_LEVEL;
														
 
															+
														
 
															+	gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
														
 
															 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
														
 
															 	smp_rmb();
														
@@ -1888,7 +1966,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 
															 	if (mmu_notifier_retry(vcpu, mmu_seq))
														
 
															 		goto out_unlock;
														
 
															 	kvm_mmu_free_some_pages(vcpu);
														
 
															-	r = __direct_map(vcpu, v, write, largepage, gfn, pfn);
														
 
															+	r = __direct_map(vcpu, v, write, level, gfn, pfn);
														
 
															 	spin_unlock(&vcpu->kvm->mmu_lock);
														
@@ -1954,6 +2032,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
 
															 	gfn_t root_gfn;
														
 
															 	struct kvm_mmu_page *sp;
														
 
															 	int direct = 0;
														
 
															+	u64 pdptr;
														
 
															 	root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
														
@@ -1981,11 +2060,12 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
 
															 		ASSERT(!VALID_PAGE(root));
														
 
															 		if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
														
 
															-			if (!is_present_pte(vcpu->arch.pdptrs[i])) {
														
 
															+			pdptr = kvm_pdptr_read(vcpu, i);
														
 
															+			if (!is_present_gpte(pdptr)) {
														
 
															 				vcpu->arch.mmu.pae_root[i] = 0;
														
 
															 				continue;
														
 
															 			}
														
 
															-			root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT;
														
 
															+			root_gfn = pdptr >> PAGE_SHIFT;
														
 
															 		} else if (vcpu->arch.mmu.root_level == 0)
														
 
															 			root_gfn = 0;
														
 
															 		if (mmu_check_root(vcpu, root_gfn))
														
@@ -2062,7 +2142,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 
															 {
														
 
															 	pfn_t pfn;
														
 
															 	int r;
														
 
															-	int largepage = 0;
														
 
															+	int level;
														
 
															 	gfn_t gfn = gpa >> PAGE_SHIFT;
														
 
															 	unsigned long mmu_seq;
														
@@ -2073,10 +2153,10 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 
															 	if (r)
														
 
															 		return r;
														
 
															-	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
														
 
															-		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
														
 
															-		largepage = 1;
														
 
															-	}
														
 
															+	level = mapping_level(vcpu, gfn);
														
 
															+
														
 
															+	gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
														
 
															+
														
 
															 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
														
 
															 	smp_rmb();
														
 
															 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
														
@@ -2089,7 +2169,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 
															 		goto out_unlock;
														
 
															 	kvm_mmu_free_some_pages(vcpu);
														
 
															 	r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
														
 
															-			 largepage, gfn, pfn);
														
 
															+			 level, gfn, pfn);
														
 
															 	spin_unlock(&vcpu->kvm->mmu_lock);
														
 
															 	return r;
														
@@ -2206,7 +2286,9 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
 
															 		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
														
 
															 			rsvd_bits(maxphyaddr, 51);
														
 
															 		context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
														
 
															-		context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2];
														
 
															+		context->rsvd_bits_mask[1][2] = exb_bit_rsvd |
														
 
															+			rsvd_bits(maxphyaddr, 51) |
														
 
															+			rsvd_bits(13, 29);
														
 
															 		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
														
 
															 			rsvd_bits(maxphyaddr, 51) |
														
 
															 			rsvd_bits(13, 20);		/* large page */
														
@@ -2357,8 +2439,8 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 
															 	spin_unlock(&vcpu->kvm->mmu_lock);
														
 
															 	if (r)
														
 
															 		goto out;
														
 
															+	/* set_cr3() should ensure TLB has been flushed */
														
 
															 	kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
														
 
															-	kvm_mmu_flush_tlb(vcpu);
														
 
															 out:
														
 
															 	return r;
														
 
															 }
														
@@ -2378,15 +2460,14 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
 
															 	pte = *spte;
														
 
															 	if (is_shadow_present_pte(pte)) {
														
 
															-		if (sp->role.level == PT_PAGE_TABLE_LEVEL ||
														
 
															-		    is_large_pte(pte))
														
 
															+		if (is_last_spte(pte, sp->role.level))
														
 
															 			rmap_remove(vcpu->kvm, spte);
														
 
															 		else {
														
 
															 			child = page_header(pte & PT64_BASE_ADDR_MASK);
														
 
															 			mmu_page_remove_parent_pte(child, spte);
														
 
															 		}
														
 
															 	}
														
 
															-	set_shadow_pte(spte, shadow_trap_nonpresent_pte);
														
 
															+	__set_spte(spte, shadow_trap_nonpresent_pte);
														
 
															 	if (is_large_pte(pte))
														
 
															 		--vcpu->kvm->stat.lpages;
														
 
															 }
														
@@ -2397,11 +2478,8 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
 
															 				  const void *new)
														
 
															 {
														
 
															 	if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
														
 
															-		if (!vcpu->arch.update_pte.largepage ||
														
 
															-		    sp->role.glevels == PT32_ROOT_LEVEL) {
														
 
															-			++vcpu->kvm->stat.mmu_pde_zapped;
														
 
															-			return;
														
 
															-		}
														
 
															+		++vcpu->kvm->stat.mmu_pde_zapped;
														
 
															+		return;
														
 
															         }
														
 
															 	++vcpu->kvm->stat.mmu_pte_updated;
														
@@ -2447,8 +2525,6 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 
															 	u64 gpte = 0;
														
 
															 	pfn_t pfn;
														
 
															-	vcpu->arch.update_pte.largepage = 0;
														
 
															-
														
 
															 	if (bytes != 4 && bytes != 8)
														
 
															 		return;
														
@@ -2472,14 +2548,10 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 
															 		if ((bytes == 4) && (gpa % 4 == 0))
														
 
															 			memcpy((void *)&gpte, new, 4);
														
 
															 	}
														
 
															-	if (!is_present_pte(gpte))
														
 
															+	if (!is_present_gpte(gpte))
														
 
															 		return;
														
 
															 	gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
														
 
															-	if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) {
														
 
															-		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
														
 
															-		vcpu->arch.update_pte.largepage = 1;
														
 
															-	}
														
 
															 	vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
														
 
															 	smp_rmb();
														
 
															 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
														
@@ -2622,6 +2694,9 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 
															 	gpa_t gpa;
														
 
															 	int r;
														
 
															+	if (tdp_enabled)
														
 
															+		return 0;
														
 
															+
														
 
															 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
														
 
															 	spin_lock(&vcpu->kvm->mmu_lock);
														
@@ -2633,7 +2708,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
 
															 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) {
														
 
															+	while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES &&
														
 
															+	       !list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
														
 
															 		struct kvm_mmu_page *sp;
														
 
															 		sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
														
@@ -2670,8 +2746,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
 
															 		++vcpu->stat.mmio_exits;
														
 
															 		return 0;
														
 
															 	case EMULATE_FAIL:
														
 
															-		kvm_report_emulation_failure(vcpu, "pagetable");
														
 
															-		return 1;
														
 
															+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
														
 
															+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
														
 
															+		return 0;
														
 
															 	default:
														
 
															 		BUG();
														
 
															 	}
														
@@ -2712,12 +2789,6 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 
															 	ASSERT(vcpu);
														
 
															-	if (vcpu->kvm->arch.n_requested_mmu_pages)
														
 
															-		vcpu->kvm->arch.n_free_mmu_pages =
														
 
															-					vcpu->kvm->arch.n_requested_mmu_pages;
														
 
															-	else
														
 
															-		vcpu->kvm->arch.n_free_mmu_pages =
														
 
															-					vcpu->kvm->arch.n_alloc_mmu_pages;
														
 
															 	/*
														
 
															 	 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
														
 
															 	 * Therefore we need to allocate shadow page tables in the first
														
@@ -3029,6 +3100,24 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 
															 	return r;
														
 
															 }
														
 
															+int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
														
 
															+{
														
 
															+	struct kvm_shadow_walk_iterator iterator;
														
 
															+	int nr_sptes = 0;
														
 
															+
														
 
															+	spin_lock(&vcpu->kvm->mmu_lock);
														
 
															+	for_each_shadow_entry(vcpu, addr, iterator) {
														
 
															+		sptes[iterator.level-1] = *iterator.sptep;
														
 
															+		nr_sptes++;
														
 
															+		if (!is_shadow_present_pte(*iterator.sptep))
														
 
															+			break;
														
 
															+	}
														
 
															+	spin_unlock(&vcpu->kvm->mmu_lock);
														
 
															+
														
 
															+	return nr_sptes;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
														
 
															+
														
 
															 #ifdef AUDIT
														
 
															 static const char *audit_msg;
														
@@ -3041,6 +3130,54 @@ static gva_t canonicalize(gva_t gva)
 
															 	return gva;
														
 
															 }
														
 
															+
														
 
															+typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp,
														
 
															+				 u64 *sptep);
														
 
															+
														
 
															+static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp,
														
 
															+			    inspect_spte_fn fn)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
														
 
															+		u64 ent = sp->spt[i];
														
 
															+
														
 
															+		if (is_shadow_present_pte(ent)) {
														
 
															+			if (!is_last_spte(ent, sp->role.level)) {
														
 
															+				struct kvm_mmu_page *child;
														
 
															+				child = page_header(ent & PT64_BASE_ADDR_MASK);
														
 
															+				__mmu_spte_walk(kvm, child, fn);
														
 
															+			} else
														
 
															+				fn(kvm, sp, &sp->spt[i]);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
														
 
															+{
														
 
															+	int i;
														
 
															+	struct kvm_mmu_page *sp;
														
 
															+
														
 
															+	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
														
 
															+		return;
														
 
															+	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
														
 
															+		hpa_t root = vcpu->arch.mmu.root_hpa;
														
 
															+		sp = page_header(root);
														
 
															+		__mmu_spte_walk(vcpu->kvm, sp, fn);
														
 
															+		return;
														
 
															+	}
														
 
															+	for (i = 0; i < 4; ++i) {
														
 
															+		hpa_t root = vcpu->arch.mmu.pae_root[i];
														
 
															+
														
 
															+		if (root && VALID_PAGE(root)) {
														
 
															+			root &= PT64_BASE_ADDR_MASK;
														
 
															+			sp = page_header(root);
														
 
															+			__mmu_spte_walk(vcpu->kvm, sp, fn);
														
 
															+		}
														
 
															+	}
														
 
															+	return;
														
 
															+}
														
 
															+
														
 
															 static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
														
 
															 				gva_t va, int level)
														
 
															 {
														
@@ -3055,20 +3192,19 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
 
															 			continue;
														
 
															 		va = canonicalize(va);
														
 
															-		if (level > 1) {
														
 
															-			if (ent == shadow_notrap_nonpresent_pte)
														
 
															-				printk(KERN_ERR "audit: (%s) nontrapping pte"
														
 
															-				       " in nonleaf level: levels %d gva %lx"
														
 
															-				       " level %d pte %llx\n", audit_msg,
														
 
															-				       vcpu->arch.mmu.root_level, va, level, ent);
														
 
															-			else
														
 
															-				audit_mappings_page(vcpu, ent, va, level - 1);
														
 
															-		} else {
														
 
															+		if (is_shadow_present_pte(ent) && !is_last_spte(ent, level))
														
 
															+			audit_mappings_page(vcpu, ent, va, level - 1);
														
 
															+		else {
														
 
															 			gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
														
 
															 			gfn_t gfn = gpa >> PAGE_SHIFT;
														
 
															 			pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
														
 
															 			hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
														
 
															+			if (is_error_pfn(pfn)) {
														
 
															+				kvm_release_pfn_clean(pfn);
														
 
															+				continue;
														
 
															+			}
														
 
															+
														
 
															 			if (is_shadow_present_pte(ent)
														
 
															 			    && (ent & PT64_BASE_ADDR_MASK) != hpa)
														
 
															 				printk(KERN_ERR "xx audit error: (%s) levels %d"
														
@@ -3122,7 +3258,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
 
															 			d = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
														
 
															 			while (d) {
														
 
															 				for (k = 0; k < RMAP_EXT; ++k)
														
 
															-					if (d->shadow_ptes[k])
														
 
															+					if (d->sptes[k])
														
 
															 						++nmaps;
														
 
															 					else
														
 
															 						break;
														
@@ -3133,9 +3269,48 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
 
															 	return nmaps;
														
 
															 }
														
 
															-static int count_writable_mappings(struct kvm_vcpu *vcpu)
														
 
															+void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep)
														
 
															+{
														
 
															+	unsigned long *rmapp;
														
 
															+	struct kvm_mmu_page *rev_sp;
														
 
															+	gfn_t gfn;
														
 
															+
														
 
															+	if (*sptep & PT_WRITABLE_MASK) {
														
 
															+		rev_sp = page_header(__pa(sptep));
														
 
															+		gfn = rev_sp->gfns[sptep - rev_sp->spt];
														
 
															+
														
 
															+		if (!gfn_to_memslot(kvm, gfn)) {
														
 
															+			if (!printk_ratelimit())
														
 
															+				return;
														
 
															+			printk(KERN_ERR "%s: no memslot for gfn %ld\n",
														
 
															+					 audit_msg, gfn);
														
 
															+			printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n",
														
 
															+					audit_msg, sptep - rev_sp->spt,
														
 
															+					rev_sp->gfn);
														
 
															+			dump_stack();
														
 
															+			return;
														
 
															+		}
														
 
															+
														
 
															+		rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt],
														
 
															+				    is_large_pte(*sptep));
														
 
															+		if (!*rmapp) {
														
 
															+			if (!printk_ratelimit())
														
 
															+				return;
														
 
															+			printk(KERN_ERR "%s: no rmap for writable spte %llx\n",
														
 
															+					 audit_msg, *sptep);
														
 
															+			dump_stack();
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+}
														
 
															+
														
 
															+void audit_writable_sptes_have_rmaps(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	mmu_spte_walk(vcpu, inspect_spte_has_rmap);
														
 
															+}
														
 
															+
														
 
															+static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	int nmaps = 0;
														
 
															 	struct kvm_mmu_page *sp;
														
 
															 	int i;
														
@@ -3152,20 +3327,16 @@ static int count_writable_mappings(struct kvm_vcpu *vcpu)
 
															 				continue;
														
 
															 			if (!(ent & PT_WRITABLE_MASK))
														
 
															 				continue;
														
 
															-			++nmaps;
														
 
															+			inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]);
														
 
															 		}
														
 
															 	}
														
 
															-	return nmaps;
														
 
															+	return;
														
 
															 }
														
 
															 static void audit_rmap(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	int n_rmap = count_rmaps(vcpu);
														
 
															-	int n_actual = count_writable_mappings(vcpu);
														
 
															-
														
 
															-	if (n_rmap != n_actual)
														
 
															-		printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
														
 
															-		       __func__, audit_msg, n_rmap, n_actual);
														
 
															+	check_writable_mappings_rmap(vcpu);
														
 
															+	count_rmaps(vcpu);
														
 
															 }
														
 
															 static void audit_write_protection(struct kvm_vcpu *vcpu)
														
@@ -3173,20 +3344,28 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
 
															 	struct kvm_mmu_page *sp;
														
 
															 	struct kvm_memory_slot *slot;
														
 
															 	unsigned long *rmapp;
														
 
															+	u64 *spte;
														
 
															 	gfn_t gfn;
														
 
															 	list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
														
 
															 		if (sp->role.direct)
														
 
															 			continue;
														
 
															+		if (sp->unsync)
														
 
															+			continue;
														
 
															 		gfn = unalias_gfn(vcpu->kvm, sp->gfn);
														
 
															 		slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
														
 
															 		rmapp = &slot->rmap[gfn - slot->base_gfn];
														
 
															-		if (*rmapp)
														
 
															-			printk(KERN_ERR "%s: (%s) shadow page has writable"
														
 
															-			       " mappings: gfn %lx role %x\n",
														
 
															+
														
 
															+		spte = rmap_next(vcpu->kvm, rmapp, NULL);
														
 
															+		while (spte) {
														
 
															+			if (*spte & PT_WRITABLE_MASK)
														
 
															+				printk(KERN_ERR "%s: (%s) shadow page has "
														
 
															+				"writable mappings: gfn %lx role %x\n",
														
 
															 			       __func__, audit_msg, sp->gfn,
														
 
															 			       sp->role.word);
														
 
															+			spte = rmap_next(vcpu->kvm, rmapp, spte);
														
 
															+		}
														
 
															 	}
														
 
															 }
														
@@ -3198,7 +3377,9 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg)
 
															 	audit_msg = msg;
														
 
															 	audit_rmap(vcpu);
														
 
															 	audit_write_protection(vcpu);
														
 
															-	audit_mappings(vcpu);
														
 
															+	if (strcmp("pre pte write", audit_msg) != 0)
														
 
															+		audit_mappings(vcpu);
														
 
															+	audit_writable_sptes_have_rmaps(vcpu);
														
 
															 	dbg = olddbg;
														
 
															 }
														
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -37,6 +37,8 @@
 
															 #define PT32_ROOT_LEVEL 2
														
 
															 #define PT32E_ROOT_LEVEL 3
														
 
															+int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
														
 
															+
														
 
															 static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
														
@@ -75,7 +77,7 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
 
															 	return vcpu->arch.cr0 & X86_CR0_PG;
														
 
															 }
														
 
															-static inline int is_present_pte(unsigned long pte)
														
 
															+static inline int is_present_gpte(unsigned long pte)
														
 
															 {
														
 
															 	return pte & PT_PRESENT_MASK;
														
 
															 }
														
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -0,0 +1,220 @@
 
															+#if !defined(_TRACE_KVMMMU_H) || defined(TRACE_HEADER_MULTI_READ)
														
 
															+#define _TRACE_KVMMMU_H
														
 
															+
														
 
															+#include <linux/tracepoint.h>
														
 
															+#include <linux/ftrace_event.h>
														
 
															+
														
 
															+#undef TRACE_SYSTEM
														
 
															+#define TRACE_SYSTEM kvmmmu
														
 
															+#define TRACE_INCLUDE_PATH .
														
 
															+#define TRACE_INCLUDE_FILE mmutrace
														
 
															+
														
 
															+#define KVM_MMU_PAGE_FIELDS \
														
 
															+	__field(__u64, gfn) \
														
 
															+	__field(__u32, role) \
														
 
															+	__field(__u32, root_count) \
														
 
															+	__field(__u32, unsync)
														
 
															+
														
 
															+#define KVM_MMU_PAGE_ASSIGN(sp)			     \
														
 
															+	__entry->gfn = sp->gfn;			     \
														
 
															+	__entry->role = sp->role.word;		     \
														
 
															+	__entry->root_count = sp->root_count;        \
														
 
															+	__entry->unsync = sp->unsync;
														
 
															+
														
 
															+#define KVM_MMU_PAGE_PRINTK() ({				        \
														
 
															+	const char *ret = p->buffer + p->len;				\
														
 
															+	static const char *access_str[] = {			        \
														
 
															+		"---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"  \
														
 
															+	};							        \
														
 
															+	union kvm_mmu_page_role role;				        \
														
 
															+								        \
														
 
															+	role.word = __entry->role;					\
														
 
															+									\
														
 
															+	trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge"	\
														
 
															+			 " %snxe root %u %s%c",				\
														
 
															+			 __entry->gfn, role.level, role.glevels,	\
														
 
															+			 role.quadrant,					\
														
 
															+			 role.direct ? " direct" : "",			\
														
 
															+			 access_str[role.access],			\
														
 
															+			 role.invalid ? " invalid" : "",		\
														
 
															+			 role.cr4_pge ? "" : "!",			\
														
 
															+			 role.nxe ? "" : "!",				\
														
 
															+			 __entry->root_count,				\
														
 
															+			 __entry->unsync ? "unsync" : "sync", 0);	\
														
 
															+	ret;								\
														
 
															+		})
														
 
															+
														
 
															+#define kvm_mmu_trace_pferr_flags       \
														
 
															+	{ PFERR_PRESENT_MASK, "P" },	\
														
 
															+	{ PFERR_WRITE_MASK, "W" },	\
														
 
															+	{ PFERR_USER_MASK, "U" },	\
														
 
															+	{ PFERR_RSVD_MASK, "RSVD" },	\
														
 
															+	{ PFERR_FETCH_MASK, "F" }
														
 
															+
														
 
															+/*
														
 
															+ * A pagetable walk has started
														
 
															+ */
														
 
															+TRACE_EVENT(
														
 
															+	kvm_mmu_pagetable_walk,
														
 
															+	TP_PROTO(u64 addr, int write_fault, int user_fault, int fetch_fault),
														
 
															+	TP_ARGS(addr, write_fault, user_fault, fetch_fault),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(__u64, addr)
														
 
															+		__field(__u32, pferr)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->addr = addr;
														
 
															+		__entry->pferr = (!!write_fault << 1) | (!!user_fault << 2)
														
 
															+		                 | (!!fetch_fault << 4);
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr,
														
 
															+		  __print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags))
														
 
															+);
														
 
															+
														
 
															+
														
 
															+/* We just walked a paging element */
														
 
															+TRACE_EVENT(
														
 
															+	kvm_mmu_paging_element,
														
 
															+	TP_PROTO(u64 pte, int level),
														
 
															+	TP_ARGS(pte, level),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(__u64, pte)
														
 
															+		__field(__u32, level)
														
 
															+		),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->pte = pte;
														
 
															+		__entry->level = level;
														
 
															+		),
														
 
															+
														
 
															+	TP_printk("pte %llx level %u", __entry->pte, __entry->level)
														
 
															+);
														
 
															+
														
 
															+/* We set a pte accessed bit */
														
 
															+TRACE_EVENT(
														
 
															+	kvm_mmu_set_accessed_bit,
														
 
															+	TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
														
 
															+	TP_ARGS(table_gfn, index, size),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(__u64, gpa)
														
 
															+		),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
														
 
															+				+ index * size;
														
 
															+		),
														
 
															+
														
 
															+	TP_printk("gpa %llx", __entry->gpa)
														
 
															+);
														
 
															+
														
 
															+/* We set a pte dirty bit */
														
 
															+TRACE_EVENT(
														
 
															+	kvm_mmu_set_dirty_bit,
														
 
															+	TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
														
 
															+	TP_ARGS(table_gfn, index, size),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(__u64, gpa)
														
 
															+		),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
														
 
															+				+ index * size;
														
 
															+		),
														
 
															+
														
 
															+	TP_printk("gpa %llx", __entry->gpa)
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(
														
 
															+	kvm_mmu_walker_error,
														
 
															+	TP_PROTO(u32 pferr),
														
 
															+	TP_ARGS(pferr),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(__u32, pferr)
														
 
															+		),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->pferr = pferr;
														
 
															+		),
														
 
															+
														
 
															+	TP_printk("pferr %x %s", __entry->pferr,
														
 
															+		  __print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags))
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(
														
 
															+	kvm_mmu_get_page,
														
 
															+	TP_PROTO(struct kvm_mmu_page *sp, bool created),
														
 
															+	TP_ARGS(sp, created),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		KVM_MMU_PAGE_FIELDS
														
 
															+		__field(bool, created)
														
 
															+		),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		KVM_MMU_PAGE_ASSIGN(sp)
														
 
															+		__entry->created = created;
														
 
															+		),
														
 
															+
														
 
															+	TP_printk("%s %s", KVM_MMU_PAGE_PRINTK(),
														
 
															+		  __entry->created ? "new" : "existing")
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(
														
 
															+	kvm_mmu_sync_page,
														
 
															+	TP_PROTO(struct kvm_mmu_page *sp),
														
 
															+	TP_ARGS(sp),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		KVM_MMU_PAGE_FIELDS
														
 
															+		),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		KVM_MMU_PAGE_ASSIGN(sp)
														
 
															+		),
														
 
															+
														
 
															+	TP_printk("%s", KVM_MMU_PAGE_PRINTK())
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(
														
 
															+	kvm_mmu_unsync_page,
														
 
															+	TP_PROTO(struct kvm_mmu_page *sp),
														
 
															+	TP_ARGS(sp),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		KVM_MMU_PAGE_FIELDS
														
 
															+		),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		KVM_MMU_PAGE_ASSIGN(sp)
														
 
															+		),
														
 
															+
														
 
															+	TP_printk("%s", KVM_MMU_PAGE_PRINTK())
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(
														
 
															+	kvm_mmu_zap_page,
														
 
															+	TP_PROTO(struct kvm_mmu_page *sp),
														
 
															+	TP_ARGS(sp),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		KVM_MMU_PAGE_FIELDS
														
 
															+		),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		KVM_MMU_PAGE_ASSIGN(sp)
														
 
															+		),
														
 
															+
														
 
															+	TP_printk("%s", KVM_MMU_PAGE_PRINTK())
														
 
															+);
														
 
															+
														
 
															+#endif /* _TRACE_KVMMMU_H */
														
 
															+
														
 
															+/* This part must be outside protection */
														
 
															+#include <trace/define_trace.h>
														
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -27,7 +27,8 @@
 
															 	#define guest_walker guest_walker64
														
 
															 	#define FNAME(name) paging##64_##name
														
 
															 	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
														
 
															-	#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
														
 
															+	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
														
 
															+	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
														
 
															 	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
														
 
															 	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
														
 
															 	#define PT_LEVEL_BITS PT64_LEVEL_BITS
														
@@ -43,7 +44,8 @@
 
															 	#define guest_walker guest_walker32
														
 
															 	#define FNAME(name) paging##32_##name
														
 
															 	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
														
 
															-	#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
														
 
															+	#define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
														
 
															+	#define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
														
 
															 	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
														
 
															 	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
														
 
															 	#define PT_LEVEL_BITS PT32_LEVEL_BITS
														
@@ -53,8 +55,8 @@
 
															 	#error Invalid PTTYPE value
														
 
															 #endif
														
 
															-#define gpte_to_gfn FNAME(gpte_to_gfn)
														
 
															-#define gpte_to_gfn_pde FNAME(gpte_to_gfn_pde)
														
 
															+#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
														
 
															+#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
														
 
															 /*
														
 
															  * The guest_walker structure emulates the behavior of the hardware page
														
@@ -71,14 +73,9 @@ struct guest_walker {
 
															 	u32 error_code;
														
 
															 };
														
 
															-static gfn_t gpte_to_gfn(pt_element_t gpte)
														
 
															+static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
														
 
															 {
														
 
															-	return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
														
 
															-}
														
 
															-
														
 
															-static gfn_t gpte_to_gfn_pde(pt_element_t gpte)
														
 
															-{
														
 
															-	return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
														
 
															+	return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
														
 
															 }
														
 
															 static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
														
@@ -125,14 +122,16 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 
															 	gpa_t pte_gpa;
														
 
															 	int rsvd_fault = 0;
														
 
															-	pgprintk("%s: addr %lx\n", __func__, addr);
														
 
															+	trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
														
 
															+				     fetch_fault);
														
 
															 walk:
														
 
															 	walker->level = vcpu->arch.mmu.root_level;
														
 
															 	pte = vcpu->arch.cr3;
														
 
															 #if PTTYPE == 64
														
 
															 	if (!is_long_mode(vcpu)) {
														
 
															-		pte = vcpu->arch.pdptrs[(addr >> 30) & 3];
														
 
															-		if (!is_present_pte(pte))
														
 
															+		pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
														
 
															+		trace_kvm_mmu_paging_element(pte, walker->level);
														
 
															+		if (!is_present_gpte(pte))
														
 
															 			goto not_present;
														
 
															 		--walker->level;
														
 
															 	}
														
@@ -150,12 +149,11 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 
															 		pte_gpa += index * sizeof(pt_element_t);
														
 
															 		walker->table_gfn[walker->level - 1] = table_gfn;
														
 
															 		walker->pte_gpa[walker->level - 1] = pte_gpa;
														
 
															-		pgprintk("%s: table_gfn[%d] %lx\n", __func__,
														
 
															-			 walker->level - 1, table_gfn);
														
 
															 		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
														
 
															+		trace_kvm_mmu_paging_element(pte, walker->level);
														
 
															-		if (!is_present_pte(pte))
														
 
															+		if (!is_present_gpte(pte))
														
 
															 			goto not_present;
														
 
															 		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
														
@@ -175,6 +173,8 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 
															 #endif
														
 
															 		if (!(pte & PT_ACCESSED_MASK)) {
														
 
															+			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
														
 
															+						       sizeof(pte));
														
 
															 			mark_page_dirty(vcpu->kvm, table_gfn);
														
 
															 			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
														
 
															 			    index, pte, pte|PT_ACCESSED_MASK))
														
@@ -186,18 +186,24 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 
															 		walker->ptes[walker->level - 1] = pte;
														
 
															-		if (walker->level == PT_PAGE_TABLE_LEVEL) {
														
 
															-			walker->gfn = gpte_to_gfn(pte);
														
 
															-			break;
														
 
															-		}
														
 
															-
														
 
															-		if (walker->level == PT_DIRECTORY_LEVEL
														
 
															-		    && (pte & PT_PAGE_SIZE_MASK)
														
 
															-		    && (PTTYPE == 64 || is_pse(vcpu))) {
														
 
															-			walker->gfn = gpte_to_gfn_pde(pte);
														
 
															-			walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
														
 
															-			if (PTTYPE == 32 && is_cpuid_PSE36())
														
 
															+		if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
														
 
															+		    ((walker->level == PT_DIRECTORY_LEVEL) &&
														
 
															+				(pte & PT_PAGE_SIZE_MASK)  &&
														
 
															+				(PTTYPE == 64 || is_pse(vcpu))) ||
														
 
															+		    ((walker->level == PT_PDPE_LEVEL) &&
														
 
															+				(pte & PT_PAGE_SIZE_MASK)  &&
														
 
															+				is_long_mode(vcpu))) {
														
 
															+			int lvl = walker->level;
														
 
															+
														
 
															+			walker->gfn = gpte_to_gfn_lvl(pte, lvl);
														
 
															+			walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl))
														
 
															+					>> PAGE_SHIFT;
														
 
															+
														
 
															+			if (PTTYPE == 32 &&
														
 
															+			    walker->level == PT_DIRECTORY_LEVEL &&
														
 
															+			    is_cpuid_PSE36())
														
 
															 				walker->gfn += pse36_gfn_delta(pte);
														
 
															+
														
 
															 			break;
														
 
															 		}
														
@@ -205,9 +211,10 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 
															 		--walker->level;
														
 
															 	}
														
 
															-	if (write_fault && !is_dirty_pte(pte)) {
														
 
															+	if (write_fault && !is_dirty_gpte(pte)) {
														
 
															 		bool ret;
														
 
															+		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
														
 
															 		mark_page_dirty(vcpu->kvm, table_gfn);
														
 
															 		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
														
 
															 			    pte|PT_DIRTY_MASK);
														
@@ -239,6 +246,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 
															 		walker->error_code |= PFERR_FETCH_MASK;
														
 
															 	if (rsvd_fault)
														
 
															 		walker->error_code |= PFERR_RSVD_MASK;
														
 
															+	trace_kvm_mmu_walker_error(walker->error_code);
														
 
															 	return 0;
														
 
															 }
														
@@ -248,12 +256,11 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 
															 	pt_element_t gpte;
														
 
															 	unsigned pte_access;
														
 
															 	pfn_t pfn;
														
 
															-	int largepage = vcpu->arch.update_pte.largepage;
														
 
															 	gpte = *(const pt_element_t *)pte;
														
 
															 	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
														
 
															-		if (!is_present_pte(gpte))
														
 
															-			set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
														
 
															+		if (!is_present_gpte(gpte))
														
 
															+			__set_spte(spte, shadow_notrap_nonpresent_pte);
														
 
															 		return;
														
 
															 	}
														
 
															 	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
														
@@ -267,7 +274,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 
															 		return;
														
 
															 	kvm_get_pfn(pfn);
														
 
															 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
														
 
															-		     gpte & PT_DIRTY_MASK, NULL, largepage,
														
 
															+		     gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
														
 
															 		     gpte_to_gfn(gpte), pfn, true);
														
 
															 }
														
@@ -276,7 +283,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 
															  */
														
 
															 static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
														
 
															 			 struct guest_walker *gw,
														
 
															-			 int user_fault, int write_fault, int largepage,
														
 
															+			 int user_fault, int write_fault, int hlevel,
														
 
															 			 int *ptwrite, pfn_t pfn)
														
 
															 {
														
 
															 	unsigned access = gw->pt_access;
														
@@ -289,19 +296,18 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
															 	pt_element_t curr_pte;
														
 
															 	struct kvm_shadow_walk_iterator iterator;
														
 
															-	if (!is_present_pte(gw->ptes[gw->level - 1]))
														
 
															+	if (!is_present_gpte(gw->ptes[gw->level - 1]))
														
 
															 		return NULL;
														
 
															 	for_each_shadow_entry(vcpu, addr, iterator) {
														
 
															 		level = iterator.level;
														
 
															 		sptep = iterator.sptep;
														
 
															-		if (level == PT_PAGE_TABLE_LEVEL
														
 
															-		    || (largepage && level == PT_DIRECTORY_LEVEL)) {
														
 
															+		if (iterator.level == hlevel) {
														
 
															 			mmu_set_spte(vcpu, sptep, access,
														
 
															 				     gw->pte_access & access,
														
 
															 				     user_fault, write_fault,
														
 
															 				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
														
 
															-				     ptwrite, largepage,
														
 
															+				     ptwrite, level,
														
 
															 				     gw->gfn, pfn, false);
														
 
															 			break;
														
 
															 		}
														
@@ -311,16 +317,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
															 		if (is_large_pte(*sptep)) {
														
 
															 			rmap_remove(vcpu->kvm, sptep);
														
 
															-			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
														
 
															+			__set_spte(sptep, shadow_trap_nonpresent_pte);
														
 
															 			kvm_flush_remote_tlbs(vcpu->kvm);
														
 
															 		}
														
 
															-		if (level == PT_DIRECTORY_LEVEL
														
 
															-		    && gw->level == PT_DIRECTORY_LEVEL) {
														
 
															+		if (level <= gw->level) {
														
 
															+			int delta = level - gw->level + 1;
														
 
															 			direct = 1;
														
 
															-			if (!is_dirty_pte(gw->ptes[level - 1]))
														
 
															+			if (!is_dirty_gpte(gw->ptes[level - delta]))
														
 
															 				access &= ~ACC_WRITE_MASK;
														
 
															-			table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
														
 
															+			table_gfn = gpte_to_gfn(gw->ptes[level - delta]);
														
 
															+			/* advance table_gfn when emulating 1gb pages with 4k */
														
 
															+			if (delta == 0)
														
 
															+				table_gfn += PT_INDEX(addr, level);
														
 
															 		} else {
														
 
															 			direct = 0;
														
 
															 			table_gfn = gw->table_gfn[level - 2];
														
@@ -369,11 +378,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 
															 	int user_fault = error_code & PFERR_USER_MASK;
														
 
															 	int fetch_fault = error_code & PFERR_FETCH_MASK;
														
 
															 	struct guest_walker walker;
														
 
															-	u64 *shadow_pte;
														
 
															+	u64 *sptep;
														
 
															 	int write_pt = 0;
														
 
															 	int r;
														
 
															 	pfn_t pfn;
														
 
															-	int largepage = 0;
														
 
															+	int level = PT_PAGE_TABLE_LEVEL;
														
 
															 	unsigned long mmu_seq;
														
 
															 	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
														
@@ -399,14 +408,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 
															 		return 0;
														
 
															 	}
														
 
															-	if (walker.level == PT_DIRECTORY_LEVEL) {
														
 
															-		gfn_t large_gfn;
														
 
															-		large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
														
 
															-		if (is_largepage_backed(vcpu, large_gfn)) {
														
 
															-			walker.gfn = large_gfn;
														
 
															-			largepage = 1;
														
 
															-		}
														
 
															+	if (walker.level >= PT_DIRECTORY_LEVEL) {
														
 
															+		level = min(walker.level, mapping_level(vcpu, walker.gfn));
														
 
															+		walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
														
 
															 	}
														
 
															+
														
 
															 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
														
 
															 	smp_rmb();
														
 
															 	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
														
@@ -422,11 +428,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 
															 	if (mmu_notifier_retry(vcpu, mmu_seq))
														
 
															 		goto out_unlock;
														
 
															 	kvm_mmu_free_some_pages(vcpu);
														
 
															-	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
														
 
															-				  largepage, &write_pt, pfn);
														
 
															-
														
 
															+	sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
														
 
															+			     level, &write_pt, pfn);
														
 
															 	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
														
 
															-		 shadow_pte, *shadow_pte, write_pt);
														
 
															+		 sptep, *sptep, write_pt);
														
 
															 	if (!write_pt)
														
 
															 		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
														
@@ -459,8 +464,9 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 
															 		sptep = iterator.sptep;
														
 
															 		/* FIXME: properly handle invlpg on large guest pages */
														
 
															-		if (level == PT_PAGE_TABLE_LEVEL ||
														
 
															-		    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
														
 
															+		if (level == PT_PAGE_TABLE_LEVEL  ||
														
 
															+		    ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
														
 
															+		    ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
														
 
															 			struct kvm_mmu_page *sp = page_header(__pa(sptep));
														
 
															 			pte_gpa = (sp->gfn << PAGE_SHIFT);
														
@@ -472,7 +478,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 
															 					--vcpu->kvm->stat.lpages;
														
 
															 				need_flush = 1;
														
 
															 			}
														
 
															-			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
														
 
															+			__set_spte(sptep, shadow_trap_nonpresent_pte);
														
 
															 			break;
														
 
															 		}
														
@@ -489,7 +495,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 
															 	if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
														
 
															 				  sizeof(pt_element_t)))
														
 
															 		return;
														
 
															-	if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
														
 
															+	if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) {
														
 
															 		if (mmu_topup_memory_caches(vcpu))
														
 
															 			return;
														
 
															 		kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
														
@@ -536,7 +542,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
 
															 		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
														
 
															 		pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
														
 
															 		for (j = 0; j < ARRAY_SIZE(pt); ++j)
														
 
															-			if (r || is_present_pte(pt[j]))
														
 
															+			if (r || is_present_gpte(pt[j]))
														
 
															 				sp->spt[i+j] = shadow_trap_nonpresent_pte;
														
 
															 			else
														
 
															 				sp->spt[i+j] = shadow_notrap_nonpresent_pte;
														
@@ -574,23 +580,23 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
															 					  sizeof(pt_element_t)))
														
 
															 			return -EINVAL;
														
 
															-		if (gpte_to_gfn(gpte) != gfn || !is_present_pte(gpte) ||
														
 
															+		if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) ||
														
 
															 		    !(gpte & PT_ACCESSED_MASK)) {
														
 
															 			u64 nonpresent;
														
 
															 			rmap_remove(vcpu->kvm, &sp->spt[i]);
														
 
															-			if (is_present_pte(gpte))
														
 
															+			if (is_present_gpte(gpte))
														
 
															 				nonpresent = shadow_trap_nonpresent_pte;
														
 
															 			else
														
 
															 				nonpresent = shadow_notrap_nonpresent_pte;
														
 
															-			set_shadow_pte(&sp->spt[i], nonpresent);
														
 
															+			__set_spte(&sp->spt[i], nonpresent);
														
 
															 			continue;
														
 
															 		}
														
 
															 		nr_present++;
														
 
															 		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
														
 
															 		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
														
 
															-			 is_dirty_pte(gpte), 0, gfn,
														
 
															+			 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
														
 
															 			 spte_to_pfn(sp->spt[i]), true, false);
														
 
															 	}
														
@@ -603,9 +609,10 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
															 #undef PT_BASE_ADDR_MASK
														
 
															 #undef PT_INDEX
														
 
															 #undef PT_LEVEL_MASK
														
 
															-#undef PT_DIR_BASE_ADDR_MASK
														
 
															+#undef PT_LVL_ADDR_MASK
														
 
															+#undef PT_LVL_OFFSET_MASK
														
 
															 #undef PT_LEVEL_BITS
														
 
															 #undef PT_MAX_FULL_LEVELS
														
 
															 #undef gpte_to_gfn
														
 
															-#undef gpte_to_gfn_pde
														
 
															+#undef gpte_to_gfn_lvl
														
 
															 #undef CMPXCHG
														
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -15,7 +15,6 @@
 
															  */
														
 
															 #include <linux/kvm_host.h>
														
 
															-#include "kvm_svm.h"
														
 
															 #include "irq.h"
														
 
															 #include "mmu.h"
														
 
															 #include "kvm_cache_regs.h"
														
@@ -26,10 +25,12 @@
 
															 #include <linux/vmalloc.h>
														
 
															 #include <linux/highmem.h>
														
 
															 #include <linux/sched.h>
														
 
															+#include <linux/ftrace_event.h>
														
 
															 #include <asm/desc.h>
														
 
															 #include <asm/virtext.h>
														
 
															+#include "trace.h"
														
 
															 #define __ex(x) __kvm_handle_fault_on_reboot(x)
														
@@ -46,6 +47,10 @@ MODULE_LICENSE("GPL");
 
															 #define SVM_FEATURE_LBRV (1 << 1)
														
 
															 #define SVM_FEATURE_SVML (1 << 2)
														
 
															+#define NESTED_EXIT_HOST	0	/* Exit handled on host level */
														
 
															+#define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
														
 
															+#define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */
														
 
															+
														
 
															 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
														
 
															 /* Turn on to get debugging output*/
														
@@ -57,6 +62,58 @@ MODULE_LICENSE("GPL");
 
															 #define nsvm_printk(fmt, args...) do {} while(0)
														
 
															 #endif
														
 
															+static const u32 host_save_user_msrs[] = {
														
 
															+#ifdef CONFIG_X86_64
														
 
															+	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
														
 
															+	MSR_FS_BASE,
														
 
															+#endif
														
 
															+	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
														
 
															+};
														
 
															+
														
 
															+#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
														
 
															+
														
 
															+struct kvm_vcpu;
														
 
															+
														
 
															+struct nested_state {
														
 
															+	struct vmcb *hsave;
														
 
															+	u64 hsave_msr;
														
 
															+	u64 vmcb;
														
 
															+
														
 
															+	/* These are the merged vectors */
														
 
															+	u32 *msrpm;
														
 
															+
														
 
															+	/* gpa pointers to the real vectors */
														
 
															+	u64 vmcb_msrpm;
														
 
															+
														
 
															+	/* cache for intercepts of the guest */
														
 
															+	u16 intercept_cr_read;
														
 
															+	u16 intercept_cr_write;
														
 
															+	u16 intercept_dr_read;
														
 
															+	u16 intercept_dr_write;
														
 
															+	u32 intercept_exceptions;
														
 
															+	u64 intercept;
														
 
															+
														
 
															+};
														
 
															+
														
 
															+struct vcpu_svm {
														
 
															+	struct kvm_vcpu vcpu;
														
 
															+	struct vmcb *vmcb;
														
 
															+	unsigned long vmcb_pa;
														
 
															+	struct svm_cpu_data *svm_data;
														
 
															+	uint64_t asid_generation;
														
 
															+	uint64_t sysenter_esp;
														
 
															+	uint64_t sysenter_eip;
														
 
															+
														
 
															+	u64 next_rip;
														
 
															+
														
 
															+	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
														
 
															+	u64 host_gs_base;
														
 
															+
														
 
															+	u32 *msrpm;
														
 
															+
														
 
															+	struct nested_state nested;
														
 
															+};
														
 
															+
														
 
															 /* enable NPT for AMD64 and X86 with PAE */
														
 
															 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
														
 
															 static bool npt_enabled = true;
														
@@ -67,15 +124,14 @@ static int npt = 1;
 
															 module_param(npt, int, S_IRUGO);
														
 
															-static int nested = 0;
														
 
															+static int nested = 1;
														
 
															 module_param(nested, int, S_IRUGO);
														
 
															 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
														
 
															+static void svm_complete_interrupts(struct vcpu_svm *svm);
														
 
															-static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
														
 
															+static int nested_svm_exit_handled(struct vcpu_svm *svm);
														
 
															 static int nested_svm_vmexit(struct vcpu_svm *svm);
														
 
															-static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
														
 
															-			     void *arg2, void *opaque);
														
 
															 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
														
 
															 				      bool has_error_code, u32 error_code);
														
@@ -86,7 +142,22 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 
															 static inline bool is_nested(struct vcpu_svm *svm)
														
 
															 {
														
 
															-	return svm->nested_vmcb;
														
 
															+	return svm->nested.vmcb;
														
 
															+}
														
 
															+
														
 
															+static inline void enable_gif(struct vcpu_svm *svm)
														
 
															+{
														
 
															+	svm->vcpu.arch.hflags |= HF_GIF_MASK;
														
 
															+}
														
 
															+
														
 
															+static inline void disable_gif(struct vcpu_svm *svm)
														
 
															+{
														
 
															+	svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
														
 
															+}
														
 
															+
														
 
															+static inline bool gif_set(struct vcpu_svm *svm)
														
 
															+{
														
 
															+	return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
														
 
															 }
														
 
															 static unsigned long iopm_base;
														
@@ -147,19 +218,6 @@ static inline void invlpga(unsigned long addr, u32 asid)
 
															 	asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid));
														
 
															 }
														
 
															-static inline unsigned long kvm_read_cr2(void)
														
 
															-{
														
 
															-	unsigned long cr2;
														
 
															-
														
 
															-	asm volatile ("mov %%cr2, %0" : "=r" (cr2));
														
 
															-	return cr2;
														
 
															-}
														
 
															-
														
 
															-static inline void kvm_write_cr2(unsigned long val)
														
 
															-{
														
 
															-	asm volatile ("mov %0, %%cr2" :: "r" (val));
														
 
															-}
														
 
															-
														
 
															 static inline void force_new_asid(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	to_svm(vcpu)->asid_generation--;
														
@@ -263,7 +321,7 @@ static void svm_hardware_enable(void *garbage)
 
															 	struct svm_cpu_data *svm_data;
														
 
															 	uint64_t efer;
														
 
															-	struct desc_ptr gdt_descr;
														
 
															+	struct descriptor_table gdt_descr;
														
 
															 	struct desc_struct *gdt;
														
 
															 	int me = raw_smp_processor_id();
														
@@ -283,8 +341,8 @@ static void svm_hardware_enable(void *garbage)
 
															 	svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
														
 
															 	svm_data->next_asid = svm_data->max_asid + 1;
														
 
															-	asm volatile ("sgdt %0" : "=m"(gdt_descr));
														
 
															-	gdt = (struct desc_struct *)gdt_descr.address;
														
 
															+	kvm_get_gdt(&gdt_descr);
														
 
															+	gdt = (struct desc_struct *)gdt_descr.base;
														
 
															 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
														
 
															 	rdmsrl(MSR_EFER, efer);
														
@@ -367,8 +425,6 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)
 
															 #endif
														
 
															 	set_msr_interception(msrpm, MSR_K6_STAR, 1, 1);
														
 
															 	set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1);
														
 
															-	set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
														
 
															-	set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
														
 
															 }
														
 
															 static void svm_enable_lbrv(struct vcpu_svm *svm)
														
@@ -595,8 +651,10 @@ static void init_vmcb(struct vcpu_svm *svm)
 
															 	}
														
 
															 	force_new_asid(&svm->vcpu);
														
 
															-	svm->nested_vmcb = 0;
														
 
															-	svm->vcpu.arch.hflags = HF_GIF_MASK;
														
 
															+	svm->nested.vmcb = 0;
														
 
															+	svm->vcpu.arch.hflags = 0;
														
 
															+
														
 
															+	enable_gif(svm);
														
 
															 }
														
 
															 static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
														
@@ -605,7 +663,7 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
 
															 	init_vmcb(svm);
														
 
															-	if (vcpu->vcpu_id != 0) {
														
 
															+	if (!kvm_vcpu_is_bsp(vcpu)) {
														
 
															 		kvm_rip_write(vcpu, 0);
														
 
															 		svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
														
 
															 		svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
														
@@ -656,9 +714,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 
															 	hsave_page = alloc_page(GFP_KERNEL);
														
 
															 	if (!hsave_page)
														
 
															 		goto uninit;
														
 
															-	svm->hsave = page_address(hsave_page);
														
 
															+	svm->nested.hsave = page_address(hsave_page);
														
 
															-	svm->nested_msrpm = page_address(nested_msrpm_pages);
														
 
															+	svm->nested.msrpm = page_address(nested_msrpm_pages);
														
 
															 	svm->vmcb = page_address(page);
														
 
															 	clear_page(svm->vmcb);
														
@@ -669,7 +727,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 
															 	fx_init(&svm->vcpu);
														
 
															 	svm->vcpu.fpu_active = 1;
														
 
															 	svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
														
 
															-	if (svm->vcpu.vcpu_id == 0)
														
 
															+	if (kvm_vcpu_is_bsp(&svm->vcpu))
														
 
															 		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
														
 
															 	return &svm->vcpu;
														
@@ -688,8 +746,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 
															 	__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
														
 
															 	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
														
 
															-	__free_page(virt_to_page(svm->hsave));
														
 
															-	__free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
														
 
															+	__free_page(virt_to_page(svm->nested.hsave));
														
 
															+	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
														
 
															 	kvm_vcpu_uninit(vcpu);
														
 
															 	kmem_cache_free(kvm_vcpu_cache, svm);
														
 
															 }
														
@@ -740,6 +798,18 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 
															 	to_svm(vcpu)->vmcb->save.rflags = rflags;
														
 
															 }
														
 
															+static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
														
 
															+{
														
 
															+	switch (reg) {
														
 
															+	case VCPU_EXREG_PDPTR:
														
 
															+		BUG_ON(!npt_enabled);
														
 
															+		load_pdptrs(vcpu, vcpu->arch.cr3);
														
 
															+		break;
														
 
															+	default:
														
 
															+		BUG();
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															 static void svm_set_vintr(struct vcpu_svm *svm)
														
 
															 {
														
 
															 	svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
														
@@ -1061,7 +1131,6 @@ static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
 
															 		val = 0;
														
 
															 	}
														
 
															-	KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
														
 
															 	return val;
														
 
															 }
														
@@ -1070,8 +1139,6 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 
															 {
														
 
															 	struct vcpu_svm *svm = to_svm(vcpu);
														
 
															-	KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
														
 
															-
														
 
															 	*exception = 0;
														
 
															 	switch (dr) {
														
@@ -1119,25 +1186,9 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
															 	fault_address  = svm->vmcb->control.exit_info_2;
														
 
															 	error_code = svm->vmcb->control.exit_info_1;
														
 
															-	if (!npt_enabled)
														
 
															-		KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code,
														
 
															-			    (u32)fault_address, (u32)(fault_address >> 32),
														
 
															-			    handler);
														
 
															-	else
														
 
															-		KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
														
 
															-			    (u32)fault_address, (u32)(fault_address >> 32),
														
 
															-			    handler);
														
 
															-	/*
														
 
															-	 * FIXME: Tis shouldn't be necessary here, but there is a flush
														
 
															-	 * missing in the MMU code. Until we find this bug, flush the
														
 
															-	 * complete TLB here on an NPF
														
 
															-	 */
														
 
															-	if (npt_enabled)
														
 
															-		svm_flush_tlb(&svm->vcpu);
														
 
															-	else {
														
 
															-		if (kvm_event_needs_reinjection(&svm->vcpu))
														
 
															-			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
														
 
															-	}
														
 
															+	trace_kvm_page_fault(fault_address, error_code);
														
 
															+	if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
														
 
															+		kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
														
 
															 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
														
 
															 }
														
@@ -1253,14 +1304,12 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
															 static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
														
 
															 {
														
 
															-	KVMTRACE_0D(NMI, &svm->vcpu, handler);
														
 
															 	return 1;
														
 
															 }
														
 
															 static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
														
 
															 {
														
 
															 	++svm->vcpu.stat.irq_exits;
														
 
															-	KVMTRACE_0D(INTR, &svm->vcpu, handler);
														
 
															 	return 1;
														
 
															 }
														
@@ -1303,44 +1352,39 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm)
 
															 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
														
 
															 				      bool has_error_code, u32 error_code)
														
 
															 {
														
 
															-	if (is_nested(svm)) {
														
 
															-		svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
														
 
															-		svm->vmcb->control.exit_code_hi = 0;
														
 
															-		svm->vmcb->control.exit_info_1 = error_code;
														
 
															-		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
														
 
															-		if (nested_svm_exit_handled(svm, false)) {
														
 
															-			nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
														
 
															-
														
 
															-			nested_svm_vmexit(svm);
														
 
															-			return 1;
														
 
															-		}
														
 
															-	}
														
 
															+	if (!is_nested(svm))
														
 
															+		return 0;
														
 
															-	return 0;
														
 
															+	svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
														
 
															+	svm->vmcb->control.exit_code_hi = 0;
														
 
															+	svm->vmcb->control.exit_info_1 = error_code;
														
 
															+	svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
														
 
															+
														
 
															+	return nested_svm_exit_handled(svm);
														
 
															 }
														
 
															 static inline int nested_svm_intr(struct vcpu_svm *svm)
														
 
															 {
														
 
															-	if (is_nested(svm)) {
														
 
															-		if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
														
 
															-			return 0;
														
 
															+	if (!is_nested(svm))
														
 
															+		return 0;
														
 
															-		if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
														
 
															-			return 0;
														
 
															+	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
														
 
															+		return 0;
														
 
															-		svm->vmcb->control.exit_code = SVM_EXIT_INTR;
														
 
															+	if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
														
 
															+		return 0;
														
 
															-		if (nested_svm_exit_handled(svm, false)) {
														
 
															-			nsvm_printk("VMexit -> INTR\n");
														
 
															-			nested_svm_vmexit(svm);
														
 
															-			return 1;
														
 
															-		}
														
 
															+	svm->vmcb->control.exit_code = SVM_EXIT_INTR;
														
 
															+
														
 
															+	if (nested_svm_exit_handled(svm)) {
														
 
															+		nsvm_printk("VMexit -> INTR\n");
														
 
															+		return 1;
														
 
															 	}
														
 
															 	return 0;
														
 
															 }
														
 
															-static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
														
 
															+static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)
														
 
															 {
														
 
															 	struct page *page;
														
@@ -1348,236 +1392,246 @@ static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
 
															 	page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
														
 
															 	up_read(&current->mm->mmap_sem);
														
 
															-	if (is_error_page(page)) {
														
 
															-		printk(KERN_INFO "%s: could not find page at 0x%llx\n",
														
 
															-		       __func__, gpa);
														
 
															-		kvm_release_page_clean(page);
														
 
															-		kvm_inject_gp(&svm->vcpu, 0);
														
 
															-		return NULL;
														
 
															-	}
														
 
															-	return page;
														
 
															+	if (is_error_page(page))
														
 
															+		goto error;
														
 
															+
														
 
															+	return kmap_atomic(page, idx);
														
 
															+
														
 
															+error:
														
 
															+	kvm_release_page_clean(page);
														
 
															+	kvm_inject_gp(&svm->vcpu, 0);
														
 
															+
														
 
															+	return NULL;
														
 
															 }
														
 
															-static int nested_svm_do(struct vcpu_svm *svm,
														
 
															-			 u64 arg1_gpa, u64 arg2_gpa, void *opaque,
														
 
															-			 int (*handler)(struct vcpu_svm *svm,
														
 
															-					void *arg1,
														
 
															-					void *arg2,
														
 
															-					void *opaque))
														
 
															+static void nested_svm_unmap(void *addr, enum km_type idx)
														
 
															 {
														
 
															-	struct page *arg1_page;
														
 
															-	struct page *arg2_page = NULL;
														
 
															-	void *arg1;
														
 
															-	void *arg2 = NULL;
														
 
															-	int retval;
														
 
															+	struct page *page;
														
 
															-	arg1_page = nested_svm_get_page(svm, arg1_gpa);
														
 
															-	if(arg1_page == NULL)
														
 
															-		return 1;
														
 
															+	if (!addr)
														
 
															+		return;
														
 
															-	if (arg2_gpa) {
														
 
															-		arg2_page = nested_svm_get_page(svm, arg2_gpa);
														
 
															-		if(arg2_page == NULL) {
														
 
															-			kvm_release_page_clean(arg1_page);
														
 
															-			return 1;
														
 
															-		}
														
 
															-	}
														
 
															+	page = kmap_atomic_to_page(addr);
														
 
															+
														
 
															+	kunmap_atomic(addr, idx);
														
 
															+	kvm_release_page_dirty(page);
														
 
															+}
														
 
															+
														
 
															+static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm)
														
 
															+{
														
 
															+	u32 param = svm->vmcb->control.exit_info_1 & 1;
														
 
															+	u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
														
 
															+	bool ret = false;
														
 
															+	u32 t0, t1;
														
 
															+	u8 *msrpm;
														
 
															-	arg1 = kmap_atomic(arg1_page, KM_USER0);
														
 
															-	if (arg2_gpa)
														
 
															-		arg2 = kmap_atomic(arg2_page, KM_USER1);
														
 
															+	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
														
 
															+		return false;
														
 
															-	retval = handler(svm, arg1, arg2, opaque);
														
 
															+	msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0);
														
 
															+
														
 
															+	if (!msrpm)
														
 
															+		goto out;
														
 
															+
														
 
															+	switch (msr) {
														
 
															+	case 0 ... 0x1fff:
														
 
															+		t0 = (msr * 2) % 8;
														
 
															+		t1 = msr / 8;
														
 
															+		break;
														
 
															+	case 0xc0000000 ... 0xc0001fff:
														
 
															+		t0 = (8192 + msr - 0xc0000000) * 2;
														
 
															+		t1 = (t0 / 8);
														
 
															+		t0 %= 8;
														
 
															+		break;
														
 
															+	case 0xc0010000 ... 0xc0011fff:
														
 
															+		t0 = (16384 + msr - 0xc0010000) * 2;
														
 
															+		t1 = (t0 / 8);
														
 
															+		t0 %= 8;
														
 
															+		break;
														
 
															+	default:
														
 
															+		ret = true;
														
 
															+		goto out;
														
 
															+	}
														
 
															-	kunmap_atomic(arg1, KM_USER0);
														
 
															-	if (arg2_gpa)
														
 
															-		kunmap_atomic(arg2, KM_USER1);
														
 
															+	ret = msrpm[t1] & ((1 << param) << t0);
														
 
															-	kvm_release_page_dirty(arg1_page);
														
 
															-	if (arg2_gpa)
														
 
															-		kvm_release_page_dirty(arg2_page);
														
 
															+out:
														
 
															+	nested_svm_unmap(msrpm, KM_USER0);
														
 
															-	return retval;
														
 
															+	return ret;
														
 
															 }
														
 
															-static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
														
 
															-					void *arg1,
														
 
															-					void *arg2,
														
 
															-					void *opaque)
														
 
															+static int nested_svm_exit_special(struct vcpu_svm *svm)
														
 
															 {
														
 
															-	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
														
 
															-	bool kvm_overrides = *(bool *)opaque;
														
 
															 	u32 exit_code = svm->vmcb->control.exit_code;
														
 
															-	if (kvm_overrides) {
														
 
															-		switch (exit_code) {
														
 
															-		case SVM_EXIT_INTR:
														
 
															-		case SVM_EXIT_NMI:
														
 
															-			return 0;
														
 
															+	switch (exit_code) {
														
 
															+	case SVM_EXIT_INTR:
														
 
															+	case SVM_EXIT_NMI:
														
 
															+		return NESTED_EXIT_HOST;
														
 
															 		/* For now we are always handling NPFs when using them */
														
 
															-		case SVM_EXIT_NPF:
														
 
															-			if (npt_enabled)
														
 
															-				return 0;
														
 
															-			break;
														
 
															-		/* When we're shadowing, trap PFs */
														
 
															-		case SVM_EXIT_EXCP_BASE + PF_VECTOR:
														
 
															-			if (!npt_enabled)
														
 
															-				return 0;
														
 
															-			break;
														
 
															-		default:
														
 
															-			break;
														
 
															-		}
														
 
															+	case SVM_EXIT_NPF:
														
 
															+		if (npt_enabled)
														
 
															+			return NESTED_EXIT_HOST;
														
 
															+		break;
														
 
															+	/* When we're shadowing, trap PFs */
														
 
															+	case SVM_EXIT_EXCP_BASE + PF_VECTOR:
														
 
															+		if (!npt_enabled)
														
 
															+			return NESTED_EXIT_HOST;
														
 
															+		break;
														
 
															+	default:
														
 
															+		break;
														
 
															 	}
														
 
															+	return NESTED_EXIT_CONTINUE;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * If this function returns true, this #vmexit was already handled
														
 
															+ */
														
 
															+static int nested_svm_exit_handled(struct vcpu_svm *svm)
														
 
															+{
														
 
															+	u32 exit_code = svm->vmcb->control.exit_code;
														
 
															+	int vmexit = NESTED_EXIT_HOST;
														
 
															+
														
 
															 	switch (exit_code) {
														
 
															+	case SVM_EXIT_MSR:
														
 
															+		vmexit = nested_svm_exit_handled_msr(svm);
														
 
															+		break;
														
 
															 	case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
														
 
															 		u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
														
 
															-		if (nested_vmcb->control.intercept_cr_read & cr_bits)
														
 
															-			return 1;
														
 
															+		if (svm->nested.intercept_cr_read & cr_bits)
														
 
															+			vmexit = NESTED_EXIT_DONE;
														
 
															 		break;
														
 
															 	}
														
 
															 	case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
														
 
															 		u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
														
 
															-		if (nested_vmcb->control.intercept_cr_write & cr_bits)
														
 
															-			return 1;
														
 
															+		if (svm->nested.intercept_cr_write & cr_bits)
														
 
															+			vmexit = NESTED_EXIT_DONE;
														
 
															 		break;
														
 
															 	}
														
 
															 	case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
														
 
															 		u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
														
 
															-		if (nested_vmcb->control.intercept_dr_read & dr_bits)
														
 
															-			return 1;
														
 
															+		if (svm->nested.intercept_dr_read & dr_bits)
														
 
															+			vmexit = NESTED_EXIT_DONE;
														
 
															 		break;
														
 
															 	}
														
 
															 	case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
														
 
															 		u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
														
 
															-		if (nested_vmcb->control.intercept_dr_write & dr_bits)
														
 
															-			return 1;
														
 
															+		if (svm->nested.intercept_dr_write & dr_bits)
														
 
															+			vmexit = NESTED_EXIT_DONE;
														
 
															 		break;
														
 
															 	}
														
 
															 	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
														
 
															 		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
														
 
															-		if (nested_vmcb->control.intercept_exceptions & excp_bits)
														
 
															-			return 1;
														
 
															+		if (svm->nested.intercept_exceptions & excp_bits)
														
 
															+			vmexit = NESTED_EXIT_DONE;
														
 
															 		break;
														
 
															 	}
														
 
															 	default: {
														
 
															 		u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
														
 
															 		nsvm_printk("exit code: 0x%x\n", exit_code);
														
 
															-		if (nested_vmcb->control.intercept & exit_bits)
														
 
															-			return 1;
														
 
															+		if (svm->nested.intercept & exit_bits)
														
 
															+			vmexit = NESTED_EXIT_DONE;
														
 
															 	}
														
 
															 	}
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
														
 
															-				       void *arg1, void *arg2,
														
 
															-				       void *opaque)
														
 
															-{
														
 
															-	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
														
 
															-	u8 *msrpm = (u8 *)arg2;
														
 
															-        u32 t0, t1;
														
 
															-	u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
														
 
															-	u32 param = svm->vmcb->control.exit_info_1 & 1;
														
 
															-
														
 
															-	if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
														
 
															-		return 0;
														
 
															-
														
 
															-	switch(msr) {
														
 
															-	case 0 ... 0x1fff:
														
 
															-		t0 = (msr * 2) % 8;
														
 
															-		t1 = msr / 8;
														
 
															-		break;
														
 
															-	case 0xc0000000 ... 0xc0001fff:
														
 
															-		t0 = (8192 + msr - 0xc0000000) * 2;
														
 
															-		t1 = (t0 / 8);
														
 
															-		t0 %= 8;
														
 
															-		break;
														
 
															-	case 0xc0010000 ... 0xc0011fff:
														
 
															-		t0 = (16384 + msr - 0xc0010000) * 2;
														
 
															-		t1 = (t0 / 8);
														
 
															-		t0 %= 8;
														
 
															-		break;
														
 
															-	default:
														
 
															-		return 1;
														
 
															-		break;
														
 
															+	if (vmexit == NESTED_EXIT_DONE) {
														
 
															+		nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
														
 
															+		nested_svm_vmexit(svm);
														
 
															 	}
														
 
															-	if (msrpm[t1] & ((1 << param) << t0))
														
 
															-		return 1;
														
 
															-	return 0;
														
 
															+	return vmexit;
														
 
															+}
														
 
															+
														
 
															+static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
														
 
															+{
														
 
															+	struct vmcb_control_area *dst  = &dst_vmcb->control;
														
 
															+	struct vmcb_control_area *from = &from_vmcb->control;
														
 
															+
														
 
															+	dst->intercept_cr_read    = from->intercept_cr_read;
														
 
															+	dst->intercept_cr_write   = from->intercept_cr_write;
														
 
															+	dst->intercept_dr_read    = from->intercept_dr_read;
														
 
															+	dst->intercept_dr_write   = from->intercept_dr_write;
														
 
															+	dst->intercept_exceptions = from->intercept_exceptions;
														
 
															+	dst->intercept            = from->intercept;
														
 
															+	dst->iopm_base_pa         = from->iopm_base_pa;
														
 
															+	dst->msrpm_base_pa        = from->msrpm_base_pa;
														
 
															+	dst->tsc_offset           = from->tsc_offset;
														
 
															+	dst->asid                 = from->asid;
														
 
															+	dst->tlb_ctl              = from->tlb_ctl;
														
 
															+	dst->int_ctl              = from->int_ctl;
														
 
															+	dst->int_vector           = from->int_vector;
														
 
															+	dst->int_state            = from->int_state;
														
 
															+	dst->exit_code            = from->exit_code;
														
 
															+	dst->exit_code_hi         = from->exit_code_hi;
														
 
															+	dst->exit_info_1          = from->exit_info_1;
														
 
															+	dst->exit_info_2          = from->exit_info_2;
														
 
															+	dst->exit_int_info        = from->exit_int_info;
														
 
															+	dst->exit_int_info_err    = from->exit_int_info_err;
														
 
															+	dst->nested_ctl           = from->nested_ctl;
														
 
															+	dst->event_inj            = from->event_inj;
														
 
															+	dst->event_inj_err        = from->event_inj_err;
														
 
															+	dst->nested_cr3           = from->nested_cr3;
														
 
															+	dst->lbr_ctl              = from->lbr_ctl;
														
 
															 }
														
 
															-static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
														
 
															+static int nested_svm_vmexit(struct vcpu_svm *svm)
														
 
															 {
														
 
															-	bool k = kvm_override;
														
 
															-
														
 
															-	switch (svm->vmcb->control.exit_code) {
														
 
															-	case SVM_EXIT_MSR:
														
 
															-		return nested_svm_do(svm, svm->nested_vmcb,
														
 
															-				     svm->nested_vmcb_msrpm, NULL,
														
 
															-				     nested_svm_exit_handled_msr);
														
 
															-	default: break;
														
 
															-	}
														
 
															+	struct vmcb *nested_vmcb;
														
 
															+	struct vmcb *hsave = svm->nested.hsave;
														
 
															+	struct vmcb *vmcb = svm->vmcb;
														
 
															-	return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
														
 
															-			     nested_svm_exit_handled_real);
														
 
															-}
														
 
															-
														
 
															-static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
														
 
															-				  void *arg2, void *opaque)
														
 
															-{
														
 
															-	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
														
 
															-	struct vmcb *hsave = svm->hsave;
														
 
															-	u64 nested_save[] = { nested_vmcb->save.cr0,
														
 
															-			      nested_vmcb->save.cr3,
														
 
															-			      nested_vmcb->save.cr4,
														
 
															-			      nested_vmcb->save.efer,
														
 
															-			      nested_vmcb->control.intercept_cr_read,
														
 
															-			      nested_vmcb->control.intercept_cr_write,
														
 
															-			      nested_vmcb->control.intercept_dr_read,
														
 
															-			      nested_vmcb->control.intercept_dr_write,
														
 
															-			      nested_vmcb->control.intercept_exceptions,
														
 
															-			      nested_vmcb->control.intercept,
														
 
															-			      nested_vmcb->control.msrpm_base_pa,
														
 
															-			      nested_vmcb->control.iopm_base_pa,
														
 
															-			      nested_vmcb->control.tsc_offset };
														
 
															+	nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
														
 
															+	if (!nested_vmcb)
														
 
															+		return 1;
														
 
															 	/* Give the current vmcb to the guest */
														
 
															-	memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
														
 
															-	nested_vmcb->save.cr0 = nested_save[0];
														
 
															-	if (!npt_enabled)
														
 
															-		nested_vmcb->save.cr3 = nested_save[1];
														
 
															-	nested_vmcb->save.cr4 = nested_save[2];
														
 
															-	nested_vmcb->save.efer = nested_save[3];
														
 
															-	nested_vmcb->control.intercept_cr_read = nested_save[4];
														
 
															-	nested_vmcb->control.intercept_cr_write = nested_save[5];
														
 
															-	nested_vmcb->control.intercept_dr_read = nested_save[6];
														
 
															-	nested_vmcb->control.intercept_dr_write = nested_save[7];
														
 
															-	nested_vmcb->control.intercept_exceptions = nested_save[8];
														
 
															-	nested_vmcb->control.intercept = nested_save[9];
														
 
															-	nested_vmcb->control.msrpm_base_pa = nested_save[10];
														
 
															-	nested_vmcb->control.iopm_base_pa = nested_save[11];
														
 
															-	nested_vmcb->control.tsc_offset = nested_save[12];
														
 
															+	disable_gif(svm);
														
 
															+
														
 
															+	nested_vmcb->save.es     = vmcb->save.es;
														
 
															+	nested_vmcb->save.cs     = vmcb->save.cs;
														
 
															+	nested_vmcb->save.ss     = vmcb->save.ss;
														
 
															+	nested_vmcb->save.ds     = vmcb->save.ds;
														
 
															+	nested_vmcb->save.gdtr   = vmcb->save.gdtr;
														
 
															+	nested_vmcb->save.idtr   = vmcb->save.idtr;
														
 
															+	if (npt_enabled)
														
 
															+		nested_vmcb->save.cr3    = vmcb->save.cr3;
														
 
															+	nested_vmcb->save.cr2    = vmcb->save.cr2;
														
 
															+	nested_vmcb->save.rflags = vmcb->save.rflags;
														
 
															+	nested_vmcb->save.rip    = vmcb->save.rip;
														
 
															+	nested_vmcb->save.rsp    = vmcb->save.rsp;
														
 
															+	nested_vmcb->save.rax    = vmcb->save.rax;
														
 
															+	nested_vmcb->save.dr7    = vmcb->save.dr7;
														
 
															+	nested_vmcb->save.dr6    = vmcb->save.dr6;
														
 
															+	nested_vmcb->save.cpl    = vmcb->save.cpl;
														
 
															+
														
 
															+	nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
														
 
															+	nested_vmcb->control.int_vector        = vmcb->control.int_vector;
														
 
															+	nested_vmcb->control.int_state         = vmcb->control.int_state;
														
 
															+	nested_vmcb->control.exit_code         = vmcb->control.exit_code;
														
 
															+	nested_vmcb->control.exit_code_hi      = vmcb->control.exit_code_hi;
														
 
															+	nested_vmcb->control.exit_info_1       = vmcb->control.exit_info_1;
														
 
															+	nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
														
 
															+	nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
														
 
															+	nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
														
 
															+	nested_vmcb->control.tlb_ctl           = 0;
														
 
															+	nested_vmcb->control.event_inj         = 0;
														
 
															+	nested_vmcb->control.event_inj_err     = 0;
														
 
															 	/* We always set V_INTR_MASKING and remember the old value in hflags */
														
 
															 	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
														
 
															 		nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
														
 
															-	if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
														
 
															-	    (nested_vmcb->control.int_vector)) {
														
 
															-		nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
														
 
															-				nested_vmcb->control.int_vector);
														
 
															-	}
														
 
															-
														
 
															 	/* Restore the original control entries */
														
 
															-	svm->vmcb->control = hsave->control;
														
 
															+	copy_vmcb_control_area(vmcb, hsave);
														
 
															 	/* Kill any pending exceptions */
														
 
															 	if (svm->vcpu.arch.exception.pending == true)
														
 
															 		nsvm_printk("WARNING: Pending Exception\n");
														
 
															-	svm->vcpu.arch.exception.pending = false;
														
 
															+
														
 
															+	kvm_clear_exception_queue(&svm->vcpu);
														
 
															+	kvm_clear_interrupt_queue(&svm->vcpu);
														
 
															 	/* Restore selected save entries */
														
 
															 	svm->vmcb->save.es = hsave->save.es;
														
@@ -1603,19 +1657,10 @@ static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
 
															 	svm->vmcb->save.cpl = 0;
														
 
															 	svm->vmcb->control.exit_int_info = 0;
														
 
															-	svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
														
 
															 	/* Exit nested SVM mode */
														
 
															-	svm->nested_vmcb = 0;
														
 
															+	svm->nested.vmcb = 0;
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int nested_svm_vmexit(struct vcpu_svm *svm)
														
 
															-{
														
 
															-	nsvm_printk("VMexit\n");
														
 
															-	if (nested_svm_do(svm, svm->nested_vmcb, 0,
														
 
															-			  NULL, nested_svm_vmexit_real))
														
 
															-		return 1;
														
 
															+	nested_svm_unmap(nested_vmcb, KM_USER0);
														
 
															 	kvm_mmu_reset_context(&svm->vcpu);
														
 
															 	kvm_mmu_load(&svm->vcpu);
														
@@ -1623,38 +1668,63 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 
															 	return 0;
														
 
															 }
														
 
															-static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
														
 
															-				  void *arg2, void *opaque)
														
 
															+static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
														
 
															 {
														
 
															+	u32 *nested_msrpm;
														
 
															 	int i;
														
 
															-	u32 *nested_msrpm = (u32*)arg1;
														
 
															+
														
 
															+	nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0);
														
 
															+	if (!nested_msrpm)
														
 
															+		return false;
														
 
															+
														
 
															 	for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
														
 
															-		svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
														
 
															-	svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
														
 
															+		svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
														
 
															-	return 0;
														
 
															+	svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
														
 
															+
														
 
															+	nested_svm_unmap(nested_msrpm, KM_USER0);
														
 
															+
														
 
															+	return true;
														
 
															 }
														
 
															-static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
														
 
															-			    void *arg2, void *opaque)
														
 
															+static bool nested_svm_vmrun(struct vcpu_svm *svm)
														
 
															 {
														
 
															-	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
														
 
															-	struct vmcb *hsave = svm->hsave;
														
 
															+	struct vmcb *nested_vmcb;
														
 
															+	struct vmcb *hsave = svm->nested.hsave;
														
 
															+	struct vmcb *vmcb = svm->vmcb;
														
 
															+
														
 
															+	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
														
 
															+	if (!nested_vmcb)
														
 
															+		return false;
														
 
															 	/* nested_vmcb is our indicator if nested SVM is activated */
														
 
															-	svm->nested_vmcb = svm->vmcb->save.rax;
														
 
															+	svm->nested.vmcb = svm->vmcb->save.rax;
														
 
															 	/* Clear internal status */
														
 
															-	svm->vcpu.arch.exception.pending = false;
														
 
															+	kvm_clear_exception_queue(&svm->vcpu);
														
 
															+	kvm_clear_interrupt_queue(&svm->vcpu);
														
 
															 	/* Save the old vmcb, so we don't need to pick what we save, but
														
 
															 	   can restore everything when a VMEXIT occurs */
														
 
															-	memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
														
 
															-	/* We need to remember the original CR3 in the SPT case */
														
 
															-	if (!npt_enabled)
														
 
															-		hsave->save.cr3 = svm->vcpu.arch.cr3;
														
 
															-	hsave->save.cr4 = svm->vcpu.arch.cr4;
														
 
															-	hsave->save.rip = svm->next_rip;
														
 
															+	hsave->save.es     = vmcb->save.es;
														
 
															+	hsave->save.cs     = vmcb->save.cs;
														
 
															+	hsave->save.ss     = vmcb->save.ss;
														
 
															+	hsave->save.ds     = vmcb->save.ds;
														
 
															+	hsave->save.gdtr   = vmcb->save.gdtr;
														
 
															+	hsave->save.idtr   = vmcb->save.idtr;
														
 
															+	hsave->save.efer   = svm->vcpu.arch.shadow_efer;
														
 
															+	hsave->save.cr0    = svm->vcpu.arch.cr0;
														
 
															+	hsave->save.cr4    = svm->vcpu.arch.cr4;
														
 
															+	hsave->save.rflags = vmcb->save.rflags;
														
 
															+	hsave->save.rip    = svm->next_rip;
														
 
															+	hsave->save.rsp    = vmcb->save.rsp;
														
 
															+	hsave->save.rax    = vmcb->save.rax;
														
 
															+	if (npt_enabled)
														
 
															+		hsave->save.cr3    = vmcb->save.cr3;
														
 
															+	else
														
 
															+		hsave->save.cr3    = svm->vcpu.arch.cr3;
														
 
															+
														
 
															+	copy_vmcb_control_area(hsave, vmcb);
														
 
															 	if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
														
 
															 		svm->vcpu.arch.hflags |= HF_HIF_MASK;
														
@@ -1679,7 +1749,7 @@ static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
 
															 		kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
														
 
															 		kvm_mmu_reset_context(&svm->vcpu);
														
 
															 	}
														
 
															-	svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
														
 
															+	svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
														
 
															 	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
														
 
															 	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
														
 
															 	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
														
@@ -1706,7 +1776,15 @@ static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
 
															 	svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
														
 
															-	svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
														
 
															+	svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
														
 
															+
														
 
															+	/* cache intercepts */
														
 
															+	svm->nested.intercept_cr_read    = nested_vmcb->control.intercept_cr_read;
														
 
															+	svm->nested.intercept_cr_write   = nested_vmcb->control.intercept_cr_write;
														
 
															+	svm->nested.intercept_dr_read    = nested_vmcb->control.intercept_dr_read;
														
 
															+	svm->nested.intercept_dr_write   = nested_vmcb->control.intercept_dr_write;
														
 
															+	svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
														
 
															+	svm->nested.intercept            = nested_vmcb->control.intercept;
														
 
															 	force_new_asid(&svm->vcpu);
														
 
															 	svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
														
@@ -1734,12 +1812,14 @@ static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
 
															 	svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
														
 
															 	svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
														
 
															-	svm->vcpu.arch.hflags |= HF_GIF_MASK;
														
 
															+	nested_svm_unmap(nested_vmcb, KM_USER0);
														
 
															-	return 0;
														
 
															+	enable_gif(svm);
														
 
															+
														
 
															+	return true;
														
 
															 }
														
 
															-static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
														
 
															+static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
														
 
															 {
														
 
															 	to_vmcb->save.fs = from_vmcb->save.fs;
														
 
															 	to_vmcb->save.gs = from_vmcb->save.gs;
														
@@ -1753,44 +1833,44 @@ static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
 
															 	to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
														
 
															 	to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
														
 
															 	to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
														
 
															-
														
 
															-	return 1;
														
 
															-}
														
 
															-
														
 
															-static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
														
 
															-			     void *arg2, void *opaque)
														
 
															-{
														
 
															-	return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
														
 
															-}
														
 
															-
														
 
															-static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
														
 
															-			     void *arg2, void *opaque)
														
 
															-{
														
 
															-	return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
														
 
															 }
														
 
															 static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
														
 
															 {
														
 
															+	struct vmcb *nested_vmcb;
														
 
															+
														
 
															 	if (nested_svm_check_permissions(svm))
														
 
															 		return 1;
														
 
															 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
														
 
															 	skip_emulated_instruction(&svm->vcpu);
														
 
															-	nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload);
														
 
															+	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
														
 
															+	if (!nested_vmcb)
														
 
															+		return 1;
														
 
															+
														
 
															+	nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
														
 
															+	nested_svm_unmap(nested_vmcb, KM_USER0);
														
 
															 	return 1;
														
 
															 }
														
 
															 static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
														
 
															 {
														
 
															+	struct vmcb *nested_vmcb;
														
 
															+
														
 
															 	if (nested_svm_check_permissions(svm))
														
 
															 		return 1;
														
 
															 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
														
 
															 	skip_emulated_instruction(&svm->vcpu);
														
 
															-	nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave);
														
 
															+	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
														
 
															+	if (!nested_vmcb)
														
 
															+		return 1;
														
 
															+
														
 
															+	nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
														
 
															+	nested_svm_unmap(nested_vmcb, KM_USER0);
														
 
															 	return 1;
														
 
															 }
														
@@ -1798,19 +1878,29 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
															 static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
														
 
															 {
														
 
															 	nsvm_printk("VMrun\n");
														
 
															+
														
 
															 	if (nested_svm_check_permissions(svm))
														
 
															 		return 1;
														
 
															 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
														
 
															 	skip_emulated_instruction(&svm->vcpu);
														
 
															-	if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
														
 
															-			  NULL, nested_svm_vmrun))
														
 
															+	if (!nested_svm_vmrun(svm))
														
 
															 		return 1;
														
 
															-	if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0,
														
 
															-		      NULL, nested_svm_vmrun_msrpm))
														
 
															-		return 1;
														
 
															+	if (!nested_svm_vmrun_msrpm(svm))
														
 
															+		goto failed;
														
 
															+
														
 
															+	return 1;
														
 
															+
														
 
															+failed:
														
 
															+
														
 
															+	svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
														
 
															+	svm->vmcb->control.exit_code_hi = 0;
														
 
															+	svm->vmcb->control.exit_info_1  = 0;
														
 
															+	svm->vmcb->control.exit_info_2  = 0;
														
 
															+
														
 
															+	nested_svm_vmexit(svm);
														
 
															 	return 1;
														
 
															 }
														
@@ -1823,7 +1913,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
															 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
														
 
															 	skip_emulated_instruction(&svm->vcpu);
														
 
															-	svm->vcpu.arch.hflags |= HF_GIF_MASK;
														
 
															+	enable_gif(svm);
														
 
															 	return 1;
														
 
															 }
														
@@ -1836,7 +1926,7 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
															 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
														
 
															 	skip_emulated_instruction(&svm->vcpu);
														
 
															-	svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
														
 
															+	disable_gif(svm);
														
 
															 	/* After a CLGI no interrupts should come */
														
 
															 	svm_clear_vintr(svm);
														
@@ -1845,6 +1935,19 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
															 	return 1;
														
 
															 }
														
 
															+static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu = &svm->vcpu;
														
 
															+	nsvm_printk("INVLPGA\n");
														
 
															+
														
 
															+	/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
														
 
															+	kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
														
 
															+
														
 
															+	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
														
 
															+	skip_emulated_instruction(&svm->vcpu);
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															 static int invalid_op_interception(struct vcpu_svm *svm,
														
 
															 				   struct kvm_run *kvm_run)
														
 
															 {
														
@@ -1953,7 +2056,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 
															 	struct vcpu_svm *svm = to_svm(vcpu);
														
 
															 	switch (ecx) {
														
 
															-	case MSR_IA32_TIME_STAMP_COUNTER: {
														
 
															+	case MSR_IA32_TSC: {
														
 
															 		u64 tsc;
														
 
															 		rdtscll(tsc);
														
@@ -1981,10 +2084,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 
															 		*data = svm->vmcb->save.sysenter_cs;
														
 
															 		break;
														
 
															 	case MSR_IA32_SYSENTER_EIP:
														
 
															-		*data = svm->vmcb->save.sysenter_eip;
														
 
															+		*data = svm->sysenter_eip;
														
 
															 		break;
														
 
															 	case MSR_IA32_SYSENTER_ESP:
														
 
															-		*data = svm->vmcb->save.sysenter_esp;
														
 
															+		*data = svm->sysenter_esp;
														
 
															 		break;
														
 
															 	/* Nobody will change the following 5 values in the VMCB so
														
 
															 	   we can safely return them on rdmsr. They will always be 0
														
@@ -2005,7 +2108,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 
															 		*data = svm->vmcb->save.last_excp_to;
														
 
															 		break;
														
 
															 	case MSR_VM_HSAVE_PA:
														
 
															-		*data = svm->hsave_msr;
														
 
															+		*data = svm->nested.hsave_msr;
														
 
															 		break;
														
 
															 	case MSR_VM_CR:
														
 
															 		*data = 0;
														
@@ -2027,8 +2130,7 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
															 	if (svm_get_msr(&svm->vcpu, ecx, &data))
														
 
															 		kvm_inject_gp(&svm->vcpu, 0);
														
 
															 	else {
														
 
															-		KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data,
														
 
															-			    (u32)(data >> 32), handler);
														
 
															+		trace_kvm_msr_read(ecx, data);
														
 
															 		svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
														
 
															 		svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
														
@@ -2043,7 +2145,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 
															 	struct vcpu_svm *svm = to_svm(vcpu);
														
 
															 	switch (ecx) {
														
 
															-	case MSR_IA32_TIME_STAMP_COUNTER: {
														
 
															+	case MSR_IA32_TSC: {
														
 
															 		u64 tsc;
														
 
															 		rdtscll(tsc);
														
@@ -2071,9 +2173,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 
															 		svm->vmcb->save.sysenter_cs = data;
														
 
															 		break;
														
 
															 	case MSR_IA32_SYSENTER_EIP:
														
 
															+		svm->sysenter_eip = data;
														
 
															 		svm->vmcb->save.sysenter_eip = data;
														
 
															 		break;
														
 
															 	case MSR_IA32_SYSENTER_ESP:
														
 
															+		svm->sysenter_esp = data;
														
 
															 		svm->vmcb->save.sysenter_esp = data;
														
 
															 		break;
														
 
															 	case MSR_IA32_DEBUGCTLMSR:
														
@@ -2091,24 +2195,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 
															 		else
														
 
															 			svm_disable_lbrv(svm);
														
 
															 		break;
														
 
															-	case MSR_K7_EVNTSEL0:
														
 
															-	case MSR_K7_EVNTSEL1:
														
 
															-	case MSR_K7_EVNTSEL2:
														
 
															-	case MSR_K7_EVNTSEL3:
														
 
															-	case MSR_K7_PERFCTR0:
														
 
															-	case MSR_K7_PERFCTR1:
														
 
															-	case MSR_K7_PERFCTR2:
														
 
															-	case MSR_K7_PERFCTR3:
														
 
															-		/*
														
 
															-		 * Just discard all writes to the performance counters; this
														
 
															-		 * should keep both older linux and windows 64-bit guests
														
 
															-		 * happy
														
 
															-		 */
														
 
															-		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
														
 
															-
														
 
															-		break;
														
 
															 	case MSR_VM_HSAVE_PA:
														
 
															-		svm->hsave_msr = data;
														
 
															+		svm->nested.hsave_msr = data;
														
 
															+		break;
														
 
															+	case MSR_VM_CR:
														
 
															+	case MSR_VM_IGNNE:
														
 
															+		pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
														
 
															 		break;
														
 
															 	default:
														
 
															 		return kvm_set_msr_common(vcpu, ecx, data);
														
@@ -2122,8 +2214,7 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
															 	u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
														
 
															 		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
														
 
															-	KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32),
														
 
															-		    handler);
														
 
															+	trace_kvm_msr_write(ecx, data);
														
 
															 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
														
 
															 	if (svm_set_msr(&svm->vcpu, ecx, data))
														
@@ -2144,8 +2235,6 @@ static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
															 static int interrupt_window_interception(struct vcpu_svm *svm,
														
 
															 				   struct kvm_run *kvm_run)
														
 
															 {
														
 
															-	KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
														
 
															-
														
 
															 	svm_clear_vintr(svm);
														
 
															 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
														
 
															 	/*
														
@@ -2201,7 +2290,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 
															 	[SVM_EXIT_INVD]                         = emulate_on_interception,
														
 
															 	[SVM_EXIT_HLT]				= halt_interception,
														
 
															 	[SVM_EXIT_INVLPG]			= invlpg_interception,
														
 
															-	[SVM_EXIT_INVLPGA]			= invalid_op_interception,
														
 
															+	[SVM_EXIT_INVLPGA]			= invlpga_interception,
														
 
															 	[SVM_EXIT_IOIO] 		  	= io_interception,
														
 
															 	[SVM_EXIT_MSR]				= msr_interception,
														
 
															 	[SVM_EXIT_TASK_SWITCH]			= task_switch_interception,
														
@@ -2224,20 +2313,26 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
															 	struct vcpu_svm *svm = to_svm(vcpu);
														
 
															 	u32 exit_code = svm->vmcb->control.exit_code;
														
 
															-	KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
														
 
															-		    (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
														
 
															+	trace_kvm_exit(exit_code, svm->vmcb->save.rip);
														
 
															 	if (is_nested(svm)) {
														
 
															+		int vmexit;
														
 
															+
														
 
															 		nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
														
 
															 			    exit_code, svm->vmcb->control.exit_info_1,
														
 
															 			    svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
														
 
															-		if (nested_svm_exit_handled(svm, true)) {
														
 
															-			nested_svm_vmexit(svm);
														
 
															-			nsvm_printk("-> #VMEXIT\n");
														
 
															+
														
 
															+		vmexit = nested_svm_exit_special(svm);
														
 
															+
														
 
															+		if (vmexit == NESTED_EXIT_CONTINUE)
														
 
															+			vmexit = nested_svm_exit_handled(svm);
														
 
															+
														
 
															+		if (vmexit == NESTED_EXIT_DONE)
														
 
															 			return 1;
														
 
															-		}
														
 
															 	}
														
 
															+	svm_complete_interrupts(svm);
														
 
															+
														
 
															 	if (npt_enabled) {
														
 
															 		int mmu_reload = 0;
														
 
															 		if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
														
@@ -2246,12 +2341,6 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
															 		}
														
 
															 		vcpu->arch.cr0 = svm->vmcb->save.cr0;
														
 
															 		vcpu->arch.cr3 = svm->vmcb->save.cr3;
														
 
															-		if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
														
 
															-			if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
														
 
															-				kvm_inject_gp(vcpu, 0);
														
 
															-				return 1;
														
 
															-			}
														
 
															-		}
														
 
															 		if (mmu_reload) {
														
 
															 			kvm_mmu_reset_context(vcpu);
														
 
															 			kvm_mmu_load(vcpu);
														
@@ -2319,7 +2408,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 
															 {
														
 
															 	struct vmcb_control_area *control;
														
 
															-	KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler);
														
 
															+	trace_kvm_inj_virq(irq);
														
 
															 	++svm->vcpu.stat.irq_injections;
														
 
															 	control = &svm->vmcb->control;
														
@@ -2329,21 +2418,14 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 
															 		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
														
 
															 }
														
 
															-static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
														
 
															-{
														
 
															-	struct vcpu_svm *svm = to_svm(vcpu);
														
 
															-
														
 
															-	svm->vmcb->control.event_inj = nr |
														
 
															-		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
														
 
															-}
														
 
															-
														
 
															 static void svm_set_irq(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct vcpu_svm *svm = to_svm(vcpu);
														
 
															-	nested_svm_intr(svm);
														
 
															+	BUG_ON(!(gif_set(svm)));
														
 
															-	svm_queue_irq(vcpu, vcpu->arch.interrupt.nr);
														
 
															+	svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
														
 
															+		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
														
 
															 }
														
 
															 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
														
@@ -2371,13 +2453,25 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 
															 	struct vmcb *vmcb = svm->vmcb;
														
 
															 	return (vmcb->save.rflags & X86_EFLAGS_IF) &&
														
 
															 		!(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
														
 
															-		(svm->vcpu.arch.hflags & HF_GIF_MASK);
														
 
															+		gif_set(svm) &&
														
 
															+		!(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK));
														
 
															 }
														
 
															 static void enable_irq_window(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	svm_set_vintr(to_svm(vcpu));
														
 
															-	svm_inject_irq(to_svm(vcpu), 0x0);
														
 
															+	struct vcpu_svm *svm = to_svm(vcpu);
														
 
															+	nsvm_printk("Trying to open IRQ window\n");
														
 
															+
														
 
															+	nested_svm_intr(svm);
														
 
															+
														
 
															+	/* In case GIF=0 we can't rely on the CPU to tell us when
														
 
															+	 * GIF becomes 1, because that's a separate STGI/VMRUN intercept.
														
 
															+	 * The next time we get that intercept, this function will be
														
 
															+	 * called again though and we'll get the vintr intercept. */
														
 
															+	if (gif_set(svm)) {
														
 
															+		svm_set_vintr(svm);
														
 
															+		svm_inject_irq(svm, 0x0);
														
 
															+	}
														
 
															 }
														
 
															 static void enable_nmi_window(struct kvm_vcpu *vcpu)
														
@@ -2456,6 +2550,8 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 
															 	case SVM_EXITINTINFO_TYPE_EXEPT:
														
 
															 		/* In case of software exception do not reinject an exception
														
 
															 		   vector, but re-execute and instruction instead */
														
 
															+		if (is_nested(svm))
														
 
															+			break;
														
 
															 		if (kvm_exception_is_soft(vector))
														
 
															 			break;
														
 
															 		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
														
@@ -2498,9 +2594,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	fs_selector = kvm_read_fs();
														
 
															 	gs_selector = kvm_read_gs();
														
 
															 	ldt_selector = kvm_read_ldt();
														
 
															-	svm->host_cr2 = kvm_read_cr2();
														
 
															-	if (!is_nested(svm))
														
 
															-		svm->vmcb->save.cr2 = vcpu->arch.cr2;
														
 
															+	svm->vmcb->save.cr2 = vcpu->arch.cr2;
														
 
															 	/* required for live migration with NPT */
														
 
															 	if (npt_enabled)
														
 
															 		svm->vmcb->save.cr3 = vcpu->arch.cr3;
														
@@ -2585,8 +2679,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
														
 
															 	vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
														
 
															-	kvm_write_cr2(svm->host_cr2);
														
 
															-
														
 
															 	kvm_load_fs(fs_selector);
														
 
															 	kvm_load_gs(gs_selector);
														
 
															 	kvm_load_ldt(ldt_selector);
														
@@ -2602,7 +2694,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	svm->next_rip = 0;
														
 
															-	svm_complete_interrupts(svm);
														
 
															+	if (npt_enabled) {
														
 
															+		vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
														
 
															+		vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
														
 
															+	}
														
 
															 }
														
 
															 #undef R
														
@@ -2673,6 +2768,64 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 
															 	return 0;
														
 
															 }
														
 
															+static const struct trace_print_flags svm_exit_reasons_str[] = {
														
 
															+	{ SVM_EXIT_READ_CR0,           		"read_cr0" },
														
 
															+	{ SVM_EXIT_READ_CR3,	      		"read_cr3" },
														
 
															+	{ SVM_EXIT_READ_CR4,	      		"read_cr4" },
														
 
															+	{ SVM_EXIT_READ_CR8,  	      		"read_cr8" },
														
 
															+	{ SVM_EXIT_WRITE_CR0,          		"write_cr0" },
														
 
															+	{ SVM_EXIT_WRITE_CR3,	      		"write_cr3" },
														
 
															+	{ SVM_EXIT_WRITE_CR4,          		"write_cr4" },
														
 
															+	{ SVM_EXIT_WRITE_CR8, 	      		"write_cr8" },
														
 
															+	{ SVM_EXIT_READ_DR0, 	      		"read_dr0" },
														
 
															+	{ SVM_EXIT_READ_DR1,	      		"read_dr1" },
														
 
															+	{ SVM_EXIT_READ_DR2,	      		"read_dr2" },
														
 
															+	{ SVM_EXIT_READ_DR3,	      		"read_dr3" },
														
 
															+	{ SVM_EXIT_WRITE_DR0,	      		"write_dr0" },
														
 
															+	{ SVM_EXIT_WRITE_DR1,	      		"write_dr1" },
														
 
															+	{ SVM_EXIT_WRITE_DR2,	      		"write_dr2" },
														
 
															+	{ SVM_EXIT_WRITE_DR3,	      		"write_dr3" },
														
 
															+	{ SVM_EXIT_WRITE_DR5,	      		"write_dr5" },
														
 
															+	{ SVM_EXIT_WRITE_DR7,	      		"write_dr7" },
														
 
															+	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,	"DB excp" },
														
 
															+	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,	"BP excp" },
														
 
															+	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,	"UD excp" },
														
 
															+	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,	"PF excp" },
														
 
															+	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,	"NM excp" },
														
 
															+	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,	"MC excp" },
														
 
															+	{ SVM_EXIT_INTR,			"interrupt" },
														
 
															+	{ SVM_EXIT_NMI,				"nmi" },
														
 
															+	{ SVM_EXIT_SMI,				"smi" },
														
 
															+	{ SVM_EXIT_INIT,			"init" },
														
 
															+	{ SVM_EXIT_VINTR,			"vintr" },
														
 
															+	{ SVM_EXIT_CPUID,			"cpuid" },
														
 
															+	{ SVM_EXIT_INVD,			"invd" },
														
 
															+	{ SVM_EXIT_HLT,				"hlt" },
														
 
															+	{ SVM_EXIT_INVLPG,			"invlpg" },
														
 
															+	{ SVM_EXIT_INVLPGA,			"invlpga" },
														
 
															+	{ SVM_EXIT_IOIO,			"io" },
														
 
															+	{ SVM_EXIT_MSR,				"msr" },
														
 
															+	{ SVM_EXIT_TASK_SWITCH,			"task_switch" },
														
 
															+	{ SVM_EXIT_SHUTDOWN,			"shutdown" },
														
 
															+	{ SVM_EXIT_VMRUN,			"vmrun" },
														
 
															+	{ SVM_EXIT_VMMCALL,			"hypercall" },
														
 
															+	{ SVM_EXIT_VMLOAD,			"vmload" },
														
 
															+	{ SVM_EXIT_VMSAVE,			"vmsave" },
														
 
															+	{ SVM_EXIT_STGI,			"stgi" },
														
 
															+	{ SVM_EXIT_CLGI,			"clgi" },
														
 
															+	{ SVM_EXIT_SKINIT,			"skinit" },
														
 
															+	{ SVM_EXIT_WBINVD,			"wbinvd" },
														
 
															+	{ SVM_EXIT_MONITOR,			"monitor" },
														
 
															+	{ SVM_EXIT_MWAIT,			"mwait" },
														
 
															+	{ SVM_EXIT_NPF,				"npf" },
														
 
															+	{ -1, NULL }
														
 
															+};
														
 
															+
														
 
															+static bool svm_gb_page_enable(void)
														
 
															+{
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															 static struct kvm_x86_ops svm_x86_ops = {
														
 
															 	.cpu_has_kvm_support = has_svm,
														
 
															 	.disabled_by_bios = is_disabled,
														
@@ -2710,6 +2863,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
															 	.set_gdt = svm_set_gdt,
														
 
															 	.get_dr = svm_get_dr,
														
 
															 	.set_dr = svm_set_dr,
														
 
															+	.cache_reg = svm_cache_reg,
														
 
															 	.get_rflags = svm_get_rflags,
														
 
															 	.set_rflags = svm_set_rflags,
														
@@ -2733,6 +2887,9 @@ static struct kvm_x86_ops svm_x86_ops = {
 
															 	.set_tss_addr = svm_set_tss_addr,
														
 
															 	.get_tdp_level = get_npt_level,
														
 
															 	.get_mt_mask = svm_get_mt_mask,
														
 
															+
														
 
															+	.exit_reasons_str = svm_exit_reasons_str,
														
 
															+	.gb_page_enable = svm_gb_page_enable,
														
 
															 };
														
 
															 static int __init svm_init(void)
														
--- a/arch/x86/kvm/timer.c
+++ b/arch/x86/kvm/timer.c
@@ -9,12 +9,16 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
 
															 	int restart_timer = 0;
														
 
															 	wait_queue_head_t *q = &vcpu->wq;
														
 
															-	/* FIXME: this code should not know anything about vcpus */
														
 
															-	if (!atomic_inc_and_test(&ktimer->pending))
														
 
															+	/*
														
 
															+	 * There is a race window between reading and incrementing, but we do
														
 
															+	 * not care about potentially loosing timer events in the !reinject
														
 
															+	 * case anyway.
														
 
															+	 */
														
 
															+	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
														
 
															+		atomic_inc(&ktimer->pending);
														
 
															+		/* FIXME: this code should not know anything about vcpus */
														
 
															 		set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
														
 
															-
														
 
															-	if (!ktimer->reinject)
														
 
															-		atomic_set(&ktimer->pending, 1);
														
 
															+	}
														
 
															 	if (waitqueue_active(q))
														
 
															 		wake_up_interruptible(q);
														
@@ -33,7 +37,7 @@ enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
 
															 	struct kvm_vcpu *vcpu;
														
 
															 	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
														
 
															-	vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id];
														
 
															+	vcpu = ktimer->vcpu;
														
 
															 	if (!vcpu)
														
 
															 		return HRTIMER_NORESTART;
														
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -0,0 +1,355 @@
 
															+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
														
 
															+#define _TRACE_KVM_H
														
 
															+
														
 
															+#include <linux/tracepoint.h>
														
 
															+
														
 
															+#undef TRACE_SYSTEM
														
 
															+#define TRACE_SYSTEM kvm
														
 
															+#define TRACE_INCLUDE_PATH arch/x86/kvm
														
 
															+#define TRACE_INCLUDE_FILE trace
														
 
															+
														
 
															+/*
														
 
															+ * Tracepoint for guest mode entry.
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_entry,
														
 
															+	TP_PROTO(unsigned int vcpu_id),
														
 
															+	TP_ARGS(vcpu_id),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	vcpu_id		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->vcpu_id	= vcpu_id;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("vcpu %u", __entry->vcpu_id)
														
 
															+);
														
 
															+
														
 
															+/*
														
 
															+ * Tracepoint for hypercall.
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_hypercall,
														
 
															+	TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
														
 
															+		 unsigned long a2, unsigned long a3),
														
 
															+	TP_ARGS(nr, a0, a1, a2, a3),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned long, 	nr		)
														
 
															+		__field(	unsigned long,	a0		)
														
 
															+		__field(	unsigned long,	a1		)
														
 
															+		__field(	unsigned long,	a2		)
														
 
															+		__field(	unsigned long,	a3		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->nr		= nr;
														
 
															+		__entry->a0		= a0;
														
 
															+		__entry->a1		= a1;
														
 
															+		__entry->a2		= a2;
														
 
															+		__entry->a3		= a3;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx",
														
 
															+		 __entry->nr, __entry->a0, __entry->a1,  __entry->a2,
														
 
															+		 __entry->a3)
														
 
															+);
														
 
															+
														
 
															+/*
														
 
															+ * Tracepoint for PIO.
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_pio,
														
 
															+	TP_PROTO(unsigned int rw, unsigned int port, unsigned int size,
														
 
															+		 unsigned int count),
														
 
															+	TP_ARGS(rw, port, size, count),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int, 	rw		)
														
 
															+		__field(	unsigned int, 	port		)
														
 
															+		__field(	unsigned int, 	size		)
														
 
															+		__field(	unsigned int,	count		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->rw		= rw;
														
 
															+		__entry->port		= port;
														
 
															+		__entry->size		= size;
														
 
															+		__entry->count		= count;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("pio_%s at 0x%x size %d count %d",
														
 
															+		  __entry->rw ? "write" : "read",
														
 
															+		  __entry->port, __entry->size, __entry->count)
														
 
															+);
														
 
															+
														
 
															+/*
														
 
															+ * Tracepoint for cpuid.
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_cpuid,
														
 
															+	TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
														
 
															+		 unsigned long rcx, unsigned long rdx),
														
 
															+	TP_ARGS(function, rax, rbx, rcx, rdx),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	function	)
														
 
															+		__field(	unsigned long,	rax		)
														
 
															+		__field(	unsigned long,	rbx		)
														
 
															+		__field(	unsigned long,	rcx		)
														
 
															+		__field(	unsigned long,	rdx		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->function	= function;
														
 
															+		__entry->rax		= rax;
														
 
															+		__entry->rbx		= rbx;
														
 
															+		__entry->rcx		= rcx;
														
 
															+		__entry->rdx		= rdx;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx",
														
 
															+		  __entry->function, __entry->rax,
														
 
															+		  __entry->rbx, __entry->rcx, __entry->rdx)
														
 
															+);
														
 
															+
														
 
															+#define AREG(x) { APIC_##x, "APIC_" #x }
														
 
															+
														
 
															+#define kvm_trace_symbol_apic						    \
														
 
															+	AREG(ID), AREG(LVR), AREG(TASKPRI), AREG(ARBPRI), AREG(PROCPRI),    \
														
 
															+	AREG(EOI), AREG(RRR), AREG(LDR), AREG(DFR), AREG(SPIV), AREG(ISR),  \
														
 
															+	AREG(TMR), AREG(IRR), AREG(ESR), AREG(ICR), AREG(ICR2), AREG(LVTT), \
														
 
															+	AREG(LVTTHMR), AREG(LVTPC), AREG(LVT0), AREG(LVT1), AREG(LVTERR),   \
														
 
															+	AREG(TMICT), AREG(TMCCT), AREG(TDCR), AREG(SELF_IPI), AREG(EFEAT),  \
														
 
															+	AREG(ECTRL)
														
 
															+/*
														
 
															+ * Tracepoint for apic access.
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_apic,
														
 
															+	TP_PROTO(unsigned int rw, unsigned int reg, unsigned int val),
														
 
															+	TP_ARGS(rw, reg, val),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	rw		)
														
 
															+		__field(	unsigned int,	reg		)
														
 
															+		__field(	unsigned int,	val		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->rw		= rw;
														
 
															+		__entry->reg		= reg;
														
 
															+		__entry->val		= val;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("apic_%s %s = 0x%x",
														
 
															+		  __entry->rw ? "write" : "read",
														
 
															+		  __print_symbolic(__entry->reg, kvm_trace_symbol_apic),
														
 
															+		  __entry->val)
														
 
															+);
														
 
															+
														
 
															+#define trace_kvm_apic_read(reg, val)		trace_kvm_apic(0, reg, val)
														
 
															+#define trace_kvm_apic_write(reg, val)		trace_kvm_apic(1, reg, val)
														
 
															+
														
 
															+/*
														
 
															+ * Tracepoint for kvm guest exit:
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_exit,
														
 
															+	TP_PROTO(unsigned int exit_reason, unsigned long guest_rip),
														
 
															+	TP_ARGS(exit_reason, guest_rip),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	exit_reason	)
														
 
															+		__field(	unsigned long,	guest_rip	)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->exit_reason	= exit_reason;
														
 
															+		__entry->guest_rip	= guest_rip;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("reason %s rip 0x%lx",
														
 
															+		 ftrace_print_symbols_seq(p, __entry->exit_reason,
														
 
															+					  kvm_x86_ops->exit_reasons_str),
														
 
															+		 __entry->guest_rip)
														
 
															+);
														
 
															+
														
 
															+/*
														
 
															+ * Tracepoint for kvm interrupt injection:
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_inj_virq,
														
 
															+	TP_PROTO(unsigned int irq),
														
 
															+	TP_ARGS(irq),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	irq		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->irq		= irq;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("irq %u", __entry->irq)
														
 
															+);
														
 
															+
														
 
															+/*
														
 
															+ * Tracepoint for page fault.
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_page_fault,
														
 
															+	TP_PROTO(unsigned long fault_address, unsigned int error_code),
														
 
															+	TP_ARGS(fault_address, error_code),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned long,	fault_address	)
														
 
															+		__field(	unsigned int,	error_code	)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->fault_address	= fault_address;
														
 
															+		__entry->error_code	= error_code;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("address %lx error_code %x",
														
 
															+		  __entry->fault_address, __entry->error_code)
														
 
															+);
														
 
															+
														
 
															+/*
														
 
															+ * Tracepoint for guest MSR access.
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_msr,
														
 
															+	TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data),
														
 
															+	TP_ARGS(rw, ecx, data),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	rw		)
														
 
															+		__field(	unsigned int,	ecx		)
														
 
															+		__field(	unsigned long,	data		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->rw		= rw;
														
 
															+		__entry->ecx		= ecx;
														
 
															+		__entry->data		= data;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("msr_%s %x = 0x%lx",
														
 
															+		  __entry->rw ? "write" : "read",
														
 
															+		  __entry->ecx, __entry->data)
														
 
															+);
														
 
															+
														
 
															+#define trace_kvm_msr_read(ecx, data)		trace_kvm_msr(0, ecx, data)
														
 
															+#define trace_kvm_msr_write(ecx, data)		trace_kvm_msr(1, ecx, data)
														
 
															+
														
 
															+/*
														
 
															+ * Tracepoint for guest CR access.
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_cr,
														
 
															+	TP_PROTO(unsigned int rw, unsigned int cr, unsigned long val),
														
 
															+	TP_ARGS(rw, cr, val),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	rw		)
														
 
															+		__field(	unsigned int,	cr		)
														
 
															+		__field(	unsigned long,	val		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->rw		= rw;
														
 
															+		__entry->cr		= cr;
														
 
															+		__entry->val		= val;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("cr_%s %x = 0x%lx",
														
 
															+		  __entry->rw ? "write" : "read",
														
 
															+		  __entry->cr, __entry->val)
														
 
															+);
														
 
															+
														
 
															+#define trace_kvm_cr_read(cr, val)		trace_kvm_cr(0, cr, val)
														
 
															+#define trace_kvm_cr_write(cr, val)		trace_kvm_cr(1, cr, val)
														
 
															+
														
 
															+TRACE_EVENT(kvm_pic_set_irq,
														
 
															+	    TP_PROTO(__u8 chip, __u8 pin, __u8 elcr, __u8 imr, bool coalesced),
														
 
															+	    TP_ARGS(chip, pin, elcr, imr, coalesced),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	__u8,		chip		)
														
 
															+		__field(	__u8,		pin		)
														
 
															+		__field(	__u8,		elcr		)
														
 
															+		__field(	__u8,		imr		)
														
 
															+		__field(	bool,		coalesced	)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->chip		= chip;
														
 
															+		__entry->pin		= pin;
														
 
															+		__entry->elcr		= elcr;
														
 
															+		__entry->imr		= imr;
														
 
															+		__entry->coalesced	= coalesced;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("chip %u pin %u (%s%s)%s",
														
 
															+		  __entry->chip, __entry->pin,
														
 
															+		  (__entry->elcr & (1 << __entry->pin)) ? "level":"edge",
														
 
															+		  (__entry->imr & (1 << __entry->pin)) ? "|masked":"",
														
 
															+		  __entry->coalesced ? " (coalesced)" : "")
														
 
															+);
														
 
															+
														
 
															+#define kvm_apic_dst_shorthand		\
														
 
															+	{0x0, "dst"},			\
														
 
															+	{0x1, "self"},			\
														
 
															+	{0x2, "all"},			\
														
 
															+	{0x3, "all-but-self"}
														
 
															+
														
 
															+TRACE_EVENT(kvm_apic_ipi,
														
 
															+	    TP_PROTO(__u32 icr_low, __u32 dest_id),
														
 
															+	    TP_ARGS(icr_low, dest_id),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	__u32,		icr_low		)
														
 
															+		__field(	__u32,		dest_id		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->icr_low	= icr_low;
														
 
															+		__entry->dest_id	= dest_id;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("dst %x vec %u (%s|%s|%s|%s|%s)",
														
 
															+		  __entry->dest_id, (u8)__entry->icr_low,
														
 
															+		  __print_symbolic((__entry->icr_low >> 8 & 0x7),
														
 
															+				   kvm_deliver_mode),
														
 
															+		  (__entry->icr_low & (1<<11)) ? "logical" : "physical",
														
 
															+		  (__entry->icr_low & (1<<14)) ? "assert" : "de-assert",
														
 
															+		  (__entry->icr_low & (1<<15)) ? "level" : "edge",
														
 
															+		  __print_symbolic((__entry->icr_low >> 18 & 0x3),
														
 
															+				   kvm_apic_dst_shorthand))
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(kvm_apic_accept_irq,
														
 
															+	    TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced),
														
 
															+	    TP_ARGS(apicid, dm, tm, vec, coalesced),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	__u32,		apicid		)
														
 
															+		__field(	__u16,		dm		)
														
 
															+		__field(	__u8,		tm		)
														
 
															+		__field(	__u8,		vec		)
														
 
															+		__field(	bool,		coalesced	)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->apicid		= apicid;
														
 
															+		__entry->dm		= dm;
														
 
															+		__entry->tm		= tm;
														
 
															+		__entry->vec		= vec;
														
 
															+		__entry->coalesced	= coalesced;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("apicid %x vec %u (%s|%s)%s",
														
 
															+		  __entry->apicid, __entry->vec,
														
 
															+		  __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
														
 
															+		  __entry->tm ? "level" : "edge",
														
 
															+		  __entry->coalesced ? " (coalesced)" : "")
														
 
															+);
														
 
															+
														
 
															+#endif /* _TRACE_KVM_H */
														
 
															+
														
 
															+/* This part must be outside protection */
														
 
															+#include <trace/define_trace.h>
														
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -25,6 +25,7 @@
 
															 #include <linux/highmem.h>
														
 
															 #include <linux/sched.h>
														
 
															 #include <linux/moduleparam.h>
														
 
															+#include <linux/ftrace_event.h>
														
 
															 #include "kvm_cache_regs.h"
														
 
															 #include "x86.h"
														
@@ -34,6 +35,8 @@
 
															 #include <asm/virtext.h>
														
 
															 #include <asm/mce.h>
														
 
															+#include "trace.h"
														
 
															+
														
 
															 #define __ex(x) __kvm_handle_fault_on_reboot(x)
														
 
															 MODULE_AUTHOR("Qumranet");
														
@@ -51,6 +54,10 @@ module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
 
															 static int __read_mostly enable_ept = 1;
														
 
															 module_param_named(ept, enable_ept, bool, S_IRUGO);
														
 
															+static int __read_mostly enable_unrestricted_guest = 1;
														
 
															+module_param_named(unrestricted_guest,
														
 
															+			enable_unrestricted_guest, bool, S_IRUGO);
														
 
															+
														
 
															 static int __read_mostly emulate_invalid_guest_state = 0;
														
 
															 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
														
@@ -84,6 +91,14 @@ struct vcpu_vmx {
 
															 		int           guest_efer_loaded;
														
 
															 	} host_state;
														
 
															 	struct {
														
 
															+		int vm86_active;
														
 
															+		u8 save_iopl;
														
 
															+		struct kvm_save_segment {
														
 
															+			u16 selector;
														
 
															+			unsigned long base;
														
 
															+			u32 limit;
														
 
															+			u32 ar;
														
 
															+		} tr, es, ds, fs, gs;
														
 
															 		struct {
														
 
															 			bool pending;
														
 
															 			u8 vector;
														
@@ -161,6 +176,8 @@ static struct kvm_vmx_segment_field {
 
															 	VMX_SEGMENT_FIELD(LDTR),
														
 
															 };
														
 
															+static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
														
 
															+
														
 
															 /*
														
 
															  * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it
														
 
															  * away by decrementing the array size.
														
@@ -256,6 +273,26 @@ static inline bool cpu_has_vmx_flexpriority(void)
 
															 		cpu_has_vmx_virtualize_apic_accesses();
														
 
															 }
														
 
															+static inline bool cpu_has_vmx_ept_execute_only(void)
														
 
															+{
														
 
															+	return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT);
														
 
															+}
														
 
															+
														
 
															+static inline bool cpu_has_vmx_eptp_uncacheable(void)
														
 
															+{
														
 
															+	return !!(vmx_capability.ept & VMX_EPTP_UC_BIT);
														
 
															+}
														
 
															+
														
 
															+static inline bool cpu_has_vmx_eptp_writeback(void)
														
 
															+{
														
 
															+	return !!(vmx_capability.ept & VMX_EPTP_WB_BIT);
														
 
															+}
														
 
															+
														
 
															+static inline bool cpu_has_vmx_ept_2m_page(void)
														
 
															+{
														
 
															+	return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT);
														
 
															+}
														
 
															+
														
 
															 static inline int cpu_has_vmx_invept_individual_addr(void)
														
 
															 {
														
 
															 	return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
														
@@ -277,6 +314,12 @@ static inline int cpu_has_vmx_ept(void)
 
															 		SECONDARY_EXEC_ENABLE_EPT;
														
 
															 }
														
 
															+static inline int cpu_has_vmx_unrestricted_guest(void)
														
 
															+{
														
 
															+	return vmcs_config.cpu_based_2nd_exec_ctrl &
														
 
															+		SECONDARY_EXEC_UNRESTRICTED_GUEST;
														
 
															+}
														
 
															+
														
 
															 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
														
 
															 {
														
 
															 	return flexpriority_enabled &&
														
@@ -497,14 +540,16 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 
															 	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR);
														
 
															 	if (!vcpu->fpu_active)
														
 
															 		eb |= 1u << NM_VECTOR;
														
 
															+	/*
														
 
															+	 * Unconditionally intercept #DB so we can maintain dr6 without
														
 
															+	 * reading it every exit.
														
 
															+	 */
														
 
															+	eb |= 1u << DB_VECTOR;
														
 
															 	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
														
 
															-		if (vcpu->guest_debug &
														
 
															-		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
														
 
															-			eb |= 1u << DB_VECTOR;
														
 
															 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
														
 
															 			eb |= 1u << BP_VECTOR;
														
 
															 	}
														
 
															-	if (vcpu->arch.rmode.vm86_active)
														
 
															+	if (to_vmx(vcpu)->rmode.vm86_active)
														
 
															 		eb = ~0;
														
 
															 	if (enable_ept)
														
 
															 		eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
														
@@ -528,12 +573,15 @@ static void reload_tss(void)
 
															 static void load_transition_efer(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															 	int efer_offset = vmx->msr_offset_efer;
														
 
															-	u64 host_efer = vmx->host_msrs[efer_offset].data;
														
 
															-	u64 guest_efer = vmx->guest_msrs[efer_offset].data;
														
 
															+	u64 host_efer;
														
 
															+	u64 guest_efer;
														
 
															 	u64 ignore_bits;
														
 
															 	if (efer_offset < 0)
														
 
															 		return;
														
 
															+	host_efer = vmx->host_msrs[efer_offset].data;
														
 
															+	guest_efer = vmx->guest_msrs[efer_offset].data;
														
 
															+
														
 
															 	/*
														
 
															 	 * NX is emulated; LMA and LME handled by hardware; SCE meaninless
														
 
															 	 * outside long mode
														
@@ -735,12 +783,17 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
 
															 static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return vmcs_readl(GUEST_RFLAGS);
														
 
															+	unsigned long rflags;
														
 
															+
														
 
															+	rflags = vmcs_readl(GUEST_RFLAGS);
														
 
															+	if (to_vmx(vcpu)->rmode.vm86_active)
														
 
															+		rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
														
 
															+	return rflags;
														
 
															 }
														
 
															 static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
														
 
															 {
														
 
															-	if (vcpu->arch.rmode.vm86_active)
														
 
															+	if (to_vmx(vcpu)->rmode.vm86_active)
														
 
															 		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
														
 
															 	vmcs_writel(GUEST_RFLAGS, rflags);
														
 
															 }
														
@@ -797,12 +850,13 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 
															 		intr_info |= INTR_INFO_DELIVER_CODE_MASK;
														
 
															 	}
														
 
															-	if (vcpu->arch.rmode.vm86_active) {
														
 
															+	if (vmx->rmode.vm86_active) {
														
 
															 		vmx->rmode.irq.pending = true;
														
 
															 		vmx->rmode.irq.vector = nr;
														
 
															 		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
														
 
															-		if (nr == BP_VECTOR || nr == OF_VECTOR)
														
 
															-			vmx->rmode.irq.rip++;
														
 
															+		if (kvm_exception_is_soft(nr))
														
 
															+			vmx->rmode.irq.rip +=
														
 
															+				vmx->vcpu.arch.event_exit_inst_len;
														
 
															 		intr_info |= INTR_TYPE_SOFT_INTR;
														
 
															 		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
														
 
															 		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
														
@@ -940,7 +994,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
															 	case MSR_EFER:
														
 
															 		return kvm_get_msr_common(vcpu, msr_index, pdata);
														
 
															 #endif
														
 
															-	case MSR_IA32_TIME_STAMP_COUNTER:
														
 
															+	case MSR_IA32_TSC:
														
 
															 		data = guest_read_tsc();
														
 
															 		break;
														
 
															 	case MSR_IA32_SYSENTER_CS:
														
@@ -953,9 +1007,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
															 		data = vmcs_readl(GUEST_SYSENTER_ESP);
														
 
															 		break;
														
 
															 	default:
														
 
															-		vmx_load_host_state(to_vmx(vcpu));
														
 
															 		msr = find_msr_entry(to_vmx(vcpu), msr_index);
														
 
															 		if (msr) {
														
 
															+			vmx_load_host_state(to_vmx(vcpu));
														
 
															 			data = msr->data;
														
 
															 			break;
														
 
															 		}
														
@@ -1000,21 +1054,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 
															 	case MSR_IA32_SYSENTER_ESP:
														
 
															 		vmcs_writel(GUEST_SYSENTER_ESP, data);
														
 
															 		break;
														
 
															-	case MSR_IA32_TIME_STAMP_COUNTER:
														
 
															+	case MSR_IA32_TSC:
														
 
															 		rdtscll(host_tsc);
														
 
															 		guest_write_tsc(data, host_tsc);
														
 
															-		break;
														
 
															-	case MSR_P6_PERFCTR0:
														
 
															-	case MSR_P6_PERFCTR1:
														
 
															-	case MSR_P6_EVNTSEL0:
														
 
															-	case MSR_P6_EVNTSEL1:
														
 
															-		/*
														
 
															-		 * Just discard all writes to the performance counters; this
														
 
															-		 * should keep both older linux and windows 64-bit guests
														
 
															-		 * happy
														
 
															-		 */
														
 
															-		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
														
 
															-
														
 
															 		break;
														
 
															 	case MSR_IA32_CR_PAT:
														
 
															 		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
														
@@ -1024,9 +1066,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 
															 		}
														
 
															 		/* Otherwise falls through to kvm_set_msr_common */
														
 
															 	default:
														
 
															-		vmx_load_host_state(vmx);
														
 
															 		msr = find_msr_entry(vmx, msr_index);
														
 
															 		if (msr) {
														
 
															+			vmx_load_host_state(vmx);
														
 
															 			msr->data = data;
														
 
															 			break;
														
 
															 		}
														
@@ -1046,6 +1088,10 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 
															 	case VCPU_REGS_RIP:
														
 
															 		vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
														
 
															 		break;
														
 
															+	case VCPU_EXREG_PDPTR:
														
 
															+		if (enable_ept)
														
 
															+			ept_save_pdptrs(vcpu);
														
 
															+		break;
														
 
															 	default:
														
 
															 		break;
														
 
															 	}
														
@@ -1203,7 +1249,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
															 		opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
														
 
															 			SECONDARY_EXEC_WBINVD_EXITING |
														
 
															 			SECONDARY_EXEC_ENABLE_VPID |
														
 
															-			SECONDARY_EXEC_ENABLE_EPT;
														
 
															+			SECONDARY_EXEC_ENABLE_EPT |
														
 
															+			SECONDARY_EXEC_UNRESTRICTED_GUEST;
														
 
															 		if (adjust_vmx_controls(min2, opt2,
														
 
															 					MSR_IA32_VMX_PROCBASED_CTLS2,
														
 
															 					&_cpu_based_2nd_exec_control) < 0)
														
@@ -1217,12 +1264,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
															 	if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
														
 
															 		/* CR3 accesses and invlpg don't need to cause VM Exits when EPT
														
 
															 		   enabled */
														
 
															-		min &= ~(CPU_BASED_CR3_LOAD_EXITING |
														
 
															-			 CPU_BASED_CR3_STORE_EXITING |
														
 
															-			 CPU_BASED_INVLPG_EXITING);
														
 
															-		if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
														
 
															-					&_cpu_based_exec_control) < 0)
														
 
															-			return -EIO;
														
 
															+		_cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
														
 
															+					     CPU_BASED_CR3_STORE_EXITING |
														
 
															+					     CPU_BASED_INVLPG_EXITING);
														
 
															 		rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
														
 
															 		      vmx_capability.ept, vmx_capability.vpid);
														
 
															 	}
														
@@ -1333,8 +1377,13 @@ static __init int hardware_setup(void)
 
															 	if (!cpu_has_vmx_vpid())
														
 
															 		enable_vpid = 0;
														
 
															-	if (!cpu_has_vmx_ept())
														
 
															+	if (!cpu_has_vmx_ept()) {
														
 
															 		enable_ept = 0;
														
 
															+		enable_unrestricted_guest = 0;
														
 
															+	}
														
 
															+
														
 
															+	if (!cpu_has_vmx_unrestricted_guest())
														
 
															+		enable_unrestricted_guest = 0;
														
 
															 	if (!cpu_has_vmx_flexpriority())
														
 
															 		flexpriority_enabled = 0;
														
@@ -1342,6 +1391,9 @@ static __init int hardware_setup(void)
 
															 	if (!cpu_has_vmx_tpr_shadow())
														
 
															 		kvm_x86_ops->update_cr8_intercept = NULL;
														
 
															+	if (enable_ept && !cpu_has_vmx_ept_2m_page())
														
 
															+		kvm_disable_largepages();
														
 
															+
														
 
															 	return alloc_kvm_area();
														
 
															 }
														
@@ -1372,15 +1424,15 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	vmx->emulation_required = 1;
														
 
															-	vcpu->arch.rmode.vm86_active = 0;
														
 
															+	vmx->rmode.vm86_active = 0;
														
 
															-	vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
														
 
															-	vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit);
														
 
															-	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->arch.rmode.tr.ar);
														
 
															+	vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
														
 
															+	vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
														
 
															+	vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
														
 
															 	flags = vmcs_readl(GUEST_RFLAGS);
														
 
															 	flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
														
 
															-	flags |= (vcpu->arch.rmode.save_iopl << IOPL_SHIFT);
														
 
															+	flags |= (vmx->rmode.save_iopl << IOPL_SHIFT);
														
 
															 	vmcs_writel(GUEST_RFLAGS, flags);
														
 
															 	vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
														
@@ -1391,10 +1443,10 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 
															 	if (emulate_invalid_guest_state)
														
 
															 		return;
														
 
															-	fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
														
 
															-	fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
														
 
															-	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
														
 
															-	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
														
 
															+	fix_pmode_dataseg(VCPU_SREG_ES, &vmx->rmode.es);
														
 
															+	fix_pmode_dataseg(VCPU_SREG_DS, &vmx->rmode.ds);
														
 
															+	fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs);
														
 
															+	fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs);
														
 
															 	vmcs_write16(GUEST_SS_SELECTOR, 0);
														
 
															 	vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
														
@@ -1433,20 +1485,23 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
															 	unsigned long flags;
														
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+	if (enable_unrestricted_guest)
														
 
															+		return;
														
 
															+
														
 
															 	vmx->emulation_required = 1;
														
 
															-	vcpu->arch.rmode.vm86_active = 1;
														
 
															+	vmx->rmode.vm86_active = 1;
														
 
															-	vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
														
 
															+	vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
														
 
															 	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
														
 
															-	vcpu->arch.rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
														
 
															+	vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
														
 
															 	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
														
 
															-	vcpu->arch.rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
														
 
															+	vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
														
 
															 	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
														
 
															 	flags = vmcs_readl(GUEST_RFLAGS);
														
 
															-	vcpu->arch.rmode.save_iopl
														
 
															+	vmx->rmode.save_iopl
														
 
															 		= (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
														
 
															 	flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
														
@@ -1468,10 +1523,10 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
															 		vmcs_writel(GUEST_CS_BASE, 0xf0000);
														
 
															 	vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
														
 
															-	fix_rmode_seg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
														
 
															-	fix_rmode_seg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
														
 
															-	fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
														
 
															-	fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
														
 
															+	fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
														
 
															+	fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
														
 
															+	fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
														
 
															+	fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
														
 
															 continue_rmode:
														
 
															 	kvm_mmu_reset_context(vcpu);
														
@@ -1545,11 +1600,11 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 
															 static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	if (!test_bit(VCPU_EXREG_PDPTR,
														
 
															+		      (unsigned long *)&vcpu->arch.regs_dirty))
														
 
															+		return;
														
 
															+
														
 
															 	if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
														
 
															-		if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
														
 
															-			printk(KERN_ERR "EPT: Fail to load pdptrs!\n");
														
 
															-			return;
														
 
															-		}
														
 
															 		vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
														
 
															 		vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
														
 
															 		vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
														
@@ -1557,6 +1612,21 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
 
															 	}
														
 
															 }
														
 
															+static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
														
 
															+		vcpu->arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
														
 
															+		vcpu->arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
														
 
															+		vcpu->arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
														
 
															+		vcpu->arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
														
 
															+	}
														
 
															+
														
 
															+	__set_bit(VCPU_EXREG_PDPTR,
														
 
															+		  (unsigned long *)&vcpu->arch.regs_avail);
														
 
															+	__set_bit(VCPU_EXREG_PDPTR,
														
 
															+		  (unsigned long *)&vcpu->arch.regs_dirty);
														
 
															+}
														
 
															+
														
 
															 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
														
 
															 static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
														
@@ -1571,8 +1641,6 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
 
															 			      CPU_BASED_CR3_STORE_EXITING));
														
 
															 		vcpu->arch.cr0 = cr0;
														
 
															 		vmx_set_cr4(vcpu, vcpu->arch.cr4);
														
 
															-		*hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
														
 
															-		*hw_cr0 &= ~X86_CR0_WP;
														
 
															 	} else if (!is_paging(vcpu)) {
														
 
															 		/* From nonpaging to paging */
														
 
															 		vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
														
@@ -1581,9 +1649,10 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
 
															 			       CPU_BASED_CR3_STORE_EXITING));
														
 
															 		vcpu->arch.cr0 = cr0;
														
 
															 		vmx_set_cr4(vcpu, vcpu->arch.cr4);
														
 
															-		if (!(vcpu->arch.cr0 & X86_CR0_WP))
														
 
															-			*hw_cr0 &= ~X86_CR0_WP;
														
 
															 	}
														
 
															+
														
 
															+	if (!(cr0 & X86_CR0_WP))
														
 
															+		*hw_cr0 &= ~X86_CR0_WP;
														
 
															 }
														
 
															 static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
														
@@ -1598,15 +1667,21 @@ static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
 
															 static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
														
 
															 {
														
 
															-	unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) |
														
 
															-				KVM_VM_CR0_ALWAYS_ON;
														
 
															+	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+	unsigned long hw_cr0;
														
 
															+
														
 
															+	if (enable_unrestricted_guest)
														
 
															+		hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST)
														
 
															+			| KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
														
 
															+	else
														
 
															+		hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
														
 
															 	vmx_fpu_deactivate(vcpu);
														
 
															-	if (vcpu->arch.rmode.vm86_active && (cr0 & X86_CR0_PE))
														
 
															+	if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
														
 
															 		enter_pmode(vcpu);
														
 
															-	if (!vcpu->arch.rmode.vm86_active && !(cr0 & X86_CR0_PE))
														
 
															+	if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
														
 
															 		enter_rmode(vcpu);
														
 
															 #ifdef CONFIG_X86_64
														
@@ -1650,10 +1725,8 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
															 	if (enable_ept) {
														
 
															 		eptp = construct_eptp(cr3);
														
 
															 		vmcs_write64(EPT_POINTER, eptp);
														
 
															-		ept_sync_context(eptp);
														
 
															-		ept_load_pdptrs(vcpu);
														
 
															 		guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
														
 
															-			VMX_EPT_IDENTITY_PAGETABLE_ADDR;
														
 
															+			vcpu->kvm->arch.ept_identity_map_addr;
														
 
															 	}
														
 
															 	vmx_flush_tlb(vcpu);
														
@@ -1664,7 +1737,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
															 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
														
 
															 {
														
 
															-	unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.vm86_active ?
														
 
															+	unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ?
														
 
															 		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
														
 
															 	vcpu->arch.cr4 = cr4;
														
@@ -1707,16 +1780,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 
															 static int vmx_get_cpl(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	struct kvm_segment kvm_seg;
														
 
															-
														
 
															 	if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */
														
 
															 		return 0;
														
 
															 	if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
														
 
															 		return 3;
														
 
															-	vmx_get_segment(vcpu, &kvm_seg, VCPU_SREG_CS);
														
 
															-	return kvm_seg.selector & 3;
														
 
															+	return vmcs_read16(GUEST_CS_SELECTOR) & 3;
														
 
															 }
														
 
															 static u32 vmx_segment_access_rights(struct kvm_segment *var)
														
@@ -1744,20 +1814,21 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
 
															 static void vmx_set_segment(struct kvm_vcpu *vcpu,
														
 
															 			    struct kvm_segment *var, int seg)
														
 
															 {
														
 
															+	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
														
 
															 	u32 ar;
														
 
															-	if (vcpu->arch.rmode.vm86_active && seg == VCPU_SREG_TR) {
														
 
															-		vcpu->arch.rmode.tr.selector = var->selector;
														
 
															-		vcpu->arch.rmode.tr.base = var->base;
														
 
															-		vcpu->arch.rmode.tr.limit = var->limit;
														
 
															-		vcpu->arch.rmode.tr.ar = vmx_segment_access_rights(var);
														
 
															+	if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
														
 
															+		vmx->rmode.tr.selector = var->selector;
														
 
															+		vmx->rmode.tr.base = var->base;
														
 
															+		vmx->rmode.tr.limit = var->limit;
														
 
															+		vmx->rmode.tr.ar = vmx_segment_access_rights(var);
														
 
															 		return;
														
 
															 	}
														
 
															 	vmcs_writel(sf->base, var->base);
														
 
															 	vmcs_write32(sf->limit, var->limit);
														
 
															 	vmcs_write16(sf->selector, var->selector);
														
 
															-	if (vcpu->arch.rmode.vm86_active && var->s) {
														
 
															+	if (vmx->rmode.vm86_active && var->s) {
														
 
															 		/*
														
 
															 		 * Hack real-mode segments into vm86 compatibility.
														
 
															 		 */
														
@@ -1766,6 +1837,21 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 
															 		ar = 0xf3;
														
 
															 	} else
														
 
															 		ar = vmx_segment_access_rights(var);
														
 
															+
														
 
															+	/*
														
 
															+	 *   Fix the "Accessed" bit in AR field of segment registers for older
														
 
															+	 * qemu binaries.
														
 
															+	 *   IA32 arch specifies that at the time of processor reset the
														
 
															+	 * "Accessed" bit in the AR field of segment registers is 1. And qemu
														
 
															+	 * is setting it to 0 in the usedland code. This causes invalid guest
														
 
															+	 * state vmexit when "unrestricted guest" mode is turned on.
														
 
															+	 *    Fix for this setup issue in cpu_reset is being pushed in the qemu
														
 
															+	 * tree. Newer qemu binaries with that qemu fix would not need this
														
 
															+	 * kvm hack.
														
 
															+	 */
														
 
															+	if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
														
 
															+		ar |= 0x1; /* Accessed */
														
 
															+
														
 
															 	vmcs_write32(sf->ar_bytes, ar);
														
 
															 }
														
@@ -2040,7 +2126,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
 
															 	if (likely(kvm->arch.ept_identity_pagetable_done))
														
 
															 		return 1;
														
 
															 	ret = 0;
														
 
															-	identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT;
														
 
															+	identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
														
 
															 	r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
														
 
															 	if (r < 0)
														
 
															 		goto out;
														
@@ -2062,11 +2148,19 @@ static int init_rmode_identity_map(struct kvm *kvm)
 
															 static void seg_setup(int seg)
														
 
															 {
														
 
															 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
														
 
															+	unsigned int ar;
														
 
															 	vmcs_write16(sf->selector, 0);
														
 
															 	vmcs_writel(sf->base, 0);
														
 
															 	vmcs_write32(sf->limit, 0xffff);
														
 
															-	vmcs_write32(sf->ar_bytes, 0xf3);
														
 
															+	if (enable_unrestricted_guest) {
														
 
															+		ar = 0x93;
														
 
															+		if (seg == VCPU_SREG_CS)
														
 
															+			ar |= 0x08; /* code segment */
														
 
															+	} else
														
 
															+		ar = 0xf3;
														
 
															+
														
 
															+	vmcs_write32(sf->ar_bytes, ar);
														
 
															 }
														
 
															 static int alloc_apic_access_page(struct kvm *kvm)
														
@@ -2101,14 +2195,15 @@ static int alloc_identity_pagetable(struct kvm *kvm)
 
															 		goto out;
														
 
															 	kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
														
 
															 	kvm_userspace_mem.flags = 0;
														
 
															-	kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
														
 
															+	kvm_userspace_mem.guest_phys_addr =
														
 
															+		kvm->arch.ept_identity_map_addr;
														
 
															 	kvm_userspace_mem.memory_size = PAGE_SIZE;
														
 
															 	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
														
 
															 	if (r)
														
 
															 		goto out;
														
 
															 	kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
														
 
															-			VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT);
														
 
															+			kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
														
 
															 out:
														
 
															 	up_write(&kvm->slots_lock);
														
 
															 	return r;
														
@@ -2209,6 +2304,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
															 			exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
														
 
															 		if (!enable_ept)
														
 
															 			exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
														
 
															+		if (!enable_unrestricted_guest)
														
 
															+			exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
														
 
															 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
														
 
															 	}
														
@@ -2326,14 +2423,14 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
															 		goto out;
														
 
															 	}
														
 
															-	vmx->vcpu.arch.rmode.vm86_active = 0;
														
 
															+	vmx->rmode.vm86_active = 0;
														
 
															 	vmx->soft_vnmi_blocked = 0;
														
 
															 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
														
 
															 	kvm_set_cr8(&vmx->vcpu, 0);
														
 
															 	msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
														
 
															-	if (vmx->vcpu.vcpu_id == 0)
														
 
															+	if (kvm_vcpu_is_bsp(&vmx->vcpu))
														
 
															 		msr |= MSR_IA32_APICBASE_BSP;
														
 
															 	kvm_set_apic_base(&vmx->vcpu, msr);
														
@@ -2344,7 +2441,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
															 	 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
														
 
															 	 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4.  Sigh.
														
 
															 	 */
														
 
															-	if (vmx->vcpu.vcpu_id == 0) {
														
 
															+	if (kvm_vcpu_is_bsp(&vmx->vcpu)) {
														
 
															 		vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
														
 
															 		vmcs_writel(GUEST_CS_BASE, 0x000f0000);
														
 
															 	} else {
														
@@ -2373,7 +2470,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
															 	vmcs_writel(GUEST_SYSENTER_EIP, 0);
														
 
															 	vmcs_writel(GUEST_RFLAGS, 0x02);
														
 
															-	if (vmx->vcpu.vcpu_id == 0)
														
 
															+	if (kvm_vcpu_is_bsp(&vmx->vcpu))
														
 
															 		kvm_rip_write(vcpu, 0xfff0);
														
 
															 	else
														
 
															 		kvm_rip_write(vcpu, 0);
														
@@ -2461,13 +2558,16 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
 
															 	uint32_t intr;
														
 
															 	int irq = vcpu->arch.interrupt.nr;
														
 
															-	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
														
 
															+	trace_kvm_inj_virq(irq);
														
 
															 	++vcpu->stat.irq_injections;
														
 
															-	if (vcpu->arch.rmode.vm86_active) {
														
 
															+	if (vmx->rmode.vm86_active) {
														
 
															 		vmx->rmode.irq.pending = true;
														
 
															 		vmx->rmode.irq.vector = irq;
														
 
															 		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
														
 
															+		if (vcpu->arch.interrupt.soft)
														
 
															+			vmx->rmode.irq.rip +=
														
 
															+				vmx->vcpu.arch.event_exit_inst_len;
														
 
															 		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
														
 
															 			     irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
														
 
															 		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
														
@@ -2502,7 +2602,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 
															 	}
														
 
															 	++vcpu->stat.nmi_injections;
														
 
															-	if (vcpu->arch.rmode.vm86_active) {
														
 
															+	if (vmx->rmode.vm86_active) {
														
 
															 		vmx->rmode.irq.pending = true;
														
 
															 		vmx->rmode.irq.vector = NMI_VECTOR;
														
 
															 		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
														
@@ -2659,14 +2759,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 		if (enable_ept)
														
 
															 			BUG();
														
 
															 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
														
 
															-		KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
														
 
															-			    (u32)((u64)cr2 >> 32), handler);
														
 
															+		trace_kvm_page_fault(cr2, error_code);
														
 
															+
														
 
															 		if (kvm_event_needs_reinjection(vcpu))
														
 
															 			kvm_mmu_unprotect_page_virt(vcpu, cr2);
														
 
															 		return kvm_mmu_page_fault(vcpu, cr2, error_code);
														
 
															 	}
														
 
															-	if (vcpu->arch.rmode.vm86_active &&
														
 
															+	if (vmx->rmode.vm86_active &&
														
 
															 	    handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
														
 
															 								error_code)) {
														
 
															 		if (vcpu->arch.halt_request) {
														
@@ -2707,7 +2807,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
 
															 				     struct kvm_run *kvm_run)
														
 
															 {
														
 
															 	++vcpu->stat.irq_exits;
														
 
															-	KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler);
														
 
															 	return 1;
														
 
															 }
														
@@ -2755,7 +2854,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
 
															 static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
														
 
															 {
														
 
															-	unsigned long exit_qualification;
														
 
															+	unsigned long exit_qualification, val;
														
 
															 	int cr;
														
 
															 	int reg;
														
@@ -2764,21 +2863,19 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	reg = (exit_qualification >> 8) & 15;
														
 
															 	switch ((exit_qualification >> 4) & 3) {
														
 
															 	case 0: /* mov to cr */
														
 
															-		KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr,
														
 
															-			    (u32)kvm_register_read(vcpu, reg),
														
 
															-			    (u32)((u64)kvm_register_read(vcpu, reg) >> 32),
														
 
															-			    handler);
														
 
															+		val = kvm_register_read(vcpu, reg);
														
 
															+		trace_kvm_cr_write(cr, val);
														
 
															 		switch (cr) {
														
 
															 		case 0:
														
 
															-			kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg));
														
 
															+			kvm_set_cr0(vcpu, val);
														
 
															 			skip_emulated_instruction(vcpu);
														
 
															 			return 1;
														
 
															 		case 3:
														
 
															-			kvm_set_cr3(vcpu, kvm_register_read(vcpu, reg));
														
 
															+			kvm_set_cr3(vcpu, val);
														
 
															 			skip_emulated_instruction(vcpu);
														
 
															 			return 1;
														
 
															 		case 4:
														
 
															-			kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg));
														
 
															+			kvm_set_cr4(vcpu, val);
														
 
															 			skip_emulated_instruction(vcpu);
														
 
															 			return 1;
														
 
															 		case 8: {
														
@@ -2800,23 +2897,19 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 		vcpu->arch.cr0 &= ~X86_CR0_TS;
														
 
															 		vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
														
 
															 		vmx_fpu_activate(vcpu);
														
 
															-		KVMTRACE_0D(CLTS, vcpu, handler);
														
 
															 		skip_emulated_instruction(vcpu);
														
 
															 		return 1;
														
 
															 	case 1: /*mov from cr*/
														
 
															 		switch (cr) {
														
 
															 		case 3:
														
 
															 			kvm_register_write(vcpu, reg, vcpu->arch.cr3);
														
 
															-			KVMTRACE_3D(CR_READ, vcpu, (u32)cr,
														
 
															-				    (u32)kvm_register_read(vcpu, reg),
														
 
															-				    (u32)((u64)kvm_register_read(vcpu, reg) >> 32),
														
 
															-				    handler);
														
 
															+			trace_kvm_cr_read(cr, vcpu->arch.cr3);
														
 
															 			skip_emulated_instruction(vcpu);
														
 
															 			return 1;
														
 
															 		case 8:
														
 
															-			kvm_register_write(vcpu, reg, kvm_get_cr8(vcpu));
														
 
															-			KVMTRACE_2D(CR_READ, vcpu, (u32)cr,
														
 
															-				    (u32)kvm_register_read(vcpu, reg), handler);
														
 
															+			val = kvm_get_cr8(vcpu);
														
 
															+			kvm_register_write(vcpu, reg, val);
														
 
															+			trace_kvm_cr_read(cr, val);
														
 
															 			skip_emulated_instruction(vcpu);
														
 
															 			return 1;
														
 
															 		}
														
@@ -2841,6 +2934,8 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	unsigned long val;
														
 
															 	int dr, reg;
														
 
															+	if (!kvm_require_cpl(vcpu, 0))
														
 
															+		return 1;
														
 
															 	dr = vmcs_readl(GUEST_DR7);
														
 
															 	if (dr & DR7_GD) {
														
 
															 		/*
														
@@ -2884,7 +2979,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 			val = 0;
														
 
															 		}
														
 
															 		kvm_register_write(vcpu, reg, val);
														
 
															-		KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
														
 
															 	} else {
														
 
															 		val = vcpu->arch.regs[reg];
														
 
															 		switch (dr) {
														
@@ -2917,7 +3011,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 			}
														
 
															 			break;
														
 
															 		}
														
 
															-		KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
														
 
															 	}
														
 
															 	skip_emulated_instruction(vcpu);
														
 
															 	return 1;
														
@@ -2939,8 +3032,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 		return 1;
														
 
															 	}
														
 
															-	KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32),
														
 
															-		    handler);
														
 
															+	trace_kvm_msr_read(ecx, data);
														
 
															 	/* FIXME: handling of bits 32:63 of rax, rdx */
														
 
															 	vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
														
@@ -2955,8 +3047,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
														
 
															 		| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
														
 
															-	KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32),
														
 
															-		    handler);
														
 
															+	trace_kvm_msr_write(ecx, data);
														
 
															 	if (vmx_set_msr(vcpu, ecx, data) != 0) {
														
 
															 		kvm_inject_gp(vcpu, 0);
														
@@ -2983,7 +3074,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 
															 	cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
														
 
															 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
														
 
															-	KVMTRACE_0D(PEND_INTR, vcpu, handler);
														
 
															 	++vcpu->stat.irq_window_exits;
														
 
															 	/*
														
@@ -3049,7 +3139,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 		printk(KERN_ERR
														
 
															 		       "Fail to handle apic access vmexit! Offset is 0x%lx\n",
														
 
															 		       offset);
														
 
															-		return -ENOTSUPP;
														
 
															+		return -ENOEXEC;
														
 
															 	}
														
 
															 	return 1;
														
 
															 }
														
@@ -3118,7 +3208,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	if (exit_qualification & (1 << 6)) {
														
 
															 		printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
														
 
															-		return -ENOTSUPP;
														
 
															+		return -EINVAL;
														
 
															 	}
														
 
															 	gla_validity = (exit_qualification >> 7) & 0x3;
														
@@ -3130,14 +3220,98 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 		printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
														
 
															 			(long unsigned int)exit_qualification);
														
 
															 		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
														
 
															-		kvm_run->hw.hardware_exit_reason = 0;
														
 
															-		return -ENOTSUPP;
														
 
															+		kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
														
 
															+		return 0;
														
 
															 	}
														
 
															 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
														
 
															+	trace_kvm_page_fault(gpa, exit_qualification);
														
 
															 	return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
														
 
															 }
														
 
															+static u64 ept_rsvd_mask(u64 spte, int level)
														
 
															+{
														
 
															+	int i;
														
 
															+	u64 mask = 0;
														
 
															+
														
 
															+	for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
														
 
															+		mask |= (1ULL << i);
														
 
															+
														
 
															+	if (level > 2)
														
 
															+		/* bits 7:3 reserved */
														
 
															+		mask |= 0xf8;
														
 
															+	else if (level == 2) {
														
 
															+		if (spte & (1ULL << 7))
														
 
															+			/* 2MB ref, bits 20:12 reserved */
														
 
															+			mask |= 0x1ff000;
														
 
															+		else
														
 
															+			/* bits 6:3 reserved */
														
 
															+			mask |= 0x78;
														
 
															+	}
														
 
															+
														
 
															+	return mask;
														
 
															+}
														
 
															+
														
 
															+static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
														
 
															+				       int level)
														
 
															+{
														
 
															+	printk(KERN_ERR "%s: spte 0x%llx level %d\n", __func__, spte, level);
														
 
															+
														
 
															+	/* 010b (write-only) */
														
 
															+	WARN_ON((spte & 0x7) == 0x2);
														
 
															+
														
 
															+	/* 110b (write/execute) */
														
 
															+	WARN_ON((spte & 0x7) == 0x6);
														
 
															+
														
 
															+	/* 100b (execute-only) and value not supported by logical processor */
														
 
															+	if (!cpu_has_vmx_ept_execute_only())
														
 
															+		WARN_ON((spte & 0x7) == 0x4);
														
 
															+
														
 
															+	/* not 000b */
														
 
															+	if ((spte & 0x7)) {
														
 
															+		u64 rsvd_bits = spte & ept_rsvd_mask(spte, level);
														
 
															+
														
 
															+		if (rsvd_bits != 0) {
														
 
															+			printk(KERN_ERR "%s: rsvd_bits = 0x%llx\n",
														
 
															+					 __func__, rsvd_bits);
														
 
															+			WARN_ON(1);
														
 
															+		}
														
 
															+
														
 
															+		if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) {
														
 
															+			u64 ept_mem_type = (spte & 0x38) >> 3;
														
 
															+
														
 
															+			if (ept_mem_type == 2 || ept_mem_type == 3 ||
														
 
															+			    ept_mem_type == 7) {
														
 
															+				printk(KERN_ERR "%s: ept_mem_type=0x%llx\n",
														
 
															+						__func__, ept_mem_type);
														
 
															+				WARN_ON(1);
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
														
 
															+{
														
 
															+	u64 sptes[4];
														
 
															+	int nr_sptes, i;
														
 
															+	gpa_t gpa;
														
 
															+
														
 
															+	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
														
 
															+
														
 
															+	printk(KERN_ERR "EPT: Misconfiguration.\n");
														
 
															+	printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa);
														
 
															+
														
 
															+	nr_sptes = kvm_mmu_get_spte_hierarchy(vcpu, gpa, sptes);
														
 
															+
														
 
															+	for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i)
														
 
															+		ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
														
 
															+
														
 
															+	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
														
 
															+	kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
														
 
															 {
														
 
															 	u32 cpu_based_vm_exec_control;
														
@@ -3217,8 +3391,9 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
 
															 	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
														
 
															 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
														
 
															 	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
														
 
															-	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
														
 
															 	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
														
 
															+	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
														
 
															+	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
														
 
															 };
														
 
															 static const int kvm_vmx_max_exit_handlers =
														
@@ -3234,8 +3409,7 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
															 	u32 exit_reason = vmx->exit_reason;
														
 
															 	u32 vectoring_info = vmx->idt_vectoring_info;
														
 
															-	KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
														
 
															-		    (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
														
 
															+	trace_kvm_exit(exit_reason, kvm_rip_read(vcpu));
														
 
															 	/* If we need to emulate an MMIO from handle_invalid_guest_state
														
 
															 	 * we just return 0 */
														
@@ -3247,10 +3421,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
															 	/* Access CR3 don't cause VMExit in paging mode, so we need
														
 
															 	 * to sync with guest real CR3. */
														
 
															-	if (enable_ept && is_paging(vcpu)) {
														
 
															+	if (enable_ept && is_paging(vcpu))
														
 
															 		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
														
 
															-		ept_load_pdptrs(vcpu);
														
 
															-	}
														
 
															 	if (unlikely(vmx->fail)) {
														
 
															 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
														
@@ -3326,10 +3498,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 
															 	/* We need to handle NMIs before interrupts are enabled */
														
 
															 	if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
														
 
															-	    (exit_intr_info & INTR_INFO_VALID_MASK)) {
														
 
															-		KVMTRACE_0D(NMI, &vmx->vcpu, handler);
														
 
															+	    (exit_intr_info & INTR_INFO_VALID_MASK))
														
 
															 		asm("int $2");
														
 
															-	}
														
 
															 	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
														
@@ -3434,6 +3604,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 {
														
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+	if (enable_ept && is_paging(vcpu)) {
														
 
															+		vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
														
 
															+		ept_load_pdptrs(vcpu);
														
 
															+	}
														
 
															 	/* Record the guest's net vcpu time for enforced NMI injections. */
														
 
															 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
														
 
															 		vmx->entry_time = ktime_get();
														
@@ -3449,12 +3623,21 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
														
 
															 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
														
 
															+	/* When single-stepping over STI and MOV SS, we must clear the
														
 
															+	 * corresponding interruptibility bits in the guest state. Otherwise
														
 
															+	 * vmentry fails as it then expects bit 14 (BS) in pending debug
														
 
															+	 * exceptions being set, but that's not correct for the guest debugging
														
 
															+	 * case. */
														
 
															+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
														
 
															+		vmx_set_interrupt_shadow(vcpu, 0);
														
 
															+
														
 
															 	/*
														
 
															 	 * Loading guest fpu may have cleared host cr0.ts
														
 
															 	 */
														
 
															 	vmcs_writel(HOST_CR0, read_cr0());
														
 
															-	set_debugreg(vcpu->arch.dr6, 6);
														
 
															+	if (vcpu->arch.switch_db_regs)
														
 
															+		set_debugreg(vcpu->arch.dr6, 6);
														
 
															 	asm(
														
 
															 		/* Store host registers */
														
@@ -3465,11 +3648,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 		"mov %%"R"sp, %c[host_rsp](%0) \n\t"
														
 
															 		__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
														
 
															 		"1: \n\t"
														
 
															+		/* Reload cr2 if changed */
														
 
															+		"mov %c[cr2](%0), %%"R"ax \n\t"
														
 
															+		"mov %%cr2, %%"R"dx \n\t"
														
 
															+		"cmp %%"R"ax, %%"R"dx \n\t"
														
 
															+		"je 2f \n\t"
														
 
															+		"mov %%"R"ax, %%cr2 \n\t"
														
 
															+		"2: \n\t"
														
 
															 		/* Check if vmlaunch of vmresume is needed */
														
 
															 		"cmpl $0, %c[launched](%0) \n\t"
														
 
															 		/* Load guest registers.  Don't clobber flags. */
														
 
															-		"mov %c[cr2](%0), %%"R"ax \n\t"
														
 
															-		"mov %%"R"ax, %%cr2 \n\t"
														
 
															 		"mov %c[rax](%0), %%"R"ax \n\t"
														
 
															 		"mov %c[rbx](%0), %%"R"bx \n\t"
														
 
															 		"mov %c[rdx](%0), %%"R"dx \n\t"
														
@@ -3547,10 +3735,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 #endif
														
 
															 	      );
														
 
															-	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
														
 
															+	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
														
 
															+				  | (1 << VCPU_EXREG_PDPTR));
														
 
															 	vcpu->arch.regs_dirty = 0;
														
 
															-	get_debugreg(vcpu->arch.dr6, 6);
														
 
															+	if (vcpu->arch.switch_db_regs)
														
 
															+		get_debugreg(vcpu->arch.dr6, 6);
														
 
															 	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
														
 
															 	if (vmx->rmode.irq.pending)
														
@@ -3633,9 +3823,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
															 		if (alloc_apic_access_page(kvm) != 0)
														
 
															 			goto free_vmcs;
														
 
															-	if (enable_ept)
														
 
															+	if (enable_ept) {
														
 
															+		if (!kvm->arch.ept_identity_map_addr)
														
 
															+			kvm->arch.ept_identity_map_addr =
														
 
															+				VMX_EPT_IDENTITY_PAGETABLE_ADDR;
														
 
															 		if (alloc_identity_pagetable(kvm) != 0)
														
 
															 			goto free_vmcs;
														
 
															+	}
														
 
															 	return &vmx->vcpu;
														
@@ -3699,6 +3893,34 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 
															 	return ret;
														
 
															 }
														
 
															+static const struct trace_print_flags vmx_exit_reasons_str[] = {
														
 
															+	{ EXIT_REASON_EXCEPTION_NMI,           "exception" },
														
 
															+	{ EXIT_REASON_EXTERNAL_INTERRUPT,      "ext_irq" },
														
 
															+	{ EXIT_REASON_TRIPLE_FAULT,            "triple_fault" },
														
 
															+	{ EXIT_REASON_NMI_WINDOW,              "nmi_window" },
														
 
															+	{ EXIT_REASON_IO_INSTRUCTION,          "io_instruction" },
														
 
															+	{ EXIT_REASON_CR_ACCESS,               "cr_access" },
														
 
															+	{ EXIT_REASON_DR_ACCESS,               "dr_access" },
														
 
															+	{ EXIT_REASON_CPUID,                   "cpuid" },
														
 
															+	{ EXIT_REASON_MSR_READ,                "rdmsr" },
														
 
															+	{ EXIT_REASON_MSR_WRITE,               "wrmsr" },
														
 
															+	{ EXIT_REASON_PENDING_INTERRUPT,       "interrupt_window" },
														
 
															+	{ EXIT_REASON_HLT,                     "halt" },
														
 
															+	{ EXIT_REASON_INVLPG,                  "invlpg" },
														
 
															+	{ EXIT_REASON_VMCALL,                  "hypercall" },
														
 
															+	{ EXIT_REASON_TPR_BELOW_THRESHOLD,     "tpr_below_thres" },
														
 
															+	{ EXIT_REASON_APIC_ACCESS,             "apic_access" },
														
 
															+	{ EXIT_REASON_WBINVD,                  "wbinvd" },
														
 
															+	{ EXIT_REASON_TASK_SWITCH,             "task_switch" },
														
 
															+	{ EXIT_REASON_EPT_VIOLATION,           "ept_violation" },
														
 
															+	{ -1, NULL }
														
 
															+};
														
 
															+
														
 
															+static bool vmx_gb_page_enable(void)
														
 
															+{
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															 static struct kvm_x86_ops vmx_x86_ops = {
														
 
															 	.cpu_has_kvm_support = cpu_has_kvm_support,
														
 
															 	.disabled_by_bios = vmx_disabled_by_bios,
														
@@ -3758,6 +3980,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
															 	.set_tss_addr = vmx_set_tss_addr,
														
 
															 	.get_tdp_level = get_ept_level,
														
 
															 	.get_mt_mask = vmx_get_mt_mask,
														
 
															+
														
 
															+	.exit_reasons_str = vmx_exit_reasons_str,
														
 
															+	.gb_page_enable = vmx_gb_page_enable,
														
 
															 };
														
 
															 static int __init vmx_init(void)
														
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -37,11 +37,16 @@
 
															 #include <linux/iommu.h>
														
 
															 #include <linux/intel-iommu.h>
														
 
															 #include <linux/cpufreq.h>
														
 
															+#include <trace/events/kvm.h>
														
 
															+#undef TRACE_INCLUDE_FILE
														
 
															+#define CREATE_TRACE_POINTS
														
 
															+#include "trace.h"
														
 
															 #include <asm/uaccess.h>
														
 
															 #include <asm/msr.h>
														
 
															 #include <asm/desc.h>
														
 
															 #include <asm/mtrr.h>
														
 
															+#include <asm/mce.h>
														
 
															 #define MAX_IO_MSRS 256
														
 
															 #define CR0_RESERVED_BITS						\
														
@@ -55,6 +60,10 @@
 
															 			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
														
 
															 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
														
 
															+
														
 
															+#define KVM_MAX_MCE_BANKS 32
														
 
															+#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
														
 
															+
														
 
															 /* EFER defaults:
														
 
															  * - enable syscall per default because its emulated by KVM
														
 
															  * - enable LME and LMA per default on 64 bit KVM
														
@@ -68,14 +77,16 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
 
															 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
														
 
															 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
														
 
															+static void update_cr8_intercept(struct kvm_vcpu *vcpu);
														
 
															 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
														
 
															 				    struct kvm_cpuid_entry2 __user *entries);
														
 
															-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
														
 
															-					      u32 function, u32 index);
														
 
															 struct kvm_x86_ops *kvm_x86_ops;
														
 
															 EXPORT_SYMBOL_GPL(kvm_x86_ops);
														
 
															+int ignore_msrs = 0;
														
 
															+module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
														
 
															+
														
 
															 struct kvm_stats_debugfs_item debugfs_entries[] = {
														
 
															 	{ "pf_fixed", VCPU_STAT(pf_fixed) },
														
 
															 	{ "pf_guest", VCPU_STAT(pf_guest) },
														
@@ -122,18 +133,16 @@ unsigned long segment_base(u16 selector)
 
															 	if (selector == 0)
														
 
															 		return 0;
														
 
															-	asm("sgdt %0" : "=m"(gdt));
														
 
															+	kvm_get_gdt(&gdt);
														
 
															 	table_base = gdt.base;
														
 
															 	if (selector & 4) {           /* from ldt */
														
 
															-		u16 ldt_selector;
														
 
															+		u16 ldt_selector = kvm_read_ldt();
														
 
															-		asm("sldt %0" : "=g"(ldt_selector));
														
 
															 		table_base = segment_base(ldt_selector);
														
 
															 	}
														
 
															 	d = (struct desc_struct *)(table_base + (selector & ~7));
														
 
															-	v = d->base0 | ((unsigned long)d->base1 << 16) |
														
 
															-		((unsigned long)d->base2 << 24);
														
 
															+	v = get_desc_base(d);
														
 
															 #ifdef CONFIG_X86_64
														
 
															 	if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
														
 
															 		v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
														
@@ -176,16 +185,22 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 
															 	++vcpu->stat.pf_guest;
														
 
															 	if (vcpu->arch.exception.pending) {
														
 
															-		if (vcpu->arch.exception.nr == PF_VECTOR) {
														
 
															-			printk(KERN_DEBUG "kvm: inject_page_fault:"
														
 
															-					" double fault 0x%lx\n", addr);
														
 
															-			vcpu->arch.exception.nr = DF_VECTOR;
														
 
															-			vcpu->arch.exception.error_code = 0;
														
 
															-		} else if (vcpu->arch.exception.nr == DF_VECTOR) {
														
 
															+		switch(vcpu->arch.exception.nr) {
														
 
															+		case DF_VECTOR:
														
 
															 			/* triple fault -> shutdown */
														
 
															 			set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
														
 
															+			return;
														
 
															+		case PF_VECTOR:
														
 
															+			vcpu->arch.exception.nr = DF_VECTOR;
														
 
															+			vcpu->arch.exception.error_code = 0;
														
 
															+			return;
														
 
															+		default:
														
 
															+			/* replace previous exception with a new one in a hope
														
 
															+			   that instruction re-execution will regenerate lost
														
 
															+			   exception */
														
 
															+			vcpu->arch.exception.pending = false;
														
 
															+			break;
														
 
															 		}
														
 
															-		return;
														
 
															 	}
														
 
															 	vcpu->arch.cr2 = addr;
														
 
															 	kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
														
@@ -207,12 +222,18 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
														
 
															-static void __queue_exception(struct kvm_vcpu *vcpu)
														
 
															+/*
														
 
															+ * Checks if cpl <= required_cpl; if true, return true.  Otherwise queue
														
 
															+ * a #GP and return false.
														
 
															+ */
														
 
															+bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
														
 
															 {
														
 
															-	kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
														
 
															-				     vcpu->arch.exception.has_error_code,
														
 
															-				     vcpu->arch.exception.error_code);
														
 
															+	if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
														
 
															+		return true;
														
 
															+	kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
														
 
															+	return false;
														
 
															 }
														
 
															+EXPORT_SYMBOL_GPL(kvm_require_cpl);
														
 
															 /*
														
 
															  * Load the pae pdptrs.  Return true is they are all valid.
														
@@ -232,7 +253,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 
															 		goto out;
														
 
															 	}
														
 
															 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
														
 
															-		if (is_present_pte(pdpte[i]) &&
														
 
															+		if (is_present_gpte(pdpte[i]) &&
														
 
															 		    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
														
 
															 			ret = 0;
														
 
															 			goto out;
														
@@ -241,6 +262,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 
															 	ret = 1;
														
 
															 	memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
														
 
															+	__set_bit(VCPU_EXREG_PDPTR,
														
 
															+		  (unsigned long *)&vcpu->arch.regs_avail);
														
 
															+	__set_bit(VCPU_EXREG_PDPTR,
														
 
															+		  (unsigned long *)&vcpu->arch.regs_dirty);
														
 
															 out:
														
 
															 	return ret;
														
@@ -256,6 +281,10 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
 
															 	if (is_long_mode(vcpu) || !is_pae(vcpu))
														
 
															 		return false;
														
 
															+	if (!test_bit(VCPU_EXREG_PDPTR,
														
 
															+		      (unsigned long *)&vcpu->arch.regs_avail))
														
 
															+		return true;
														
 
															+
														
 
															 	r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
														
 
															 	if (r < 0)
														
 
															 		goto out;
														
@@ -328,9 +357,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
 
															 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
														
 
															 {
														
 
															 	kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
														
 
															-	KVMTRACE_1D(LMSW, vcpu,
														
 
															-		    (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
														
 
															-		    handler);
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_lmsw);
														
@@ -466,7 +492,7 @@ static u32 msrs_to_save[] = {
 
															 #ifdef CONFIG_X86_64
														
 
															 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
														
 
															 #endif
														
 
															-	MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
														
 
															+	MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
														
 
															 	MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
														
 
															 };
														
@@ -644,8 +670,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 
															 	/* Keep irq disabled to prevent changes to the clock */
														
 
															 	local_irq_save(flags);
														
 
															-	kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
														
 
															-			  &vcpu->hv_clock.tsc_timestamp);
														
 
															+	kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
														
 
															 	ktime_get_ts(&ts);
														
 
															 	local_irq_restore(flags);
														
@@ -778,23 +803,60 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
															 	return 0;
														
 
															 }
														
 
															+static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
														
 
															+{
														
 
															+	u64 mcg_cap = vcpu->arch.mcg_cap;
														
 
															+	unsigned bank_num = mcg_cap & 0xff;
														
 
															+
														
 
															+	switch (msr) {
														
 
															+	case MSR_IA32_MCG_STATUS:
														
 
															+		vcpu->arch.mcg_status = data;
														
 
															+		break;
														
 
															+	case MSR_IA32_MCG_CTL:
														
 
															+		if (!(mcg_cap & MCG_CTL_P))
														
 
															+			return 1;
														
 
															+		if (data != 0 && data != ~(u64)0)
														
 
															+			return -1;
														
 
															+		vcpu->arch.mcg_ctl = data;
														
 
															+		break;
														
 
															+	default:
														
 
															+		if (msr >= MSR_IA32_MC0_CTL &&
														
 
															+		    msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
														
 
															+			u32 offset = msr - MSR_IA32_MC0_CTL;
														
 
															+			/* only 0 or all 1s can be written to IA32_MCi_CTL */
														
 
															+			if ((offset & 0x3) == 0 &&
														
 
															+			    data != 0 && data != ~(u64)0)
														
 
															+				return -1;
														
 
															+			vcpu->arch.mce_banks[offset] = data;
														
 
															+			break;
														
 
															+		}
														
 
															+		return 1;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
														
 
															 {
														
 
															 	switch (msr) {
														
 
															 	case MSR_EFER:
														
 
															 		set_efer(vcpu, data);
														
 
															 		break;
														
 
															-	case MSR_IA32_MC0_STATUS:
														
 
															-		pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
														
 
															-		       __func__, data);
														
 
															+	case MSR_K7_HWCR:
														
 
															+		data &= ~(u64)0x40;	/* ignore flush filter disable */
														
 
															+		if (data != 0) {
														
 
															+			pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
														
 
															+				data);
														
 
															+			return 1;
														
 
															+		}
														
 
															 		break;
														
 
															-	case MSR_IA32_MCG_STATUS:
														
 
															-		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
														
 
															-			__func__, data);
														
 
															+	case MSR_FAM10H_MMIO_CONF_BASE:
														
 
															+		if (data != 0) {
														
 
															+			pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
														
 
															+				"0x%llx\n", data);
														
 
															+			return 1;
														
 
															+		}
														
 
															 		break;
														
 
															-	case MSR_IA32_MCG_CTL:
														
 
															-		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
														
 
															-			__func__, data);
														
 
															+	case MSR_AMD64_NB_CFG:
														
 
															 		break;
														
 
															 	case MSR_IA32_DEBUGCTLMSR:
														
 
															 		if (!data) {
														
@@ -811,12 +873,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
															 	case MSR_IA32_UCODE_REV:
														
 
															 	case MSR_IA32_UCODE_WRITE:
														
 
															 	case MSR_VM_HSAVE_PA:
														
 
															+	case MSR_AMD64_PATCH_LOADER:
														
 
															 		break;
														
 
															 	case 0x200 ... 0x2ff:
														
 
															 		return set_msr_mtrr(vcpu, msr, data);
														
 
															 	case MSR_IA32_APICBASE:
														
 
															 		kvm_set_apic_base(vcpu, data);
														
 
															 		break;
														
 
															+	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
														
 
															+		return kvm_x2apic_msr_write(vcpu, msr, data);
														
 
															 	case MSR_IA32_MISC_ENABLE:
														
 
															 		vcpu->arch.ia32_misc_enable_msr = data;
														
 
															 		break;
														
@@ -850,9 +915,50 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
															 		kvm_request_guest_time_update(vcpu);
														
 
															 		break;
														
 
															 	}
														
 
															+	case MSR_IA32_MCG_CTL:
														
 
															+	case MSR_IA32_MCG_STATUS:
														
 
															+	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
														
 
															+		return set_msr_mce(vcpu, msr, data);
														
 
															+
														
 
															+	/* Performance counters are not protected by a CPUID bit,
														
 
															+	 * so we should check all of them in the generic path for the sake of
														
 
															+	 * cross vendor migration.
														
 
															+	 * Writing a zero into the event select MSRs disables them,
														
 
															+	 * which we perfectly emulate ;-). Any other value should be at least
														
 
															+	 * reported, some guests depend on them.
														
 
															+	 */
														
 
															+	case MSR_P6_EVNTSEL0:
														
 
															+	case MSR_P6_EVNTSEL1:
														
 
															+	case MSR_K7_EVNTSEL0:
														
 
															+	case MSR_K7_EVNTSEL1:
														
 
															+	case MSR_K7_EVNTSEL2:
														
 
															+	case MSR_K7_EVNTSEL3:
														
 
															+		if (data != 0)
														
 
															+			pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
														
 
															+				"0x%x data 0x%llx\n", msr, data);
														
 
															+		break;
														
 
															+	/* at least RHEL 4 unconditionally writes to the perfctr registers,
														
 
															+	 * so we ignore writes to make it happy.
														
 
															+	 */
														
 
															+	case MSR_P6_PERFCTR0:
														
 
															+	case MSR_P6_PERFCTR1:
														
 
															+	case MSR_K7_PERFCTR0:
														
 
															+	case MSR_K7_PERFCTR1:
														
 
															+	case MSR_K7_PERFCTR2:
														
 
															+	case MSR_K7_PERFCTR3:
														
 
															+		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
														
 
															+			"0x%x data 0x%llx\n", msr, data);
														
 
															+		break;
														
 
															 	default:
														
 
															-		pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
														
 
															-		return 1;
														
 
															+		if (!ignore_msrs) {
														
 
															+			pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
														
 
															+				msr, data);
														
 
															+			return 1;
														
 
															+		} else {
														
 
															+			pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
														
 
															+				msr, data);
														
 
															+			break;
														
 
															+		}
														
 
															 	}
														
 
															 	return 0;
														
 
															 }
														
@@ -905,26 +1011,47 @@ static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 
															 	return 0;
														
 
															 }
														
 
															-int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
														
 
															+static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
														
 
															 {
														
 
															 	u64 data;
														
 
															+	u64 mcg_cap = vcpu->arch.mcg_cap;
														
 
															+	unsigned bank_num = mcg_cap & 0xff;
														
 
															 	switch (msr) {
														
 
															-	case 0xc0010010: /* SYSCFG */
														
 
															-	case 0xc0010015: /* HWCR */
														
 
															-	case MSR_IA32_PLATFORM_ID:
														
 
															 	case MSR_IA32_P5_MC_ADDR:
														
 
															 	case MSR_IA32_P5_MC_TYPE:
														
 
															-	case MSR_IA32_MC0_CTL:
														
 
															-	case MSR_IA32_MCG_STATUS:
														
 
															+		data = 0;
														
 
															+		break;
														
 
															 	case MSR_IA32_MCG_CAP:
														
 
															+		data = vcpu->arch.mcg_cap;
														
 
															+		break;
														
 
															 	case MSR_IA32_MCG_CTL:
														
 
															-	case MSR_IA32_MC0_MISC:
														
 
															-	case MSR_IA32_MC0_MISC+4:
														
 
															-	case MSR_IA32_MC0_MISC+8:
														
 
															-	case MSR_IA32_MC0_MISC+12:
														
 
															-	case MSR_IA32_MC0_MISC+16:
														
 
															-	case MSR_IA32_MC0_MISC+20:
														
 
															+		if (!(mcg_cap & MCG_CTL_P))
														
 
															+			return 1;
														
 
															+		data = vcpu->arch.mcg_ctl;
														
 
															+		break;
														
 
															+	case MSR_IA32_MCG_STATUS:
														
 
															+		data = vcpu->arch.mcg_status;
														
 
															+		break;
														
 
															+	default:
														
 
															+		if (msr >= MSR_IA32_MC0_CTL &&
														
 
															+		    msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
														
 
															+			u32 offset = msr - MSR_IA32_MC0_CTL;
														
 
															+			data = vcpu->arch.mce_banks[offset];
														
 
															+			break;
														
 
															+		}
														
 
															+		return 1;
														
 
															+	}
														
 
															+	*pdata = data;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
														
 
															+{
														
 
															+	u64 data;
														
 
															+
														
 
															+	switch (msr) {
														
 
															+	case MSR_IA32_PLATFORM_ID:
														
 
															 	case MSR_IA32_UCODE_REV:
														
 
															 	case MSR_IA32_EBL_CR_POWERON:
														
 
															 	case MSR_IA32_DEBUGCTLMSR:
														
@@ -932,10 +1059,18 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 
															 	case MSR_IA32_LASTBRANCHTOIP:
														
 
															 	case MSR_IA32_LASTINTFROMIP:
														
 
															 	case MSR_IA32_LASTINTTOIP:
														
 
															+	case MSR_K8_SYSCFG:
														
 
															+	case MSR_K7_HWCR:
														
 
															 	case MSR_VM_HSAVE_PA:
														
 
															+	case MSR_P6_PERFCTR0:
														
 
															+	case MSR_P6_PERFCTR1:
														
 
															 	case MSR_P6_EVNTSEL0:
														
 
															 	case MSR_P6_EVNTSEL1:
														
 
															 	case MSR_K7_EVNTSEL0:
														
 
															+	case MSR_K7_PERFCTR0:
														
 
															+	case MSR_K8_INT_PENDING_MSG:
														
 
															+	case MSR_AMD64_NB_CFG:
														
 
															+	case MSR_FAM10H_MMIO_CONF_BASE:
														
 
															 		data = 0;
														
 
															 		break;
														
 
															 	case MSR_MTRRcap:
														
@@ -949,6 +1084,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 
															 	case MSR_IA32_APICBASE:
														
 
															 		data = kvm_get_apic_base(vcpu);
														
 
															 		break;
														
 
															+	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
														
 
															+		return kvm_x2apic_msr_read(vcpu, msr, pdata);
														
 
															+		break;
														
 
															 	case MSR_IA32_MISC_ENABLE:
														
 
															 		data = vcpu->arch.ia32_misc_enable_msr;
														
 
															 		break;
														
@@ -967,9 +1105,22 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 
															 	case MSR_KVM_SYSTEM_TIME:
														
 
															 		data = vcpu->arch.time;
														
 
															 		break;
														
 
															+	case MSR_IA32_P5_MC_ADDR:
														
 
															+	case MSR_IA32_P5_MC_TYPE:
														
 
															+	case MSR_IA32_MCG_CAP:
														
 
															+	case MSR_IA32_MCG_CTL:
														
 
															+	case MSR_IA32_MCG_STATUS:
														
 
															+	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
														
 
															+		return get_msr_mce(vcpu, msr, pdata);
														
 
															 	default:
														
 
															-		pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
														
 
															-		return 1;
														
 
															+		if (!ignore_msrs) {
														
 
															+			pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
														
 
															+			return 1;
														
 
															+		} else {
														
 
															+			pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
														
 
															+			data = 0;
														
 
															+		}
														
 
															+		break;
														
 
															 	}
														
 
															 	*pdata = data;
														
 
															 	return 0;
														
@@ -1068,6 +1219,11 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_REINJECT_CONTROL:
														
 
															 	case KVM_CAP_IRQ_INJECT_STATUS:
														
 
															 	case KVM_CAP_ASSIGN_DEV_IRQ:
														
 
															+	case KVM_CAP_IRQFD:
														
 
															+	case KVM_CAP_IOEVENTFD:
														
 
															+	case KVM_CAP_PIT2:
														
 
															+	case KVM_CAP_PIT_STATE2:
														
 
															+	case KVM_CAP_SET_IDENTITY_MAP_ADDR:
														
 
															 		r = 1;
														
 
															 		break;
														
 
															 	case KVM_CAP_COALESCED_MMIO:
														
@@ -1088,6 +1244,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_IOMMU:
														
 
															 		r = iommu_found();
														
 
															 		break;
														
 
															+	case KVM_CAP_MCE:
														
 
															+		r = KVM_MAX_MCE_BANKS;
														
 
															+		break;
														
 
															 	default:
														
 
															 		r = 0;
														
 
															 		break;
														
@@ -1147,6 +1306,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
 
															 		r = 0;
														
 
															 		break;
														
 
															 	}
														
 
															+	case KVM_X86_GET_MCE_CAP_SUPPORTED: {
														
 
															+		u64 mce_cap;
														
 
															+
														
 
															+		mce_cap = KVM_MCE_CAP_SUPPORTED;
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
														
 
															+			goto out;
														
 
															+		r = 0;
														
 
															+		break;
														
 
															+	}
														
 
															 	default:
														
 
															 		r = -EINVAL;
														
 
															 	}
														
@@ -1227,6 +1396,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 
															 	vcpu->arch.cpuid_nent = cpuid->nent;
														
 
															 	cpuid_fix_nx_cap(vcpu);
														
 
															 	r = 0;
														
 
															+	kvm_apic_set_version(vcpu);
														
 
															 out_free:
														
 
															 	vfree(cpuid_entries);
														
@@ -1248,6 +1418,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
 
															 			   cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
														
 
															 		goto out;
														
 
															 	vcpu->arch.cpuid_nent = cpuid->nent;
														
 
															+	kvm_apic_set_version(vcpu);
														
 
															 	return 0;
														
 
															 out:
														
@@ -1290,6 +1461,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
															 			 u32 index, int *nent, int maxnent)
														
 
															 {
														
 
															 	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
														
 
															+	unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0;
														
 
															 #ifdef CONFIG_X86_64
														
 
															 	unsigned f_lm = F(LM);
														
 
															 #else
														
@@ -1314,7 +1486,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
															 		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
														
 
															 		F(PAT) | F(PSE36) | 0 /* Reserved */ |
														
 
															 		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
														
 
															-		F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ |
														
 
															+		F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ |
														
 
															 		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
														
 
															 	/* cpuid 1.ecx */
														
 
															 	const u32 kvm_supported_word4_x86_features =
														
@@ -1323,7 +1495,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
															 		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
														
 
															 		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
														
 
															 		0 /* Reserved, DCA */ | F(XMM4_1) |
														
 
															-		F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) |
														
 
															+		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
														
 
															 		0 /* Reserved, XSAVE, OSXSAVE */;
														
 
															 	/* cpuid 0x80000001.ecx */
														
 
															 	const u32 kvm_supported_word6_x86_features =
														
@@ -1344,6 +1516,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
															 	case 1:
														
 
															 		entry->edx &= kvm_supported_word0_x86_features;
														
 
															 		entry->ecx &= kvm_supported_word4_x86_features;
														
 
															+		/* we support x2apic emulation even if host does not support
														
 
															+		 * it since we emulate x2apic in software */
														
 
															+		entry->ecx |= F(X2APIC);
														
 
															 		break;
														
 
															 	/* function 2 entries are STATEFUL. That is, repeated cpuid commands
														
 
															 	 * may return different values. This forces us to get_cpu() before
														
@@ -1435,6 +1610,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 
															 	for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
														
 
															 		do_cpuid_ent(&cpuid_entries[nent], func, 0,
														
 
															 			     &nent, cpuid->nent);
														
 
															+	r = -E2BIG;
														
 
															+	if (nent >= cpuid->nent)
														
 
															+		goto out_free;
														
 
															+
														
 
															 	r = -EFAULT;
														
 
															 	if (copy_to_user(entries, cpuid_entries,
														
 
															 			 nent * sizeof(struct kvm_cpuid_entry2)))
														
@@ -1464,6 +1643,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
 
															 	vcpu_load(vcpu);
														
 
															 	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
														
 
															 	kvm_apic_post_state_restore(vcpu);
														
 
															+	update_cr8_intercept(vcpu);
														
 
															 	vcpu_put(vcpu);
														
 
															 	return 0;
														
@@ -1503,6 +1683,80 @@ static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
 
															 	return 0;
														
 
															 }
														
 
															+static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
														
 
															+					u64 mcg_cap)
														
 
															+{
														
 
															+	int r;
														
 
															+	unsigned bank_num = mcg_cap & 0xff, bank;
														
 
															+
														
 
															+	r = -EINVAL;
														
 
															+	if (!bank_num)
														
 
															+		goto out;
														
 
															+	if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
														
 
															+		goto out;
														
 
															+	r = 0;
														
 
															+	vcpu->arch.mcg_cap = mcg_cap;
														
 
															+	/* Init IA32_MCG_CTL to all 1s */
														
 
															+	if (mcg_cap & MCG_CTL_P)
														
 
															+		vcpu->arch.mcg_ctl = ~(u64)0;
														
 
															+	/* Init IA32_MCi_CTL to all 1s */
														
 
															+	for (bank = 0; bank < bank_num; bank++)
														
 
															+		vcpu->arch.mce_banks[bank*4] = ~(u64)0;
														
 
															+out:
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
														
 
															+				      struct kvm_x86_mce *mce)
														
 
															+{
														
 
															+	u64 mcg_cap = vcpu->arch.mcg_cap;
														
 
															+	unsigned bank_num = mcg_cap & 0xff;
														
 
															+	u64 *banks = vcpu->arch.mce_banks;
														
 
															+
														
 
															+	if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
														
 
															+		return -EINVAL;
														
 
															+	/*
														
 
															+	 * if IA32_MCG_CTL is not all 1s, the uncorrected error
														
 
															+	 * reporting is disabled
														
 
															+	 */
														
 
															+	if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
														
 
															+	    vcpu->arch.mcg_ctl != ~(u64)0)
														
 
															+		return 0;
														
 
															+	banks += 4 * mce->bank;
														
 
															+	/*
														
 
															+	 * if IA32_MCi_CTL is not all 1s, the uncorrected error
														
 
															+	 * reporting is disabled for the bank
														
 
															+	 */
														
 
															+	if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
														
 
															+		return 0;
														
 
															+	if (mce->status & MCI_STATUS_UC) {
														
 
															+		if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
														
 
															+		    !(vcpu->arch.cr4 & X86_CR4_MCE)) {
														
 
															+			printk(KERN_DEBUG "kvm: set_mce: "
														
 
															+			       "injects mce exception while "
														
 
															+			       "previous one is in progress!\n");
														
 
															+			set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
														
 
															+			return 0;
														
 
															+		}
														
 
															+		if (banks[1] & MCI_STATUS_VAL)
														
 
															+			mce->status |= MCI_STATUS_OVER;
														
 
															+		banks[2] = mce->addr;
														
 
															+		banks[3] = mce->misc;
														
 
															+		vcpu->arch.mcg_status = mce->mcg_status;
														
 
															+		banks[1] = mce->status;
														
 
															+		kvm_queue_exception(vcpu, MC_VECTOR);
														
 
															+	} else if (!(banks[1] & MCI_STATUS_VAL)
														
 
															+		   || !(banks[1] & MCI_STATUS_UC)) {
														
 
															+		if (banks[1] & MCI_STATUS_VAL)
														
 
															+			mce->status |= MCI_STATUS_OVER;
														
 
															+		banks[2] = mce->addr;
														
 
															+		banks[3] = mce->misc;
														
 
															+		banks[1] = mce->status;
														
 
															+	} else
														
 
															+		banks[1] |= MCI_STATUS_OVER;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 long kvm_arch_vcpu_ioctl(struct file *filp,
														
 
															 			 unsigned int ioctl, unsigned long arg)
														
 
															 {
														
@@ -1636,6 +1890,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
															 		kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
														
 
															 		break;
														
 
															 	}
														
 
															+	case KVM_X86_SETUP_MCE: {
														
 
															+		u64 mcg_cap;
														
 
															+
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
														
 
															+			goto out;
														
 
															+		r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_X86_SET_MCE: {
														
 
															+		struct kvm_x86_mce mce;
														
 
															+
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_from_user(&mce, argp, sizeof mce))
														
 
															+			goto out;
														
 
															+		r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
														
 
															+		break;
														
 
															+	}
														
 
															 	default:
														
 
															 		r = -EINVAL;
														
 
															 	}
														
@@ -1654,6 +1926,13 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
 
															 	return ret;
														
 
															 }
														
 
															+static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
														
 
															+					      u64 ident_addr)
														
 
															+{
														
 
															+	kvm->arch.ept_identity_map_addr = ident_addr;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
														
 
															 					  u32 kvm_nr_mmu_pages)
														
 
															 {
														
@@ -1775,19 +2054,25 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
															 	r = 0;
														
 
															 	switch (chip->chip_id) {
														
 
															 	case KVM_IRQCHIP_PIC_MASTER:
														
 
															+		spin_lock(&pic_irqchip(kvm)->lock);
														
 
															 		memcpy(&pic_irqchip(kvm)->pics[0],
														
 
															 			&chip->chip.pic,
														
 
															 			sizeof(struct kvm_pic_state));
														
 
															+		spin_unlock(&pic_irqchip(kvm)->lock);
														
 
															 		break;
														
 
															 	case KVM_IRQCHIP_PIC_SLAVE:
														
 
															+		spin_lock(&pic_irqchip(kvm)->lock);
														
 
															 		memcpy(&pic_irqchip(kvm)->pics[1],
														
 
															 			&chip->chip.pic,
														
 
															 			sizeof(struct kvm_pic_state));
														
 
															+		spin_unlock(&pic_irqchip(kvm)->lock);
														
 
															 		break;
														
 
															 	case KVM_IRQCHIP_IOAPIC:
														
 
															+		mutex_lock(&kvm->irq_lock);
														
 
															 		memcpy(ioapic_irqchip(kvm),
														
 
															 			&chip->chip.ioapic,
														
 
															 			sizeof(struct kvm_ioapic_state));
														
 
															+		mutex_unlock(&kvm->irq_lock);
														
 
															 		break;
														
 
															 	default:
														
 
															 		r = -EINVAL;
														
@@ -1801,7 +2086,9 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 
															 {
														
 
															 	int r = 0;
														
 
															+	mutex_lock(&kvm->arch.vpit->pit_state.lock);
														
 
															 	memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
														
 
															+	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
														
 
															 	return r;
														
 
															 }
														
@@ -1809,8 +2096,39 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 
															 {
														
 
															 	int r = 0;
														
 
															+	mutex_lock(&kvm->arch.vpit->pit_state.lock);
														
 
															 	memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
														
 
															-	kvm_pit_load_count(kvm, 0, ps->channels[0].count);
														
 
															+	kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
														
 
															+	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
														
 
															+{
														
 
															+	int r = 0;
														
 
															+
														
 
															+	mutex_lock(&kvm->arch.vpit->pit_state.lock);
														
 
															+	memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
														
 
															+		sizeof(ps->channels));
														
 
															+	ps->flags = kvm->arch.vpit->pit_state.flags;
														
 
															+	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
														
 
															+{
														
 
															+	int r = 0, start = 0;
														
 
															+	u32 prev_legacy, cur_legacy;
														
 
															+	mutex_lock(&kvm->arch.vpit->pit_state.lock);
														
 
															+	prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
														
 
															+	cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
														
 
															+	if (!prev_legacy && cur_legacy)
														
 
															+		start = 1;
														
 
															+	memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
														
 
															+	       sizeof(kvm->arch.vpit->pit_state.channels));
														
 
															+	kvm->arch.vpit->pit_state.flags = ps->flags;
														
 
															+	kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
														
 
															+	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
														
 
															 	return r;
														
 
															 }
														
@@ -1819,7 +2137,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 
															 {
														
 
															 	if (!kvm->arch.vpit)
														
 
															 		return -ENXIO;
														
 
															+	mutex_lock(&kvm->arch.vpit->pit_state.lock);
														
 
															 	kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
														
 
															+	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
														
 
															 	return 0;
														
 
															 }
														
@@ -1845,7 +2165,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
															 		spin_lock(&kvm->mmu_lock);
														
 
															 		kvm_mmu_slot_remove_write_access(kvm, log->slot);
														
 
															 		spin_unlock(&kvm->mmu_lock);
														
 
															-		kvm_flush_remote_tlbs(kvm);
														
 
															 		memslot = &kvm->memslots[log->slot];
														
 
															 		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
														
 
															 		memset(memslot->dirty_bitmap, 0, n);
														
@@ -1869,7 +2188,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 	 */
														
 
															 	union {
														
 
															 		struct kvm_pit_state ps;
														
 
															+		struct kvm_pit_state2 ps2;
														
 
															 		struct kvm_memory_alias alias;
														
 
															+		struct kvm_pit_config pit_config;
														
 
															 	} u;
														
 
															 	switch (ioctl) {
														
@@ -1878,6 +2199,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 		if (r < 0)
														
 
															 			goto out;
														
 
															 		break;
														
 
															+	case KVM_SET_IDENTITY_MAP_ADDR: {
														
 
															+		u64 ident_addr;
														
 
															+
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
														
 
															+			goto out;
														
 
															+		r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
														
 
															+		if (r < 0)
														
 
															+			goto out;
														
 
															+		break;
														
 
															+	}
														
 
															 	case KVM_SET_MEMORY_REGION: {
														
 
															 		struct kvm_memory_region kvm_mem;
														
 
															 		struct kvm_userspace_memory_region kvm_userspace_mem;
														
@@ -1930,16 +2262,24 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 		}
														
 
															 		break;
														
 
															 	case KVM_CREATE_PIT:
														
 
															-		mutex_lock(&kvm->lock);
														
 
															+		u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
														
 
															+		goto create_pit;
														
 
															+	case KVM_CREATE_PIT2:
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_from_user(&u.pit_config, argp,
														
 
															+				   sizeof(struct kvm_pit_config)))
														
 
															+			goto out;
														
 
															+	create_pit:
														
 
															+		down_write(&kvm->slots_lock);
														
 
															 		r = -EEXIST;
														
 
															 		if (kvm->arch.vpit)
														
 
															 			goto create_pit_unlock;
														
 
															 		r = -ENOMEM;
														
 
															-		kvm->arch.vpit = kvm_create_pit(kvm);
														
 
															+		kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
														
 
															 		if (kvm->arch.vpit)
														
 
															 			r = 0;
														
 
															 	create_pit_unlock:
														
 
															-		mutex_unlock(&kvm->lock);
														
 
															+		up_write(&kvm->slots_lock);
														
 
															 		break;
														
 
															 	case KVM_IRQ_LINE_STATUS:
														
 
															 	case KVM_IRQ_LINE: {
														
@@ -1950,10 +2290,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 			goto out;
														
 
															 		if (irqchip_in_kernel(kvm)) {
														
 
															 			__s32 status;
														
 
															-			mutex_lock(&kvm->lock);
														
 
															+			mutex_lock(&kvm->irq_lock);
														
 
															 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
														
 
															 					irq_event.irq, irq_event.level);
														
 
															-			mutex_unlock(&kvm->lock);
														
 
															+			mutex_unlock(&kvm->irq_lock);
														
 
															 			if (ioctl == KVM_IRQ_LINE_STATUS) {
														
 
															 				irq_event.status = status;
														
 
															 				if (copy_to_user(argp, &irq_event,
														
@@ -2042,6 +2382,32 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 		r = 0;
														
 
															 		break;
														
 
															 	}
														
 
															+	case KVM_GET_PIT2: {
														
 
															+		r = -ENXIO;
														
 
															+		if (!kvm->arch.vpit)
														
 
															+			goto out;
														
 
															+		r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
														
 
															+		if (r)
														
 
															+			goto out;
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
														
 
															+			goto out;
														
 
															+		r = 0;
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_SET_PIT2: {
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
														
 
															+			goto out;
														
 
															+		r = -ENXIO;
														
 
															+		if (!kvm->arch.vpit)
														
 
															+			goto out;
														
 
															+		r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
														
 
															+		if (r)
														
 
															+			goto out;
														
 
															+		r = 0;
														
 
															+		break;
														
 
															+	}
														
 
															 	case KVM_REINJECT_CONTROL: {
														
 
															 		struct kvm_reinject_control control;
														
 
															 		r =  -EFAULT;
														
@@ -2075,35 +2441,23 @@ static void kvm_init_msr_list(void)
 
															 	num_msrs_to_save = j;
														
 
															 }
														
 
															-/*
														
 
															- * Only apic need an MMIO device hook, so shortcut now..
														
 
															- */
														
 
															-static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
														
 
															-						gpa_t addr, int len,
														
 
															-						int is_write)
														
 
															+static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
														
 
															+			   const void *v)
														
 
															 {
														
 
															-	struct kvm_io_device *dev;
														
 
															+	if (vcpu->arch.apic &&
														
 
															+	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
														
 
															+		return 0;
														
 
															-	if (vcpu->arch.apic) {
														
 
															-		dev = &vcpu->arch.apic->dev;
														
 
															-		if (dev->in_range(dev, addr, len, is_write))
														
 
															-			return dev;
														
 
															-	}
														
 
															-	return NULL;
														
 
															+	return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v);
														
 
															 }
														
 
															-
														
 
															-static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
														
 
															-						gpa_t addr, int len,
														
 
															-						int is_write)
														
 
															+static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
														
 
															 {
														
 
															-	struct kvm_io_device *dev;
														
 
															+	if (vcpu->arch.apic &&
														
 
															+	    !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
														
 
															+		return 0;
														
 
															-	dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
														
 
															-	if (dev == NULL)
														
 
															-		dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
														
 
															-					  is_write);
														
 
															-	return dev;
														
 
															+	return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
														
 
															 }
														
 
															 static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
														
@@ -2172,11 +2526,12 @@ static int emulator_read_emulated(unsigned long addr,
 
															 				  unsigned int bytes,
														
 
															 				  struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	struct kvm_io_device *mmio_dev;
														
 
															 	gpa_t                 gpa;
														
 
															 	if (vcpu->mmio_read_completed) {
														
 
															 		memcpy(val, vcpu->mmio_data, bytes);
														
 
															+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
														
 
															+			       vcpu->mmio_phys_addr, *(u64 *)val);
														
 
															 		vcpu->mmio_read_completed = 0;
														
 
															 		return X86EMUL_CONTINUE;
														
 
															 	}
														
@@ -2197,14 +2552,12 @@ static int emulator_read_emulated(unsigned long addr,
 
															 	/*
														
 
															 	 * Is this MMIO handled locally?
														
 
															 	 */
														
 
															-	mutex_lock(&vcpu->kvm->lock);
														
 
															-	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
														
 
															-	if (mmio_dev) {
														
 
															-		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
														
 
															-		mutex_unlock(&vcpu->kvm->lock);
														
 
															+	if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
														
 
															+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
														
 
															 		return X86EMUL_CONTINUE;
														
 
															 	}
														
 
															-	mutex_unlock(&vcpu->kvm->lock);
														
 
															+
														
 
															+	trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
														
 
															 	vcpu->mmio_needed = 1;
														
 
															 	vcpu->mmio_phys_addr = gpa;
														
@@ -2231,7 +2584,6 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 
															 					   unsigned int bytes,
														
 
															 					   struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	struct kvm_io_device *mmio_dev;
														
 
															 	gpa_t                 gpa;
														
 
															 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
														
@@ -2249,17 +2601,12 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 
															 		return X86EMUL_CONTINUE;
														
 
															 mmio:
														
 
															+	trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
														
 
															 	/*
														
 
															 	 * Is this MMIO handled locally?
														
 
															 	 */
														
 
															-	mutex_lock(&vcpu->kvm->lock);
														
 
															-	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
														
 
															-	if (mmio_dev) {
														
 
															-		kvm_iodevice_write(mmio_dev, gpa, bytes, val);
														
 
															-		mutex_unlock(&vcpu->kvm->lock);
														
 
															+	if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
														
 
															 		return X86EMUL_CONTINUE;
														
 
															-	}
														
 
															-	mutex_unlock(&vcpu->kvm->lock);
														
 
															 	vcpu->mmio_needed = 1;
														
 
															 	vcpu->mmio_phys_addr = gpa;
														
@@ -2343,7 +2690,6 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
 
															 int emulate_clts(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	KVMTRACE_0D(CLTS, vcpu, handler);
														
 
															 	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
														
 
															 	return X86EMUL_CONTINUE;
														
 
															 }
														
@@ -2420,7 +2766,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 
															 	kvm_clear_exception_queue(vcpu);
														
 
															 	vcpu->arch.mmio_fault_cr2 = cr2;
														
 
															 	/*
														
 
															-	 * TODO: fix x86_emulate.c to use guest_read/write_register
														
 
															+	 * TODO: fix emulate.c to use guest_read/write_register
														
 
															 	 * instead of direct ->regs accesses, can save hundred cycles
														
 
															 	 * on Intel for instructions that don't read/change RSP, for
														
 
															 	 * for example.
														
@@ -2444,14 +2790,33 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 
															 		r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
														
 
															-		/* Reject the instructions other than VMCALL/VMMCALL when
														
 
															-		 * try to emulate invalid opcode */
														
 
															+		/* Only allow emulation of specific instructions on #UD
														
 
															+		 * (namely VMMCALL, sysenter, sysexit, syscall)*/
														
 
															 		c = &vcpu->arch.emulate_ctxt.decode;
														
 
															-		if ((emulation_type & EMULTYPE_TRAP_UD) &&
														
 
															-		    (!(c->twobyte && c->b == 0x01 &&
														
 
															-		      (c->modrm_reg == 0 || c->modrm_reg == 3) &&
														
 
															-		       c->modrm_mod == 3 && c->modrm_rm == 1)))
														
 
															-			return EMULATE_FAIL;
														
 
															+		if (emulation_type & EMULTYPE_TRAP_UD) {
														
 
															+			if (!c->twobyte)
														
 
															+				return EMULATE_FAIL;
														
 
															+			switch (c->b) {
														
 
															+			case 0x01: /* VMMCALL */
														
 
															+				if (c->modrm_mod != 3 || c->modrm_rm != 1)
														
 
															+					return EMULATE_FAIL;
														
 
															+				break;
														
 
															+			case 0x34: /* sysenter */
														
 
															+			case 0x35: /* sysexit */
														
 
															+				if (c->modrm_mod != 0 || c->modrm_rm != 0)
														
 
															+					return EMULATE_FAIL;
														
 
															+				break;
														
 
															+			case 0x05: /* syscall */
														
 
															+				if (c->modrm_mod != 0 || c->modrm_rm != 0)
														
 
															+					return EMULATE_FAIL;
														
 
															+				break;
														
 
															+			default:
														
 
															+				return EMULATE_FAIL;
														
 
															+			}
														
 
															+
														
 
															+			if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
														
 
															+				return EMULATE_FAIL;
														
 
															+		}
														
 
															 		++vcpu->stat.insn_emulation;
														
 
															 		if (r)  {
														
@@ -2571,52 +2936,40 @@ int complete_pio(struct kvm_vcpu *vcpu)
 
															 	return 0;
														
 
															 }
														
 
															-static void kernel_pio(struct kvm_io_device *pio_dev,
														
 
															-		       struct kvm_vcpu *vcpu,
														
 
															-		       void *pd)
														
 
															+static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
														
 
															 {
														
 
															 	/* TODO: String I/O for in kernel device */
														
 
															+	int r;
														
 
															-	mutex_lock(&vcpu->kvm->lock);
														
 
															 	if (vcpu->arch.pio.in)
														
 
															-		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
														
 
															-				  vcpu->arch.pio.size,
														
 
															-				  pd);
														
 
															+		r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
														
 
															+				    vcpu->arch.pio.size, pd);
														
 
															 	else
														
 
															-		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
														
 
															-				   vcpu->arch.pio.size,
														
 
															-				   pd);
														
 
															-	mutex_unlock(&vcpu->kvm->lock);
														
 
															+		r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
														
 
															+				     vcpu->arch.pio.size, pd);
														
 
															+	return r;
														
 
															 }
														
 
															-static void pio_string_write(struct kvm_io_device *pio_dev,
														
 
															-			     struct kvm_vcpu *vcpu)
														
 
															+static int pio_string_write(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct kvm_pio_request *io = &vcpu->arch.pio;
														
 
															 	void *pd = vcpu->arch.pio_data;
														
 
															-	int i;
														
 
															+	int i, r = 0;
														
 
															-	mutex_lock(&vcpu->kvm->lock);
														
 
															 	for (i = 0; i < io->cur_count; i++) {
														
 
															-		kvm_iodevice_write(pio_dev, io->port,
														
 
															-				   io->size,
														
 
															-				   pd);
														
 
															+		if (kvm_io_bus_write(&vcpu->kvm->pio_bus,
														
 
															+				     io->port, io->size, pd)) {
														
 
															+			r = -EOPNOTSUPP;
														
 
															+			break;
														
 
															+		}
														
 
															 		pd += io->size;
														
 
															 	}
														
 
															-	mutex_unlock(&vcpu->kvm->lock);
														
 
															-}
														
 
															-
														
 
															-static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
														
 
															-					       gpa_t addr, int len,
														
 
															-					       int is_write)
														
 
															-{
														
 
															-	return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
														
 
															+	return r;
														
 
															 }
														
 
															 int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
														
 
															 		  int size, unsigned port)
														
 
															 {
														
 
															-	struct kvm_io_device *pio_dev;
														
 
															 	unsigned long val;
														
 
															 	vcpu->run->exit_reason = KVM_EXIT_IO;
														
@@ -2630,19 +2983,13 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
															 	vcpu->arch.pio.down = 0;
														
 
															 	vcpu->arch.pio.rep = 0;
														
 
															-	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
														
 
															-		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
														
 
															-			    handler);
														
 
															-	else
														
 
															-		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
														
 
															-			    handler);
														
 
															+	trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
														
 
															+		      size, 1);
														
 
															 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
														
 
															 	memcpy(vcpu->arch.pio_data, &val, 4);
														
 
															-	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
														
 
															-	if (pio_dev) {
														
 
															-		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
														
 
															+	if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
														
 
															 		complete_pio(vcpu);
														
 
															 		return 1;
														
 
															 	}
														
@@ -2656,7 +3003,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
															 {
														
 
															 	unsigned now, in_page;
														
 
															 	int ret = 0;
														
 
															-	struct kvm_io_device *pio_dev;
														
 
															 	vcpu->run->exit_reason = KVM_EXIT_IO;
														
 
															 	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
														
@@ -2669,12 +3015,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
															 	vcpu->arch.pio.down = down;
														
 
															 	vcpu->arch.pio.rep = rep;
														
 
															-	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
														
 
															-		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
														
 
															-			    handler);
														
 
															-	else
														
 
															-		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
														
 
															-			    handler);
														
 
															+	trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
														
 
															+		      size, count);
														
 
															 	if (!count) {
														
 
															 		kvm_x86_ops->skip_emulated_instruction(vcpu);
														
@@ -2704,9 +3046,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
															 	vcpu->arch.pio.guest_gva = address;
														
 
															-	pio_dev = vcpu_find_pio_dev(vcpu, port,
														
 
															-				    vcpu->arch.pio.cur_count,
														
 
															-				    !vcpu->arch.pio.in);
														
 
															 	if (!vcpu->arch.pio.in) {
														
 
															 		/* string PIO write */
														
 
															 		ret = pio_copy_data(vcpu);
														
@@ -2714,16 +3053,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
															 			kvm_inject_gp(vcpu, 0);
														
 
															 			return 1;
														
 
															 		}
														
 
															-		if (ret == 0 && pio_dev) {
														
 
															-			pio_string_write(pio_dev, vcpu);
														
 
															+		if (ret == 0 && !pio_string_write(vcpu)) {
														
 
															 			complete_pio(vcpu);
														
 
															 			if (vcpu->arch.pio.count == 0)
														
 
															 				ret = 1;
														
 
															 		}
														
 
															-	} else if (pio_dev)
														
 
															-		pr_unimpl(vcpu, "no string pio read support yet, "
														
 
															-		       "port %x size %d count %ld\n",
														
 
															-			port, size, count);
														
 
															+	}
														
 
															+	/* no string PIO read support yet */
														
 
															 	return ret;
														
 
															 }
														
@@ -2756,10 +3092,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
															 	spin_lock(&kvm_lock);
														
 
															 	list_for_each_entry(kvm, &vm_list, vm_list) {
														
 
															-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
														
 
															-			vcpu = kvm->vcpus[i];
														
 
															-			if (!vcpu)
														
 
															-				continue;
														
 
															+		kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															 			if (vcpu->cpu != freq->cpu)
														
 
															 				continue;
														
 
															 			if (!kvm_request_guest_time_update(vcpu))
														
@@ -2852,7 +3185,6 @@ void kvm_arch_exit(void)
 
															 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	++vcpu->stat.halt_exits;
														
 
															-	KVMTRACE_0D(HLT, vcpu, handler);
														
 
															 	if (irqchip_in_kernel(vcpu->kvm)) {
														
 
															 		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
														
 
															 		return 1;
														
@@ -2883,7 +3215,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 
															 	a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
														
 
															 	a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
														
 
															-	KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
														
 
															+	trace_kvm_hypercall(nr, a0, a1, a2, a3);
														
 
															 	if (!is_long_mode(vcpu)) {
														
 
															 		nr &= 0xFFFFFFFF;
														
@@ -2893,6 +3225,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 
															 		a3 &= 0xFFFFFFFF;
														
 
															 	}
														
 
															+	if (kvm_x86_ops->get_cpl(vcpu) != 0) {
														
 
															+		ret = -KVM_EPERM;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															 	switch (nr) {
														
 
															 	case KVM_HC_VAPIC_POLL_IRQ:
														
 
															 		ret = 0;
														
@@ -2904,6 +3241,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 
															 		ret = -KVM_ENOSYS;
														
 
															 		break;
														
 
															 	}
														
 
															+out:
														
 
															 	kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
														
 
															 	++vcpu->stat.hypercalls;
														
 
															 	return r;
														
@@ -2983,8 +3321,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 
															 		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
														
 
															 		return 0;
														
 
															 	}
														
 
															-	KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
														
 
															-		    (u32)((u64)value >> 32), handler);
														
 
															 	return value;
														
 
															 }
														
@@ -2992,9 +3328,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 
															 void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
														
 
															 		     unsigned long *rflags)
														
 
															 {
														
 
															-	KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
														
 
															-		    (u32)((u64)val >> 32), handler);
														
 
															-
														
 
															 	switch (cr) {
														
 
															 	case 0:
														
 
															 		kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
														
@@ -3104,11 +3437,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 
															 		kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
														
 
															 	}
														
 
															 	kvm_x86_ops->skip_emulated_instruction(vcpu);
														
 
															-	KVMTRACE_5D(CPUID, vcpu, function,
														
 
															-		    (u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
														
 
															-		    (u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
														
 
															-		    (u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
														
 
															-		    (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
														
 
															+	trace_kvm_cpuid(function,
														
 
															+			kvm_register_read(vcpu, VCPU_REGS_RAX),
														
 
															+			kvm_register_read(vcpu, VCPU_REGS_RBX),
														
 
															+			kvm_register_read(vcpu, VCPU_REGS_RCX),
														
 
															+			kvm_register_read(vcpu, VCPU_REGS_RDX));
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
														
@@ -3174,6 +3507,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 
															 	if (!kvm_x86_ops->update_cr8_intercept)
														
 
															 		return;
														
 
															+	if (!vcpu->arch.apic)
														
 
															+		return;
														
 
															+
														
 
															 	if (!vcpu->arch.apic->vapic_addr)
														
 
															 		max_irr = kvm_lapic_find_highest_irr(vcpu);
														
 
															 	else
														
@@ -3187,12 +3523,16 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 
															 	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
														
 
															 }
														
 
															-static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
														
 
															+static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
														
 
															 {
														
 
															-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
														
 
															-		kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
														
 
															-
														
 
															 	/* try to reinject previous events if any */
														
 
															+	if (vcpu->arch.exception.pending) {
														
 
															+		kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
														
 
															+					  vcpu->arch.exception.has_error_code,
														
 
															+					  vcpu->arch.exception.error_code);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															 	if (vcpu->arch.nmi_injected) {
														
 
															 		kvm_x86_ops->set_nmi(vcpu);
														
 
															 		return;
														
@@ -3266,16 +3606,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	smp_mb__after_clear_bit();
														
 
															 	if (vcpu->requests || need_resched() || signal_pending(current)) {
														
 
															+		set_bit(KVM_REQ_KICK, &vcpu->requests);
														
 
															 		local_irq_enable();
														
 
															 		preempt_enable();
														
 
															 		r = 1;
														
 
															 		goto out;
														
 
															 	}
														
 
															-	if (vcpu->arch.exception.pending)
														
 
															-		__queue_exception(vcpu);
														
 
															-	else
														
 
															-		inject_pending_irq(vcpu, kvm_run);
														
 
															+	inject_pending_event(vcpu, kvm_run);
														
 
															 	/* enable NMI/IRQ window open exits if needed */
														
 
															 	if (vcpu->arch.nmi_pending)
														
@@ -3292,14 +3630,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	kvm_guest_enter();
														
 
															-	get_debugreg(vcpu->arch.host_dr6, 6);
														
 
															-	get_debugreg(vcpu->arch.host_dr7, 7);
														
 
															 	if (unlikely(vcpu->arch.switch_db_regs)) {
														
 
															-		get_debugreg(vcpu->arch.host_db[0], 0);
														
 
															-		get_debugreg(vcpu->arch.host_db[1], 1);
														
 
															-		get_debugreg(vcpu->arch.host_db[2], 2);
														
 
															-		get_debugreg(vcpu->arch.host_db[3], 3);
														
 
															-
														
 
															 		set_debugreg(0, 7);
														
 
															 		set_debugreg(vcpu->arch.eff_db[0], 0);
														
 
															 		set_debugreg(vcpu->arch.eff_db[1], 1);
														
@@ -3307,18 +3638,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 		set_debugreg(vcpu->arch.eff_db[3], 3);
														
 
															 	}
														
 
															-	KVMTRACE_0D(VMENTRY, vcpu, entryexit);
														
 
															+	trace_kvm_entry(vcpu->vcpu_id);
														
 
															 	kvm_x86_ops->run(vcpu, kvm_run);
														
 
															-	if (unlikely(vcpu->arch.switch_db_regs)) {
														
 
															-		set_debugreg(0, 7);
														
 
															-		set_debugreg(vcpu->arch.host_db[0], 0);
														
 
															-		set_debugreg(vcpu->arch.host_db[1], 1);
														
 
															-		set_debugreg(vcpu->arch.host_db[2], 2);
														
 
															-		set_debugreg(vcpu->arch.host_db[3], 3);
														
 
															+	if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
														
 
															+		set_debugreg(current->thread.debugreg0, 0);
														
 
															+		set_debugreg(current->thread.debugreg1, 1);
														
 
															+		set_debugreg(current->thread.debugreg2, 2);
														
 
															+		set_debugreg(current->thread.debugreg3, 3);
														
 
															+		set_debugreg(current->thread.debugreg6, 6);
														
 
															+		set_debugreg(current->thread.debugreg7, 7);
														
 
															 	}
														
 
															-	set_debugreg(vcpu->arch.host_dr6, 6);
														
 
															-	set_debugreg(vcpu->arch.host_dr7, 7);
														
 
															 	set_bit(KVM_REQ_KICK, &vcpu->requests);
														
 
															 	local_irq_enable();
														
@@ -3648,11 +3978,8 @@ static void kvm_set_segment(struct kvm_vcpu *vcpu,
 
															 static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
														
 
															 				   struct kvm_segment *kvm_desct)
														
 
															 {
														
 
															-	kvm_desct->base = seg_desc->base0;
														
 
															-	kvm_desct->base |= seg_desc->base1 << 16;
														
 
															-	kvm_desct->base |= seg_desc->base2 << 24;
														
 
															-	kvm_desct->limit = seg_desc->limit0;
														
 
															-	kvm_desct->limit |= seg_desc->limit << 16;
														
 
															+	kvm_desct->base = get_desc_base(seg_desc);
														
 
															+	kvm_desct->limit = get_desc_limit(seg_desc);
														
 
															 	if (seg_desc->g) {
														
 
															 		kvm_desct->limit <<= 12;
														
 
															 		kvm_desct->limit |= 0xfff;
														
@@ -3696,7 +4023,6 @@ static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
 
															 static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
														
 
															 					 struct desc_struct *seg_desc)
														
 
															 {
														
 
															-	gpa_t gpa;
														
 
															 	struct descriptor_table dtable;
														
 
															 	u16 index = selector >> 3;
														
@@ -3706,16 +4032,13 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
 
															 		kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
														
 
															 		return 1;
														
 
															 	}
														
 
															-	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
														
 
															-	gpa += index * 8;
														
 
															-	return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8);
														
 
															+	return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
														
 
															 }
														
 
															 /* allowed just for 8 bytes segments */
														
 
															 static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
														
 
															 					 struct desc_struct *seg_desc)
														
 
															 {
														
 
															-	gpa_t gpa;
														
 
															 	struct descriptor_table dtable;
														
 
															 	u16 index = selector >> 3;
														
@@ -3723,19 +4046,13 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
 
															 	if (dtable.limit < index * 8 + 7)
														
 
															 		return 1;
														
 
															-	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
														
 
															-	gpa += index * 8;
														
 
															-	return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8);
														
 
															+	return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
														
 
															 }
														
 
															 static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
														
 
															 			     struct desc_struct *seg_desc)
														
 
															 {
														
 
															-	u32 base_addr;
														
 
															-
														
 
															-	base_addr = seg_desc->base0;
														
 
															-	base_addr |= (seg_desc->base1 << 16);
														
 
															-	base_addr |= (seg_desc->base2 << 24);
														
 
															+	u32 base_addr = get_desc_base(seg_desc);
														
 
															 	return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
														
 
															 }
														
@@ -3780,12 +4097,19 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se
 
															 	return 0;
														
 
															 }
														
 
															+static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
														
 
															+{
														
 
															+	return (seg != VCPU_SREG_LDTR) &&
														
 
															+		(seg != VCPU_SREG_TR) &&
														
 
															+		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM);
														
 
															+}
														
 
															+
														
 
															 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
														
 
															 				int type_bits, int seg)
														
 
															 {
														
 
															 	struct kvm_segment kvm_seg;
														
 
															-	if (!(vcpu->arch.cr0 & X86_CR0_PE))
														
 
															+	if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE))
														
 
															 		return kvm_load_realmode_segment(vcpu, selector, seg);
														
 
															 	if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
														
 
															 		return 1;
														
@@ -4024,7 +4348,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
 
															 		}
														
 
															 	}
														
 
															-	if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
														
 
															+	if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) {
														
 
															 		kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
														
 
															 		return 1;
														
 
															 	}
														
@@ -4094,13 +4418,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
															 	vcpu->arch.cr2 = sregs->cr2;
														
 
															 	mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
														
 
															-
														
 
															-	down_read(&vcpu->kvm->slots_lock);
														
 
															-	if (gfn_to_memslot(vcpu->kvm, sregs->cr3 >> PAGE_SHIFT))
														
 
															-		vcpu->arch.cr3 = sregs->cr3;
														
 
															-	else
														
 
															-		set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
														
 
															-	up_read(&vcpu->kvm->slots_lock);
														
 
															+	vcpu->arch.cr3 = sregs->cr3;
														
 
															 	kvm_set_cr8(vcpu, sregs->cr8);
														
@@ -4142,8 +4460,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
															 	kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
														
 
															 	kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
														
 
															+	update_cr8_intercept(vcpu);
														
 
															+
														
 
															 	/* Older userspace won't unhalt the vcpu on reset. */
														
 
															-	if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
														
 
															+	if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
														
 
															 	    sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
														
 
															 	    !(vcpu->arch.cr0 & X86_CR0_PE))
														
 
															 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
														
@@ -4414,7 +4734,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
															 	kvm = vcpu->kvm;
														
 
															 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
														
 
															-	if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
														
 
															+	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
														
 
															 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
														
 
															 	else
														
 
															 		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
														
@@ -4436,6 +4756,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
															 			goto fail_mmu_destroy;
														
 
															 	}
														
 
															+	vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
														
 
															+				       GFP_KERNEL);
														
 
															+	if (!vcpu->arch.mce_banks) {
														
 
															+		r = -ENOMEM;
														
 
															+		goto fail_mmu_destroy;
														
 
															+	}
														
 
															+	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
														
 
															+
														
 
															 	return 0;
														
 
															 fail_mmu_destroy:
														
@@ -4483,20 +4811,22 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 
															 static void kvm_free_vcpus(struct kvm *kvm)
														
 
															 {
														
 
															 	unsigned int i;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															 	/*
														
 
															 	 * Unpin any mmu pages first.
														
 
															 	 */
														
 
															-	for (i = 0; i < KVM_MAX_VCPUS; ++i)
														
 
															-		if (kvm->vcpus[i])
														
 
															-			kvm_unload_vcpu_mmu(kvm->vcpus[i]);
														
 
															-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
														
 
															-		if (kvm->vcpus[i]) {
														
 
															-			kvm_arch_vcpu_free(kvm->vcpus[i]);
														
 
															-			kvm->vcpus[i] = NULL;
														
 
															-		}
														
 
															-	}
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm)
														
 
															+		kvm_unload_vcpu_mmu(vcpu);
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm)
														
 
															+		kvm_arch_vcpu_free(vcpu);
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
														
 
															+		kvm->vcpus[i] = NULL;
														
 
															+	atomic_set(&kvm->online_vcpus, 0);
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															 }
														
 
															 void kvm_arch_sync_events(struct kvm *kvm)
														
@@ -4573,7 +4903,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
															 	kvm_mmu_slot_remove_write_access(kvm, mem->slot);
														
 
															 	spin_unlock(&kvm->mmu_lock);
														
 
															-	kvm_flush_remote_tlbs(kvm);
														
 
															 	return 0;
														
 
															 }
														
@@ -4587,8 +4916,10 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 
															 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
														
 
															-	       || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
														
 
															-	       || vcpu->arch.nmi_pending;
														
 
															+		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
														
 
															+		|| vcpu->arch.nmi_pending ||
														
 
															+		(kvm_arch_interrupt_allowed(vcpu) &&
														
 
															+		 kvm_cpu_has_interrupt(vcpu));
														
 
															 }
														
 
															 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
														
@@ -4612,3 +4943,9 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	return kvm_x86_ops->interrupt_allowed(vcpu);
														
 
															 }
														
 
															+
														
 
															+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
														
 
															+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
														
 
															+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
														
 
															+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
														
 
															+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
														
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -31,4 +31,8 @@ static inline bool kvm_exception_is_soft(unsigned int nr)
 
															 {
														
 
															 	return (nr == BP_VECTOR) || (nr == OF_VECTOR);
														
 
															 }
														
 
															+
														
 
															+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
														
 
															+                                             u32 function, u32 index);
														
 
															+
														
 
															 #endif
														
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -104,6 +104,7 @@ EXPORT_SYMBOL(kunmap);
 
															 EXPORT_SYMBOL(kmap_atomic);
														
 
															 EXPORT_SYMBOL(kunmap_atomic);
														
 
															 EXPORT_SYMBOL(kmap_atomic_prot);
														
 
															+EXPORT_SYMBOL(kmap_atomic_to_page);
														
 
															 void __init set_highmem_pages_init(void)
														
 
															 {
														
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -3,6 +3,11 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \
 
															 header-y  += kvm.h
														
 
															 endif
														
 
															+ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \
														
 
															+		  $(srctree)/include/asm-$(SRCARCH)/kvm_para.h),)
														
 
															+header-y  += kvm_para.h
														
 
															+endif
														
 
															+
														
 
															 ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/a.out.h \
														
 
															       		  $(srctree)/include/asm-$(SRCARCH)/a.out.h),)
														
 
															 unifdef-y += a.out.h
														
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -268,6 +268,10 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \
 
															       		  $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
														
 
															 unifdef-y += kvm.h
														
 
															 endif
														
 
															+ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \
														
 
															+		  $(srctree)/include/asm-$(SRCARCH)/kvm_para.h),)
														
 
															+unifdef-y += kvm_para.h
														
 
															+endif
														
 
															 unifdef-y += llc.h
														
 
															 unifdef-y += loop.h
														
 
															 unifdef-y += lp.h
														
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -14,7 +14,7 @@
 
															 #define KVM_API_VERSION 12
														
 
															-/* for KVM_TRACE_ENABLE */
														
 
															+/* for KVM_TRACE_ENABLE, deprecated */
														
 
															 struct kvm_user_trace_setup {
														
 
															 	__u32 buf_size; /* sub_buffer size of each per-cpu */
														
 
															 	__u32 buf_nr; /* the number of sub_buffers of each per-cpu */
														
@@ -70,6 +70,14 @@ struct kvm_irqchip {
 
															 	} chip;
														
 
															 };
														
 
															+/* for KVM_CREATE_PIT2 */
														
 
															+struct kvm_pit_config {
														
 
															+	__u32 flags;
														
 
															+	__u32 pad[15];
														
 
															+};
														
 
															+
														
 
															+#define KVM_PIT_SPEAKER_DUMMY     1
														
 
															+
														
 
															 #define KVM_EXIT_UNKNOWN          0
														
 
															 #define KVM_EXIT_EXCEPTION        1
														
 
															 #define KVM_EXIT_IO               2
														
@@ -87,6 +95,10 @@ struct kvm_irqchip {
 
															 #define KVM_EXIT_S390_RESET       14
														
 
															 #define KVM_EXIT_DCR              15
														
 
															 #define KVM_EXIT_NMI              16
														
 
															+#define KVM_EXIT_INTERNAL_ERROR   17
														
 
															+
														
 
															+/* For KVM_EXIT_INTERNAL_ERROR */
														
 
															+#define KVM_INTERNAL_ERROR_EMULATION 1
														
 
															 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
														
 
															 struct kvm_run {
														
@@ -173,6 +185,9 @@ struct kvm_run {
 
															 			__u32 data;
														
 
															 			__u8  is_write;
														
 
															 		} dcr;
														
 
															+		struct {
														
 
															+			__u32 suberror;
														
 
															+		} internal;
														
 
															 		/* Fix the size of the union. */
														
 
															 		char padding[256];
														
 
															 	};
														
@@ -292,6 +307,28 @@ struct kvm_guest_debug {
 
															 	struct kvm_guest_debug_arch arch;
														
 
															 };
														
 
															+enum {
														
 
															+	kvm_ioeventfd_flag_nr_datamatch,
														
 
															+	kvm_ioeventfd_flag_nr_pio,
														
 
															+	kvm_ioeventfd_flag_nr_deassign,
														
 
															+	kvm_ioeventfd_flag_nr_max,
														
 
															+};
														
 
															+
														
 
															+#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
														
 
															+#define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
														
 
															+#define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
														
 
															+
														
 
															+#define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1 << kvm_ioeventfd_flag_nr_max) - 1)
														
 
															+
														
 
															+struct kvm_ioeventfd {
														
 
															+	__u64 datamatch;
														
 
															+	__u64 addr;        /* legal pio/mmio address */
														
 
															+	__u32 len;         /* 1, 2, 4, or 8 bytes    */
														
 
															+	__s32 fd;
														
 
															+	__u32 flags;
														
 
															+	__u8  pad[36];
														
 
															+};
														
 
															+
														
 
															 #define KVM_TRC_SHIFT           16
														
 
															 /*
														
 
															  * kvm trace categories
														
@@ -310,35 +347,6 @@ struct kvm_guest_debug {
 
															 #define KVM_TRC_CYCLE_SIZE      8
														
 
															 #define KVM_TRC_EXTRA_MAX       7
														
 
															-/* This structure represents a single trace buffer record. */
														
 
															-struct kvm_trace_rec {
														
 
															-	/* variable rec_val
														
 
															-	 * is split into:
														
 
															-	 * bits 0 - 27  -> event id
														
 
															-	 * bits 28 -30  -> number of extra data args of size u32
														
 
															-	 * bits 31      -> binary indicator for if tsc is in record
														
 
															-	 */
														
 
															-	__u32 rec_val;
														
 
															-	__u32 pid;
														
 
															-	__u32 vcpu_id;
														
 
															-	union {
														
 
															-		struct {
														
 
															-			__u64 timestamp;
														
 
															-			__u32 extra_u32[KVM_TRC_EXTRA_MAX];
														
 
															-		} __attribute__((packed)) timestamp;
														
 
															-		struct {
														
 
															-			__u32 extra_u32[KVM_TRC_EXTRA_MAX];
														
 
															-		} notimestamp;
														
 
															-	} u;
														
 
															-};
														
 
															-
														
 
															-#define TRACE_REC_EVENT_ID(val) \
														
 
															-		(0x0fffffff & (val))
														
 
															-#define TRACE_REC_NUM_DATA_ARGS(val) \
														
 
															-		(0x70000000 & ((val) << 28))
														
 
															-#define TRACE_REC_TCS(val) \
														
 
															-		(0x80000000 & ((val) << 31))
														
 
															-
														
 
															 #define KVMIO 0xAE
														
 
															 /*
														
@@ -415,6 +423,19 @@ struct kvm_trace_rec {
 
															 #define KVM_CAP_ASSIGN_DEV_IRQ 29
														
 
															 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
														
 
															 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
														
 
															+#ifdef __KVM_HAVE_MCE
														
 
															+#define KVM_CAP_MCE 31
														
 
															+#endif
														
 
															+#define KVM_CAP_IRQFD 32
														
 
															+#ifdef __KVM_HAVE_PIT
														
 
															+#define KVM_CAP_PIT2 33
														
 
															+#endif
														
 
															+#define KVM_CAP_SET_BOOT_CPU_ID 34
														
 
															+#ifdef __KVM_HAVE_PIT_STATE2
														
 
															+#define KVM_CAP_PIT_STATE2 35
														
 
															+#endif
														
 
															+#define KVM_CAP_IOEVENTFD 36
														
 
															+#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
														
 
															 #ifdef KVM_CAP_IRQ_ROUTING
														
@@ -454,15 +475,32 @@ struct kvm_irq_routing {
 
															 #endif
														
 
															+#ifdef KVM_CAP_MCE
														
 
															+/* x86 MCE */
														
 
															+struct kvm_x86_mce {
														
 
															+	__u64 status;
														
 
															+	__u64 addr;
														
 
															+	__u64 misc;
														
 
															+	__u64 mcg_status;
														
 
															+	__u8 bank;
														
 
															+	__u8 pad1[7];
														
 
															+	__u64 pad2[3];
														
 
															+};
														
 
															+#endif
														
 
															+
														
 
															+#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
														
 
															+
														
 
															+struct kvm_irqfd {
														
 
															+	__u32 fd;
														
 
															+	__u32 gsi;
														
 
															+	__u32 flags;
														
 
															+	__u8  pad[20];
														
 
															+};
														
 
															+
														
 
															 /*
														
 
															  * ioctls for VM fds
														
 
															  */
														
 
															 #define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 0x40, struct kvm_memory_region)
														
 
															-#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
														
 
															-#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
														
 
															-#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
														
 
															-					struct kvm_userspace_memory_region)
														
 
															-#define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
														
 
															 /*
														
 
															  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
														
 
															  * a vcpu fd.
														
@@ -470,6 +508,12 @@ struct kvm_irq_routing {
 
															 #define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
														
 
															 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
														
 
															 #define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
														
 
															+#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
														
 
															+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
														
 
															+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
														
 
															+					struct kvm_userspace_memory_region)
														
 
															+#define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
														
 
															+#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
														
 
															 /* Device model IOC */
														
 
															 #define KVM_CREATE_IRQCHIP	  _IO(KVMIO,  0x60)
														
 
															 #define KVM_IRQ_LINE		  _IOW(KVMIO, 0x61, struct kvm_irq_level)
														
@@ -498,6 +542,10 @@ struct kvm_irq_routing {
 
															 #define KVM_ASSIGN_SET_MSIX_ENTRY \
														
 
															 			_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
														
 
															 #define KVM_DEASSIGN_DEV_IRQ       _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
														
 
															+#define KVM_IRQFD                  _IOW(KVMIO, 0x76, struct kvm_irqfd)
														
 
															+#define KVM_CREATE_PIT2		   _IOW(KVMIO, 0x77, struct kvm_pit_config)
														
 
															+#define KVM_SET_BOOT_CPU_ID        _IO(KVMIO, 0x78)
														
 
															+#define KVM_IOEVENTFD             _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
														
 
															 /*
														
 
															  * ioctls for vcpu fds
														
@@ -541,6 +589,10 @@ struct kvm_irq_routing {
 
															 #define KVM_NMI                   _IO(KVMIO,  0x9a)
														
 
															 /* Available with KVM_CAP_SET_GUEST_DEBUG */
														
 
															 #define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
														
 
															+/* MCE for x86 */
														
 
															+#define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
														
 
															+#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
														
 
															+#define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
														
 
															 /*
														
 
															  * Deprecated interfaces
														
@@ -563,6 +615,9 @@ struct kvm_debug_guest {
 
															 #define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
														
 
															 #define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
														
 
															+#define KVM_GET_PIT2   _IOR(KVMIO,   0x9f, struct kvm_pit_state2)
														
 
															+#define KVM_SET_PIT2   _IOW(KVMIO,   0xa0, struct kvm_pit_state2)
														
 
															+
														
 
															 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
														
 
															 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
														
 
															 #define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
														
@@ -633,7 +688,7 @@ struct kvm_assigned_msix_nr {
 
															 	__u16 padding;
														
 
															 };
														
 
															-#define KVM_MAX_MSIX_PER_DEV		512
														
 
															+#define KVM_MAX_MSIX_PER_DEV		256
														
 
															 struct kvm_assigned_msix_entry {
														
 
															 	__u32 assigned_dev_id;
														
 
															 	__u32 gsi;
														
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -42,6 +42,7 @@
 
															 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
														
 
															+struct kvm;
														
 
															 struct kvm_vcpu;
														
 
															 extern struct kmem_cache *kvm_vcpu_cache;
														
@@ -59,10 +60,18 @@ struct kvm_io_bus {
 
															 void kvm_io_bus_init(struct kvm_io_bus *bus);
														
 
															 void kvm_io_bus_destroy(struct kvm_io_bus *bus);
														
 
															-struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
														
 
															-					  gpa_t addr, int len, int is_write);
														
 
															-void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
														
 
															-			     struct kvm_io_device *dev);
														
 
															+int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len,
														
 
															+		     const void *val);
														
 
															+int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len,
														
 
															+		    void *val);
														
 
															+int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
														
 
															+			       struct kvm_io_device *dev);
														
 
															+int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
														
 
															+			    struct kvm_io_device *dev);
														
 
															+void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
														
 
															+				 struct kvm_io_device *dev);
														
 
															+void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus,
														
 
															+			       struct kvm_io_device *dev);
														
 
															 struct kvm_vcpu {
														
 
															 	struct kvm *kvm;
														
@@ -103,7 +112,7 @@ struct kvm_memory_slot {
 
															 	struct {
														
 
															 		unsigned long rmap_pde;
														
 
															 		int write_count;
														
 
															-	} *lpage_info;
														
 
															+	} *lpage_info[KVM_NR_PAGE_SIZES - 1];
														
 
															 	unsigned long userspace_addr;
														
 
															 	int user_alloc;
														
 
															 };
														
@@ -124,7 +133,6 @@ struct kvm_kernel_irq_routing_entry {
 
															 };
														
 
															 struct kvm {
														
 
															-	struct mutex lock; /* protects the vcpus array and APIC accesses */
														
 
															 	spinlock_t mmu_lock;
														
 
															 	spinlock_t requests_lock;
														
 
															 	struct rw_semaphore slots_lock;
														
@@ -132,10 +140,23 @@ struct kvm {
 
															 	int nmemslots;
														
 
															 	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
														
 
															 					KVM_PRIVATE_MEM_SLOTS];
														
 
															+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
														
 
															+	u32 bsp_vcpu_id;
														
 
															+	struct kvm_vcpu *bsp_vcpu;
														
 
															+#endif
														
 
															 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
														
 
															+	atomic_t online_vcpus;
														
 
															 	struct list_head vm_list;
														
 
															+	struct mutex lock;
														
 
															 	struct kvm_io_bus mmio_bus;
														
 
															 	struct kvm_io_bus pio_bus;
														
 
															+#ifdef CONFIG_HAVE_KVM_EVENTFD
														
 
															+	struct {
														
 
															+		spinlock_t        lock;
														
 
															+		struct list_head  items;
														
 
															+	} irqfds;
														
 
															+	struct list_head ioeventfds;
														
 
															+#endif
														
 
															 	struct kvm_vm_stat stat;
														
 
															 	struct kvm_arch arch;
														
 
															 	atomic_t users_count;
														
@@ -144,6 +165,7 @@ struct kvm {
 
															 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
														
 
															 #endif
														
 
															+	struct mutex irq_lock;
														
 
															 #ifdef CONFIG_HAVE_KVM_IRQCHIP
														
 
															 	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
														
 
															 	struct hlist_head mask_notifier_list;
														
@@ -167,6 +189,17 @@ struct kvm {
 
															 #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
														
 
															 #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
														
 
															+static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
														
 
															+{
														
 
															+	smp_rmb();
														
 
															+	return kvm->vcpus[i];
														
 
															+}
														
 
															+
														
 
															+#define kvm_for_each_vcpu(idx, vcpup, kvm) \
														
 
															+	for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \
														
 
															+	     idx < atomic_read(&kvm->online_vcpus) && vcpup; \
														
 
															+	     vcpup = kvm_get_vcpu(kvm, ++idx))
														
 
															+
														
 
															 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
														
 
															 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
														
@@ -201,6 +234,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
															 				struct kvm_userspace_memory_region *mem,
														
 
															 				struct kvm_memory_slot old,
														
 
															 				int user_alloc);
														
 
															+void kvm_disable_largepages(void);
														
 
															 void kvm_arch_flush_shadow(struct kvm *kvm);
														
 
															 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
														
 
															 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
														
@@ -243,8 +277,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
 
															 			unsigned int ioctl, unsigned long arg);
														
 
															 long kvm_arch_vcpu_ioctl(struct file *filp,
														
 
															 			 unsigned int ioctl, unsigned long arg);
														
 
															-void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
														
 
															-void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
														
 
															 int kvm_dev_ioctl_check_extension(long ext);
														
@@ -300,7 +332,6 @@ int kvm_arch_hardware_setup(void);
 
															 void kvm_arch_hardware_unsetup(void);
														
 
															 void kvm_arch_check_processor_compat(void *rtn);
														
 
															 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
														
 
															-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
														
 
															 void kvm_free_physmem(struct kvm *kvm);
														
@@ -309,8 +340,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm);
 
															 void kvm_free_all_assigned_devices(struct kvm *kvm);
														
 
															 void kvm_arch_sync_events(struct kvm *kvm);
														
 
															-int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
														
 
															-int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
														
 
															 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
														
 
															 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
														
@@ -366,7 +395,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 
															 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
														
 
															 void kvm_register_irq_ack_notifier(struct kvm *kvm,
														
 
															 				   struct kvm_irq_ack_notifier *kian);
														
 
															-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
														
 
															+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
														
 
															+				   struct kvm_irq_ack_notifier *kian);
														
 
															 int kvm_request_irq_source_id(struct kvm *kvm);
														
 
															 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
														
@@ -459,37 +489,6 @@ struct kvm_stats_debugfs_item {
 
															 extern struct kvm_stats_debugfs_item debugfs_entries[];
														
 
															 extern struct dentry *kvm_debugfs_dir;
														
 
															-#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \
														
 
															-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
														
 
															-						vcpu, 5, d1, d2, d3, d4, d5)
														
 
															-#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \
														
 
															-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
														
 
															-						vcpu, 4, d1, d2, d3, d4, 0)
														
 
															-#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \
														
 
															-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
														
 
															-						vcpu, 3, d1, d2, d3, 0, 0)
														
 
															-#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \
														
 
															-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
														
 
															-						vcpu, 2, d1, d2, 0, 0, 0)
														
 
															-#define KVMTRACE_1D(evt, vcpu, d1, name) \
														
 
															-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
														
 
															-						vcpu, 1, d1, 0, 0, 0, 0)
														
 
															-#define KVMTRACE_0D(evt, vcpu, name) \
														
 
															-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
														
 
															-						vcpu, 0, 0, 0, 0, 0, 0)
														
 
															-
														
 
															-#ifdef CONFIG_KVM_TRACE
														
 
															-int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg);
														
 
															-void kvm_trace_cleanup(void);
														
 
															-#else
														
 
															-static inline
														
 
															-int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
														
 
															-{
														
 
															-	return -EINVAL;
														
 
															-}
														
 
															-#define kvm_trace_cleanup() ((void)0)
														
 
															-#endif
														
 
															-
														
 
															 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
														
 
															 static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq)
														
 
															 {
														
@@ -525,4 +524,33 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
															 #endif
														
 
															+#ifdef CONFIG_HAVE_KVM_EVENTFD
														
 
															+
														
 
															+void kvm_eventfd_init(struct kvm *kvm);
														
 
															+int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
														
 
															+void kvm_irqfd_release(struct kvm *kvm);
														
 
															+int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
														
 
															+
														
 
															+#else
														
 
															+
														
 
															+static inline void kvm_eventfd_init(struct kvm *kvm) {}
														
 
															+static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															+static inline void kvm_irqfd_release(struct kvm *kvm) {}
														
 
															+static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
														
 
															+{
														
 
															+	return -ENOSYS;
														
 
															+}
														
 
															+
														
 
															+#endif /* CONFIG_HAVE_KVM_EVENTFD */
														
 
															+
														
 
															+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
														
 
															+static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
														
 
															+}
														
 
															+#endif
														
 
															 #endif
														
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -13,6 +13,7 @@
 
															 #define KVM_ENOSYS		1000
														
 
															 #define KVM_EFAULT		EFAULT
														
 
															 #define KVM_E2BIG		E2BIG
														
 
															+#define KVM_EPERM		EPERM
														
 
															 #define KVM_HC_VAPIC_POLL_IRQ		1
														
 
															 #define KVM_HC_MMU_OP			2
														
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -0,0 +1,151 @@
 
															+#if !defined(_TRACE_KVM_MAIN_H) || defined(TRACE_HEADER_MULTI_READ)
														
 
															+#define _TRACE_KVM_MAIN_H
														
 
															+
														
 
															+#include <linux/tracepoint.h>
														
 
															+
														
 
															+#undef TRACE_SYSTEM
														
 
															+#define TRACE_SYSTEM kvm
														
 
															+#define TRACE_INCLUDE_FILE kvm
														
 
															+
														
 
															+#if defined(__KVM_HAVE_IOAPIC)
														
 
															+TRACE_EVENT(kvm_set_irq,
														
 
															+	TP_PROTO(unsigned int gsi, int level, int irq_source_id),
														
 
															+	TP_ARGS(gsi, level, irq_source_id),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	gsi		)
														
 
															+		__field(	int,		level		)
														
 
															+		__field(	int,		irq_source_id	)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->gsi		= gsi;
														
 
															+		__entry->level		= level;
														
 
															+		__entry->irq_source_id	= irq_source_id;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("gsi %u level %d source %d",
														
 
															+		  __entry->gsi, __entry->level, __entry->irq_source_id)
														
 
															+);
														
 
															+
														
 
															+#define kvm_deliver_mode		\
														
 
															+	{0x0, "Fixed"},			\
														
 
															+	{0x1, "LowPrio"},		\
														
 
															+	{0x2, "SMI"},			\
														
 
															+	{0x3, "Res3"},			\
														
 
															+	{0x4, "NMI"},			\
														
 
															+	{0x5, "INIT"},			\
														
 
															+	{0x6, "SIPI"},			\
														
 
															+	{0x7, "ExtINT"}
														
 
															+
														
 
															+TRACE_EVENT(kvm_ioapic_set_irq,
														
 
															+	    TP_PROTO(__u64 e, int pin, bool coalesced),
														
 
															+	    TP_ARGS(e, pin, coalesced),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	__u64,		e		)
														
 
															+		__field(	int,		pin		)
														
 
															+		__field(	bool,		coalesced	)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->e		= e;
														
 
															+		__entry->pin		= pin;
														
 
															+		__entry->coalesced	= coalesced;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s",
														
 
															+		  __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e,
														
 
															+		  __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
														
 
															+		  (__entry->e & (1<<11)) ? "logical" : "physical",
														
 
															+		  (__entry->e & (1<<15)) ? "level" : "edge",
														
 
															+		  (__entry->e & (1<<16)) ? "|masked" : "",
														
 
															+		  __entry->coalesced ? " (coalesced)" : "")
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(kvm_msi_set_irq,
														
 
															+	    TP_PROTO(__u64 address, __u64 data),
														
 
															+	    TP_ARGS(address, data),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	__u64,		address		)
														
 
															+		__field(	__u64,		data		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->address	= address;
														
 
															+		__entry->data		= data;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("dst %u vec %x (%s|%s|%s%s)",
														
 
															+		  (u8)(__entry->address >> 12), (u8)__entry->data,
														
 
															+		  __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
														
 
															+		  (__entry->address & (1<<2)) ? "logical" : "physical",
														
 
															+		  (__entry->data & (1<<15)) ? "level" : "edge",
														
 
															+		  (__entry->address & (1<<3)) ? "|rh" : "")
														
 
															+);
														
 
															+
														
 
															+#define kvm_irqchips						\
														
 
															+	{KVM_IRQCHIP_PIC_MASTER,	"PIC master"},		\
														
 
															+	{KVM_IRQCHIP_PIC_SLAVE,		"PIC slave"},		\
														
 
															+	{KVM_IRQCHIP_IOAPIC,		"IOAPIC"}
														
 
															+
														
 
															+TRACE_EVENT(kvm_ack_irq,
														
 
															+	TP_PROTO(unsigned int irqchip, unsigned int pin),
														
 
															+	TP_ARGS(irqchip, pin),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	irqchip		)
														
 
															+		__field(	unsigned int,	pin		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->irqchip	= irqchip;
														
 
															+		__entry->pin		= pin;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("irqchip %s pin %u",
														
 
															+		  __print_symbolic(__entry->irqchip, kvm_irqchips),
														
 
															+		 __entry->pin)
														
 
															+);
														
 
															+
														
 
															+
														
 
															+
														
 
															+#endif /* defined(__KVM_HAVE_IOAPIC) */
														
 
															+
														
 
															+#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
														
 
															+#define KVM_TRACE_MMIO_READ 1
														
 
															+#define KVM_TRACE_MMIO_WRITE 2
														
 
															+
														
 
															+#define kvm_trace_symbol_mmio \
														
 
															+	{ KVM_TRACE_MMIO_READ_UNSATISFIED, "unsatisfied-read" }, \
														
 
															+	{ KVM_TRACE_MMIO_READ, "read" }, \
														
 
															+	{ KVM_TRACE_MMIO_WRITE, "write" }
														
 
															+
														
 
															+TRACE_EVENT(kvm_mmio,
														
 
															+	TP_PROTO(int type, int len, u64 gpa, u64 val),
														
 
															+	TP_ARGS(type, len, gpa, val),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	u32,	type		)
														
 
															+		__field(	u32,	len		)
														
 
															+		__field(	u64,	gpa		)
														
 
															+		__field(	u64,	val		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->type		= type;
														
 
															+		__entry->len		= len;
														
 
															+		__entry->gpa		= gpa;
														
 
															+		__entry->val		= val;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
														
 
															+		  __print_symbolic(__entry->type, kvm_trace_symbol_mmio),
														
 
															+		  __entry->len, __entry->gpa, __entry->val)
														
 
															+);
														
 
															+
														
 
															+#endif /* _TRACE_KVM_MAIN_H */
														
 
															+
														
 
															+/* This part must be outside protection */
														
 
															+#include <trace/define_trace.h>
														
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -234,6 +234,7 @@ unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
 
															 	return 1UL << (hstate->order + PAGE_SHIFT);
														
 
															 }
														
 
															+EXPORT_SYMBOL_GPL(vma_kernel_pagesize);
														
 
															 /*
														
 
															  * Return the page size being used by the MMU to back a VMA. In the majority
														
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -0,0 +1,14 @@
 
															+# KVM common configuration items and defaults
														
 
															+
														
 
															+config HAVE_KVM
														
 
															+       bool
														
 
															+
														
 
															+config HAVE_KVM_IRQCHIP
														
 
															+       bool
														
 
															+
														
 
															+config HAVE_KVM_EVENTFD
														
 
															+       bool
														
 
															+       select EVENTFD
														
 
															+
														
 
															+config KVM_APIC_ARCHITECTURE
														
 
															+       bool
														
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -14,32 +14,28 @@
 
															 #include "coalesced_mmio.h"
														
 
															-static int coalesced_mmio_in_range(struct kvm_io_device *this,
														
 
															-				   gpa_t addr, int len, int is_write)
														
 
															+static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev)
														
 
															+{
														
 
															+	return container_of(dev, struct kvm_coalesced_mmio_dev, dev);
														
 
															+}
														
 
															+
														
 
															+static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
														
 
															+				   gpa_t addr, int len)
														
 
															 {
														
 
															-	struct kvm_coalesced_mmio_dev *dev =
														
 
															-				(struct kvm_coalesced_mmio_dev*)this->private;
														
 
															 	struct kvm_coalesced_mmio_zone *zone;
														
 
															-	int next;
														
 
															+	struct kvm_coalesced_mmio_ring *ring;
														
 
															+	unsigned avail;
														
 
															 	int i;
														
 
															-	if (!is_write)
														
 
															-		return 0;
														
 
															-
														
 
															-	/* kvm->lock is taken by the caller and must be not released before
														
 
															-         * dev.read/write
														
 
															-         */
														
 
															-
														
 
															 	/* Are we able to batch it ? */
														
 
															 	/* last is the first free entry
														
 
															 	 * check if we don't meet the first used entry
														
 
															 	 * there is always one unused entry in the buffer
														
 
															 	 */
														
 
															-
														
 
															-	next = (dev->kvm->coalesced_mmio_ring->last + 1) %
														
 
															-							KVM_COALESCED_MMIO_MAX;
														
 
															-	if (next == dev->kvm->coalesced_mmio_ring->first) {
														
 
															+	ring = dev->kvm->coalesced_mmio_ring;
														
 
															+	avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX;
														
 
															+	if (avail < KVM_MAX_VCPUS) {
														
 
															 		/* full */
														
 
															 		return 0;
														
 
															 	}
														
@@ -60,14 +56,15 @@ static int coalesced_mmio_in_range(struct kvm_io_device *this,
 
															 	return 0;
														
 
															 }
														
 
															-static void coalesced_mmio_write(struct kvm_io_device *this,
														
 
															-				 gpa_t addr, int len, const void *val)
														
 
															+static int coalesced_mmio_write(struct kvm_io_device *this,
														
 
															+				gpa_t addr, int len, const void *val)
														
 
															 {
														
 
															-	struct kvm_coalesced_mmio_dev *dev =
														
 
															-				(struct kvm_coalesced_mmio_dev*)this->private;
														
 
															+	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
														
 
															 	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
														
 
															+	if (!coalesced_mmio_in_range(dev, addr, len))
														
 
															+		return -EOPNOTSUPP;
														
 
															-	/* kvm->lock must be taken by caller before call to in_range()*/
														
 
															+	spin_lock(&dev->lock);
														
 
															 	/* copy data in first free entry of the ring */
														
@@ -76,29 +73,40 @@ static void coalesced_mmio_write(struct kvm_io_device *this,
 
															 	memcpy(ring->coalesced_mmio[ring->last].data, val, len);
														
 
															 	smp_wmb();
														
 
															 	ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
														
 
															+	spin_unlock(&dev->lock);
														
 
															+	return 0;
														
 
															 }
														
 
															 static void coalesced_mmio_destructor(struct kvm_io_device *this)
														
 
															 {
														
 
															-	kfree(this);
														
 
															+	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
														
 
															+
														
 
															+	kfree(dev);
														
 
															 }
														
 
															+static const struct kvm_io_device_ops coalesced_mmio_ops = {
														
 
															+	.write      = coalesced_mmio_write,
														
 
															+	.destructor = coalesced_mmio_destructor,
														
 
															+};
														
 
															+
														
 
															 int kvm_coalesced_mmio_init(struct kvm *kvm)
														
 
															 {
														
 
															 	struct kvm_coalesced_mmio_dev *dev;
														
 
															+	int ret;
														
 
															 	dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
														
 
															 	if (!dev)
														
 
															 		return -ENOMEM;
														
 
															-	dev->dev.write  = coalesced_mmio_write;
														
 
															-	dev->dev.in_range  = coalesced_mmio_in_range;
														
 
															-	dev->dev.destructor  = coalesced_mmio_destructor;
														
 
															-	dev->dev.private  = dev;
														
 
															+	spin_lock_init(&dev->lock);
														
 
															+	kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
														
 
															 	dev->kvm = kvm;
														
 
															 	kvm->coalesced_mmio_dev = dev;
														
 
															-	kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev);
														
 
															-	return 0;
														
 
															+	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
														
 
															+	if (ret < 0)
														
 
															+		kfree(dev);
														
 
															+
														
 
															+	return ret;
														
 
															 }
														
 
															 int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
														
@@ -109,16 +117,16 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
 
															 	if (dev == NULL)
														
 
															 		return -EINVAL;
														
 
															-	mutex_lock(&kvm->lock);
														
 
															+	down_write(&kvm->slots_lock);
														
 
															 	if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
														
 
															-		mutex_unlock(&kvm->lock);
														
 
															+		up_write(&kvm->slots_lock);
														
 
															 		return -ENOBUFS;
														
 
															 	}
														
 
															 	dev->zone[dev->nb_zones] = *zone;
														
 
															 	dev->nb_zones++;
														
 
															-	mutex_unlock(&kvm->lock);
														
 
															+	up_write(&kvm->slots_lock);
														
 
															 	return 0;
														
 
															 }
														
@@ -132,7 +140,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
 
															 	if (dev == NULL)
														
 
															 		return -EINVAL;
														
 
															-	mutex_lock(&kvm->lock);
														
 
															+	down_write(&kvm->slots_lock);
														
 
															 	i = dev->nb_zones;
														
 
															 	while(i) {
														
@@ -150,7 +158,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
 
															 		i--;
														
 
															 	}
														
 
															-	mutex_unlock(&kvm->lock);
														
 
															+	up_write(&kvm->slots_lock);
														
 
															 	return 0;
														
 
															 }
														
--- a/virt/kvm/coalesced_mmio.h
+++ b/virt/kvm/coalesced_mmio.h
@@ -12,6 +12,7 @@
 
															 struct kvm_coalesced_mmio_dev {
														
 
															 	struct kvm_io_device dev;
														
 
															 	struct kvm *kvm;
														
 
															+	spinlock_t lock;
														
 
															 	int nb_zones;
														
 
															 	struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
														
 
															 };
														
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -0,0 +1,578 @@
 
															+/*
														
 
															+ * kvm eventfd support - use eventfd objects to signal various KVM events
														
 
															+ *
														
 
															+ * Copyright 2009 Novell.  All Rights Reserved.
														
 
															+ *
														
 
															+ * Author:
														
 
															+ *	Gregory Haskins <ghaskins@novell.com>
														
 
															+ *
														
 
															+ * This file is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of version 2 of the GNU General Public License
														
 
															+ * as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public License
														
 
															+ * along with this program; if not, write to the Free Software Foundation,
														
 
															+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/kvm.h>
														
 
															+#include <linux/workqueue.h>
														
 
															+#include <linux/syscalls.h>
														
 
															+#include <linux/wait.h>
														
 
															+#include <linux/poll.h>
														
 
															+#include <linux/file.h>
														
 
															+#include <linux/list.h>
														
 
															+#include <linux/eventfd.h>
														
 
															+#include <linux/kernel.h>
														
 
															+
														
 
															+#include "iodev.h"
														
 
															+
														
 
															+/*
														
 
															+ * --------------------------------------------------------------------
														
 
															+ * irqfd: Allows an fd to be used to inject an interrupt to the guest
														
 
															+ *
														
 
															+ * Credit goes to Avi Kivity for the original idea.
														
 
															+ * --------------------------------------------------------------------
														
 
															+ */
														
 
															+
														
 
															+struct _irqfd {
														
 
															+	struct kvm               *kvm;
														
 
															+	struct eventfd_ctx       *eventfd;
														
 
															+	int                       gsi;
														
 
															+	struct list_head          list;
														
 
															+	poll_table                pt;
														
 
															+	wait_queue_head_t        *wqh;
														
 
															+	wait_queue_t              wait;
														
 
															+	struct work_struct        inject;
														
 
															+	struct work_struct        shutdown;
														
 
															+};
														
 
															+
														
 
															+static struct workqueue_struct *irqfd_cleanup_wq;
														
 
															+
														
 
															+static void
														
 
															+irqfd_inject(struct work_struct *work)
														
 
															+{
														
 
															+	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
														
 
															+	struct kvm *kvm = irqfd->kvm;
														
 
															+
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															+	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
														
 
															+	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Race-free decouple logic (ordering is critical)
														
 
															+ */
														
 
															+static void
														
 
															+irqfd_shutdown(struct work_struct *work)
														
 
															+{
														
 
															+	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
														
 
															+
														
 
															+	/*
														
 
															+	 * Synchronize with the wait-queue and unhook ourselves to prevent
														
 
															+	 * further events.
														
 
															+	 */
														
 
															+	remove_wait_queue(irqfd->wqh, &irqfd->wait);
														
 
															+
														
 
															+	/*
														
 
															+	 * We know no new events will be scheduled at this point, so block
														
 
															+	 * until all previously outstanding events have completed
														
 
															+	 */
														
 
															+	flush_work(&irqfd->inject);
														
 
															+
														
 
															+	/*
														
 
															+	 * It is now safe to release the object's resources
														
 
															+	 */
														
 
															+	eventfd_ctx_put(irqfd->eventfd);
														
 
															+	kfree(irqfd);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/* assumes kvm->irqfds.lock is held */
														
 
															+static bool
														
 
															+irqfd_is_active(struct _irqfd *irqfd)
														
 
															+{
														
 
															+	return list_empty(&irqfd->list) ? false : true;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Mark the irqfd as inactive and schedule it for removal
														
 
															+ *
														
 
															+ * assumes kvm->irqfds.lock is held
														
 
															+ */
														
 
															+static void
														
 
															+irqfd_deactivate(struct _irqfd *irqfd)
														
 
															+{
														
 
															+	BUG_ON(!irqfd_is_active(irqfd));
														
 
															+
														
 
															+	list_del_init(&irqfd->list);
														
 
															+
														
 
															+	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Called with wqh->lock held and interrupts disabled
														
 
															+ */
														
 
															+static int
														
 
															+irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
														
 
															+{
														
 
															+	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
														
 
															+	unsigned long flags = (unsigned long)key;
														
 
															+
														
 
															+	if (flags & POLLIN)
														
 
															+		/* An event has been signaled, inject an interrupt */
														
 
															+		schedule_work(&irqfd->inject);
														
 
															+
														
 
															+	if (flags & POLLHUP) {
														
 
															+		/* The eventfd is closing, detach from KVM */
														
 
															+		struct kvm *kvm = irqfd->kvm;
														
 
															+		unsigned long flags;
														
 
															+
														
 
															+		spin_lock_irqsave(&kvm->irqfds.lock, flags);
														
 
															+
														
 
															+		/*
														
 
															+		 * We must check if someone deactivated the irqfd before
														
 
															+		 * we could acquire the irqfds.lock since the item is
														
 
															+		 * deactivated from the KVM side before it is unhooked from
														
 
															+		 * the wait-queue.  If it is already deactivated, we can
														
 
															+		 * simply return knowing the other side will cleanup for us.
														
 
															+		 * We cannot race against the irqfd going away since the
														
 
															+		 * other side is required to acquire wqh->lock, which we hold
														
 
															+		 */
														
 
															+		if (irqfd_is_active(irqfd))
														
 
															+			irqfd_deactivate(irqfd);
														
 
															+
														
 
															+		spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void
														
 
															+irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
														
 
															+			poll_table *pt)
														
 
															+{
														
 
															+	struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
														
 
															+
														
 
															+	irqfd->wqh = wqh;
														
 
															+	add_wait_queue(wqh, &irqfd->wait);
														
 
															+}
														
 
															+
														
 
															+static int
														
 
															+kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
														
 
															+{
														
 
															+	struct _irqfd *irqfd;
														
 
															+	struct file *file = NULL;
														
 
															+	struct eventfd_ctx *eventfd = NULL;
														
 
															+	int ret;
														
 
															+	unsigned int events;
														
 
															+
														
 
															+	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
														
 
															+	if (!irqfd)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	irqfd->kvm = kvm;
														
 
															+	irqfd->gsi = gsi;
														
 
															+	INIT_LIST_HEAD(&irqfd->list);
														
 
															+	INIT_WORK(&irqfd->inject, irqfd_inject);
														
 
															+	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
														
 
															+
														
 
															+	file = eventfd_fget(fd);
														
 
															+	if (IS_ERR(file)) {
														
 
															+		ret = PTR_ERR(file);
														
 
															+		goto fail;
														
 
															+	}
														
 
															+
														
 
															+	eventfd = eventfd_ctx_fileget(file);
														
 
															+	if (IS_ERR(eventfd)) {
														
 
															+		ret = PTR_ERR(eventfd);
														
 
															+		goto fail;
														
 
															+	}
														
 
															+
														
 
															+	irqfd->eventfd = eventfd;
														
 
															+
														
 
															+	/*
														
 
															+	 * Install our own custom wake-up handling so we are notified via
														
 
															+	 * a callback whenever someone signals the underlying eventfd
														
 
															+	 */
														
 
															+	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
														
 
															+	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
														
 
															+
														
 
															+	events = file->f_op->poll(file, &irqfd->pt);
														
 
															+
														
 
															+	spin_lock_irq(&kvm->irqfds.lock);
														
 
															+	list_add_tail(&irqfd->list, &kvm->irqfds.items);
														
 
															+	spin_unlock_irq(&kvm->irqfds.lock);
														
 
															+
														
 
															+	/*
														
 
															+	 * Check if there was an event already pending on the eventfd
														
 
															+	 * before we registered, and trigger it as if we didn't miss it.
														
 
															+	 */
														
 
															+	if (events & POLLIN)
														
 
															+		schedule_work(&irqfd->inject);
														
 
															+
														
 
															+	/*
														
 
															+	 * do not drop the file until the irqfd is fully initialized, otherwise
														
 
															+	 * we might race against the POLLHUP
														
 
															+	 */
														
 
															+	fput(file);
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+fail:
														
 
															+	if (eventfd && !IS_ERR(eventfd))
														
 
															+		eventfd_ctx_put(eventfd);
														
 
															+
														
 
															+	if (!IS_ERR(file))
														
 
															+		fput(file);
														
 
															+
														
 
															+	kfree(irqfd);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+void
														
 
															+kvm_eventfd_init(struct kvm *kvm)
														
 
															+{
														
 
															+	spin_lock_init(&kvm->irqfds.lock);
														
 
															+	INIT_LIST_HEAD(&kvm->irqfds.items);
														
 
															+	INIT_LIST_HEAD(&kvm->ioeventfds);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * shutdown any irqfd's that match fd+gsi
														
 
															+ */
														
 
															+static int
														
 
															+kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
														
 
															+{
														
 
															+	struct _irqfd *irqfd, *tmp;
														
 
															+	struct eventfd_ctx *eventfd;
														
 
															+
														
 
															+	eventfd = eventfd_ctx_fdget(fd);
														
 
															+	if (IS_ERR(eventfd))
														
 
															+		return PTR_ERR(eventfd);
														
 
															+
														
 
															+	spin_lock_irq(&kvm->irqfds.lock);
														
 
															+
														
 
															+	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
														
 
															+		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
														
 
															+			irqfd_deactivate(irqfd);
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock_irq(&kvm->irqfds.lock);
														
 
															+	eventfd_ctx_put(eventfd);
														
 
															+
														
 
															+	/*
														
 
															+	 * Block until we know all outstanding shutdown jobs have completed
														
 
															+	 * so that we guarantee there will not be any more interrupts on this
														
 
															+	 * gsi once this deassign function returns.
														
 
															+	 */
														
 
															+	flush_workqueue(irqfd_cleanup_wq);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int
														
 
															+kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
														
 
															+{
														
 
															+	if (flags & KVM_IRQFD_FLAG_DEASSIGN)
														
 
															+		return kvm_irqfd_deassign(kvm, fd, gsi);
														
 
															+
														
 
															+	return kvm_irqfd_assign(kvm, fd, gsi);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * This function is called as the kvm VM fd is being released. Shutdown all
														
 
															+ * irqfds that still remain open
														
 
															+ */
														
 
															+void
														
 
															+kvm_irqfd_release(struct kvm *kvm)
														
 
															+{
														
 
															+	struct _irqfd *irqfd, *tmp;
														
 
															+
														
 
															+	spin_lock_irq(&kvm->irqfds.lock);
														
 
															+
														
 
															+	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
														
 
															+		irqfd_deactivate(irqfd);
														
 
															+
														
 
															+	spin_unlock_irq(&kvm->irqfds.lock);
														
 
															+
														
 
															+	/*
														
 
															+	 * Block until we know all outstanding shutdown jobs have completed
														
 
															+	 * since we do not take a kvm* reference.
														
 
															+	 */
														
 
															+	flush_workqueue(irqfd_cleanup_wq);
														
 
															+
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * create a host-wide workqueue for issuing deferred shutdown requests
														
 
															+ * aggregated from all vm* instances. We need our own isolated single-thread
														
 
															+ * queue to prevent deadlock against flushing the normal work-queue.
														
 
															+ */
														
 
															+static int __init irqfd_module_init(void)
														
 
															+{
														
 
															+	irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
														
 
															+	if (!irqfd_cleanup_wq)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void __exit irqfd_module_exit(void)
														
 
															+{
														
 
															+	destroy_workqueue(irqfd_cleanup_wq);
														
 
															+}
														
 
															+
														
 
															+module_init(irqfd_module_init);
														
 
															+module_exit(irqfd_module_exit);
														
 
															+
														
 
															+/*
														
 
															+ * --------------------------------------------------------------------
														
 
															+ * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
														
 
															+ *
														
 
															+ * userspace can register a PIO/MMIO address with an eventfd for receiving
														
 
															+ * notification when the memory has been touched.
														
 
															+ * --------------------------------------------------------------------
														
 
															+ */
														
 
															+
														
 
															+struct _ioeventfd {
														
 
															+	struct list_head     list;
														
 
															+	u64                  addr;
														
 
															+	int                  length;
														
 
															+	struct eventfd_ctx  *eventfd;
														
 
															+	u64                  datamatch;
														
 
															+	struct kvm_io_device dev;
														
 
															+	bool                 wildcard;
														
 
															+};
														
 
															+
														
 
															+static inline struct _ioeventfd *
														
 
															+to_ioeventfd(struct kvm_io_device *dev)
														
 
															+{
														
 
															+	return container_of(dev, struct _ioeventfd, dev);
														
 
															+}
														
 
															+
														
 
															+static void
														
 
															+ioeventfd_release(struct _ioeventfd *p)
														
 
															+{
														
 
															+	eventfd_ctx_put(p->eventfd);
														
 
															+	list_del(&p->list);
														
 
															+	kfree(p);
														
 
															+}
														
 
															+
														
 
															+static bool
														
 
															+ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
														
 
															+{
														
 
															+	u64 _val;
														
 
															+
														
 
															+	if (!(addr == p->addr && len == p->length))
														
 
															+		/* address-range must be precise for a hit */
														
 
															+		return false;
														
 
															+
														
 
															+	if (p->wildcard)
														
 
															+		/* all else equal, wildcard is always a hit */
														
 
															+		return true;
														
 
															+
														
 
															+	/* otherwise, we have to actually compare the data */
														
 
															+
														
 
															+	BUG_ON(!IS_ALIGNED((unsigned long)val, len));
														
 
															+
														
 
															+	switch (len) {
														
 
															+	case 1:
														
 
															+		_val = *(u8 *)val;
														
 
															+		break;
														
 
															+	case 2:
														
 
															+		_val = *(u16 *)val;
														
 
															+		break;
														
 
															+	case 4:
														
 
															+		_val = *(u32 *)val;
														
 
															+		break;
														
 
															+	case 8:
														
 
															+		_val = *(u64 *)val;
														
 
															+		break;
														
 
															+	default:
														
 
															+		return false;
														
 
															+	}
														
 
															+
														
 
															+	return _val == p->datamatch ? true : false;
														
 
															+}
														
 
															+
														
 
															+/* MMIO/PIO writes trigger an event if the addr/val match */
														
 
															+static int
														
 
															+ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
														
 
															+		const void *val)
														
 
															+{
														
 
															+	struct _ioeventfd *p = to_ioeventfd(this);
														
 
															+
														
 
															+	if (!ioeventfd_in_range(p, addr, len, val))
														
 
															+		return -EOPNOTSUPP;
														
 
															+
														
 
															+	eventfd_signal(p->eventfd, 1);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * This function is called as KVM is completely shutting down.  We do not
														
 
															+ * need to worry about locking just nuke anything we have as quickly as possible
														
 
															+ */
														
 
															+static void
														
 
															+ioeventfd_destructor(struct kvm_io_device *this)
														
 
															+{
														
 
															+	struct _ioeventfd *p = to_ioeventfd(this);
														
 
															+
														
 
															+	ioeventfd_release(p);
														
 
															+}
														
 
															+
														
 
															+static const struct kvm_io_device_ops ioeventfd_ops = {
														
 
															+	.write      = ioeventfd_write,
														
 
															+	.destructor = ioeventfd_destructor,
														
 
															+};
														
 
															+
														
 
															+/* assumes kvm->slots_lock held */
														
 
															+static bool
														
 
															+ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
														
 
															+{
														
 
															+	struct _ioeventfd *_p;
														
 
															+
														
 
															+	list_for_each_entry(_p, &kvm->ioeventfds, list)
														
 
															+		if (_p->addr == p->addr && _p->length == p->length &&
														
 
															+		    (_p->wildcard || p->wildcard ||
														
 
															+		     _p->datamatch == p->datamatch))
														
 
															+			return true;
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static int
														
 
															+kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
														
 
															+{
														
 
															+	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
														
 
															+	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
														
 
															+	struct _ioeventfd        *p;
														
 
															+	struct eventfd_ctx       *eventfd;
														
 
															+	int                       ret;
														
 
															+
														
 
															+	/* must be natural-word sized */
														
 
															+	switch (args->len) {
														
 
															+	case 1:
														
 
															+	case 2:
														
 
															+	case 4:
														
 
															+	case 8:
														
 
															+		break;
														
 
															+	default:
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	/* check for range overflow */
														
 
															+	if (args->addr + args->len < args->addr)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	/* check for extra flags that we don't understand */
														
 
															+	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	eventfd = eventfd_ctx_fdget(args->fd);
														
 
															+	if (IS_ERR(eventfd))
														
 
															+		return PTR_ERR(eventfd);
														
 
															+
														
 
															+	p = kzalloc(sizeof(*p), GFP_KERNEL);
														
 
															+	if (!p) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto fail;
														
 
															+	}
														
 
															+
														
 
															+	INIT_LIST_HEAD(&p->list);
														
 
															+	p->addr    = args->addr;
														
 
															+	p->length  = args->len;
														
 
															+	p->eventfd = eventfd;
														
 
															+
														
 
															+	/* The datamatch feature is optional, otherwise this is a wildcard */
														
 
															+	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
														
 
															+		p->datamatch = args->datamatch;
														
 
															+	else
														
 
															+		p->wildcard = true;
														
 
															+
														
 
															+	down_write(&kvm->slots_lock);
														
 
															+
														
 
															+	/* Verify that there isnt a match already */
														
 
															+	if (ioeventfd_check_collision(kvm, p)) {
														
 
															+		ret = -EEXIST;
														
 
															+		goto unlock_fail;
														
 
															+	}
														
 
															+
														
 
															+	kvm_iodevice_init(&p->dev, &ioeventfd_ops);
														
 
															+
														
 
															+	ret = __kvm_io_bus_register_dev(bus, &p->dev);
														
 
															+	if (ret < 0)
														
 
															+		goto unlock_fail;
														
 
															+
														
 
															+	list_add_tail(&p->list, &kvm->ioeventfds);
														
 
															+
														
 
															+	up_write(&kvm->slots_lock);
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+unlock_fail:
														
 
															+	up_write(&kvm->slots_lock);
														
 
															+
														
 
															+fail:
														
 
															+	kfree(p);
														
 
															+	eventfd_ctx_put(eventfd);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int
														
 
															+kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
														
 
															+{
														
 
															+	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
														
 
															+	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
														
 
															+	struct _ioeventfd        *p, *tmp;
														
 
															+	struct eventfd_ctx       *eventfd;
														
 
															+	int                       ret = -ENOENT;
														
 
															+
														
 
															+	eventfd = eventfd_ctx_fdget(args->fd);
														
 
															+	if (IS_ERR(eventfd))
														
 
															+		return PTR_ERR(eventfd);
														
 
															+
														
 
															+	down_write(&kvm->slots_lock);
														
 
															+
														
 
															+	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
														
 
															+		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
														
 
															+
														
 
															+		if (p->eventfd != eventfd  ||
														
 
															+		    p->addr != args->addr  ||
														
 
															+		    p->length != args->len ||
														
 
															+		    p->wildcard != wildcard)
														
 
															+			continue;
														
 
															+
														
 
															+		if (!p->wildcard && p->datamatch != args->datamatch)
														
 
															+			continue;
														
 
															+
														
 
															+		__kvm_io_bus_unregister_dev(bus, &p->dev);
														
 
															+		ioeventfd_release(p);
														
 
															+		ret = 0;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	up_write(&kvm->slots_lock);
														
 
															+
														
 
															+	eventfd_ctx_put(eventfd);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int
														
 
															+kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
														
 
															+{
														
 
															+	if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
														
 
															+		return kvm_deassign_ioeventfd(kvm, args);
														
 
															+
														
 
															+	return kvm_assign_ioeventfd(kvm, args);
														
 
															+}
														
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -36,6 +36,7 @@
 
															 #include <asm/processor.h>
														
 
															 #include <asm/page.h>
														
 
															 #include <asm/current.h>
														
 
															+#include <trace/events/kvm.h>
														
 
															 #include "ioapic.h"
														
 
															 #include "lapic.h"
														
@@ -103,6 +104,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 
															 {
														
 
															 	unsigned index;
														
 
															 	bool mask_before, mask_after;
														
 
															+	union kvm_ioapic_redirect_entry *e;
														
 
															 	switch (ioapic->ioregsel) {
														
 
															 	case IOAPIC_REG_VERSION:
														
@@ -122,19 +124,20 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 
															 		ioapic_debug("change redir index %x val %x\n", index, val);
														
 
															 		if (index >= IOAPIC_NUM_PINS)
														
 
															 			return;
														
 
															-		mask_before = ioapic->redirtbl[index].fields.mask;
														
 
															+		e = &ioapic->redirtbl[index];
														
 
															+		mask_before = e->fields.mask;
														
 
															 		if (ioapic->ioregsel & 1) {
														
 
															-			ioapic->redirtbl[index].bits &= 0xffffffff;
														
 
															-			ioapic->redirtbl[index].bits |= (u64) val << 32;
														
 
															+			e->bits &= 0xffffffff;
														
 
															+			e->bits |= (u64) val << 32;
														
 
															 		} else {
														
 
															-			ioapic->redirtbl[index].bits &= ~0xffffffffULL;
														
 
															-			ioapic->redirtbl[index].bits |= (u32) val;
														
 
															-			ioapic->redirtbl[index].fields.remote_irr = 0;
														
 
															+			e->bits &= ~0xffffffffULL;
														
 
															+			e->bits |= (u32) val;
														
 
															+			e->fields.remote_irr = 0;
														
 
															 		}
														
 
															-		mask_after = ioapic->redirtbl[index].fields.mask;
														
 
															+		mask_after = e->fields.mask;
														
 
															 		if (mask_before != mask_after)
														
 
															 			kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
														
 
															-		if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG
														
 
															+		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
														
 
															 		    && ioapic->irr & (1 << index))
														
 
															 			ioapic_service(ioapic, index);
														
 
															 		break;
														
@@ -164,7 +167,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 
															 	/* Always delivery PIT interrupt to vcpu 0 */
														
 
															 	if (irq == 0) {
														
 
															 		irqe.dest_mode = 0; /* Physical mode. */
														
 
															-		irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
														
 
															+		/* need to read apic_id from apic regiest since
														
 
															+		 * it can be rewritten */
														
 
															+		irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id;
														
 
															 	}
														
 
															 #endif
														
 
															 	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
														
@@ -188,7 +193,10 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 
															 			if ((edge && old_irr != ioapic->irr) ||
														
 
															 			    (!edge && !entry.fields.remote_irr))
														
 
															 				ret = ioapic_service(ioapic, irq);
														
 
															+			else
														
 
															+				ret = 0; /* report coalesced interrupt */
														
 
															 		}
														
 
															+		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
														
 
															 	}
														
 
															 	return ret;
														
 
															 }
														
@@ -220,24 +228,29 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
 
															 			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
														
 
															 }
														
 
															-static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr,
														
 
															-			   int len, int is_write)
														
 
															+static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
														
 
															 {
														
 
															-	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
														
 
															+	return container_of(dev, struct kvm_ioapic, dev);
														
 
															+}
														
 
															+static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
														
 
															+{
														
 
															 	return ((addr >= ioapic->base_address &&
														
 
															 		 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
														
 
															 }
														
 
															-static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
														
 
															-			     void *val)
														
 
															+static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
														
 
															+			    void *val)
														
 
															 {
														
 
															-	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
														
 
															+	struct kvm_ioapic *ioapic = to_ioapic(this);
														
 
															 	u32 result;
														
 
															+	if (!ioapic_in_range(ioapic, addr))
														
 
															+		return -EOPNOTSUPP;
														
 
															 	ioapic_debug("addr %lx\n", (unsigned long)addr);
														
 
															 	ASSERT(!(addr & 0xf));	/* check alignment */
														
 
															+	mutex_lock(&ioapic->kvm->irq_lock);
														
 
															 	addr &= 0xff;
														
 
															 	switch (addr) {
														
 
															 	case IOAPIC_REG_SELECT:
														
@@ -264,22 +277,28 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 
															 	default:
														
 
															 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
														
 
															 	}
														
 
															+	mutex_unlock(&ioapic->kvm->irq_lock);
														
 
															+	return 0;
														
 
															 }
														
 
															-static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
														
 
															-			      const void *val)
														
 
															+static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
														
 
															+			     const void *val)
														
 
															 {
														
 
															-	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
														
 
															+	struct kvm_ioapic *ioapic = to_ioapic(this);
														
 
															 	u32 data;
														
 
															+	if (!ioapic_in_range(ioapic, addr))
														
 
															+		return -EOPNOTSUPP;
														
 
															 	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
														
 
															 		     (void*)addr, len, val);
														
 
															 	ASSERT(!(addr & 0xf));	/* check alignment */
														
 
															+
														
 
															+	mutex_lock(&ioapic->kvm->irq_lock);
														
 
															 	if (len == 4 || len == 8)
														
 
															 		data = *(u32 *) val;
														
 
															 	else {
														
 
															 		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
														
 
															-		return;
														
 
															+		goto unlock;
														
 
															 	}
														
 
															 	addr &= 0xff;
														
@@ -300,6 +319,9 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 
															 	default:
														
 
															 		break;
														
 
															 	}
														
 
															+unlock:
														
 
															+	mutex_unlock(&ioapic->kvm->irq_lock);
														
 
															+	return 0;
														
 
															 }
														
 
															 void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
														
@@ -314,21 +336,27 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
 
															 	ioapic->id = 0;
														
 
															 }
														
 
															+static const struct kvm_io_device_ops ioapic_mmio_ops = {
														
 
															+	.read     = ioapic_mmio_read,
														
 
															+	.write    = ioapic_mmio_write,
														
 
															+};
														
 
															+
														
 
															 int kvm_ioapic_init(struct kvm *kvm)
														
 
															 {
														
 
															 	struct kvm_ioapic *ioapic;
														
 
															+	int ret;
														
 
															 	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
														
 
															 	if (!ioapic)
														
 
															 		return -ENOMEM;
														
 
															 	kvm->arch.vioapic = ioapic;
														
 
															 	kvm_ioapic_reset(ioapic);
														
 
															-	ioapic->dev.read = ioapic_mmio_read;
														
 
															-	ioapic->dev.write = ioapic_mmio_write;
														
 
															-	ioapic->dev.in_range = ioapic_in_range;
														
 
															-	ioapic->dev.private = ioapic;
														
 
															+	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
														
 
															 	ioapic->kvm = kvm;
														
 
															-	kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
														
 
															-	return 0;
														
 
															+	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
														
 
															+	if (ret < 0)
														
 
															+		kfree(ioapic);
														
 
															+
														
 
															+	return ret;
														
 
															 }
														
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -17,49 +17,54 @@
 
															 #define __KVM_IODEV_H__
														
 
															 #include <linux/kvm_types.h>
														
 
															+#include <asm/errno.h>
														
 
															-struct kvm_io_device {
														
 
															-	void (*read)(struct kvm_io_device *this,
														
 
															+struct kvm_io_device;
														
 
															+
														
 
															+/**
														
 
															+ * kvm_io_device_ops are called under kvm slots_lock.
														
 
															+ * read and write handlers return 0 if the transaction has been handled,
														
 
															+ * or non-zero to have it passed to the next device.
														
 
															+ **/
														
 
															+struct kvm_io_device_ops {
														
 
															+	int (*read)(struct kvm_io_device *this,
														
 
															+		    gpa_t addr,
														
 
															+		    int len,
														
 
															+		    void *val);
														
 
															+	int (*write)(struct kvm_io_device *this,
														
 
															 		     gpa_t addr,
														
 
															 		     int len,
														
 
															-		     void *val);
														
 
															-	void (*write)(struct kvm_io_device *this,
														
 
															-		      gpa_t addr,
														
 
															-		      int len,
														
 
															-		      const void *val);
														
 
															-	int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len,
														
 
															-			int is_write);
														
 
															+		     const void *val);
														
 
															 	void (*destructor)(struct kvm_io_device *this);
														
 
															+};
														
 
															-	void             *private;
														
 
															+
														
 
															+struct kvm_io_device {
														
 
															+	const struct kvm_io_device_ops *ops;
														
 
															 };
														
 
															-static inline void kvm_iodevice_read(struct kvm_io_device *dev,
														
 
															-				     gpa_t addr,
														
 
															-				     int len,
														
 
															-				     void *val)
														
 
															+static inline void kvm_iodevice_init(struct kvm_io_device *dev,
														
 
															+				     const struct kvm_io_device_ops *ops)
														
 
															 {
														
 
															-	dev->read(dev, addr, len, val);
														
 
															+	dev->ops = ops;
														
 
															 }
														
 
															-static inline void kvm_iodevice_write(struct kvm_io_device *dev,
														
 
															-				      gpa_t addr,
														
 
															-				      int len,
														
 
															-				      const void *val)
														
 
															+static inline int kvm_iodevice_read(struct kvm_io_device *dev,
														
 
															+				    gpa_t addr, int l, void *v)
														
 
															 {
														
 
															-	dev->write(dev, addr, len, val);
														
 
															+	return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
														
 
															 }
														
 
															-static inline int kvm_iodevice_inrange(struct kvm_io_device *dev,
														
 
															-				       gpa_t addr, int len, int is_write)
														
 
															+static inline int kvm_iodevice_write(struct kvm_io_device *dev,
														
 
															+				     gpa_t addr, int l, const void *v)
														
 
															 {
														
 
															-	return dev->in_range(dev, addr, len, is_write);
														
 
															+	return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
														
 
															 }
														
 
															 static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
														
 
															 {
														
 
															-	if (dev->destructor)
														
 
															-		dev->destructor(dev);
														
 
															+	if (dev->ops->destructor)
														
 
															+		dev->ops->destructor(dev);
														
 
															 }
														
 
															 #endif /* __KVM_IODEV_H__ */
														
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -20,6 +20,7 @@
 
															  */
														
 
															 #include <linux/kvm_host.h>
														
 
															+#include <trace/events/kvm.h>
														
 
															 #include <asm/msidef.h>
														
 
															 #ifdef CONFIG_IA64
														
@@ -62,14 +63,14 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 
															 	int i, r = -1;
														
 
															 	struct kvm_vcpu *vcpu, *lowest = NULL;
														
 
															+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
														
 
															+
														
 
															 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
														
 
															 			kvm_is_dm_lowest_prio(irq))
														
 
															 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
														
 
															-	for (i = 0; i < KVM_MAX_VCPUS; i++) {
														
 
															-		vcpu = kvm->vcpus[i];
														
 
															-
														
 
															-		if (!vcpu || !kvm_apic_present(vcpu))
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		if (!kvm_apic_present(vcpu))
														
 
															 			continue;
														
 
															 		if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
														
@@ -99,6 +100,8 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 
															 {
														
 
															 	struct kvm_lapic_irq irq;
														
 
															+	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
														
 
															+
														
 
															 	irq.dest_id = (e->msi.address_lo &
														
 
															 			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
														
 
															 	irq.vector = (e->msi.data &
														
@@ -113,7 +116,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 
															 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
														
 
															 }
														
 
															-/* This should be called with the kvm->lock mutex held
														
 
															+/* This should be called with the kvm->irq_lock mutex held
														
 
															  * Return value:
														
 
															  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
														
 
															  *  = 0   Interrupt was coalesced (previous irq is still pending)
														
@@ -125,6 +128,10 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 
															 	unsigned long *irq_state, sig_level;
														
 
															 	int ret = -1;
														
 
															+	trace_kvm_set_irq(irq, level, irq_source_id);
														
 
															+
														
 
															+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
														
 
															+
														
 
															 	if (irq < KVM_IOAPIC_NUM_PINS) {
														
 
															 		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
														
@@ -134,7 +141,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 
															 		else
														
 
															 			clear_bit(irq_source_id, irq_state);
														
 
															 		sig_level = !!(*irq_state);
														
 
															-	} else /* Deal with MSI/MSI-X */
														
 
															+	} else if (!level)
														
 
															+		return ret;
														
 
															+	else /* Deal with MSI/MSI-X */
														
 
															 		sig_level = 1;
														
 
															 	/* Not possible to detect if the guest uses the PIC or the
														
@@ -159,6 +168,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 
															 	struct hlist_node *n;
														
 
															 	unsigned gsi = pin;
														
 
															+	trace_kvm_ack_irq(irqchip, pin);
														
 
															+
														
 
															 	list_for_each_entry(e, &kvm->irq_routing, link)
														
 
															 		if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
														
 
															 		    e->irqchip.irqchip == irqchip &&
														
@@ -175,19 +186,26 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 
															 void kvm_register_irq_ack_notifier(struct kvm *kvm,
														
 
															 				   struct kvm_irq_ack_notifier *kian)
														
 
															 {
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															 }
														
 
															-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
														
 
															+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
														
 
															+				    struct kvm_irq_ack_notifier *kian)
														
 
															 {
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															 	hlist_del_init(&kian->link);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															 }
														
 
															-/* The caller must hold kvm->lock mutex */
														
 
															 int kvm_request_irq_source_id(struct kvm *kvm)
														
 
															 {
														
 
															 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
														
 
															-	int irq_source_id = find_first_zero_bit(bitmap,
														
 
															+	int irq_source_id;
														
 
															+
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															+	irq_source_id = find_first_zero_bit(bitmap,
														
 
															 				sizeof(kvm->arch.irq_sources_bitmap));
														
 
															 	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
														
@@ -197,6 +215,7 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 
															 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
														
 
															 	set_bit(irq_source_id, bitmap);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															 	return irq_source_id;
														
 
															 }
														
@@ -207,6 +226,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 
															 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															 	if (irq_source_id < 0 ||
														
 
															 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
														
 
															 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
														
@@ -215,19 +235,24 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 
															 	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
														
 
															 		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
														
 
															 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															 }
														
 
															 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
														
 
															 				    struct kvm_irq_mask_notifier *kimn)
														
 
															 {
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															 	kimn->irq = irq;
														
 
															 	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															 }
														
 
															 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
														
 
															 				      struct kvm_irq_mask_notifier *kimn)
														
 
															 {
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															 	hlist_del(&kimn->link);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															 }
														
 
															 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
														
@@ -235,6 +260,8 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 
															 	struct kvm_irq_mask_notifier *kimn;
														
 
															 	struct hlist_node *n;
														
 
															+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
														
 
															+
														
 
															 	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
														
 
															 		if (kimn->irq == irq)
														
 
															 			kimn->func(kimn, mask);
														
@@ -250,7 +277,9 @@ static void __kvm_free_irq_routing(struct list_head *irq_routing)
 
															 void kvm_free_irq_routing(struct kvm *kvm)
														
 
															 {
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															 	__kvm_free_irq_routing(&kvm->irq_routing);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															 }
														
 
															 static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
														
@@ -325,13 +354,13 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
															 		e = NULL;
														
 
															 	}
														
 
															-	mutex_lock(&kvm->lock);
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															 	list_splice(&kvm->irq_routing, &tmp);
														
 
															 	INIT_LIST_HEAD(&kvm->irq_routing);
														
 
															 	list_splice(&irq_list, &kvm->irq_routing);
														
 
															 	INIT_LIST_HEAD(&irq_list);
														
 
															 	list_splice(&tmp, &irq_list);
														
 
															-	mutex_unlock(&kvm->lock);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															 	r = 0;
														
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -59,9 +59,18 @@
 
															 #include "irq.h"
														
 
															 #endif
														
 
															+#define CREATE_TRACE_POINTS
														
 
															+#include <trace/events/kvm.h>
														
 
															+
														
 
															 MODULE_AUTHOR("Qumranet");
														
 
															 MODULE_LICENSE("GPL");
														
 
															+/*
														
 
															+ * Ordering of locks:
														
 
															+ *
														
 
															+ * 		kvm->slots_lock --> kvm->lock --> kvm->irq_lock
														
 
															+ */
														
 
															+
														
 
															 DEFINE_SPINLOCK(kvm_lock);
														
 
															 LIST_HEAD(vm_list);
														
@@ -79,6 +88,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 
															 static bool kvm_rebooting;
														
 
															+static bool largepages_enabled = true;
														
 
															+
														
 
															 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
														
 
															 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
														
 
															 						      int assigned_dev_id)
														
@@ -120,17 +131,13 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 
															 {
														
 
															 	struct kvm_assigned_dev_kernel *assigned_dev;
														
 
															 	struct kvm *kvm;
														
 
															-	int irq, i;
														
 
															+	int i;
														
 
															 	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
														
 
															 				    interrupt_work);
														
 
															 	kvm = assigned_dev->kvm;
														
 
															-	/* This is taken to safely inject irq inside the guest. When
														
 
															-	 * the interrupt injection (or the ioapic code) uses a
														
 
															-	 * finer-grained lock, update this
														
 
															-	 */
														
 
															-	mutex_lock(&kvm->lock);
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															 	spin_lock_irq(&assigned_dev->assigned_dev_lock);
														
 
															 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
														
 
															 		struct kvm_guest_msix_entry *guest_entries =
														
@@ -143,23 +150,13 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 
															 			kvm_set_irq(assigned_dev->kvm,
														
 
															 				    assigned_dev->irq_source_id,
														
 
															 				    guest_entries[i].vector, 1);
														
 
															-			irq = assigned_dev->host_msix_entries[i].vector;
														
 
															-			if (irq != 0)
														
 
															-				enable_irq(irq);
														
 
															-			assigned_dev->host_irq_disabled = false;
														
 
															 		}
														
 
															-	} else {
														
 
															+	} else
														
 
															 		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
														
 
															 			    assigned_dev->guest_irq, 1);
														
 
															-		if (assigned_dev->irq_requested_type &
														
 
															-				KVM_DEV_IRQ_GUEST_MSI) {
														
 
															-			enable_irq(assigned_dev->host_irq);
														
 
															-			assigned_dev->host_irq_disabled = false;
														
 
															-		}
														
 
															-	}
														
 
															 	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
														
 
															-	mutex_unlock(&assigned_dev->kvm->lock);
														
 
															+	mutex_unlock(&assigned_dev->kvm->irq_lock);
														
 
															 }
														
 
															 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
														
@@ -179,8 +176,10 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 
															 	schedule_work(&assigned_dev->interrupt_work);
														
 
															-	disable_irq_nosync(irq);
														
 
															-	assigned_dev->host_irq_disabled = true;
														
 
															+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
														
 
															+		disable_irq_nosync(irq);
														
 
															+		assigned_dev->host_irq_disabled = true;
														
 
															+	}
														
 
															 out:
														
 
															 	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
														
@@ -215,7 +214,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 
															 static void deassign_guest_irq(struct kvm *kvm,
														
 
															 			       struct kvm_assigned_dev_kernel *assigned_dev)
														
 
															 {
														
 
															-	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
														
 
															+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
														
 
															 	assigned_dev->ack_notifier.gsi = -1;
														
 
															 	if (assigned_dev->irq_source_id != -1)
														
@@ -417,6 +416,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm,
 
															 {
														
 
															 	dev->guest_irq = irq->guest_irq;
														
 
															 	dev->ack_notifier.gsi = -1;
														
 
															+	dev->host_irq_disabled = false;
														
 
															 	return 0;
														
 
															 }
														
 
															 #endif
														
@@ -427,6 +427,7 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm,
 
															 {
														
 
															 	dev->guest_irq = irq->guest_irq;
														
 
															 	dev->ack_notifier.gsi = -1;
														
 
															+	dev->host_irq_disabled = false;
														
 
															 	return 0;
														
 
															 }
														
 
															 #endif
														
@@ -693,11 +694,6 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
 
															 }
														
 
															 #endif
														
 
															-static inline int valid_vcpu(int n)
														
 
															-{
														
 
															-	return likely(n >= 0 && n < KVM_MAX_VCPUS);
														
 
															-}
														
 
															-
														
 
															 inline int kvm_is_mmio_pfn(pfn_t pfn)
														
 
															 {
														
 
															 	if (pfn_valid(pfn)) {
														
@@ -745,12 +741,9 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 
															 	if (alloc_cpumask_var(&cpus, GFP_ATOMIC))
														
 
															 		cpumask_clear(cpus);
														
 
															-	me = get_cpu();
														
 
															 	spin_lock(&kvm->requests_lock);
														
 
															-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
														
 
															-		vcpu = kvm->vcpus[i];
														
 
															-		if (!vcpu)
														
 
															-			continue;
														
 
															+	me = smp_processor_id();
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															 		if (test_and_set_bit(req, &vcpu->requests))
														
 
															 			continue;
														
 
															 		cpu = vcpu->cpu;
														
@@ -764,7 +757,6 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 
															 	else
														
 
															 		called = false;
														
 
															 	spin_unlock(&kvm->requests_lock);
														
 
															-	put_cpu();
														
 
															 	free_cpumask_var(cpus);
														
 
															 	return called;
														
 
															 }
														
@@ -986,7 +978,9 @@ static struct kvm *kvm_create_vm(void)
 
															 	spin_lock_init(&kvm->mmu_lock);
														
 
															 	spin_lock_init(&kvm->requests_lock);
														
 
															 	kvm_io_bus_init(&kvm->pio_bus);
														
 
															+	kvm_eventfd_init(kvm);
														
 
															 	mutex_init(&kvm->lock);
														
 
															+	mutex_init(&kvm->irq_lock);
														
 
															 	kvm_io_bus_init(&kvm->mmio_bus);
														
 
															 	init_rwsem(&kvm->slots_lock);
														
 
															 	atomic_set(&kvm->users_count, 1);
														
@@ -1006,19 +1000,25 @@ static struct kvm *kvm_create_vm(void)
 
															 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
														
 
															 				  struct kvm_memory_slot *dont)
														
 
															 {
														
 
															+	int i;
														
 
															+
														
 
															 	if (!dont || free->rmap != dont->rmap)
														
 
															 		vfree(free->rmap);
														
 
															 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
														
 
															 		vfree(free->dirty_bitmap);
														
 
															-	if (!dont || free->lpage_info != dont->lpage_info)
														
 
															-		vfree(free->lpage_info);
														
 
															+
														
 
															+	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
														
 
															+		if (!dont || free->lpage_info[i] != dont->lpage_info[i]) {
														
 
															+			vfree(free->lpage_info[i]);
														
 
															+			free->lpage_info[i] = NULL;
														
 
															+		}
														
 
															+	}
														
 
															 	free->npages = 0;
														
 
															 	free->dirty_bitmap = NULL;
														
 
															 	free->rmap = NULL;
														
 
															-	free->lpage_info = NULL;
														
 
															 }
														
 
															 void kvm_free_physmem(struct kvm *kvm)
														
@@ -1071,6 +1071,8 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
 
															 {
														
 
															 	struct kvm *kvm = filp->private_data;
														
 
															+	kvm_irqfd_release(kvm);
														
 
															+
														
 
															 	kvm_put_kvm(kvm);
														
 
															 	return 0;
														
 
															 }
														
@@ -1089,8 +1091,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
															 {
														
 
															 	int r;
														
 
															 	gfn_t base_gfn;
														
 
															-	unsigned long npages, ugfn;
														
 
															-	unsigned long largepages, i;
														
 
															+	unsigned long npages;
														
 
															+	unsigned long i;
														
 
															 	struct kvm_memory_slot *memslot;
														
 
															 	struct kvm_memory_slot old, new;
														
@@ -1164,31 +1166,51 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
															 		else
														
 
															 			new.userspace_addr = 0;
														
 
															 	}
														
 
															-	if (npages && !new.lpage_info) {
														
 
															-		largepages = 1 + (base_gfn + npages - 1) / KVM_PAGES_PER_HPAGE;
														
 
															-		largepages -= base_gfn / KVM_PAGES_PER_HPAGE;
														
 
															+	if (!npages)
														
 
															+		goto skip_lpage;
														
 
															-		new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info));
														
 
															+	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
														
 
															+		unsigned long ugfn;
														
 
															+		unsigned long j;
														
 
															+		int lpages;
														
 
															+		int level = i + 2;
														
 
															-		if (!new.lpage_info)
														
 
															+		/* Avoid unused variable warning if no large pages */
														
 
															+		(void)level;
														
 
															+
														
 
															+		if (new.lpage_info[i])
														
 
															+			continue;
														
 
															+
														
 
															+		lpages = 1 + (base_gfn + npages - 1) /
														
 
															+			     KVM_PAGES_PER_HPAGE(level);
														
 
															+		lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level);
														
 
															+
														
 
															+		new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i]));
														
 
															+
														
 
															+		if (!new.lpage_info[i])
														
 
															 			goto out_free;
														
 
															-		memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info));
														
 
															+		memset(new.lpage_info[i], 0,
														
 
															+		       lpages * sizeof(*new.lpage_info[i]));
														
 
															-		if (base_gfn % KVM_PAGES_PER_HPAGE)
														
 
															-			new.lpage_info[0].write_count = 1;
														
 
															-		if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
														
 
															-			new.lpage_info[largepages-1].write_count = 1;
														
 
															+		if (base_gfn % KVM_PAGES_PER_HPAGE(level))
														
 
															+			new.lpage_info[i][0].write_count = 1;
														
 
															+		if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level))
														
 
															+			new.lpage_info[i][lpages - 1].write_count = 1;
														
 
															 		ugfn = new.userspace_addr >> PAGE_SHIFT;
														
 
															 		/*
														
 
															 		 * If the gfn and userspace address are not aligned wrt each
														
 
															-		 * other, disable large page support for this slot
														
 
															+		 * other, or if explicitly asked to, disable large page
														
 
															+		 * support for this slot
														
 
															 		 */
														
 
															-		if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1))
														
 
															-			for (i = 0; i < largepages; ++i)
														
 
															-				new.lpage_info[i].write_count = 1;
														
 
															+		if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
														
 
															+		    !largepages_enabled)
														
 
															+			for (j = 0; j < lpages; ++j)
														
 
															+				new.lpage_info[i][j].write_count = 1;
														
 
															 	}
														
 
															+skip_lpage:
														
 
															+
														
 
															 	/* Allocate page dirty bitmap if needed */
														
 
															 	if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
														
 
															 		unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
														
@@ -1200,6 +1222,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
															 		if (old.npages)
														
 
															 			kvm_arch_flush_shadow(kvm);
														
 
															 	}
														
 
															+#else  /* not defined CONFIG_S390 */
														
 
															+	new.user_alloc = user_alloc;
														
 
															+	if (user_alloc)
														
 
															+		new.userspace_addr = mem->userspace_addr;
														
 
															 #endif /* not defined CONFIG_S390 */
														
 
															 	if (!npages)
														
@@ -1299,6 +1325,12 @@ int kvm_get_dirty_log(struct kvm *kvm,
 
															 	return r;
														
 
															 }
														
 
															+void kvm_disable_largepages(void)
														
 
															+{
														
 
															+	largepages_enabled = false;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(kvm_disable_largepages);
														
 
															+
														
 
															 int is_error_page(struct page *page)
														
 
															 {
														
 
															 	return page == bad_page;
														
@@ -1635,9 +1667,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 
															 	for (;;) {
														
 
															 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
														
 
															-		if ((kvm_arch_interrupt_allowed(vcpu) &&
														
 
															-					kvm_cpu_has_interrupt(vcpu)) ||
														
 
															-				kvm_arch_vcpu_runnable(vcpu)) {
														
 
															+		if (kvm_arch_vcpu_runnable(vcpu)) {
														
 
															 			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
														
 
															 			break;
														
 
															 		}
														
@@ -1714,24 +1744,18 @@ static struct file_operations kvm_vcpu_fops = {
 
															  */
														
 
															 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0);
														
 
															-	if (fd < 0)
														
 
															-		kvm_put_kvm(vcpu->kvm);
														
 
															-	return fd;
														
 
															+	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0);
														
 
															 }
														
 
															 /*
														
 
															  * Creates some virtual cpus.  Good luck creating more than one.
														
 
															  */
														
 
															-static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
														
 
															+static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
														
 
															 {
														
 
															 	int r;
														
 
															-	struct kvm_vcpu *vcpu;
														
 
															-
														
 
															-	if (!valid_vcpu(n))
														
 
															-		return -EINVAL;
														
 
															+	struct kvm_vcpu *vcpu, *v;
														
 
															-	vcpu = kvm_arch_vcpu_create(kvm, n);
														
 
															+	vcpu = kvm_arch_vcpu_create(kvm, id);
														
 
															 	if (IS_ERR(vcpu))
														
 
															 		return PTR_ERR(vcpu);
														
@@ -1742,23 +1766,38 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 
															 		return r;
														
 
															 	mutex_lock(&kvm->lock);
														
 
															-	if (kvm->vcpus[n]) {
														
 
															-		r = -EEXIST;
														
 
															+	if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
														
 
															+		r = -EINVAL;
														
 
															 		goto vcpu_destroy;
														
 
															 	}
														
 
															-	kvm->vcpus[n] = vcpu;
														
 
															-	mutex_unlock(&kvm->lock);
														
 
															+
														
 
															+	kvm_for_each_vcpu(r, v, kvm)
														
 
															+		if (v->vcpu_id == id) {
														
 
															+			r = -EEXIST;
														
 
															+			goto vcpu_destroy;
														
 
															+		}
														
 
															+
														
 
															+	BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
														
 
															 	/* Now it's all set up, let userspace reach it */
														
 
															 	kvm_get_kvm(kvm);
														
 
															 	r = create_vcpu_fd(vcpu);
														
 
															-	if (r < 0)
														
 
															-		goto unlink;
														
 
															+	if (r < 0) {
														
 
															+		kvm_put_kvm(kvm);
														
 
															+		goto vcpu_destroy;
														
 
															+	}
														
 
															+
														
 
															+	kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
														
 
															+	smp_wmb();
														
 
															+	atomic_inc(&kvm->online_vcpus);
														
 
															+
														
 
															+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
														
 
															+	if (kvm->bsp_vcpu_id == id)
														
 
															+		kvm->bsp_vcpu = vcpu;
														
 
															+#endif
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															 	return r;
														
 
															-unlink:
														
 
															-	mutex_lock(&kvm->lock);
														
 
															-	kvm->vcpus[n] = NULL;
														
 
															 vcpu_destroy:
														
 
															 	mutex_unlock(&kvm->lock);
														
 
															 	kvm_arch_vcpu_destroy(vcpu);
														
@@ -2199,6 +2238,7 @@ static long kvm_vm_ioctl(struct file *filp,
 
															 		vfree(entries);
														
 
															 		break;
														
 
															 	}
														
 
															+#endif /* KVM_CAP_IRQ_ROUTING */
														
 
															 #ifdef __KVM_HAVE_MSIX
														
 
															 	case KVM_ASSIGN_SET_MSIX_NR: {
														
 
															 		struct kvm_assigned_msix_nr entry_nr;
														
@@ -2221,7 +2261,35 @@ static long kvm_vm_ioctl(struct file *filp,
 
															 		break;
														
 
															 	}
														
 
															 #endif
														
 
															-#endif /* KVM_CAP_IRQ_ROUTING */
														
 
															+	case KVM_IRQFD: {
														
 
															+		struct kvm_irqfd data;
														
 
															+
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_from_user(&data, argp, sizeof data))
														
 
															+			goto out;
														
 
															+		r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags);
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_IOEVENTFD: {
														
 
															+		struct kvm_ioeventfd data;
														
 
															+
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_from_user(&data, argp, sizeof data))
														
 
															+			goto out;
														
 
															+		r = kvm_ioeventfd(kvm, &data);
														
 
															+		break;
														
 
															+	}
														
 
															+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
														
 
															+	case KVM_SET_BOOT_CPU_ID:
														
 
															+		r = 0;
														
 
															+		mutex_lock(&kvm->lock);
														
 
															+		if (atomic_read(&kvm->online_vcpus) != 0)
														
 
															+			r = -EBUSY;
														
 
															+		else
														
 
															+			kvm->bsp_vcpu_id = arg;
														
 
															+		mutex_unlock(&kvm->lock);
														
 
															+		break;
														
 
															+#endif
														
 
															 	default:
														
 
															 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
														
 
															 	}
														
@@ -2288,6 +2356,9 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 
															 	case KVM_CAP_USER_MEMORY:
														
 
															 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
														
 
															 	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
														
 
															+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
														
 
															+	case KVM_CAP_SET_BOOT_CPU_ID:
														
 
															+#endif
														
 
															 		return 1;
														
 
															 #ifdef CONFIG_HAVE_KVM_IRQCHIP
														
 
															 	case KVM_CAP_IRQ_ROUTING:
														
@@ -2335,7 +2406,7 @@ static long kvm_dev_ioctl(struct file *filp,
 
															 	case KVM_TRACE_ENABLE:
														
 
															 	case KVM_TRACE_PAUSE:
														
 
															 	case KVM_TRACE_DISABLE:
														
 
															-		r = kvm_trace_ioctl(ioctl, arg);
														
 
															+		r = -EOPNOTSUPP;
														
 
															 		break;
														
 
															 	default:
														
 
															 		return kvm_arch_dev_ioctl(filp, ioctl, arg);
														
@@ -2449,26 +2520,71 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 
															 	}
														
 
															 }
														
 
															-struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
														
 
															-					  gpa_t addr, int len, int is_write)
														
 
															+/* kvm_io_bus_write - called under kvm->slots_lock */
														
 
															+int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
														
 
															+		     int len, const void *val)
														
 
															 {
														
 
															 	int i;
														
 
															+	for (i = 0; i < bus->dev_count; i++)
														
 
															+		if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
														
 
															+			return 0;
														
 
															+	return -EOPNOTSUPP;
														
 
															+}
														
 
															-	for (i = 0; i < bus->dev_count; i++) {
														
 
															-		struct kvm_io_device *pos = bus->devs[i];
														
 
															+/* kvm_io_bus_read - called under kvm->slots_lock */
														
 
															+int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val)
														
 
															+{
														
 
															+	int i;
														
 
															+	for (i = 0; i < bus->dev_count; i++)
														
 
															+		if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
														
 
															+			return 0;
														
 
															+	return -EOPNOTSUPP;
														
 
															+}
														
 
															-		if (pos->in_range(pos, addr, len, is_write))
														
 
															-			return pos;
														
 
															-	}
														
 
															+int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
														
 
															+			     struct kvm_io_device *dev)
														
 
															+{
														
 
															+	int ret;
														
 
															-	return NULL;
														
 
															+	down_write(&kvm->slots_lock);
														
 
															+	ret = __kvm_io_bus_register_dev(bus, dev);
														
 
															+	up_write(&kvm->slots_lock);
														
 
															+
														
 
															+	return ret;
														
 
															 }
														
 
															-void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
														
 
															+/* An unlocked version. Caller must have write lock on slots_lock. */
														
 
															+int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
														
 
															+			      struct kvm_io_device *dev)
														
 
															 {
														
 
															-	BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
														
 
															+	if (bus->dev_count > NR_IOBUS_DEVS-1)
														
 
															+		return -ENOSPC;
														
 
															 	bus->devs[bus->dev_count++] = dev;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+void kvm_io_bus_unregister_dev(struct kvm *kvm,
														
 
															+			       struct kvm_io_bus *bus,
														
 
															+			       struct kvm_io_device *dev)
														
 
															+{
														
 
															+	down_write(&kvm->slots_lock);
														
 
															+	__kvm_io_bus_unregister_dev(bus, dev);
														
 
															+	up_write(&kvm->slots_lock);
														
 
															+}
														
 
															+
														
 
															+/* An unlocked version. Caller must have write lock on slots_lock. */
														
 
															+void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
														
 
															+				 struct kvm_io_device *dev)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	for (i = 0; i < bus->dev_count; i++)
														
 
															+		if (bus->devs[i] == dev) {
														
 
															+			bus->devs[i] = bus->devs[--bus->dev_count];
														
 
															+			break;
														
 
															+		}
														
 
															 }
														
 
															 static struct notifier_block kvm_cpu_notifier = {
														
@@ -2501,11 +2617,9 @@ static int vcpu_stat_get(void *_offset, u64 *val)
 
															 	*val = 0;
														
 
															 	spin_lock(&kvm_lock);
														
 
															 	list_for_each_entry(kvm, &vm_list, vm_list)
														
 
															-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
														
 
															-			vcpu = kvm->vcpus[i];
														
 
															-			if (vcpu)
														
 
															-				*val += *(u32 *)((void *)vcpu + offset);
														
 
															-		}
														
 
															+		kvm_for_each_vcpu(i, vcpu, kvm)
														
 
															+			*val += *(u32 *)((void *)vcpu + offset);
														
 
															+
														
 
															 	spin_unlock(&kvm_lock);
														
 
															 	return 0;
														
 
															 }
														
@@ -2679,15 +2793,15 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 
															 	__free_page(bad_page);
														
 
															 out:
														
 
															 	kvm_arch_exit();
														
 
															-	kvm_exit_debug();
														
 
															 out_fail:
														
 
															+	kvm_exit_debug();
														
 
															 	return r;
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_init);
														
 
															 void kvm_exit(void)
														
 
															 {
														
 
															-	kvm_trace_cleanup();
														
 
															+	tracepoint_synchronize_unregister();
														
 
															 	misc_deregister(&kvm_dev);
														
 
															 	kmem_cache_destroy(kvm_vcpu_cache);
														
 
															 	sysdev_unregister(&kvm_sysdev);
														
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -1,285 +0,0 @@
 
															-/*
														
 
															- * kvm trace
														
 
															- *
														
 
															- * It is designed to allow debugging traces of kvm to be generated
														
 
															- * on UP / SMP machines.  Each trace entry can be timestamped so that
														
 
															- * it's possible to reconstruct a chronological record of trace events.
														
 
															- * The implementation refers to blktrace kernel support.
														
 
															- *
														
 
															- * Copyright (c) 2008 Intel Corporation
														
 
															- * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
														
 
															- *
														
 
															- * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
														
 
															- *
														
 
															- * Date:    Feb 2008
														
 
															- */
														
 
															-
														
 
															-#include <linux/module.h>
														
 
															-#include <linux/relay.h>
														
 
															-#include <linux/debugfs.h>
														
 
															-#include <linux/ktime.h>
														
 
															-
														
 
															-#include <linux/kvm_host.h>
														
 
															-
														
 
															-#define KVM_TRACE_STATE_RUNNING 	(1 << 0)
														
 
															-#define KVM_TRACE_STATE_PAUSE 		(1 << 1)
														
 
															-#define KVM_TRACE_STATE_CLEARUP 	(1 << 2)
														
 
															-
														
 
															-struct kvm_trace {
														
 
															-	int trace_state;
														
 
															-	struct rchan *rchan;
														
 
															-	struct dentry *lost_file;
														
 
															-	atomic_t lost_records;
														
 
															-};
														
 
															-static struct kvm_trace *kvm_trace;
														
 
															-
														
 
															-struct kvm_trace_probe {
														
 
															-	const char *name;
														
 
															-	const char *format;
														
 
															-	u32 timestamp_in;
														
 
															-	marker_probe_func *probe_func;
														
 
															-};
														
 
															-
														
 
															-static inline int calc_rec_size(int timestamp, int extra)
														
 
															-{
														
 
															-	int rec_size = KVM_TRC_HEAD_SIZE;
														
 
															-
														
 
															-	rec_size += extra;
														
 
															-	return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
														
 
															-}
														
 
															-
														
 
															-static void kvm_add_trace(void *probe_private, void *call_data,
														
 
															-			  const char *format, va_list *args)
														
 
															-{
														
 
															-	struct kvm_trace_probe *p = probe_private;
														
 
															-	struct kvm_trace *kt = kvm_trace;
														
 
															-	struct kvm_trace_rec rec;
														
 
															-	struct kvm_vcpu *vcpu;
														
 
															-	int    i, size;
														
 
															-	u32    extra;
														
 
															-
														
 
															-	if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
														
 
															-		return;
														
 
															-
														
 
															-	rec.rec_val	= TRACE_REC_EVENT_ID(va_arg(*args, u32));
														
 
															-	vcpu		= va_arg(*args, struct kvm_vcpu *);
														
 
															-	rec.pid		= current->tgid;
														
 
															-	rec.vcpu_id	= vcpu->vcpu_id;
														
 
															-
														
 
															-	extra   	= va_arg(*args, u32);
														
 
															-	WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
														
 
															-	extra 		= min_t(u32, extra, KVM_TRC_EXTRA_MAX);
														
 
															-
														
 
															-	rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
														
 
															-			| TRACE_REC_NUM_DATA_ARGS(extra);
														
 
															-
														
 
															-	if (p->timestamp_in) {
														
 
															-		rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
														
 
															-
														
 
															-		for (i = 0; i < extra; i++)
														
 
															-			rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
														
 
															-	} else {
														
 
															-		for (i = 0; i < extra; i++)
														
 
															-			rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
														
 
															-	}
														
 
															-
														
 
															-	size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
														
 
															-	relay_write(kt->rchan, &rec, size);
														
 
															-}
														
 
															-
														
 
															-static struct kvm_trace_probe kvm_trace_probes[] = {
														
 
															-	{ "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
														
 
															-	{ "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
														
 
															-};
														
 
															-
														
 
															-static int lost_records_get(void *data, u64 *val)
														
 
															-{
														
 
															-	struct kvm_trace *kt = data;
														
 
															-
														
 
															-	*val = atomic_read(&kt->lost_records);
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
														
 
															-
														
 
															-/*
														
 
															- *  The relay channel is used in "no-overwrite" mode, it keeps trace of how
														
 
															- *  many times we encountered a full subbuffer, to tell user space app the
														
 
															- *  lost records there were.
														
 
															- */
														
 
															-static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
														
 
															-				     void *prev_subbuf, size_t prev_padding)
														
 
															-{
														
 
															-	struct kvm_trace *kt;
														
 
															-
														
 
															-	if (!relay_buf_full(buf)) {
														
 
															-		if (!prev_subbuf) {
														
 
															-			/*
														
 
															-			 * executed only once when the channel is opened
														
 
															-			 * save metadata as first record
														
 
															-			 */
														
 
															-			subbuf_start_reserve(buf, sizeof(u32));
														
 
															-			*(u32 *)subbuf = 0x12345678;
														
 
															-		}
														
 
															-
														
 
															-		return 1;
														
 
															-	}
														
 
															-
														
 
															-	kt = buf->chan->private_data;
														
 
															-	atomic_inc(&kt->lost_records);
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static struct dentry *kvm_create_buf_file_callack(const char *filename,
														
 
															-						 struct dentry *parent,
														
 
															-						 int mode,
														
 
															-						 struct rchan_buf *buf,
														
 
															-						 int *is_global)
														
 
															-{
														
 
															-	return debugfs_create_file(filename, mode, parent, buf,
														
 
															-				   &relay_file_operations);
														
 
															-}
														
 
															-
														
 
															-static int kvm_remove_buf_file_callback(struct dentry *dentry)
														
 
															-{
														
 
															-	debugfs_remove(dentry);
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static struct rchan_callbacks kvm_relay_callbacks = {
														
 
															-	.subbuf_start 		= kvm_subbuf_start_callback,
														
 
															-	.create_buf_file 	= kvm_create_buf_file_callack,
														
 
															-	.remove_buf_file 	= kvm_remove_buf_file_callback,
														
 
															-};
														
 
															-
														
 
															-static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
														
 
															-{
														
 
															-	struct kvm_trace *kt;
														
 
															-	int i, r = -ENOMEM;
														
 
															-
														
 
															-	if (!kuts->buf_size || !kuts->buf_nr)
														
 
															-		return -EINVAL;
														
 
															-
														
 
															-	kt = kzalloc(sizeof(*kt), GFP_KERNEL);
														
 
															-	if (!kt)
														
 
															-		goto err;
														
 
															-
														
 
															-	r = -EIO;
														
 
															-	atomic_set(&kt->lost_records, 0);
														
 
															-	kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
														
 
															-					    kt, &kvm_trace_lost_ops);
														
 
															-	if (!kt->lost_file)
														
 
															-		goto err;
														
 
															-
														
 
															-	kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
														
 
															-				kuts->buf_nr, &kvm_relay_callbacks, kt);
														
 
															-	if (!kt->rchan)
														
 
															-		goto err;
														
 
															-
														
 
															-	kvm_trace = kt;
														
 
															-
														
 
															-	for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
														
 
															-		struct kvm_trace_probe *p = &kvm_trace_probes[i];
														
 
															-
														
 
															-		r = marker_probe_register(p->name, p->format, p->probe_func, p);
														
 
															-		if (r)
														
 
															-			printk(KERN_INFO "Unable to register probe %s\n",
														
 
															-			       p->name);
														
 
															-	}
														
 
															-
														
 
															-	kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
														
 
															-
														
 
															-	return 0;
														
 
															-err:
														
 
															-	if (kt) {
														
 
															-		if (kt->lost_file)
														
 
															-			debugfs_remove(kt->lost_file);
														
 
															-		if (kt->rchan)
														
 
															-			relay_close(kt->rchan);
														
 
															-		kfree(kt);
														
 
															-	}
														
 
															-	return r;
														
 
															-}
														
 
															-
														
 
															-static int kvm_trace_enable(char __user *arg)
														
 
															-{
														
 
															-	struct kvm_user_trace_setup kuts;
														
 
															-	int ret;
														
 
															-
														
 
															-	ret = copy_from_user(&kuts, arg, sizeof(kuts));
														
 
															-	if (ret)
														
 
															-		return -EFAULT;
														
 
															-
														
 
															-	ret = do_kvm_trace_enable(&kuts);
														
 
															-	if (ret)
														
 
															-		return ret;
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static int kvm_trace_pause(void)
														
 
															-{
														
 
															-	struct kvm_trace *kt = kvm_trace;
														
 
															-	int r = -EINVAL;
														
 
															-
														
 
															-	if (kt == NULL)
														
 
															-		return r;
														
 
															-
														
 
															-	if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
														
 
															-		kt->trace_state = KVM_TRACE_STATE_PAUSE;
														
 
															-		relay_flush(kt->rchan);
														
 
															-		r = 0;
														
 
															-	}
														
 
															-
														
 
															-	return r;
														
 
															-}
														
 
															-
														
 
															-void kvm_trace_cleanup(void)
														
 
															-{
														
 
															-	struct kvm_trace *kt = kvm_trace;
														
 
															-	int i;
														
 
															-
														
 
															-	if (kt == NULL)
														
 
															-		return;
														
 
															-
														
 
															-	if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
														
 
															-	    kt->trace_state == KVM_TRACE_STATE_PAUSE) {
														
 
															-
														
 
															-		kt->trace_state = KVM_TRACE_STATE_CLEARUP;
														
 
															-
														
 
															-		for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
														
 
															-			struct kvm_trace_probe *p = &kvm_trace_probes[i];
														
 
															-			marker_probe_unregister(p->name, p->probe_func, p);
														
 
															-		}
														
 
															-		marker_synchronize_unregister();
														
 
															-
														
 
															-		relay_close(kt->rchan);
														
 
															-		debugfs_remove(kt->lost_file);
														
 
															-		kfree(kt);
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
														
 
															-{
														
 
															-	void __user *argp = (void __user *)arg;
														
 
															-	long r = -EINVAL;
														
 
															-
														
 
															-	if (!capable(CAP_SYS_ADMIN))
														
 
															-		return -EPERM;
														
 
															-
														
 
															-	switch (ioctl) {
														
 
															-	case KVM_TRACE_ENABLE:
														
 
															-		r = kvm_trace_enable(argp);
														
 
															-		break;
														
 
															-	case KVM_TRACE_PAUSE:
														
 
															-		r = kvm_trace_pause();
														
 
															-		break;
														
 
															-	case KVM_TRACE_DISABLE:
														
 
															-		r = 0;
														
 
															-		kvm_trace_cleanup();
														
 
															-		break;
														
 
															-	}
														
 
															-
														
 
															-	return r;
														
 
															-}