10 лет назад · 933425fb00
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1585,6 +1585,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
															 			nosid	disable Source ID checking
														
 
															 			no_x2apic_optout
														
 
															 				BIOS x2APIC opt-out request will be ignored
														
 
															+			nopost	disable Interrupt Posting
														
 
															 	iomem=		Disable strict checking of access to MMIO memory
														
 
															 		strict	regions from userspace.
														
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -401,10 +401,9 @@ Capability: basic
 
															 Architectures: x86, ppc, mips
														
 
															 Type: vcpu ioctl
														
 
															 Parameters: struct kvm_interrupt (in)
														
 
															-Returns: 0 on success, -1 on error
														
 
															+Returns: 0 on success, negative on failure.
														
 
															-Queues a hardware interrupt vector to be injected.  This is only
														
 
															-useful if in-kernel local APIC or equivalent is not used.
														
 
															+Queues a hardware interrupt vector to be injected.
														
 
															 /* for KVM_INTERRUPT */
														
 
															 struct kvm_interrupt {
														
@@ -414,7 +413,14 @@ struct kvm_interrupt {
 
															 X86:
														
 
															-Note 'irq' is an interrupt vector, not an interrupt pin or line.
														
 
															+Returns: 0 on success,
														
 
															+	 -EEXIST if an interrupt is already enqueued
														
 
															+	 -EINVAL the the irq number is invalid
														
 
															+	 -ENXIO if the PIC is in the kernel
														
 
															+	 -EFAULT if the pointer is invalid
														
 
															+
														
 
															+Note 'irq' is an interrupt vector, not an interrupt pin or line. This
														
 
															+ioctl is useful if the in-kernel PIC is not used.
														
 
															 PPC:
														
@@ -1598,7 +1604,7 @@ provided event instead of triggering an exit.
 
															 struct kvm_ioeventfd {
														
 
															 	__u64 datamatch;
														
 
															 	__u64 addr;        /* legal pio/mmio address */
														
 
															-	__u32 len;         /* 1, 2, 4, or 8 bytes    */
														
 
															+	__u32 len;         /* 0, 1, 2, 4, or 8 bytes    */
														
 
															 	__s32 fd;
														
 
															 	__u32 flags;
														
 
															 	__u8  pad[36];
														
@@ -1621,6 +1627,10 @@ to the registered address is equal to datamatch in struct kvm_ioeventfd.
 
															 For virtio-ccw devices, addr contains the subchannel id and datamatch the
														
 
															 virtqueue index.
														
 
															+With KVM_CAP_IOEVENTFD_ANY_LENGTH, a zero length ioeventfd is allowed, and
														
 
															+the kernel will ignore the length of guest write and may get a faster vmexit.
														
 
															+The speedup may only apply to specific architectures, but the ioeventfd will
														
 
															+work anyway.
														
 
															 4.60 KVM_DIRTY_TLB
														
@@ -3309,6 +3319,18 @@ Valid values for 'type' are:
 
															    to ignore the request, or to gather VM memory core dump and/or
														
 
															    reset/shutdown of the VM.
														
 
															+		/* KVM_EXIT_IOAPIC_EOI */
														
 
															+		struct {
														
 
															+			__u8 vector;
														
 
															+		} eoi;
														
 
															+
														
 
															+Indicates that the VCPU's in-kernel local APIC received an EOI for a
														
 
															+level-triggered IOAPIC interrupt.  This exit only triggers when the
														
 
															+IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled);
														
 
															+the userspace IOAPIC should process the EOI and retrigger the interrupt if
														
 
															+it is still asserted.  Vector is the LAPIC interrupt vector for which the
														
 
															+EOI was received.
														
 
															+
														
 
															 		/* Fix the size of the union. */
														
 
															 		char padding[256];
														
 
															 	};
														
@@ -3627,6 +3649,26 @@ struct {
 
															 KVM handlers should exit to userspace with rc = -EREMOTE.
														
 
															+7.5 KVM_CAP_SPLIT_IRQCHIP
														
 
															+
														
 
															+Architectures: x86
														
 
															+Parameters: args[0] - number of routes reserved for userspace IOAPICs
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Create a local apic for each processor in the kernel. This can be used
														
 
															+instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the
														
 
															+IOAPIC and PIC (and also the PIT, even though this has to be enabled
														
 
															+separately).
														
 
															+
														
 
															+This capability also enables in kernel routing of interrupt requests;
														
 
															+when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type are
														
 
															+used in the IRQ routing table.  The first args[0] MSI routes are reserved
														
 
															+for the IOAPIC pins.  Whenever the LAPIC receives an EOI for these routes,
														
 
															+a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
														
 
															+
														
 
															+Fails if VCPU has already been created, or if the irqchip is already in the
														
 
															+kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
														
 
															+
														
 
															 8. Other capabilities.
														
 
															 ----------------------
														
--- a/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
+++ b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
@@ -0,0 +1,187 @@
 
															+KVM/ARM VGIC Forwarded Physical Interrupts
														
 
															+==========================================
														
 
															+
														
 
															+The KVM/ARM code implements software support for the ARM Generic
														
 
															+Interrupt Controller's (GIC's) hardware support for virtualization by
														
 
															+allowing software to inject virtual interrupts to a VM, which the guest
														
 
															+OS sees as regular interrupts.  The code is famously known as the VGIC.
														
 
															+
														
 
															+Some of these virtual interrupts, however, correspond to physical
														
 
															+interrupts from real physical devices.  One example could be the
														
 
															+architected timer, which itself supports virtualization, and therefore
														
 
															+lets a guest OS program the hardware device directly to raise an
														
 
															+interrupt at some point in time.  When such an interrupt is raised, the
														
 
															+host OS initially handles the interrupt and must somehow signal this
														
 
															+event as a virtual interrupt to the guest.  Another example could be a
														
 
															+passthrough device, where the physical interrupts are initially handled
														
 
															+by the host, but the device driver for the device lives in the guest OS
														
 
															+and KVM must therefore somehow inject a virtual interrupt on behalf of
														
 
															+the physical one to the guest OS.
														
 
															+
														
 
															+These virtual interrupts corresponding to a physical interrupt on the
														
 
															+host are called forwarded physical interrupts, but are also sometimes
														
 
															+referred to as 'virtualized physical interrupts' and 'mapped interrupts'.
														
 
															+
														
 
															+Forwarded physical interrupts are handled slightly differently compared
														
 
															+to virtual interrupts generated purely by a software emulated device.
														
 
															+
														
 
															+
														
 
															+The HW bit
														
 
															+----------
														
 
															+Virtual interrupts are signalled to the guest by programming the List
														
 
															+Registers (LRs) on the GIC before running a VCPU.  The LR is programmed
														
 
															+with the virtual IRQ number and the state of the interrupt (Pending,
														
 
															+Active, or Pending+Active).  When the guest ACKs and EOIs a virtual
														
 
															+interrupt, the LR state moves from Pending to Active, and finally to
														
 
															+inactive.
														
 
															+
														
 
															+The LRs include an extra bit, called the HW bit.  When this bit is set,
														
 
															+KVM must also program an additional field in the LR, the physical IRQ
														
 
															+number, to link the virtual with the physical IRQ.
														
 
															+
														
 
															+When the HW bit is set, KVM must EITHER set the Pending OR the Active
														
 
															+bit, never both at the same time.
														
 
															+
														
 
															+Setting the HW bit causes the hardware to deactivate the physical
														
 
															+interrupt on the physical distributor when the guest deactivates the
														
 
															+corresponding virtual interrupt.
														
 
															+
														
 
															+
														
 
															+Forwarded Physical Interrupts Life Cycle
														
 
															+----------------------------------------
														
 
															+
														
 
															+The state of forwarded physical interrupts is managed in the following way:
														
 
															+
														
 
															+  - The physical interrupt is acked by the host, and becomes active on
														
 
															+    the physical distributor (*).
														
 
															+  - KVM sets the LR.Pending bit, because this is the only way the GICV
														
 
															+    interface is going to present it to the guest.
														
 
															+  - LR.Pending will stay set as long as the guest has not acked the interrupt.
														
 
															+  - LR.Pending transitions to LR.Active on the guest read of the IAR, as
														
 
															+    expected.
														
 
															+  - On guest EOI, the *physical distributor* active bit gets cleared,
														
 
															+    but the LR.Active is left untouched (set).
														
 
															+  - KVM clears the LR on VM exits when the physical distributor
														
 
															+    active state has been cleared.
														
 
															+
														
 
															+(*): The host handling is slightly more complicated.  For some forwarded
														
 
															+interrupts (shared), KVM directly sets the active state on the physical
														
 
															+distributor before entering the guest, because the interrupt is never actually
														
 
															+handled on the host (see details on the timer as an example below).  For other
														
 
															+forwarded interrupts (non-shared) the host does not deactivate the interrupt
														
 
															+when the host ISR completes, but leaves the interrupt active until the guest
														
 
															+deactivates it.  Leaving the interrupt active is allowed, because Linux
														
 
															+configures the physical GIC with EOIMode=1, which causes EOI operations to
														
 
															+perform a priority drop allowing the GIC to receive other interrupts of the
														
 
															+default priority.
														
 
															+
														
 
															+
														
 
															+Forwarded Edge and Level Triggered PPIs and SPIs
														
 
															+------------------------------------------------
														
 
															+Forwarded physical interrupts injected should always be active on the
														
 
															+physical distributor when injected to a guest.
														
 
															+
														
 
															+Level-triggered interrupts will keep the interrupt line to the GIC
														
 
															+asserted, typically until the guest programs the device to deassert the
														
 
															+line.  This means that the interrupt will remain pending on the physical
														
 
															+distributor until the guest has reprogrammed the device.  Since we
														
 
															+always run the VM with interrupts enabled on the CPU, a pending
														
 
															+interrupt will exit the guest as soon as we switch into the guest,
														
 
															+preventing the guest from ever making progress as the process repeats
														
 
															+over and over.  Therefore, the active state on the physical distributor
														
 
															+must be set when entering the guest, preventing the GIC from forwarding
														
 
															+the pending interrupt to the CPU.  As soon as the guest deactivates the
														
 
															+interrupt, the physical line is sampled by the hardware again and the host
														
 
															+takes a new interrupt if and only if the physical line is still asserted.
														
 
															+
														
 
															+Edge-triggered interrupts do not exhibit the same problem with
														
 
															+preventing guest execution that level-triggered interrupts do.  One
														
 
															+option is to not use HW bit at all, and inject edge-triggered interrupts
														
 
															+from a physical device as pure virtual interrupts.  But that would
														
 
															+potentially slow down handling of the interrupt in the guest, because a
														
 
															+physical interrupt occurring in the middle of the guest ISR would
														
 
															+preempt the guest for the host to handle the interrupt.  Additionally,
														
 
															+if you configure the system to handle interrupts on a separate physical
														
 
															+core from that running your VCPU, you still have to interrupt the VCPU
														
 
															+to queue the pending state onto the LR, even though the guest won't use
														
 
															+this information until the guest ISR completes.  Therefore, the HW
														
 
															+bit should always be set for forwarded edge-triggered interrupts.  With
														
 
															+the HW bit set, the virtual interrupt is injected and additional
														
 
															+physical interrupts occurring before the guest deactivates the interrupt
														
 
															+simply mark the state on the physical distributor as Pending+Active.  As
														
 
															+soon as the guest deactivates the interrupt, the host takes another
														
 
															+interrupt if and only if there was a physical interrupt between injecting
														
 
															+the forwarded interrupt to the guest and the guest deactivating the
														
 
															+interrupt.
														
 
															+
														
 
															+Consequently, whenever we schedule a VCPU with one or more LRs with the
														
 
															+HW bit set, the interrupt must also be active on the physical
														
 
															+distributor.
														
 
															+
														
 
															+
														
 
															+Forwarded LPIs
														
 
															+--------------
														
 
															+LPIs, introduced in GICv3, are always edge-triggered and do not have an
														
 
															+active state.  They become pending when a device signal them, and as
														
 
															+soon as they are acked by the CPU, they are inactive again.
														
 
															+
														
 
															+It therefore doesn't make sense, and is not supported, to set the HW bit
														
 
															+for physical LPIs that are forwarded to a VM as virtual interrupts,
														
 
															+typically virtual SPIs.
														
 
															+
														
 
															+For LPIs, there is no other choice than to preempt the VCPU thread if
														
 
															+necessary, and queue the pending state onto the LR.
														
 
															+
														
 
															+
														
 
															+Putting It Together: The Architected Timer
														
 
															+------------------------------------------
														
 
															+The architected timer is a device that signals interrupts with level
														
 
															+triggered semantics.  The timer hardware is directly accessed by VCPUs
														
 
															+which program the timer to fire at some point in time.  Each VCPU on a
														
 
															+system programs the timer to fire at different times, and therefore the
														
 
															+hardware is multiplexed between multiple VCPUs.  This is implemented by
														
 
															+context-switching the timer state along with each VCPU thread.
														
 
															+
														
 
															+However, this means that a scenario like the following is entirely
														
 
															+possible, and in fact, typical:
														
 
															+
														
 
															+1.  KVM runs the VCPU
														
 
															+2.  The guest programs the time to fire in T+100
														
 
															+3.  The guest is idle and calls WFI (wait-for-interrupts)
														
 
															+4.  The hardware traps to the host
														
 
															+5.  KVM stores the timer state to memory and disables the hardware timer
														
 
															+6.  KVM schedules a soft timer to fire in T+(100 - time since step 2)
														
 
															+7.  KVM puts the VCPU thread to sleep (on a waitqueue)
														
 
															+8.  The soft timer fires, waking up the VCPU thread
														
 
															+9.  KVM reprograms the timer hardware with the VCPU's values
														
 
															+10. KVM marks the timer interrupt as active on the physical distributor
														
 
															+11. KVM injects a forwarded physical interrupt to the guest
														
 
															+12. KVM runs the VCPU
														
 
															+
														
 
															+Notice that KVM injects a forwarded physical interrupt in step 11 without
														
 
															+the corresponding interrupt having actually fired on the host.  That is
														
 
															+exactly why we mark the timer interrupt as active in step 10, because
														
 
															+the active state on the physical distributor is part of the state
														
 
															+belonging to the timer hardware, which is context-switched along with
														
 
															+the VCPU thread.
														
 
															+
														
 
															+If the guest does not idle because it is busy, the flow looks like this
														
 
															+instead:
														
 
															+
														
 
															+1.  KVM runs the VCPU
														
 
															+2.  The guest programs the time to fire in T+100
														
 
															+4.  At T+100 the timer fires and a physical IRQ causes the VM to exit
														
 
															+    (note that this initially only traps to EL2 and does not run the host ISR
														
 
															+    until KVM has returned to the host).
														
 
															+5.  With interrupts still disabled on the CPU coming back from the guest, KVM
														
 
															+    stores the virtual timer state to memory and disables the virtual hw timer.
														
 
															+6.  KVM looks at the timer state (in memory) and injects a forwarded physical
														
 
															+    interrupt because it concludes the timer has expired.
														
 
															+7.  KVM marks the timer interrupt as active on the physical distributor
														
 
															+7.  KVM enables the timer, enables interrupts, and runs the VCPU
														
 
															+
														
 
															+Notice that again the forwarded physical interrupt is injected to the
														
 
															+guest without having actually been handled on the host.  In this case it
														
 
															+is because the physical interrupt is never actually seen by the host because the
														
 
															+timer is disabled upon guest return, and the virtual forwarded interrupt is
														
 
															+injected on the KVM guest entry path.
														
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -44,28 +44,29 @@ Groups:
 
															   Attributes:
														
 
															     The attr field of kvm_device_attr encodes two values:
														
 
															     bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
														
 
															-    values:   |    reserved   |   cpu id   |      offset     |
														
 
															+    values:   |    reserved   | vcpu_index |      offset     |
														
 
															     All distributor regs are (rw, 32-bit)
														
 
															     The offset is relative to the "Distributor base address" as defined in the
														
 
															     GICv2 specs.  Getting or setting such a register has the same effect as
														
 
															-    reading or writing the register on the actual hardware from the cpu
														
 
															-    specified with cpu id field.  Note that most distributor fields are not
														
 
															-    banked, but return the same value regardless of the cpu id used to access
														
 
															-    the register.
														
 
															+    reading or writing the register on the actual hardware from the cpu whose
														
 
															+    index is specified with the vcpu_index field.  Note that most distributor
														
 
															+    fields are not banked, but return the same value regardless of the
														
 
															+    vcpu_index used to access the register.
														
 
															   Limitations:
														
 
															     - Priorities are not implemented, and registers are RAZ/WI
														
 
															     - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
														
 
															   Errors:
														
 
															-    -ENODEV: Getting or setting this register is not yet supported
														
 
															+    -ENXIO: Getting or setting this register is not yet supported
														
 
															     -EBUSY: One or more VCPUs are running
														
 
															+    -EINVAL: Invalid vcpu_index supplied
														
 
															   KVM_DEV_ARM_VGIC_GRP_CPU_REGS
														
 
															   Attributes:
														
 
															     The attr field of kvm_device_attr encodes two values:
														
 
															     bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
														
 
															-    values:   |    reserved   |   cpu id   |      offset     |
														
 
															+    values:   |    reserved   | vcpu_index |      offset     |
														
 
															     All CPU interface regs are (rw, 32-bit)
														
@@ -91,8 +92,9 @@ Groups:
 
															     - Priorities are not implemented, and registers are RAZ/WI
														
 
															     - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
														
 
															   Errors:
														
 
															-    -ENODEV: Getting or setting this register is not yet supported
														
 
															+    -ENXIO: Getting or setting this register is not yet supported
														
 
															     -EBUSY: One or more VCPUs are running
														
 
															+    -EINVAL: Invalid vcpu_index supplied
														
 
															   KVM_DEV_ARM_VGIC_GRP_NR_IRQS
														
 
															   Attributes:
														
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -166,3 +166,15 @@ Comment:	The srcu read lock must be held while accessing memslots (e.g.
 
															 		MMIO/PIO address->device structure mapping (kvm->buses).
														
 
															 		The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
														
 
															 		if it is needed by multiple functions.
														
 
															+
														
 
															+Name:		blocked_vcpu_on_cpu_lock
														
 
															+Type:		spinlock_t
														
 
															+Arch:		x86
														
 
															+Protects:	blocked_vcpu_on_cpu
														
 
															+Comment:	This is a per-CPU lock and it is used for VT-d posted-interrupts.
														
 
															+		When VT-d posted-interrupts is supported and the VM has assigned
														
 
															+		devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
														
 
															+		protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
														
 
															+		wakeup notification event since external interrupts from the
														
 
															+		assigned devices happens, we will find the vCPU on the list to
														
 
															+		wakeup.
														
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11348,6 +11348,13 @@ L:	netdev@vger.kernel.org
 
															 S:	Maintained
														
 
															 F:	drivers/net/ethernet/via/via-velocity.*
														
 
															+VIRT LIB
														
 
															+M:	Alex Williamson <alex.williamson@redhat.com>
														
 
															+M:	Paolo Bonzini <pbonzini@redhat.com>
														
 
															+L:	kvm@vger.kernel.org
														
 
															+S:	Supported
														
 
															+F:	virt/lib/
														
 
															+
														
 
															 VIVID VIRTUAL VIDEO DRIVER
														
 
															 M:	Hans Verkuil <hverkuil@xs4all.nl>
														
 
															 L:	linux-media@vger.kernel.org
														
--- a/Makefile
+++ b/Makefile
@@ -550,6 +550,7 @@ drivers-y	:= drivers/ sound/ firmware/
 
															 net-y		:= net/
														
 
															 libs-y		:= lib/
														
 
															 core-y		:= usr/
														
 
															+virt-y		:= virt/
														
 
															 endif # KBUILD_EXTMOD
														
 
															 ifeq ($(dot-config),1)
														
@@ -882,10 +883,10 @@ core-y		+= kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
 
															 vmlinux-dirs	:= $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
														
 
															 		     $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
														
 
															-		     $(net-y) $(net-m) $(libs-y) $(libs-m)))
														
 
															+		     $(net-y) $(net-m) $(libs-y) $(libs-m) $(virt-y)))
														
 
															 vmlinux-alldirs	:= $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \
														
 
															-		     $(init-) $(core-) $(drivers-) $(net-) $(libs-))))
														
 
															+		     $(init-) $(core-) $(drivers-) $(net-) $(libs-) $(virt-))))
														
 
															 init-y		:= $(patsubst %/, %/built-in.o, $(init-y))
														
 
															 core-y		:= $(patsubst %/, %/built-in.o, $(core-y))
														
@@ -894,14 +895,15 @@ net-y		:= $(patsubst %/, %/built-in.o, $(net-y))
 
															 libs-y1		:= $(patsubst %/, %/lib.a, $(libs-y))
														
 
															 libs-y2		:= $(patsubst %/, %/built-in.o, $(libs-y))
														
 
															 libs-y		:= $(libs-y1) $(libs-y2)
														
 
															+virt-y		:= $(patsubst %/, %/built-in.o, $(virt-y))
														
 
															 # Externally visible symbols (used by link-vmlinux.sh)
														
 
															 export KBUILD_VMLINUX_INIT := $(head-y) $(init-y)
														
 
															-export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y)
														
 
															+export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) $(virt-y)
														
 
															 export KBUILD_LDS          := arch/$(SRCARCH)/kernel/vmlinux.lds
														
 
															 export LDFLAGS_vmlinux
														
 
															 # used by scripts/pacmage/Makefile
														
 
															-export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools virt)
														
 
															+export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools)
														
 
															 vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN)
														
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -218,4 +218,24 @@
 
															 #define HSR_DABT_CM		(1U << 8)
														
 
															 #define HSR_DABT_EA		(1U << 9)
														
 
															+#define kvm_arm_exception_type	\
														
 
															+	{0, "RESET" }, 		\
														
 
															+	{1, "UNDEFINED" },	\
														
 
															+	{2, "SOFTWARE" },	\
														
 
															+	{3, "PREF_ABORT" },	\
														
 
															+	{4, "DATA_ABORT" },	\
														
 
															+	{5, "IRQ" },		\
														
 
															+	{6, "FIQ" },		\
														
 
															+	{7, "HVC" }
														
 
															+
														
 
															+#define HSRECN(x) { HSR_EC_##x, #x }
														
 
															+
														
 
															+#define kvm_arm_exception_class \
														
 
															+	HSRECN(UNKNOWN), HSRECN(WFI), HSRECN(CP15_32), HSRECN(CP15_64), \
														
 
															+	HSRECN(CP14_MR), HSRECN(CP14_LS), HSRECN(CP_0_13), HSRECN(CP10_ID), \
														
 
															+	HSRECN(JAZELLE), HSRECN(BXJ), HSRECN(CP14_64), HSRECN(SVC_HYP), \
														
 
															+	HSRECN(HVC), HSRECN(SMC), HSRECN(IABT), HSRECN(IABT_HYP), \
														
 
															+	HSRECN(DABT), HSRECN(DABT_HYP)
														
 
															+
														
 
															+
														
 
															 #endif /* __ARM_KVM_ARM_H__ */
														
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -126,7 +126,10 @@ struct kvm_vcpu_arch {
 
															 	 * here.
														
 
															 	 */
														
 
															-	/* Don't run the guest on this vcpu */
														
 
															+	/* vcpu power-off state */
														
 
															+	bool power_off;
														
 
															+
														
 
															+	 /* Don't run the guest (internal implementation need) */
														
 
															 	bool pause;
														
 
															 	/* IO related fields */
														
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -46,4 +46,6 @@ config KVM_ARM_HOST
 
															 	---help---
														
 
															 	  Provides host support for ARM processors.
														
 
															+source drivers/vhost/Kconfig
														
 
															+
														
 
															 endif # VIRTUALIZATION
														
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -271,6 +271,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
															 	return kvm_timer_should_fire(vcpu);
														
 
															 }
														
 
															+void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	kvm_timer_schedule(vcpu);
														
 
															+}
														
 
															+
														
 
															+void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	kvm_timer_unschedule(vcpu);
														
 
															+}
														
 
															+
														
 
															 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	/* Force users to call KVM_ARM_VCPU_INIT */
														
@@ -308,7 +318,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
															 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
														
 
															 				    struct kvm_mp_state *mp_state)
														
 
															 {
														
 
															-	if (vcpu->arch.pause)
														
 
															+	if (vcpu->arch.power_off)
														
 
															 		mp_state->mp_state = KVM_MP_STATE_STOPPED;
														
 
															 	else
														
 
															 		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
														
@@ -321,10 +331,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 
															 {
														
 
															 	switch (mp_state->mp_state) {
														
 
															 	case KVM_MP_STATE_RUNNABLE:
														
 
															-		vcpu->arch.pause = false;
														
 
															+		vcpu->arch.power_off = false;
														
 
															 		break;
														
 
															 	case KVM_MP_STATE_STOPPED:
														
 
															-		vcpu->arch.pause = true;
														
 
															+		vcpu->arch.power_off = true;
														
 
															 		break;
														
 
															 	default:
														
 
															 		return -EINVAL;
														
@@ -342,7 +352,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 
															  */
														
 
															 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
														
 
															 {
														
 
															-	return !!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v);
														
 
															+	return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v))
														
 
															+		&& !v->arch.power_off && !v->arch.pause);
														
 
															 }
														
 
															 /* Just ensure a guest exit from a particular CPU */
														
@@ -468,11 +479,38 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
 
															 	return vgic_initialized(kvm);
														
 
															 }
														
 
															-static void vcpu_pause(struct kvm_vcpu *vcpu)
														
 
															+static void kvm_arm_halt_guest(struct kvm *kvm) __maybe_unused;
														
 
															+static void kvm_arm_resume_guest(struct kvm *kvm) __maybe_unused;
														
 
															+
														
 
															+static void kvm_arm_halt_guest(struct kvm *kvm)
														
 
															+{
														
 
															+	int i;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm)
														
 
															+		vcpu->arch.pause = true;
														
 
															+	force_vm_exit(cpu_all_mask);
														
 
															+}
														
 
															+
														
 
															+static void kvm_arm_resume_guest(struct kvm *kvm)
														
 
															+{
														
 
															+	int i;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
														
 
															+
														
 
															+		vcpu->arch.pause = false;
														
 
															+		wake_up_interruptible(wq);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void vcpu_sleep(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
														
 
															-	wait_event_interruptible(*wq, !vcpu->arch.pause);
														
 
															+	wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
														
 
															+				       (!vcpu->arch.pause)));
														
 
															 }
														
 
															 static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
														
@@ -522,8 +560,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 		update_vttbr(vcpu->kvm);
														
 
															-		if (vcpu->arch.pause)
														
 
															-			vcpu_pause(vcpu);
														
 
															+		if (vcpu->arch.power_off || vcpu->arch.pause)
														
 
															+			vcpu_sleep(vcpu);
														
 
															 		/*
														
 
															 		 * Disarming the background timer must be done in a
														
@@ -549,11 +587,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 			run->exit_reason = KVM_EXIT_INTR;
														
 
															 		}
														
 
															-		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
														
 
															+		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
														
 
															+			vcpu->arch.power_off || vcpu->arch.pause) {
														
 
															 			local_irq_enable();
														
 
															+			kvm_timer_sync_hwstate(vcpu);
														
 
															 			kvm_vgic_sync_hwstate(vcpu);
														
 
															 			preempt_enable();
														
 
															-			kvm_timer_sync_hwstate(vcpu);
														
 
															 			continue;
														
 
															 		}
														
@@ -596,14 +635,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 		 * guest time.
														
 
															 		 */
														
 
															 		kvm_guest_exit();
														
 
															-		trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
														
 
															+		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
														
 
															+
														
 
															+		/*
														
 
															+		 * We must sync the timer state before the vgic state so that
														
 
															+		 * the vgic can properly sample the updated state of the
														
 
															+		 * interrupt line.
														
 
															+		 */
														
 
															+		kvm_timer_sync_hwstate(vcpu);
														
 
															 		kvm_vgic_sync_hwstate(vcpu);
														
 
															 		preempt_enable();
														
 
															-		kvm_timer_sync_hwstate(vcpu);
														
 
															-
														
 
															 		ret = handle_exit(vcpu, run, ret);
														
 
															 	}
														
@@ -765,12 +809,12 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 
															 	vcpu_reset_hcr(vcpu);
														
 
															 	/*
														
 
															-	 * Handle the "start in power-off" case by marking the VCPU as paused.
														
 
															+	 * Handle the "start in power-off" case.
														
 
															 	 */
														
 
															 	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
														
 
															-		vcpu->arch.pause = true;
														
 
															+		vcpu->arch.power_off = true;
														
 
															 	else
														
 
															-		vcpu->arch.pause = false;
														
 
															+		vcpu->arch.power_off = false;
														
 
															 	return 0;
														
 
															 }
														
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -63,7 +63,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
 
															 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	vcpu->arch.pause = true;
														
 
															+	vcpu->arch.power_off = true;
														
 
															 }
														
 
															 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
														
@@ -87,7 +87,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 
															 	 */
														
 
															 	if (!vcpu)
														
 
															 		return PSCI_RET_INVALID_PARAMS;
														
 
															-	if (!vcpu->arch.pause) {
														
 
															+	if (!vcpu->arch.power_off) {
														
 
															 		if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
														
 
															 			return PSCI_RET_ALREADY_ON;
														
 
															 		else
														
@@ -115,7 +115,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 
															 	 * the general puspose registers are undefined upon CPU_ON.
														
 
															 	 */
														
 
															 	*vcpu_reg(vcpu, 0) = context_id;
														
 
															-	vcpu->arch.pause = false;
														
 
															+	vcpu->arch.power_off = false;
														
 
															 	smp_mb();		/* Make sure the above is visible */
														
 
															 	wq = kvm_arch_vcpu_wq(vcpu);
														
@@ -153,7 +153,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 
															 		mpidr = kvm_vcpu_get_mpidr_aff(tmp);
														
 
															 		if ((mpidr & target_affinity_mask) == target_affinity) {
														
 
															 			matching_cpus++;
														
 
															-			if (!tmp->arch.pause)
														
 
															+			if (!tmp->arch.power_off)
														
 
															 				return PSCI_0_2_AFFINITY_LEVEL_ON;
														
 
															 		}
														
 
															 	}
														
@@ -179,7 +179,7 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
 
															 	 * re-initialized.
														
 
															 	 */
														
 
															 	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
														
 
															-		tmp->arch.pause = true;
														
 
															+		tmp->arch.power_off = true;
														
 
															 		kvm_vcpu_kick(tmp);
														
 
															 	}
														
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -25,21 +25,25 @@ TRACE_EVENT(kvm_entry,
 
															 );
														
 
															 TRACE_EVENT(kvm_exit,
														
 
															-	TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc),
														
 
															-	TP_ARGS(exit_reason, vcpu_pc),
														
 
															+	TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
														
 
															+	TP_ARGS(idx, exit_reason, vcpu_pc),
														
 
															 	TP_STRUCT__entry(
														
 
															+		__field(	int,		idx		)
														
 
															 		__field(	unsigned int,	exit_reason	)
														
 
															 		__field(	unsigned long,	vcpu_pc		)
														
 
															 	),
														
 
															 	TP_fast_assign(
														
 
															+		__entry->idx			= idx;
														
 
															 		__entry->exit_reason		= exit_reason;
														
 
															 		__entry->vcpu_pc		= vcpu_pc;
														
 
															 	),
														
 
															-	TP_printk("HSR_EC: 0x%04x, PC: 0x%08lx",
														
 
															+	TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
														
 
															+		  __print_symbolic(__entry->idx, kvm_arm_exception_type),
														
 
															 		  __entry->exit_reason,
														
 
															+		  __print_symbolic(__entry->exit_reason, kvm_arm_exception_class),
														
 
															 		  __entry->vcpu_pc)
														
 
															 );
														
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -200,4 +200,20 @@
 
															 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
														
 
															 #define HPFAR_MASK	(~UL(0xf))
														
 
															+#define kvm_arm_exception_type	\
														
 
															+	{0, "IRQ" }, 		\
														
 
															+	{1, "TRAP" }
														
 
															+
														
 
															+#define ECN(x) { ESR_ELx_EC_##x, #x }
														
 
															+
														
 
															+#define kvm_arm_exception_class \
														
 
															+	ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \
														
 
															+	ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \
														
 
															+	ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \
														
 
															+	ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
														
 
															+	ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
														
 
															+	ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
														
 
															+	ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
														
 
															+	ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
														
 
															+
														
 
															 #endif /* __ARM64_KVM_ARM_H__ */
														
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -149,7 +149,10 @@ struct kvm_vcpu_arch {
 
															 		u32	mdscr_el1;
														
 
															 	} guest_debug_preserved;
														
 
															-	/* Don't run the guest */
														
 
															+	/* vcpu power-off state */
														
 
															+	bool power_off;
														
 
															+
														
 
															+	/* Don't run the guest (internal implementation need) */
														
 
															 	bool pause;
														
 
															 	/* IO related fields */
														
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -48,4 +48,6 @@ config KVM_ARM_HOST
 
															 	---help---
														
 
															 	  Provides host support for ARM processors.
														
 
															+source drivers/vhost/Kconfig
														
 
															+
														
 
															 endif # VIRTUALIZATION
														
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -880,6 +880,14 @@ __kvm_hyp_panic:
 
															 	bl __restore_sysregs
														
 
															+	/*
														
 
															+	 * Make sure we have a valid host stack, and don't leave junk in the
														
 
															+	 * frame pointer that will give us a misleading host stack unwinding.
														
 
															+	 */
														
 
															+	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
														
 
															+	msr	sp_el1, x22
														
 
															+	mov	x29, xzr
														
 
															+
														
 
															 1:	adr	x0, __hyp_panic_str
														
 
															 	adr	x1, 2f
														
 
															 	ldp	x2, x3, [x1]
														
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -847,5 +847,7 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 
															 		struct kvm_memory_slot *slot) {}
														
 
															 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
														
 
															 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
														
 
															+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
														
 
															+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
														
 
															 #endif /* __MIPS_KVM_HOST_H__ */
														
--- a/arch/powerpc/include/asm/disassemble.h
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -42,6 +42,11 @@ static inline unsigned int get_dcrn(u32 inst)
 
															 	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
														
 
															 }
														
 
															+static inline unsigned int get_tmrn(u32 inst)
														
 
															+{
														
 
															+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
														
 
															+}
														
 
															+
														
 
															 static inline unsigned int get_rt(u32 inst)
														
 
															 {
														
 
															 	return (inst >> 21) & 0x1f;
														
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -716,5 +716,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot
 
															 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
														
 
															 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
														
 
															 static inline void kvm_arch_exit(void) {}
														
 
															+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
														
 
															+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
														
 
															 #endif /* __POWERPC_KVM_HOST_H__ */
														
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -742,6 +742,12 @@
 
															 #define MMUBE1_VBE4		0x00000002
														
 
															 #define MMUBE1_VBE5		0x00000001
														
 
															+#define TMRN_TMCFG0      16	/* Thread Management Configuration Register 0 */
														
 
															+#define TMRN_TMCFG0_NPRIBITS       0x003f0000 /* Bits of thread priority */
														
 
															+#define TMRN_TMCFG0_NPRIBITS_SHIFT 16
														
 
															+#define TMRN_TMCFG0_NATHRD         0x00003f00 /* Number of active threads */
														
 
															+#define TMRN_TMCFG0_NATHRD_SHIFT   8
														
 
															+#define TMRN_TMCFG0_NTHRD          0x0000003f /* Number of threads */
														
 
															 #define TMRN_IMSR0	0x120	/* Initial MSR Register 0 (e6500) */
														
 
															 #define TMRN_IMSR1	0x121	/* Initial MSR Register 1 (e6500) */
														
 
															 #define TMRN_INIA0	0x140	/* Next Instruction Address Register 0 */
														
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -70,7 +70,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 
															 	}
														
 
															 	/* Lastly try successively smaller sizes from the page allocator */
														
 
															-	while (!hpt && order > PPC_MIN_HPT_ORDER) {
														
 
															+	/* Only do this if userspace didn't specify a size via ioctl */
														
 
															+	while (!hpt && order > PPC_MIN_HPT_ORDER && !htab_orderp) {
														
 
															 		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
														
 
															 				       __GFP_NOWARN, order - PAGE_SHIFT);
														
 
															 		if (!hpt)
														
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -470,6 +470,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 
															 	note_hpte_modification(kvm, rev);
														
 
															 	unlock_hpte(hpte, 0);
														
 
															+	if (v & HPTE_V_ABSENT)
														
 
															+		v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
														
 
															 	hpret[0] = v;
														
 
															 	hpret[1] = r;
														
 
															 	return H_SUCCESS;
														
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -150,6 +150,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
															 	cmpwi	cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
														
 
															 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
														
 
															 	beq	11f
														
 
															+	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
														
 
															+	beq 	15f	/* Invoke the H_DOORBELL handler */
														
 
															 	cmpwi	cr2, r12, BOOK3S_INTERRUPT_HMI
														
 
															 	beq	cr2, 14f			/* HMI check */
														
@@ -174,6 +176,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
															 	mtspr	SPRN_HSRR1, r7
														
 
															 	b	hmi_exception_after_realmode
														
 
															+15:	mtspr SPRN_HSRR0, r8
														
 
															+	mtspr SPRN_HSRR1, r7
														
 
															+	ba    0xe80
														
 
															+
														
 
															 kvmppc_primary_no_guest:
														
 
															 	/* We handle this much like a ceded vcpu */
														
 
															 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
														
@@ -2377,7 +2383,6 @@ machine_check_realmode:
 
															 	mr	r3, r9		/* get vcpu pointer */
														
 
															 	bl	kvmppc_realmode_machine_check
														
 
															 	nop
														
 
															-	cmpdi	r3, 0		/* Did we handle MCE ? */
														
 
															 	ld	r9, HSTATE_KVM_VCPU(r13)
														
 
															 	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
														
 
															 	/*
														
@@ -2390,13 +2395,18 @@ machine_check_realmode:
 
															 	 * The old code used to return to host for unhandled errors which
														
 
															 	 * was causing guest to hang with soft lockups inside guest and
														
 
															 	 * makes it difficult to recover guest instance.
														
 
															+	 *
														
 
															+	 * if we receive machine check with MSR(RI=0) then deliver it to
														
 
															+	 * guest as machine check causing guest to crash.
														
 
															 	 */
														
 
															-	ld	r10, VCPU_PC(r9)
														
 
															 	ld	r11, VCPU_MSR(r9)
														
 
															+	andi.	r10, r11, MSR_RI	/* check for unrecoverable exception */
														
 
															+	beq	1f			/* Deliver a machine check to guest */
														
 
															+	ld	r10, VCPU_PC(r9)
														
 
															+	cmpdi	r3, 0		/* Did we handle MCE ? */
														
 
															 	bne	2f	/* Continue guest execution. */
														
 
															 	/* If not, deliver a machine check.  SRR0/1 are already set */
														
 
															-	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
														
 
															-	ld	r11, VCPU_MSR(r9)
														
 
															+1:	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
														
 
															 	bl	kvmppc_msr_interrupt
														
 
															 2:	b	fast_interrupt_c_return
														
@@ -2436,14 +2446,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
															 	/* hypervisor doorbell */
														
 
															 3:	li	r12, BOOK3S_INTERRUPT_H_DOORBELL
														
 
															+
														
 
															+	/*
														
 
															+	 * Clear the doorbell as we will invoke the handler
														
 
															+	 * explicitly in the guest exit path.
														
 
															+	 */
														
 
															+	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
														
 
															+	PPC_MSGCLR(6)
														
 
															 	/* see if it's a host IPI */
														
 
															 	li	r3, 1
														
 
															 	lbz	r0, HSTATE_HOST_IPI(r13)
														
 
															 	cmpwi	r0, 0
														
 
															 	bnelr
														
 
															-	/* if not, clear it and return -1 */
														
 
															-	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
														
 
															-	PPC_MSGCLR(6)
														
 
															+	/* if not, return -1 */
														
 
															 	li	r3, -1
														
 
															 	blr
														
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -237,7 +237,8 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
 
															                            struct kvm_book3e_206_tlb_entry *gtlbe)
														
 
															 {
														
 
															 	struct vcpu_id_table *idt = vcpu_e500->idt;
														
 
															-	unsigned int pr, tid, ts, pid;
														
 
															+	unsigned int pr, tid, ts;
														
 
															+	int pid;
														
 
															 	u32 val, eaddr;
														
 
															 	unsigned long flags;
														
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -15,6 +15,7 @@
 
															 #include <asm/kvm_ppc.h>
														
 
															 #include <asm/disassemble.h>
														
 
															 #include <asm/dbell.h>
														
 
															+#include <asm/reg_booke.h>
														
 
															 #include "booke.h"
														
 
															 #include "e500.h"
														
@@ -22,6 +23,7 @@
 
															 #define XOP_DCBTLS  166
														
 
															 #define XOP_MSGSND  206
														
 
															 #define XOP_MSGCLR  238
														
 
															+#define XOP_MFTMR   366
														
 
															 #define XOP_TLBIVAX 786
														
 
															 #define XOP_TLBSX   914
														
 
															 #define XOP_TLBRE   946
														
@@ -113,6 +115,19 @@ static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu)
 
															 	return EMULATE_DONE;
														
 
															 }
														
 
															+static int kvmppc_e500_emul_mftmr(struct kvm_vcpu *vcpu, unsigned int inst,
														
 
															+				  int rt)
														
 
															+{
														
 
															+	/* Expose one thread per vcpu */
														
 
															+	if (get_tmrn(inst) == TMRN_TMCFG0) {
														
 
															+		kvmppc_set_gpr(vcpu, rt,
														
 
															+			       1 | (1 << TMRN_TMCFG0_NATHRD_SHIFT));
														
 
															+		return EMULATE_DONE;
														
 
															+	}
														
 
															+
														
 
															+	return EMULATE_FAIL;
														
 
															+}
														
 
															+
														
 
															 int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
														
 
															 				unsigned int inst, int *advance)
														
 
															 {
														
@@ -165,6 +180,10 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
															 			emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
														
 
															 			break;
														
 
															+		case XOP_MFTMR:
														
 
															+			emulated = kvmppc_e500_emul_mftmr(vcpu, inst, rt);
														
 
															+			break;
														
 
															+
														
 
															 		case XOP_EHPRIV:
														
 
															 			emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst,
														
 
															 							   advance);
														
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -406,7 +406,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
															 			for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
														
 
															 				unsigned long gfn_start, gfn_end;
														
 
															-				tsize_pages = 1 << (tsize - 2);
														
 
															+				tsize_pages = 1UL << (tsize - 2);
														
 
															 				gfn_start = gfn & ~(tsize_pages - 1);
														
 
															 				gfn_end = gfn_start + tsize_pages;
														
@@ -447,7 +447,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
															 	}
														
 
															 	if (likely(!pfnmap)) {
														
 
															-		tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
														
 
															+		tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT);
														
 
															 		pfn = gfn_to_pfn_memslot(slot, gfn);
														
 
															 		if (is_error_noslot_pfn(pfn)) {
														
 
															 			if (printk_ratelimit())
														
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -559,6 +559,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
															 		else
														
 
															 			r = num_online_cpus();
														
 
															 		break;
														
 
															+	case KVM_CAP_NR_MEMSLOTS:
														
 
															+		r = KVM_USER_MEM_SLOTS;
														
 
															+		break;
														
 
															 	case KVM_CAP_MAX_VCPUS:
														
 
															 		r = KVM_MAX_VCPUS;
														
 
															 		break;
														
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -644,5 +644,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot
 
															 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
														
 
															 static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
														
 
															 		struct kvm_memory_slot *slot) {}
														
 
															+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
														
 
															+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
														
 
															 #endif
														
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -336,28 +336,28 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
 
															 	return -EOPNOTSUPP;
														
 
															 }
														
 
															-static const intercept_handler_t intercept_funcs[] = {
														
 
															-	[0x00 >> 2] = handle_noop,
														
 
															-	[0x04 >> 2] = handle_instruction,
														
 
															-	[0x08 >> 2] = handle_prog,
														
 
															-	[0x10 >> 2] = handle_noop,
														
 
															-	[0x14 >> 2] = handle_external_interrupt,
														
 
															-	[0x18 >> 2] = handle_noop,
														
 
															-	[0x1C >> 2] = kvm_s390_handle_wait,
														
 
															-	[0x20 >> 2] = handle_validity,
														
 
															-	[0x28 >> 2] = handle_stop,
														
 
															-	[0x38 >> 2] = handle_partial_execution,
														
 
															-};
														
 
															-
														
 
															 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	intercept_handler_t func;
														
 
															-	u8 code = vcpu->arch.sie_block->icptcode;
														
 
															-
														
 
															-	if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs))
														
 
															+	switch (vcpu->arch.sie_block->icptcode) {
														
 
															+	case 0x00:
														
 
															+	case 0x10:
														
 
															+	case 0x18:
														
 
															+		return handle_noop(vcpu);
														
 
															+	case 0x04:
														
 
															+		return handle_instruction(vcpu);
														
 
															+	case 0x08:
														
 
															+		return handle_prog(vcpu);
														
 
															+	case 0x14:
														
 
															+		return handle_external_interrupt(vcpu);
														
 
															+	case 0x1c:
														
 
															+		return kvm_s390_handle_wait(vcpu);
														
 
															+	case 0x20:
														
 
															+		return handle_validity(vcpu);
														
 
															+	case 0x28:
														
 
															+		return handle_stop(vcpu);
														
 
															+	case 0x38:
														
 
															+		return handle_partial_execution(vcpu);
														
 
															+	default:
														
 
															 		return -EOPNOTSUPP;
														
 
															-	func = intercept_funcs[code >> 2];
														
 
															-	if (func)
														
 
															-		return func(vcpu);
														
 
															-	return -EOPNOTSUPP;
														
 
															+	}
														
 
															 }
														
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -51,11 +51,9 @@ static int psw_mchk_disabled(struct kvm_vcpu *vcpu)
 
															 static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
														
 
															-	    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) ||
														
 
															-	    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT))
														
 
															-		return 0;
														
 
															-	return 1;
														
 
															+	return psw_extint_disabled(vcpu) &&
														
 
															+	       psw_ioint_disabled(vcpu) &&
														
 
															+	       psw_mchk_disabled(vcpu);
														
 
															 }
														
 
															 static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
														
@@ -71,13 +69,8 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
 
															 static int ckc_irq_pending(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	preempt_disable();
														
 
															-	if (!(vcpu->arch.sie_block->ckc <
														
 
															-	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
														
 
															-		preempt_enable();
														
 
															+	if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm))
														
 
															 		return 0;
														
 
															-	}
														
 
															-	preempt_enable();
														
 
															 	return ckc_interrupts_enabled(vcpu);
														
 
															 }
														
@@ -109,14 +102,10 @@ static inline u8 int_word_to_isc(u32 int_word)
 
															 	return (int_word & 0x38000000) >> 27;
														
 
															 }
														
 
															-static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu)
														
 
															+static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return vcpu->kvm->arch.float_int.pending_irqs;
														
 
															-}
														
 
															-
														
 
															-static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
														
 
															-{
														
 
															-	return vcpu->arch.local_int.pending_irqs;
														
 
															+	return vcpu->kvm->arch.float_int.pending_irqs |
														
 
															+	       vcpu->arch.local_int.pending_irqs;
														
 
															 }
														
 
															 static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
														
@@ -135,8 +124,7 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	unsigned long active_mask;
														
 
															-	active_mask = pending_local_irqs(vcpu);
														
 
															-	active_mask |= pending_floating_irqs(vcpu);
														
 
															+	active_mask = pending_irqs(vcpu);
														
 
															 	if (!active_mask)
														
 
															 		return 0;
														
@@ -204,7 +192,7 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
 
															 static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK))
														
 
															+	if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
														
 
															 		return;
														
 
															 	else if (psw_ioint_disabled(vcpu))
														
 
															 		__set_cpuflag(vcpu, CPUSTAT_IO_INT);
														
@@ -214,7 +202,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
 
															 static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
														
 
															+	if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK))
														
 
															 		return;
														
 
															 	if (psw_extint_disabled(vcpu))
														
 
															 		__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
														
@@ -224,7 +212,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
 
															 static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
														
 
															+	if (!(pending_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
														
 
															 		return;
														
 
															 	if (psw_mchk_disabled(vcpu))
														
 
															 		vcpu->arch.sie_block->ictl |= ICTL_LPSW;
														
@@ -815,23 +803,21 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 
															 int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
														
 
															 {
														
 
															-	int rc;
														
 
															+	if (deliverable_irqs(vcpu))
														
 
															+		return 1;
														
 
															-	rc = !!deliverable_irqs(vcpu);
														
 
															-
														
 
															-	if (!rc && kvm_cpu_has_pending_timer(vcpu))
														
 
															-		rc = 1;
														
 
															+	if (kvm_cpu_has_pending_timer(vcpu))
														
 
															+		return 1;
														
 
															 	/* external call pending and deliverable */
														
 
															-	if (!rc && kvm_s390_ext_call_pending(vcpu) &&
														
 
															+	if (kvm_s390_ext_call_pending(vcpu) &&
														
 
															 	    !psw_extint_disabled(vcpu) &&
														
 
															 	    (vcpu->arch.sie_block->gcr[0] & 0x2000ul))
														
 
															-		rc = 1;
														
 
															-
														
 
															-	if (!rc && !exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
														
 
															-		rc = 1;
														
 
															+		return 1;
														
 
															-	return rc;
														
 
															+	if (!exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
														
 
															+		return 1;
														
 
															+	return 0;
														
 
															 }
														
 
															 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
														
@@ -846,7 +832,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 
															 	vcpu->stat.exit_wait_state++;
														
 
															 	/* fast path */
														
 
															-	if (kvm_cpu_has_pending_timer(vcpu) || kvm_arch_vcpu_runnable(vcpu))
														
 
															+	if (kvm_arch_vcpu_runnable(vcpu))
														
 
															 		return 0;
														
 
															 	if (psw_interrupts_disabled(vcpu)) {
														
@@ -860,9 +846,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 
															 		goto no_timer;
														
 
															 	}
														
 
															-	preempt_disable();
														
 
															-	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
														
 
															-	preempt_enable();
														
 
															+	now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
														
 
															 	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
														
 
															 	/* underflow */
														
@@ -901,9 +885,7 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
 
															 	u64 now, sltime;
														
 
															 	vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
														
 
															-	preempt_disable();
														
 
															-	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
														
 
															-	preempt_enable();
														
 
															+	now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
														
 
															 	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
														
 
															 	/*
														
@@ -981,39 +963,30 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 
															 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
														
 
															 				   irq->u.pgm.code, 0);
														
 
															-	li->irq.pgm = irq->u.pgm;
														
 
															+	if (irq->u.pgm.code == PGM_PER) {
														
 
															+		li->irq.pgm.code |= PGM_PER;
														
 
															+		/* only modify PER related information */
														
 
															+		li->irq.pgm.per_address = irq->u.pgm.per_address;
														
 
															+		li->irq.pgm.per_code = irq->u.pgm.per_code;
														
 
															+		li->irq.pgm.per_atmid = irq->u.pgm.per_atmid;
														
 
															+		li->irq.pgm.per_access_id = irq->u.pgm.per_access_id;
														
 
															+	} else if (!(irq->u.pgm.code & PGM_PER)) {
														
 
															+		li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) |
														
 
															+				   irq->u.pgm.code;
														
 
															+		/* only modify non-PER information */
														
 
															+		li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code;
														
 
															+		li->irq.pgm.mon_code = irq->u.pgm.mon_code;
														
 
															+		li->irq.pgm.data_exc_code = irq->u.pgm.data_exc_code;
														
 
															+		li->irq.pgm.mon_class_nr = irq->u.pgm.mon_class_nr;
														
 
															+		li->irq.pgm.exc_access_id = irq->u.pgm.exc_access_id;
														
 
															+		li->irq.pgm.op_access_id = irq->u.pgm.op_access_id;
														
 
															+	} else {
														
 
															+		li->irq.pgm = irq->u.pgm;
														
 
															+	}
														
 
															 	set_bit(IRQ_PEND_PROG, &li->pending_irqs);
														
 
															 	return 0;
														
 
															 }
														
 
															-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
														
 
															-{
														
 
															-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
 
															-	struct kvm_s390_irq irq;
														
 
															-
														
 
															-	spin_lock(&li->lock);
														
 
															-	irq.u.pgm.code = code;
														
 
															-	__inject_prog(vcpu, &irq);
														
 
															-	BUG_ON(waitqueue_active(li->wq));
														
 
															-	spin_unlock(&li->lock);
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
														
 
															-			     struct kvm_s390_pgm_info *pgm_info)
														
 
															-{
														
 
															-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
 
															-	struct kvm_s390_irq irq;
														
 
															-	int rc;
														
 
															-
														
 
															-	spin_lock(&li->lock);
														
 
															-	irq.u.pgm = *pgm_info;
														
 
															-	rc = __inject_prog(vcpu, &irq);
														
 
															-	BUG_ON(waitqueue_active(li->wq));
														
 
															-	spin_unlock(&li->lock);
														
 
															-	return rc;
														
 
															-}
														
 
															-
														
 
															 static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
														
 
															 {
														
 
															 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
@@ -1390,12 +1363,9 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
 
															 static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
														
 
															 {
														
 
															-	struct kvm_s390_float_interrupt *fi;
														
 
															 	u64 type = READ_ONCE(inti->type);
														
 
															 	int rc;
														
 
															-	fi = &kvm->arch.float_int;
														
 
															-
														
 
															 	switch (type) {
														
 
															 	case KVM_S390_MCHK:
														
 
															 		rc = __inject_float_mchk(kvm, inti);
														
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -514,35 +514,20 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 
															 	if (gtod_high != 0)
														
 
															 		return -EINVAL;
														
 
															-	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
														
 
															+	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
														
 
															 	return 0;
														
 
															 }
														
 
															 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															 {
														
 
															-	struct kvm_vcpu *cur_vcpu;
														
 
															-	unsigned int vcpu_idx;
														
 
															-	u64 host_tod, gtod;
														
 
															-	int r;
														
 
															+	u64 gtod;
														
 
															 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
														
 
															 		return -EFAULT;
														
 
															-	r = store_tod_clock(&host_tod);
														
 
															-	if (r)
														
 
															-		return r;
														
 
															-
														
 
															-	mutex_lock(&kvm->lock);
														
 
															-	preempt_disable();
														
 
															-	kvm->arch.epoch = gtod - host_tod;
														
 
															-	kvm_s390_vcpu_block_all(kvm);
														
 
															-	kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
														
 
															-		cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
														
 
															-	kvm_s390_vcpu_unblock_all(kvm);
														
 
															-	preempt_enable();
														
 
															-	mutex_unlock(&kvm->lock);
														
 
															-	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
														
 
															+	kvm_s390_set_tod_clock(kvm, gtod);
														
 
															+	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
														
 
															 	return 0;
														
 
															 }
														
@@ -574,26 +559,19 @@ static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 
															 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
														
 
															 					 sizeof(gtod_high)))
														
 
															 		return -EFAULT;
														
 
															-	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
														
 
															+	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
														
 
															 	return 0;
														
 
															 }
														
 
															 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															 {
														
 
															-	u64 host_tod, gtod;
														
 
															-	int r;
														
 
															+	u64 gtod;
														
 
															-	r = store_tod_clock(&host_tod);
														
 
															-	if (r)
														
 
															-		return r;
														
 
															-
														
 
															-	preempt_disable();
														
 
															-	gtod = host_tod + kvm->arch.epoch;
														
 
															-	preempt_enable();
														
 
															+	gtod = kvm_s390_get_tod_clock_fast(kvm);
														
 
															 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
														
 
															 		return -EFAULT;
														
 
															-	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
														
 
															+	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
														
 
															 	return 0;
														
 
															 }
														
@@ -1120,7 +1098,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
															 	if (!kvm->arch.sca)
														
 
															 		goto out_err;
														
 
															 	spin_lock(&kvm_lock);
														
 
															-	sca_offset = (sca_offset + 16) & 0x7f0;
														
 
															+	sca_offset += 16;
														
 
															+	if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
														
 
															+		sca_offset = 0;
														
 
															 	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
														
 
															 	spin_unlock(&kvm_lock);
														
@@ -1911,6 +1891,22 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 
															 	return 0;
														
 
															 }
														
 
															+void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+	int i;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+	preempt_disable();
														
 
															+	kvm->arch.epoch = tod - get_tod_clock();
														
 
															+	kvm_s390_vcpu_block_all(kvm);
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm)
														
 
															+		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
														
 
															+	kvm_s390_vcpu_unblock_all(kvm);
														
 
															+	preempt_enable();
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+}
														
 
															+
														
 
															 /**
														
 
															  * kvm_arch_fault_in_page - fault-in guest page if necessary
														
 
															  * @vcpu: The corresponding virtual cpu
														
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -175,6 +175,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
 
															 	return kvm->arch.user_cpu_state_ctrl != 0;
														
 
															 }
														
 
															+/* implemented in interrupt.c */
														
 
															 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
														
 
															 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
														
 
															 enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
														
@@ -185,7 +186,25 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm,
 
															 				    struct kvm_s390_interrupt *s390int);
														
 
															 int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
														
 
															 				      struct kvm_s390_irq *irq);
														
 
															-int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
														
 
															+static inline int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
														
 
															+					   struct kvm_s390_pgm_info *pgm_info)
														
 
															+{
														
 
															+	struct kvm_s390_irq irq = {
														
 
															+		.type = KVM_S390_PROGRAM_INT,
														
 
															+		.u.pgm = *pgm_info,
														
 
															+	};
														
 
															+
														
 
															+	return kvm_s390_inject_vcpu(vcpu, &irq);
														
 
															+}
														
 
															+static inline int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
														
 
															+{
														
 
															+	struct kvm_s390_irq irq = {
														
 
															+		.type = KVM_S390_PROGRAM_INT,
														
 
															+		.u.pgm.code = code,
														
 
															+	};
														
 
															+
														
 
															+	return kvm_s390_inject_vcpu(vcpu, &irq);
														
 
															+}
														
 
															 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
														
 
															 						    u64 isc_mask, u32 schid);
														
 
															 int kvm_s390_reinject_io_int(struct kvm *kvm,
														
@@ -212,6 +231,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
 
															 int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
														
 
															 /* implemented in kvm-s390.c */
														
 
															+void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
														
 
															 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
														
 
															 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
														
 
															 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
														
@@ -231,9 +251,6 @@ extern unsigned long kvm_s390_fac_list_mask[];
 
															 /* implemented in diag.c */
														
 
															 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
														
 
															-/* implemented in interrupt.c */
														
 
															-int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
														
 
															-			     struct kvm_s390_pgm_info *pgm_info);
														
 
															 static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
														
 
															 {
														
@@ -254,6 +271,16 @@ static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm)
 
															 		kvm_s390_vcpu_unblock(vcpu);
														
 
															 }
														
 
															+static inline u64 kvm_s390_get_tod_clock_fast(struct kvm *kvm)
														
 
															+{
														
 
															+	u64 rc;
														
 
															+
														
 
															+	preempt_disable();
														
 
															+	rc = get_tod_clock_fast() + kvm->arch.epoch;
														
 
															+	preempt_enable();
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															 /**
														
 
															  * kvm_s390_inject_prog_cond - conditionally inject a program check
														
 
															  * @vcpu: virtual cpu
														
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -33,11 +33,9 @@
 
															 /* Handle SCK (SET CLOCK) interception */
														
 
															 static int handle_set_clock(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	struct kvm_vcpu *cpup;
														
 
															-	s64 hostclk, val;
														
 
															-	int i, rc;
														
 
															+	int rc;
														
 
															 	ar_t ar;
														
 
															-	u64 op2;
														
 
															+	u64 op2, val;
														
 
															 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
														
 
															 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
														
@@ -49,19 +47,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 
															 	if (rc)
														
 
															 		return kvm_s390_inject_prog_cond(vcpu, rc);
														
 
															-	if (store_tod_clock(&hostclk)) {
														
 
															-		kvm_s390_set_psw_cc(vcpu, 3);
														
 
															-		return 0;
														
 
															-	}
														
 
															 	VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
														
 
															-	val = (val - hostclk) & ~0x3fUL;
														
 
															-
														
 
															-	mutex_lock(&vcpu->kvm->lock);
														
 
															-	preempt_disable();
														
 
															-	kvm_for_each_vcpu(i, cpup, vcpu->kvm)
														
 
															-		cpup->arch.sie_block->epoch = val;
														
 
															-	preempt_enable();
														
 
															-	mutex_unlock(&vcpu->kvm->lock);
														
 
															+	kvm_s390_set_tod_clock(vcpu->kvm, val);
														
 
															 	kvm_s390_set_psw_cc(vcpu, 0);
														
 
															 	return 0;
														
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -33,6 +33,11 @@ enum irq_remap_cap {
 
															 	IRQ_POSTING_CAP = 0,
														
 
															 };
														
 
															+struct vcpu_data {
														
 
															+	u64 pi_desc_addr;	/* Physical address of PI Descriptor */
														
 
															+	u32 vector;		/* Guest vector of the interrupt */
														
 
															+};
														
 
															+
														
 
															 #ifdef CONFIG_IRQ_REMAP
														
 
															 extern bool irq_remapping_cap(enum irq_remap_cap cap);
														
@@ -58,11 +63,6 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void)
 
															 	return x86_vector_domain;
														
 
															 }
														
 
															-struct vcpu_data {
														
 
															-	u64 pi_desc_addr;	/* Physical address of PI Descriptor */
														
 
															-	u32 vector;		/* Guest vector of the interrupt */
														
 
															-};
														
 
															-
														
 
															 #else  /* CONFIG_IRQ_REMAP */
														
 
															 static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; }
														
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -111,6 +111,16 @@ struct x86_emulate_ops {
 
															 			unsigned int bytes,
														
 
															 			struct x86_exception *fault);
														
 
															+	/*
														
 
															+	 * read_phys: Read bytes of standard (non-emulated/special) memory.
														
 
															+	 *            Used for descriptor reading.
														
 
															+	 *  @addr:  [IN ] Physical address from which to read.
														
 
															+	 *  @val:   [OUT] Value read from memory.
														
 
															+	 *  @bytes: [IN ] Number of bytes to read from memory.
														
 
															+	 */
														
 
															+	int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr,
														
 
															+			void *val, unsigned int bytes);
														
 
															+
														
 
															 	/*
														
 
															 	 * write_std: Write bytes of standard (non-emulated/special) memory.
														
 
															 	 *            Used for descriptor writing.
														
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 
															 #include <linux/perf_event.h>
														
 
															 #include <linux/pvclock_gtod.h>
														
 
															 #include <linux/clocksource.h>
														
 
															+#include <linux/irqbypass.h>
														
 
															 #include <asm/pvclock-abi.h>
														
 
															 #include <asm/desc.h>
														
@@ -176,6 +177,8 @@ enum {
 
															  */
														
 
															 #define KVM_APIC_PV_EOI_PENDING	1
														
 
															+struct kvm_kernel_irq_routing_entry;
														
 
															+
														
 
															 /*
														
 
															  * We don't want allocation failures within the mmu code, so we preallocate
														
 
															  * enough memory for a single page fault in a cache.
														
@@ -374,6 +377,7 @@ struct kvm_mtrr {
 
															 /* Hyper-V per vcpu emulation context */
														
 
															 struct kvm_vcpu_hv {
														
 
															 	u64 hv_vapic;
														
 
															+	s64 runtime_offset;
														
 
															 };
														
 
															 struct kvm_vcpu_arch {
														
@@ -396,6 +400,7 @@ struct kvm_vcpu_arch {
 
															 	u64 efer;
														
 
															 	u64 apic_base;
														
 
															 	struct kvm_lapic *apic;    /* kernel irqchip context */
														
 
															+	u64 eoi_exit_bitmap[4];
														
 
															 	unsigned long apic_attention;
														
 
															 	int32_t apic_arb_prio;
														
 
															 	int mp_state;
														
@@ -573,6 +578,9 @@ struct kvm_vcpu_arch {
 
															 	struct {
														
 
															 		bool pv_unhalted;
														
 
															 	} pv;
														
 
															+
														
 
															+	int pending_ioapic_eoi;
														
 
															+	int pending_external_vector;
														
 
															 };
														
 
															 struct kvm_lpage_info {
														
@@ -683,6 +691,9 @@ struct kvm_arch {
 
															 	u32 bsp_vcpu_id;
														
 
															 	u64 disabled_quirks;
														
 
															+
														
 
															+	bool irqchip_split;
														
 
															+	u8 nr_reserved_ioapic_pins;
														
 
															 };
														
 
															 struct kvm_vm_stat {
														
@@ -819,10 +830,10 @@ struct kvm_x86_ops {
 
															 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
														
 
															 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
														
 
															 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
														
 
															-	int (*vm_has_apicv)(struct kvm *kvm);
														
 
															+	int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu);
														
 
															 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
														
 
															 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
														
 
															-	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
														
 
															+	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu);
														
 
															 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
														
 
															 	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
														
 
															 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
														
@@ -887,6 +898,20 @@ struct kvm_x86_ops {
 
															 					   gfn_t offset, unsigned long mask);
														
 
															 	/* pmu operations of sub-arch */
														
 
															 	const struct kvm_pmu_ops *pmu_ops;
														
 
															+
														
 
															+	/*
														
 
															+	 * Architecture specific hooks for vCPU blocking due to
														
 
															+	 * HLT instruction.
														
 
															+	 * Returns for .pre_block():
														
 
															+	 *    - 0 means continue to block the vCPU.
														
 
															+	 *    - 1 means we cannot block the vCPU since some event
														
 
															+	 *        happens during this period, such as, 'ON' bit in
														
 
															+	 *        posted-interrupts descriptor is set.
														
 
															+	 */
														
 
															+	int (*pre_block)(struct kvm_vcpu *vcpu);
														
 
															+	void (*post_block)(struct kvm_vcpu *vcpu);
														
 
															+	int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
														
 
															+			      uint32_t guest_irq, bool set);
														
 
															 };
														
 
															 struct kvm_arch_async_pf {
														
@@ -1231,4 +1256,13 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
 
															 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
														
 
															 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
														
 
															+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
														
 
															+			     struct kvm_vcpu **dest_vcpu);
														
 
															+
														
 
															+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
														
 
															+		     struct kvm_lapic_irq *irq);
														
 
															+
														
 
															+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
														
 
															+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
														
 
															+
														
 
															 #endif /* _ASM_X86_KVM_HOST_H */
														
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,7 +72,7 @@
 
															 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
														
 
															 #define SECONDARY_EXEC_ENABLE_PML               0x00020000
														
 
															 #define SECONDARY_EXEC_XSAVES			0x00100000
														
 
															-
														
 
															+#define SECONDARY_EXEC_PCOMMIT			0x00200000
														
 
															 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
														
 
															 #define PIN_BASED_NMI_EXITING                   0x00000008
														
@@ -416,6 +416,7 @@ enum vmcs_field {
 
															 #define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
														
 
															 #define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
														
 
															+#define VMX_VPID_INVVPID_BIT                    (1ull << 0) /* (32 - 32) */
														
 
															 #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT      (1ull << 9) /* (41 - 32) */
														
 
															 #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT      (1ull << 10) /* (42 - 32) */
														
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -153,6 +153,12 @@
 
															 /* MSR used to provide vcpu index */
														
 
															 #define HV_X64_MSR_VP_INDEX			0x40000002
														
 
															+/* MSR used to reset the guest OS. */
														
 
															+#define HV_X64_MSR_RESET			0x40000003
														
 
															+
														
 
															+/* MSR used to provide vcpu runtime in 100ns units */
														
 
															+#define HV_X64_MSR_VP_RUNTIME			0x40000010
														
 
															+
														
 
															 /* MSR used to read the per-partition time reference counter */
														
 
															 #define HV_X64_MSR_TIME_REF_COUNT		0x40000020
														
@@ -251,4 +257,16 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
 
															 	__s64 tsc_offset;
														
 
															 } HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
														
 
															+/* Define the number of synthetic interrupt sources. */
														
 
															+#define HV_SYNIC_SINT_COUNT		(16)
														
 
															+/* Define the expected SynIC version. */
														
 
															+#define HV_SYNIC_VERSION_1		(0x1)
														
 
															+
														
 
															+#define HV_SYNIC_CONTROL_ENABLE		(1ULL << 0)
														
 
															+#define HV_SYNIC_SIMP_ENABLE		(1ULL << 0)
														
 
															+#define HV_SYNIC_SIEFP_ENABLE		(1ULL << 0)
														
 
															+#define HV_SYNIC_SINT_MASKED		(1ULL << 16)
														
 
															+#define HV_SYNIC_SINT_AUTO_EOI		(1ULL << 17)
														
 
															+#define HV_SYNIC_SINT_VECTOR_MASK	(0xFF)
														
 
															+
														
 
															 #endif
														
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -78,6 +78,7 @@
 
															 #define EXIT_REASON_PML_FULL            62
														
 
															 #define EXIT_REASON_XSAVES              63
														
 
															 #define EXIT_REASON_XRSTORS             64
														
 
															+#define EXIT_REASON_PCOMMIT             65
														
 
															 #define VMX_EXIT_REASONS \
														
 
															 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
														
@@ -126,7 +127,8 @@
 
															 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
														
 
															 	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
														
 
															 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
														
 
															-	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
														
 
															+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }, \
														
 
															+	{ EXIT_REASON_PCOMMIT,               "PCOMMIT" }
														
 
															 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
														
 
															 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
														
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -32,6 +32,7 @@
 
															 static int kvmclock = 1;
														
 
															 static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
														
 
															 static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
														
 
															+static cycle_t kvm_sched_clock_offset;
														
 
															 static int parse_no_kvmclock(char *arg)
														
 
															 {
														
@@ -92,6 +93,29 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
 
															 	return kvm_clock_read();
														
 
															 }
														
 
															+static cycle_t kvm_sched_clock_read(void)
														
 
															+{
														
 
															+	return kvm_clock_read() - kvm_sched_clock_offset;
														
 
															+}
														
 
															+
														
 
															+static inline void kvm_sched_clock_init(bool stable)
														
 
															+{
														
 
															+	if (!stable) {
														
 
															+		pv_time_ops.sched_clock = kvm_clock_read;
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	kvm_sched_clock_offset = kvm_clock_read();
														
 
															+	pv_time_ops.sched_clock = kvm_sched_clock_read;
														
 
															+	set_sched_clock_stable();
														
 
															+
														
 
															+	printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n",
														
 
															+			kvm_sched_clock_offset);
														
 
															+
														
 
															+	BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) >
														
 
															+	         sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time));
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * If we don't do that, there is the possibility that the guest
														
 
															  * will calibrate under heavy load - thus, getting a lower lpj -
														
@@ -248,7 +272,17 @@ void __init kvmclock_init(void)
 
															 		memblock_free(mem, size);
														
 
															 		return;
														
 
															 	}
														
 
															-	pv_time_ops.sched_clock = kvm_clock_read;
														
 
															+
														
 
															+	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
														
 
															+		pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
														
 
															+
														
 
															+	cpu = get_cpu();
														
 
															+	vcpu_time = &hv_clock[cpu].pvti;
														
 
															+	flags = pvclock_read_flags(vcpu_time);
														
 
															+
														
 
															+	kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT);
														
 
															+	put_cpu();
														
 
															+
														
 
															 	x86_platform.calibrate_tsc = kvm_get_tsc_khz;
														
 
															 	x86_platform.get_wallclock = kvm_get_wallclock;
														
 
															 	x86_platform.set_wallclock = kvm_set_wallclock;
														
@@ -265,16 +299,6 @@ void __init kvmclock_init(void)
 
															 	kvm_get_preset_lpj();
														
 
															 	clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
														
 
															 	pv_info.name = "KVM";
														
 
															-
														
 
															-	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
														
 
															-		pvclock_set_flags(~0);
														
 
															-
														
 
															-	cpu = get_cpu();
														
 
															-	vcpu_time = &hv_clock[cpu].pvti;
														
 
															-	flags = pvclock_read_flags(vcpu_time);
														
 
															-	if (flags & PVCLOCK_COUNTS_FROM_ZERO)
														
 
															-		set_sched_clock_stable();
														
 
															-	put_cpu();
														
 
															 }
														
 
															 int __init kvm_setup_vsyscall_timeinfo(void)
														
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -28,6 +28,8 @@ config KVM
 
															 	select ANON_INODES
														
 
															 	select HAVE_KVM_IRQCHIP
														
 
															 	select HAVE_KVM_IRQFD
														
 
															+	select IRQ_BYPASS_MANAGER
														
 
															+	select HAVE_KVM_IRQ_BYPASS
														
 
															 	select HAVE_KVM_IRQ_ROUTING
														
 
															 	select HAVE_KVM_EVENTFD
														
 
															 	select KVM_APIC_ARCHITECTURE
														
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -21,6 +21,7 @@
 
															 #include <linux/fs.h>
														
 
															 #include "irq.h"
														
 
															 #include "assigned-dev.h"
														
 
															+#include "trace/events/kvm.h"
														
 
															 struct kvm_assigned_dev_kernel {
														
 
															 	struct kvm_irq_ack_notifier ack_notifier;
														
@@ -131,7 +132,42 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
 
															 	return IRQ_HANDLED;
														
 
															 }
														
 
															-#ifdef __KVM_HAVE_MSI
														
 
															+/*
														
 
															+ * Deliver an IRQ in an atomic context if we can, or return a failure,
														
 
															+ * user can retry in a process context.
														
 
															+ * Return value:
														
 
															+ *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
														
 
															+ *  Other values - No need to retry.
														
 
															+ */
														
 
															+static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
														
 
															+				int level)
														
 
															+{
														
 
															+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
														
 
															+	struct kvm_kernel_irq_routing_entry *e;
														
 
															+	int ret = -EINVAL;
														
 
															+	int idx;
														
 
															+
														
 
															+	trace_kvm_set_irq(irq, level, irq_source_id);
														
 
															+
														
 
															+	/*
														
 
															+	 * Injection into either PIC or IOAPIC might need to scan all CPUs,
														
 
															+	 * which would need to be retried from thread context;  when same GSI
														
 
															+	 * is connected to both PIC and IOAPIC, we'd have to report a
														
 
															+	 * partial failure here.
														
 
															+	 * Since there's no easy way to do this, we only support injecting MSI
														
 
															+	 * which is limited to 1:1 GSI mapping.
														
 
															+	 */
														
 
															+	idx = srcu_read_lock(&kvm->irq_srcu);
														
 
															+	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
														
 
															+		e = &entries[0];
														
 
															+		ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
														
 
															+						irq, level);
														
 
															+	}
														
 
															+	srcu_read_unlock(&kvm->irq_srcu, idx);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+
														
 
															 static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
														
 
															 {
														
 
															 	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
														
@@ -150,9 +186,7 @@ static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
 
															 	return IRQ_HANDLED;
														
 
															 }
														
 
															-#endif
														
 
															-#ifdef __KVM_HAVE_MSIX
														
 
															 static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
														
 
															 {
														
 
															 	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
														
@@ -183,7 +217,6 @@ static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
 
															 	return IRQ_HANDLED;
														
 
															 }
														
 
															-#endif
														
 
															 /* Ack the irq line for an assigned device */
														
 
															 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
														
@@ -386,7 +419,6 @@ static int assigned_device_enable_host_intx(struct kvm *kvm,
 
															 	return 0;
														
 
															 }
														
 
															-#ifdef __KVM_HAVE_MSI
														
 
															 static int assigned_device_enable_host_msi(struct kvm *kvm,
														
 
															 					   struct kvm_assigned_dev_kernel *dev)
														
 
															 {
														
@@ -408,9 +440,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
 
															 	return 0;
														
 
															 }
														
 
															-#endif
														
 
															-#ifdef __KVM_HAVE_MSIX
														
 
															 static int assigned_device_enable_host_msix(struct kvm *kvm,
														
 
															 					    struct kvm_assigned_dev_kernel *dev)
														
 
															 {
														
@@ -443,8 +473,6 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,
 
															 	return r;
														
 
															 }
														
 
															-#endif
														
 
															-
														
 
															 static int assigned_device_enable_guest_intx(struct kvm *kvm,
														
 
															 				struct kvm_assigned_dev_kernel *dev,
														
 
															 				struct kvm_assigned_irq *irq)
														
@@ -454,7 +482,6 @@ static int assigned_device_enable_guest_intx(struct kvm *kvm,
 
															 	return 0;
														
 
															 }
														
 
															-#ifdef __KVM_HAVE_MSI
														
 
															 static int assigned_device_enable_guest_msi(struct kvm *kvm,
														
 
															 			struct kvm_assigned_dev_kernel *dev,
														
 
															 			struct kvm_assigned_irq *irq)
														
@@ -463,9 +490,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm,
 
															 	dev->ack_notifier.gsi = -1;
														
 
															 	return 0;
														
 
															 }
														
 
															-#endif
														
 
															-#ifdef __KVM_HAVE_MSIX
														
 
															 static int assigned_device_enable_guest_msix(struct kvm *kvm,
														
 
															 			struct kvm_assigned_dev_kernel *dev,
														
 
															 			struct kvm_assigned_irq *irq)
														
@@ -474,7 +499,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm,
 
															 	dev->ack_notifier.gsi = -1;
														
 
															 	return 0;
														
 
															 }
														
 
															-#endif
														
 
															 static int assign_host_irq(struct kvm *kvm,
														
 
															 			   struct kvm_assigned_dev_kernel *dev,
														
@@ -492,16 +516,12 @@ static int assign_host_irq(struct kvm *kvm,
 
															 	case KVM_DEV_IRQ_HOST_INTX:
														
 
															 		r = assigned_device_enable_host_intx(kvm, dev);
														
 
															 		break;
														
 
															-#ifdef __KVM_HAVE_MSI
														
 
															 	case KVM_DEV_IRQ_HOST_MSI:
														
 
															 		r = assigned_device_enable_host_msi(kvm, dev);
														
 
															 		break;
														
 
															-#endif
														
 
															-#ifdef __KVM_HAVE_MSIX
														
 
															 	case KVM_DEV_IRQ_HOST_MSIX:
														
 
															 		r = assigned_device_enable_host_msix(kvm, dev);
														
 
															 		break;
														
 
															-#endif
														
 
															 	default:
														
 
															 		r = -EINVAL;
														
 
															 	}
														
@@ -534,16 +554,12 @@ static int assign_guest_irq(struct kvm *kvm,
 
															 	case KVM_DEV_IRQ_GUEST_INTX:
														
 
															 		r = assigned_device_enable_guest_intx(kvm, dev, irq);
														
 
															 		break;
														
 
															-#ifdef __KVM_HAVE_MSI
														
 
															 	case KVM_DEV_IRQ_GUEST_MSI:
														
 
															 		r = assigned_device_enable_guest_msi(kvm, dev, irq);
														
 
															 		break;
														
 
															-#endif
														
 
															-#ifdef __KVM_HAVE_MSIX
														
 
															 	case KVM_DEV_IRQ_GUEST_MSIX:
														
 
															 		r = assigned_device_enable_guest_msix(kvm, dev, irq);
														
 
															 		break;
														
 
															-#endif
														
 
															 	default:
														
 
															 		r = -EINVAL;
														
 
															 	}
														
@@ -826,7 +842,6 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
 
															 }
														
 
															-#ifdef __KVM_HAVE_MSIX
														
 
															 static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
														
 
															 				    struct kvm_assigned_msix_nr *entry_nr)
														
 
															 {
														
@@ -906,7 +921,6 @@ static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
 
															 	return r;
														
 
															 }
														
 
															-#endif
														
 
															 static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
														
 
															 		struct kvm_assigned_pci_dev *assigned_dev)
														
@@ -1012,7 +1026,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 
															 			goto out;
														
 
															 		break;
														
 
															 	}
														
 
															-#ifdef __KVM_HAVE_MSIX
														
 
															 	case KVM_ASSIGN_SET_MSIX_NR: {
														
 
															 		struct kvm_assigned_msix_nr entry_nr;
														
 
															 		r = -EFAULT;
														
@@ -1033,7 +1046,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 
															 			goto out;
														
 
															 		break;
														
 
															 	}
														
 
															-#endif
														
 
															 	case KVM_ASSIGN_SET_INTX_MASK: {
														
 
															 		struct kvm_assigned_pci_dev assigned_dev;
														
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -348,7 +348,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
															 		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
														
 
															 		F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
														
 
															 		F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
														
 
															-		F(AVX512CD);
														
 
															+		F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
														
 
															 	/* cpuid 0xD.1.eax */
														
 
															 	const u32 kvm_supported_word10_x86_features =
														
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -133,4 +133,41 @@ static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
 
															 	best = kvm_find_cpuid_entry(vcpu, 7, 0);
														
 
															 	return best && (best->ebx & bit(X86_FEATURE_MPX));
														
 
															 }
														
 
															+
														
 
															+static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct kvm_cpuid_entry2 *best;
														
 
															+
														
 
															+	best = kvm_find_cpuid_entry(vcpu, 7, 0);
														
 
															+	return best && (best->ebx & bit(X86_FEATURE_PCOMMIT));
														
 
															+}
														
 
															+
														
 
															+static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct kvm_cpuid_entry2 *best;
														
 
															+
														
 
															+	best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
														
 
															+	return best && (best->edx & bit(X86_FEATURE_RDTSCP));
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
														
 
															+ */
														
 
															+#define BIT_NRIPS	3
														
 
															+
														
 
															+static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct kvm_cpuid_entry2 *best;
														
 
															+
														
 
															+	best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0);
														
 
															+
														
 
															+	/*
														
 
															+	 * NRIPS is a scattered cpuid feature, so we can't use
														
 
															+	 * X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit
														
 
															+	 * position 8, not 3).
														
 
															+	 */
														
 
															+	return best && (best->edx & bit(BIT_NRIPS));
														
 
															+}
														
 
															+#undef BIT_NRIPS
														
 
															+
														
 
															 #endif
														
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2272,8 +2272,8 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
 
															 #define GET_SMSTATE(type, smbase, offset)				  \
														
 
															 	({								  \
														
 
															 	 type __val;							  \
														
 
															-	 int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val,       \
														
 
															-				     sizeof(__val), NULL);		  \
														
 
															+	 int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val,      \
														
 
															+				      sizeof(__val));			  \
														
 
															 	 if (r != X86EMUL_CONTINUE)					  \
														
 
															 		 return X86EMUL_UNHANDLEABLE;				  \
														
 
															 	 __val;								  \
														
@@ -2484,17 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 
															 	/*
														
 
															 	 * Get back to real mode, to prepare a safe state in which to load
														
 
															-	 * CR0/CR3/CR4/EFER.  Also this will ensure that addresses passed
														
 
															-	 * to read_std/write_std are not virtual.
														
 
															-	 *
														
 
															-	 * CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
														
 
															+	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
														
 
															+	 * supports long mode.
														
 
															 	 */
														
 
															+	cr4 = ctxt->ops->get_cr(ctxt, 4);
														
 
															+	if (emulator_has_longmode(ctxt)) {
														
 
															+		struct desc_struct cs_desc;
														
 
															+
														
 
															+		/* Zero CR4.PCIDE before CR0.PG.  */
														
 
															+		if (cr4 & X86_CR4_PCIDE) {
														
 
															+			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
														
 
															+			cr4 &= ~X86_CR4_PCIDE;
														
 
															+		}
														
 
															+
														
 
															+		/* A 32-bit code segment is required to clear EFER.LMA.  */
														
 
															+		memset(&cs_desc, 0, sizeof(cs_desc));
														
 
															+		cs_desc.type = 0xb;
														
 
															+		cs_desc.s = cs_desc.g = cs_desc.p = 1;
														
 
															+		ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
														
 
															+	}
														
 
															+
														
 
															+	/* For the 64-bit case, this will clear EFER.LMA.  */
														
 
															 	cr0 = ctxt->ops->get_cr(ctxt, 0);
														
 
															 	if (cr0 & X86_CR0_PE)
														
 
															 		ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
														
 
															-	cr4 = ctxt->ops->get_cr(ctxt, 4);
														
 
															+
														
 
															+	/* Now clear CR4.PAE (which must be done before clearing EFER.LME).  */
														
 
															 	if (cr4 & X86_CR4_PAE)
														
 
															 		ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
														
 
															+
														
 
															+	/* And finally go back to 32-bit mode.  */
														
 
															 	efer = 0;
														
 
															 	ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
														
@@ -4455,7 +4474,7 @@ static const struct opcode twobyte_table[256] = {
 
															 	F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
														
 
															 	/* 0xA8 - 0xAF */
														
 
															 	I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
														
 
															-	II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
														
 
															+	II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
														
 
															 	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
														
 
															 	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
														
 
															 	F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
														
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -41,6 +41,7 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
 
															 	case HV_X64_MSR_TIME_REF_COUNT:
														
 
															 	case HV_X64_MSR_CRASH_CTL:
														
 
															 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
														
 
															+	case HV_X64_MSR_RESET:
														
 
															 		r = true;
														
 
															 		break;
														
 
															 	}
														
@@ -163,6 +164,12 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 
															 						 data);
														
 
															 	case HV_X64_MSR_CRASH_CTL:
														
 
															 		return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
														
 
															+	case HV_X64_MSR_RESET:
														
 
															+		if (data == 1) {
														
 
															+			vcpu_debug(vcpu, "hyper-v reset requested\n");
														
 
															+			kvm_make_request(KVM_REQ_HV_RESET, vcpu);
														
 
															+		}
														
 
															+		break;
														
 
															 	default:
														
 
															 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
														
 
															 			    msr, data);
														
@@ -171,7 +178,16 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 
															 	return 0;
														
 
															 }
														
 
															-static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
														
 
															+/* Calculate cpu time spent by current task in 100ns units */
														
 
															+static u64 current_task_runtime_100ns(void)
														
 
															+{
														
 
															+	cputime_t utime, stime;
														
 
															+
														
 
															+	task_cputime_adjusted(current, &utime, &stime);
														
 
															+	return div_u64(cputime_to_nsecs(utime + stime), 100);
														
 
															+}
														
 
															+
														
 
															+static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
														
 
															 {
														
 
															 	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
														
@@ -205,6 +221,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
															 		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
														
 
															 	case HV_X64_MSR_TPR:
														
 
															 		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
														
 
															+	case HV_X64_MSR_VP_RUNTIME:
														
 
															+		if (!host)
														
 
															+			return 1;
														
 
															+		hv->runtime_offset = data - current_task_runtime_100ns();
														
 
															+		break;
														
 
															 	default:
														
 
															 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
														
 
															 			    msr, data);
														
@@ -241,6 +262,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 
															 						 pdata);
														
 
															 	case HV_X64_MSR_CRASH_CTL:
														
 
															 		return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
														
 
															+	case HV_X64_MSR_RESET:
														
 
															+		data = 0;
														
 
															+		break;
														
 
															 	default:
														
 
															 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
														
 
															 		return 1;
														
@@ -277,6 +301,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 
															 	case HV_X64_MSR_APIC_ASSIST_PAGE:
														
 
															 		data = hv->hv_vapic;
														
 
															 		break;
														
 
															+	case HV_X64_MSR_VP_RUNTIME:
														
 
															+		data = current_task_runtime_100ns() + hv->runtime_offset;
														
 
															+		break;
														
 
															 	default:
														
 
															 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
														
 
															 		return 1;
														
@@ -295,7 +322,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 
															 		mutex_unlock(&vcpu->kvm->lock);
														
 
															 		return r;
														
 
															 	} else
														
 
															-		return kvm_hv_set_msr(vcpu, msr, data);
														
 
															+		return kvm_hv_set_msr(vcpu, msr, data, host);
														
 
															 }
														
 
															 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
														
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -35,6 +35,7 @@
 
															 #include <linux/kvm_host.h>
														
 
															 #include <linux/slab.h>
														
 
															+#include "ioapic.h"
														
 
															 #include "irq.h"
														
 
															 #include "i8254.h"
														
 
															 #include "x86.h"
														
@@ -333,7 +334,8 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 
															 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
														
 
															 	s64 interval;
														
 
															-	if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
														
 
															+	if (!ioapic_in_kernel(kvm) ||
														
 
															+	    ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
														
 
															 		return;
														
 
															 	interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
														
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -233,21 +233,7 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
 
															 }
														
 
															-static void update_handled_vectors(struct kvm_ioapic *ioapic)
														
 
															-{
														
 
															-	DECLARE_BITMAP(handled_vectors, 256);
														
 
															-	int i;
														
 
															-
														
 
															-	memset(handled_vectors, 0, sizeof(handled_vectors));
														
 
															-	for (i = 0; i < IOAPIC_NUM_PINS; ++i)
														
 
															-		__set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
														
 
															-	memcpy(ioapic->handled_vectors, handled_vectors,
														
 
															-	       sizeof(handled_vectors));
														
 
															-	smp_wmb();
														
 
															-}
														
 
															-
														
 
															-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
														
 
															-			u32 *tmr)
														
 
															+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
														
 
															 {
														
 
															 	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
														
 
															 	union kvm_ioapic_redirect_entry *e;
														
@@ -260,13 +246,11 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
 
															 		    kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) ||
														
 
															 		    index == RTC_GSI) {
														
 
															 			if (kvm_apic_match_dest(vcpu, NULL, 0,
														
 
															-				e->fields.dest_id, e->fields.dest_mode)) {
														
 
															+			             e->fields.dest_id, e->fields.dest_mode) ||
														
 
															+			    (e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
														
 
															+			     kvm_apic_pending_eoi(vcpu, e->fields.vector)))
														
 
															 				__set_bit(e->fields.vector,
														
 
															 					(unsigned long *)eoi_exit_bitmap);
														
 
															-				if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG)
														
 
															-					__set_bit(e->fields.vector,
														
 
															-						(unsigned long *)tmr);
														
 
															-			}
														
 
															 		}
														
 
															 	}
														
 
															 	spin_unlock(&ioapic->lock);
														
@@ -315,7 +299,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 
															 			e->bits |= (u32) val;
														
 
															 			e->fields.remote_irr = 0;
														
 
															 		}
														
 
															-		update_handled_vectors(ioapic);
														
 
															 		mask_after = e->fields.mask;
														
 
															 		if (mask_before != mask_after)
														
 
															 			kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
														
@@ -599,7 +582,6 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
 
															 	ioapic->id = 0;
														
 
															 	memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
														
 
															 	rtc_irq_eoi_tracking_reset(ioapic);
														
 
															-	update_handled_vectors(ioapic);
														
 
															 }
														
 
															 static const struct kvm_io_device_ops ioapic_mmio_ops = {
														
@@ -628,8 +610,10 @@ int kvm_ioapic_init(struct kvm *kvm)
 
															 	if (ret < 0) {
														
 
															 		kvm->arch.vioapic = NULL;
														
 
															 		kfree(ioapic);
														
 
															+		return ret;
														
 
															 	}
														
 
															+	kvm_vcpu_request_scan_ioapic(kvm);
														
 
															 	return ret;
														
 
															 }
														
@@ -666,7 +650,6 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 
															 	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
														
 
															 	ioapic->irr = 0;
														
 
															 	ioapic->irr_delivered = 0;
														
 
															-	update_handled_vectors(ioapic);
														
 
															 	kvm_vcpu_request_scan_ioapic(kvm);
														
 
															 	kvm_ioapic_inject_all(ioapic, state->irr);
														
 
															 	spin_unlock(&ioapic->lock);
														
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -9,6 +9,7 @@ struct kvm;
 
															 struct kvm_vcpu;
														
 
															 #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
														
 
															+#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
														
 
															 #define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
														
 
															 #define IOAPIC_EDGE_TRIG  0
														
 
															 #define IOAPIC_LEVEL_TRIG 1
														
@@ -73,7 +74,6 @@ struct kvm_ioapic {
 
															 	struct kvm *kvm;
														
 
															 	void (*ack_notifier)(void *opaque, int irq);
														
 
															 	spinlock_t lock;
														
 
															-	DECLARE_BITMAP(handled_vectors, 256);
														
 
															 	struct rtc_status rtc_status;
														
 
															 	struct delayed_work eoi_inject;
														
 
															 	u32 irq_eoi[IOAPIC_NUM_PINS];
														
@@ -98,11 +98,12 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 
															 	return kvm->arch.vioapic;
														
 
															 }
														
 
															-static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
														
 
															+static inline int ioapic_in_kernel(struct kvm *kvm)
														
 
															 {
														
 
															-	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
														
 
															-	smp_rmb();
														
 
															-	return test_bit(vector, ioapic->handled_vectors);
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = (ioapic_irqchip(kvm) != NULL);
														
 
															+	return ret;
														
 
															 }
														
 
															 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
														
@@ -120,7 +121,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 
															 		struct kvm_lapic_irq *irq, unsigned long *dest_map);
														
 
															 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
														
 
															 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
														
 
															-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
														
 
															-			u32 *tmr);
														
 
															+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
														
 
															+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
														
 
															 #endif
														
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -37,15 +37,28 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
															 }
														
 
															 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
														
 
															+/*
														
 
															+ * check if there is a pending userspace external interrupt
														
 
															+ */
														
 
															+static int pending_userspace_extint(struct kvm_vcpu *v)
														
 
															+{
														
 
															+	return v->arch.pending_external_vector != -1;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * check if there is pending interrupt from
														
 
															  * non-APIC source without intack.
														
 
															  */
														
 
															 static int kvm_cpu_has_extint(struct kvm_vcpu *v)
														
 
															 {
														
 
															-	if (kvm_apic_accept_pic_intr(v))
														
 
															-		return pic_irqchip(v->kvm)->output;	/* PIC */
														
 
															-	else
														
 
															+	u8 accept = kvm_apic_accept_pic_intr(v);
														
 
															+
														
 
															+	if (accept) {
														
 
															+		if (irqchip_split(v->kvm))
														
 
															+			return pending_userspace_extint(v);
														
 
															+		else
														
 
															+			return pic_irqchip(v->kvm)->output;
														
 
															+	} else
														
 
															 		return 0;
														
 
															 }
														
@@ -57,13 +70,13 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
 
															  */
														
 
															 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
														
 
															 {
														
 
															-	if (!irqchip_in_kernel(v->kvm))
														
 
															+	if (!lapic_in_kernel(v))
														
 
															 		return v->arch.interrupt.pending;
														
 
															 	if (kvm_cpu_has_extint(v))
														
 
															 		return 1;
														
 
															-	if (kvm_apic_vid_enabled(v->kvm))
														
 
															+	if (kvm_vcpu_apic_vid_enabled(v))
														
 
															 		return 0;
														
 
															 	return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
														
@@ -75,7 +88,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 
															  */
														
 
															 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
														
 
															 {
														
 
															-	if (!irqchip_in_kernel(v->kvm))
														
 
															+	if (!lapic_in_kernel(v))
														
 
															 		return v->arch.interrupt.pending;
														
 
															 	if (kvm_cpu_has_extint(v))
														
@@ -91,9 +104,16 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
 
															  */
														
 
															 static int kvm_cpu_get_extint(struct kvm_vcpu *v)
														
 
															 {
														
 
															-	if (kvm_cpu_has_extint(v))
														
 
															-		return kvm_pic_read_irq(v->kvm); /* PIC */
														
 
															-	return -1;
														
 
															+	if (kvm_cpu_has_extint(v)) {
														
 
															+		if (irqchip_split(v->kvm)) {
														
 
															+			int vector = v->arch.pending_external_vector;
														
 
															+
														
 
															+			v->arch.pending_external_vector = -1;
														
 
															+			return vector;
														
 
															+		} else
														
 
															+			return kvm_pic_read_irq(v->kvm); /* PIC */
														
 
															+	} else
														
 
															+		return -1;
														
 
															 }
														
 
															 /*
														
@@ -103,7 +123,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 
															 {
														
 
															 	int vector;
														
 
															-	if (!irqchip_in_kernel(v->kvm))
														
 
															+	if (!lapic_in_kernel(v))
														
 
															 		return v->arch.interrupt.nr;
														
 
															 	vector = kvm_cpu_get_extint(v);
														
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -83,13 +83,38 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 
															 	return kvm->arch.vpic;
														
 
															 }
														
 
															+static inline int pic_in_kernel(struct kvm *kvm)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = (pic_irqchip(kvm) != NULL);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static inline int irqchip_split(struct kvm *kvm)
														
 
															+{
														
 
															+	return kvm->arch.irqchip_split;
														
 
															+}
														
 
															+
														
 
															 static inline int irqchip_in_kernel(struct kvm *kvm)
														
 
															 {
														
 
															 	struct kvm_pic *vpic = pic_irqchip(kvm);
														
 
															+	bool ret;
														
 
															+
														
 
															+	ret = (vpic != NULL);
														
 
															+	ret |= irqchip_split(kvm);
														
 
															 	/* Read vpic before kvm->irq_routing.  */
														
 
															 	smp_rmb();
														
 
															-	return vpic != NULL;
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static inline int lapic_in_kernel(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	/* Same as irqchip_in_kernel(vcpu->kvm), but with less
														
 
															+	 * pointer chasing and no unnecessary memory barriers.
														
 
															+	 */
														
 
															+	return vcpu->arch.apic != NULL;
														
 
															 }
														
 
															 void kvm_pic_reset(struct kvm_kpic_state *s);
														
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -91,8 +91,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 
															 	return r;
														
 
															 }
														
 
															-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
														
 
															-				   struct kvm_lapic_irq *irq)
														
 
															+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
														
 
															+		     struct kvm_lapic_irq *irq)
														
 
															 {
														
 
															 	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
														
@@ -108,6 +108,7 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
 
															 	irq->level = 1;
														
 
															 	irq->shorthand = 0;
														
 
															 }
														
 
															+EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
														
 
															 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
														
 
															 		struct kvm *kvm, int irq_source_id, int level, bool line_status)
														
@@ -123,12 +124,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 
															 }
														
 
															-static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
														
 
															-			 struct kvm *kvm)
														
 
															+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
														
 
															+			      struct kvm *kvm, int irq_source_id, int level,
														
 
															+			      bool line_status)
														
 
															 {
														
 
															 	struct kvm_lapic_irq irq;
														
 
															 	int r;
														
 
															+	if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
														
 
															+		return -EWOULDBLOCK;
														
 
															+
														
 
															 	kvm_set_msi_irq(e, &irq);
														
 
															 	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
														
@@ -137,42 +142,6 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
 
															 		return -EWOULDBLOCK;
														
 
															 }
														
 
															-/*
														
 
															- * Deliver an IRQ in an atomic context if we can, or return a failure,
														
 
															- * user can retry in a process context.
														
 
															- * Return value:
														
 
															- *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
														
 
															- *  Other values - No need to retry.
														
 
															- */
														
 
															-int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
														
 
															-{
														
 
															-	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
														
 
															-	struct kvm_kernel_irq_routing_entry *e;
														
 
															-	int ret = -EINVAL;
														
 
															-	int idx;
														
 
															-
														
 
															-	trace_kvm_set_irq(irq, level, irq_source_id);
														
 
															-
														
 
															-	/*
														
 
															-	 * Injection into either PIC or IOAPIC might need to scan all CPUs,
														
 
															-	 * which would need to be retried from thread context;  when same GSI
														
 
															-	 * is connected to both PIC and IOAPIC, we'd have to report a
														
 
															-	 * partial failure here.
														
 
															-	 * Since there's no easy way to do this, we only support injecting MSI
														
 
															-	 * which is limited to 1:1 GSI mapping.
														
 
															-	 */
														
 
															-	idx = srcu_read_lock(&kvm->irq_srcu);
														
 
															-	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
														
 
															-		e = &entries[0];
														
 
															-		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
														
 
															-			ret = kvm_set_msi_inatomic(e, kvm);
														
 
															-		else
														
 
															-			ret = -EWOULDBLOCK;
														
 
															-	}
														
 
															-	srcu_read_unlock(&kvm->irq_srcu, idx);
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															 int kvm_request_irq_source_id(struct kvm *kvm)
														
 
															 {
														
 
															 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
														
@@ -208,7 +177,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 
															 		goto unlock;
														
 
															 	}
														
 
															 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
														
 
															-	if (!irqchip_in_kernel(kvm))
														
 
															+	if (!ioapic_in_kernel(kvm))
														
 
															 		goto unlock;
														
 
															 	kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
														
@@ -297,6 +266,33 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 
															 	return r;
														
 
															 }
														
 
															+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
														
 
															+			     struct kvm_vcpu **dest_vcpu)
														
 
															+{
														
 
															+	int i, r = 0;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+
														
 
															+	if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
														
 
															+		return true;
														
 
															+
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		if (!kvm_apic_present(vcpu))
														
 
															+			continue;
														
 
															+
														
 
															+		if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
														
 
															+					irq->dest_id, irq->dest_mode))
														
 
															+			continue;
														
 
															+
														
 
															+		if (++r == 2)
														
 
															+			return false;
														
 
															+
														
 
															+		*dest_vcpu = vcpu;
														
 
															+	}
														
 
															+
														
 
															+	return r == 1;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
														
 
															+
														
 
															 #define IOAPIC_ROUTING_ENTRY(irq) \
														
 
															 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
														
 
															 	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
														
@@ -328,3 +324,54 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
 
															 	return kvm_set_irq_routing(kvm, default_routing,
														
 
															 				   ARRAY_SIZE(default_routing), 0);
														
 
															 }
														
 
															+
														
 
															+static const struct kvm_irq_routing_entry empty_routing[] = {};
														
 
															+
														
 
															+int kvm_setup_empty_irq_routing(struct kvm *kvm)
														
 
															+{
														
 
															+	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
														
 
															+}
														
 
															+
														
 
															+void kvm_arch_irq_routing_update(struct kvm *kvm)
														
 
															+{
														
 
															+	if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
														
 
															+		return;
														
 
															+	kvm_make_scan_ioapic_request(kvm);
														
 
															+}
														
 
															+
														
 
															+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
														
 
															+{
														
 
															+	struct kvm *kvm = vcpu->kvm;
														
 
															+	struct kvm_kernel_irq_routing_entry *entry;
														
 
															+	struct kvm_irq_routing_table *table;
														
 
															+	u32 i, nr_ioapic_pins;
														
 
															+	int idx;
														
 
															+
														
 
															+	/* kvm->irq_routing must be read after clearing
														
 
															+	 * KVM_SCAN_IOAPIC. */
														
 
															+	smp_mb();
														
 
															+	idx = srcu_read_lock(&kvm->irq_srcu);
														
 
															+	table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
														
 
															+	nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
														
 
															+			       kvm->arch.nr_reserved_ioapic_pins);
														
 
															+	for (i = 0; i < nr_ioapic_pins; ++i) {
														
 
															+		hlist_for_each_entry(entry, &table->map[i], link) {
														
 
															+			u32 dest_id, dest_mode;
														
 
															+			bool level;
														
 
															+
														
 
															+			if (entry->type != KVM_IRQ_ROUTING_MSI)
														
 
															+				continue;
														
 
															+			dest_id = (entry->msi.address_lo >> 12) & 0xff;
														
 
															+			dest_mode = (entry->msi.address_lo >> 2) & 0x1;
														
 
															+			level = entry->msi.data & MSI_DATA_TRIGGER_LEVEL;
														
 
															+			if (level && kvm_apic_match_dest(vcpu, NULL, 0,
														
 
															+						dest_id, dest_mode)) {
														
 
															+				u32 vector = entry->msi.data & 0xff;
														
 
															+
														
 
															+				__set_bit(vector,
														
 
															+					  (unsigned long *) eoi_exit_bitmap);
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+	srcu_read_unlock(&kvm->irq_srcu, idx);
														
 
															+}
														
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -209,7 +209,7 @@ static void recalculate_apic_map(struct kvm *kvm)
 
															 	if (old)
														
 
															 		kfree_rcu(old, rcu);
														
 
															-	kvm_vcpu_request_scan_ioapic(kvm);
														
 
															+	kvm_make_scan_ioapic_request(kvm);
														
 
															 }
														
 
															 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
														
@@ -348,6 +348,8 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
 
															 	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															 	__kvm_apic_update_irr(pir, apic->regs);
														
 
															+
														
 
															+	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
														
@@ -390,7 +392,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 
															 	vcpu = apic->vcpu;
														
 
															-	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
														
 
															+	if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) {
														
 
															 		/* try to update RVI */
														
 
															 		apic_clear_vector(vec, apic->regs + APIC_IRR);
														
 
															 		kvm_make_request(KVM_REQ_EVENT, vcpu);
														
@@ -551,15 +553,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 
															 	__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
														
 
															 }
														
 
															-void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
														
 
															-{
														
 
															-	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															-	int i;
														
 
															-
														
 
															-	for (i = 0; i < 8; i++)
														
 
															-		apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]);
														
 
															-}
														
 
															-
														
 
															 static void apic_update_ppr(struct kvm_lapic *apic)
														
 
															 {
														
 
															 	u32 tpr, isrv, ppr, old_ppr;
														
@@ -764,6 +757,65 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
															 	return ret;
														
 
															 }
														
 
															+bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
														
 
															+			struct kvm_vcpu **dest_vcpu)
														
 
															+{
														
 
															+	struct kvm_apic_map *map;
														
 
															+	bool ret = false;
														
 
															+	struct kvm_lapic *dst = NULL;
														
 
															+
														
 
															+	if (irq->shorthand)
														
 
															+		return false;
														
 
															+
														
 
															+	rcu_read_lock();
														
 
															+	map = rcu_dereference(kvm->arch.apic_map);
														
 
															+
														
 
															+	if (!map)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
														
 
															+		if (irq->dest_id == 0xFF)
														
 
															+			goto out;
														
 
															+
														
 
															+		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
														
 
															+			goto out;
														
 
															+
														
 
															+		dst = map->phys_map[irq->dest_id];
														
 
															+		if (dst && kvm_apic_present(dst->vcpu))
														
 
															+			*dest_vcpu = dst->vcpu;
														
 
															+		else
														
 
															+			goto out;
														
 
															+	} else {
														
 
															+		u16 cid;
														
 
															+		unsigned long bitmap = 1;
														
 
															+		int i, r = 0;
														
 
															+
														
 
															+		if (!kvm_apic_logical_map_valid(map))
														
 
															+			goto out;
														
 
															+
														
 
															+		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
														
 
															+
														
 
															+		if (cid >= ARRAY_SIZE(map->logical_map))
														
 
															+			goto out;
														
 
															+
														
 
															+		for_each_set_bit(i, &bitmap, 16) {
														
 
															+			dst = map->logical_map[cid][i];
														
 
															+			if (++r == 2)
														
 
															+				goto out;
														
 
															+		}
														
 
															+
														
 
															+		if (dst && kvm_apic_present(dst->vcpu))
														
 
															+			*dest_vcpu = dst->vcpu;
														
 
															+		else
														
 
															+			goto out;
														
 
															+	}
														
 
															+
														
 
															+	ret = true;
														
 
															+out:
														
 
															+	rcu_read_unlock();
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Add a pending IRQ into lapic.
														
 
															  * Return 1 if successfully added and 0 if discarded.
														
@@ -781,6 +833,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
															 	case APIC_DM_LOWEST:
														
 
															 		vcpu->arch.apic_arb_prio++;
														
 
															 	case APIC_DM_FIXED:
														
 
															+		if (unlikely(trig_mode && !level))
														
 
															+			break;
														
 
															+
														
 
															 		/* FIXME add logic for vcpu on reset */
														
 
															 		if (unlikely(!apic_enabled(apic)))
														
 
															 			break;
														
@@ -790,6 +845,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
															 		if (dest_map)
														
 
															 			__set_bit(vcpu->vcpu_id, dest_map);
														
 
															+		if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
														
 
															+			if (trig_mode)
														
 
															+				apic_set_vector(vector, apic->regs + APIC_TMR);
														
 
															+			else
														
 
															+				apic_clear_vector(vector, apic->regs + APIC_TMR);
														
 
															+		}
														
 
															+
														
 
															 		if (kvm_x86_ops->deliver_posted_interrupt)
														
 
															 			kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
														
 
															 		else {
														
@@ -868,16 +930,32 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 
															 	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
														
 
															 }
														
 
															+static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
														
 
															+{
														
 
															+	return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap);
														
 
															+}
														
 
															+
														
 
															 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
														
 
															 {
														
 
															-	if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
														
 
															-		int trigger_mode;
														
 
															-		if (apic_test_vector(vector, apic->regs + APIC_TMR))
														
 
															-			trigger_mode = IOAPIC_LEVEL_TRIG;
														
 
															-		else
														
 
															-			trigger_mode = IOAPIC_EDGE_TRIG;
														
 
															-		kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
														
 
															+	int trigger_mode;
														
 
															+
														
 
															+	/* Eoi the ioapic only if the ioapic doesn't own the vector. */
														
 
															+	if (!kvm_ioapic_handles_vector(apic, vector))
														
 
															+		return;
														
 
															+
														
 
															+	/* Request a KVM exit to inform the userspace IOAPIC. */
														
 
															+	if (irqchip_split(apic->vcpu->kvm)) {
														
 
															+		apic->vcpu->arch.pending_ioapic_eoi = vector;
														
 
															+		kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
														
 
															+		return;
														
 
															 	}
														
 
															+
														
 
															+	if (apic_test_vector(vector, apic->regs + APIC_TMR))
														
 
															+		trigger_mode = IOAPIC_LEVEL_TRIG;
														
 
															+	else
														
 
															+		trigger_mode = IOAPIC_EDGE_TRIG;
														
 
															+
														
 
															+	kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
														
 
															 }
														
 
															 static int apic_set_eoi(struct kvm_lapic *apic)
														
@@ -1615,7 +1693,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 
															 		apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
														
 
															 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
														
 
															 	}
														
 
															-	apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
														
 
															+	apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu);
														
 
															 	apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0;
														
 
															 	apic->highest_isr_cache = -1;
														
 
															 	update_divide_count(apic);
														
@@ -1838,7 +1916,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 
															 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
														
 
															 				apic_find_highest_isr(apic));
														
 
															 	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															-	kvm_rtc_eoi_tracking_restore_one(vcpu);
														
 
															+	if (ioapic_in_kernel(vcpu->kvm))
														
 
															+		kvm_rtc_eoi_tracking_restore_one(vcpu);
														
 
															+
														
 
															+	vcpu->arch.apic_arb_prio = 0;
														
 
															 }
														
 
															 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
														
@@ -1922,7 +2003,7 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
 
															 	    /* Cache not set: could be safe but we don't bother. */
														
 
															 	    apic->highest_isr_cache == -1 ||
														
 
															 	    /* Need EOI to update ioapic. */
														
 
															-	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
														
 
															+	    kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
														
 
															 		/*
														
 
															 		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
														
 
															 		 * so we need not do anything here.
														
@@ -1978,7 +2059,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
															 	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															 	u32 reg = (msr - APIC_BASE_MSR) << 4;
														
 
															-	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
														
 
															+	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
														
 
															 		return 1;
														
 
															 	if (reg == APIC_ICR2)
														
@@ -1995,7 +2076,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 
															 	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															 	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
														
 
															-	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
														
 
															+	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
														
 
															 		return 1;
														
 
															 	if (reg == APIC_DFR || reg == APIC_ICR2) {
														
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -57,7 +57,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
 
															 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
														
 
															 void kvm_apic_set_version(struct kvm_vcpu *vcpu);
														
 
															-void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
														
 
															 void __kvm_apic_update_irr(u32 *pir, void *regs);
														
 
															 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
														
 
															 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
														
@@ -144,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
 
															 	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
														
 
															 }
														
 
															-static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
														
 
															+static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return kvm_x86_ops->vm_has_apicv(kvm);
														
 
															+	return kvm_x86_ops->cpu_uses_apicv(vcpu);
														
 
															 }
														
 
															 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
														
@@ -169,4 +168,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
															 void wait_lapic_expire(struct kvm_vcpu *vcpu);
														
 
															+bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
														
 
															+			struct kvm_vcpu **dest_vcpu);
														
 
															 #endif
														
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -818,14 +818,11 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 
															 	kvm->arch.indirect_shadow_pages--;
														
 
															 }
														
 
															-static int has_wrprotected_page(struct kvm_vcpu *vcpu,
														
 
															-				gfn_t gfn,
														
 
															-				int level)
														
 
															+static int __has_wrprotected_page(gfn_t gfn, int level,
														
 
															+				  struct kvm_memory_slot *slot)
														
 
															 {
														
 
															-	struct kvm_memory_slot *slot;
														
 
															 	struct kvm_lpage_info *linfo;
														
 
															-	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
														
 
															 	if (slot) {
														
 
															 		linfo = lpage_info_slot(gfn, slot, level);
														
 
															 		return linfo->write_count;
														
@@ -834,6 +831,14 @@ static int has_wrprotected_page(struct kvm_vcpu *vcpu,
 
															 	return 1;
														
 
															 }
														
 
															+static int has_wrprotected_page(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
														
 
															+{
														
 
															+	struct kvm_memory_slot *slot;
														
 
															+
														
 
															+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
														
 
															+	return __has_wrprotected_page(gfn, level, slot);
														
 
															+}
														
 
															+
														
 
															 static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
														
 
															 {
														
 
															 	unsigned long page_size;
														
@@ -851,6 +856,17 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
 
															 	return ret;
														
 
															 }
														
 
															+static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot,
														
 
															+					  bool no_dirty_log)
														
 
															+{
														
 
															+	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
														
 
															+		return false;
														
 
															+	if (no_dirty_log && slot->dirty_bitmap)
														
 
															+		return false;
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															 static struct kvm_memory_slot *
														
 
															 gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
														
 
															 			    bool no_dirty_log)
														
@@ -858,21 +874,25 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
 
															 	struct kvm_memory_slot *slot;
														
 
															 	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
														
 
															-	if (!slot || slot->flags & KVM_MEMSLOT_INVALID ||
														
 
															-	      (no_dirty_log && slot->dirty_bitmap))
														
 
															+	if (!memslot_valid_for_gpte(slot, no_dirty_log))
														
 
															 		slot = NULL;
														
 
															 	return slot;
														
 
															 }
														
 
															-static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn)
														
 
															-{
														
 
															-	return !gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true);
														
 
															-}
														
 
															-
														
 
															-static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
														
 
															+static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
														
 
															+			 bool *force_pt_level)
														
 
															 {
														
 
															 	int host_level, level, max_level;
														
 
															+	struct kvm_memory_slot *slot;
														
 
															+
														
 
															+	if (unlikely(*force_pt_level))
														
 
															+		return PT_PAGE_TABLE_LEVEL;
														
 
															+
														
 
															+	slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn);
														
 
															+	*force_pt_level = !memslot_valid_for_gpte(slot, true);
														
 
															+	if (unlikely(*force_pt_level))
														
 
															+		return PT_PAGE_TABLE_LEVEL;
														
 
															 	host_level = host_mapping_level(vcpu->kvm, large_gfn);
														
@@ -882,7 +902,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
 
															 	max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
														
 
															 	for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
														
 
															-		if (has_wrprotected_page(vcpu, large_gfn, level))
														
 
															+		if (__has_wrprotected_page(large_gfn, level, slot))
														
 
															 			break;
														
 
															 	return level - 1;
														
@@ -2962,14 +2982,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 
															 {
														
 
															 	int r;
														
 
															 	int level;
														
 
															-	int force_pt_level;
														
 
															+	bool force_pt_level = false;
														
 
															 	pfn_t pfn;
														
 
															 	unsigned long mmu_seq;
														
 
															 	bool map_writable, write = error_code & PFERR_WRITE_MASK;
														
 
															-	force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn);
														
 
															+	level = mapping_level(vcpu, gfn, &force_pt_level);
														
 
															 	if (likely(!force_pt_level)) {
														
 
															-		level = mapping_level(vcpu, gfn);
														
 
															 		/*
														
 
															 		 * This path builds a PAE pagetable - so we can map
														
 
															 		 * 2mb pages at maximum. Therefore check if the level
														
@@ -2979,8 +2998,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 
															 			level = PT_DIRECTORY_LEVEL;
														
 
															 		gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
														
 
															-	} else
														
 
															-		level = PT_PAGE_TABLE_LEVEL;
														
 
															+	}
														
 
															 	if (fast_page_fault(vcpu, v, level, error_code))
														
 
															 		return 0;
														
@@ -3427,7 +3445,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 
															 static bool can_do_async_pf(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
														
 
															+	if (unlikely(!lapic_in_kernel(vcpu) ||
														
 
															 		     kvm_event_needs_reinjection(vcpu)))
														
 
															 		return false;
														
@@ -3476,7 +3494,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 
															 	pfn_t pfn;
														
 
															 	int r;
														
 
															 	int level;
														
 
															-	int force_pt_level;
														
 
															+	bool force_pt_level;
														
 
															 	gfn_t gfn = gpa >> PAGE_SHIFT;
														
 
															 	unsigned long mmu_seq;
														
 
															 	int write = error_code & PFERR_WRITE_MASK;
														
@@ -3495,20 +3513,15 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 
															 	if (r)
														
 
															 		return r;
														
 
															-	if (mapping_level_dirty_bitmap(vcpu, gfn) ||
														
 
															-	    !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL))
														
 
															-		force_pt_level = 1;
														
 
															-	else
														
 
															-		force_pt_level = 0;
														
 
															-
														
 
															+	force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
														
 
															+							   PT_DIRECTORY_LEVEL);
														
 
															+	level = mapping_level(vcpu, gfn, &force_pt_level);
														
 
															 	if (likely(!force_pt_level)) {
														
 
															-		level = mapping_level(vcpu, gfn);
														
 
															 		if (level > PT_DIRECTORY_LEVEL &&
														
 
															 		    !check_hugepage_cache_consistency(vcpu, gfn, level))
														
 
															 			level = PT_DIRECTORY_LEVEL;
														
 
															 		gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
														
 
															-	} else
														
 
															-		level = PT_PAGE_TABLE_LEVEL;
														
 
															+	}
														
 
															 	if (fast_page_fault(vcpu, gpa, level, error_code))
														
 
															 		return 0;
														
@@ -3706,7 +3719,7 @@ static void
 
															 __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
														
 
															 			    int maxphyaddr, bool execonly)
														
 
															 {
														
 
															-	int pte;
														
 
															+	u64 bad_mt_xwr;
														
 
															 	rsvd_check->rsvd_bits_mask[0][3] =
														
 
															 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
														
@@ -3724,14 +3737,16 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
 
															 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
														
 
															 	rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
														
 
															-	for (pte = 0; pte < 64; pte++) {
														
 
															-		int rwx_bits = pte & 7;
														
 
															-		int mt = pte >> 3;
														
 
															-		if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
														
 
															-				rwx_bits == 0x2 || rwx_bits == 0x6 ||
														
 
															-				(rwx_bits == 0x4 && !execonly))
														
 
															-			rsvd_check->bad_mt_xwr |= (1ull << pte);
														
 
															+	bad_mt_xwr = 0xFFull << (2 * 8);	/* bits 3..5 must not be 2 */
														
 
															+	bad_mt_xwr |= 0xFFull << (3 * 8);	/* bits 3..5 must not be 3 */
														
 
															+	bad_mt_xwr |= 0xFFull << (7 * 8);	/* bits 3..5 must not be 7 */
														
 
															+	bad_mt_xwr |= REPEAT_BYTE(1ull << 2);	/* bits 0..2 must not be 010 */
														
 
															+	bad_mt_xwr |= REPEAT_BYTE(1ull << 6);	/* bits 0..2 must not be 110 */
														
 
															+	if (!execonly) {
														
 
															+		/* bits 0..2 must not be 100 unless VMX capabilities allow it */
														
 
															+		bad_mt_xwr |= REPEAT_BYTE(1ull << 4);
														
 
															 	}
														
 
															+	rsvd_check->bad_mt_xwr = bad_mt_xwr;
														
 
															 }
														
 
															 static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
														
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -698,7 +698,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 
															 	int r;
														
 
															 	pfn_t pfn;
														
 
															 	int level = PT_PAGE_TABLE_LEVEL;
														
 
															-	int force_pt_level;
														
 
															+	bool force_pt_level = false;
														
 
															 	unsigned long mmu_seq;
														
 
															 	bool map_writable, is_self_change_mapping;
														
@@ -743,15 +743,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 
															 	is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
														
 
															 	      &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
														
 
															-	if (walker.level >= PT_DIRECTORY_LEVEL)
														
 
															-		force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn)
														
 
															-		   || is_self_change_mapping;
														
 
															-	else
														
 
															-		force_pt_level = 1;
														
 
															-	if (!force_pt_level) {
														
 
															-		level = min(walker.level, mapping_level(vcpu, walker.gfn));
														
 
															-		walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
														
 
															-	}
														
 
															+	if (walker.level >= PT_DIRECTORY_LEVEL && !is_self_change_mapping) {
														
 
															+		level = mapping_level(vcpu, walker.gfn, &force_pt_level);
														
 
															+		if (likely(!force_pt_level)) {
														
 
															+			level = min(walker.level, level);
														
 
															+			walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
														
 
															+		}
														
 
															+	} else
														
 
															+		force_pt_level = true;
														
 
															 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
														
 
															 	smp_rmb();
														
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -159,6 +159,9 @@ struct vcpu_svm {
 
															 	u32 apf_reason;
														
 
															 	u64  tsc_ratio;
														
 
															+
														
 
															+	/* cached guest cpuid flags for faster access */
														
 
															+	bool nrips_enabled	: 1;
														
 
															 };
														
 
															 static DEFINE_PER_CPU(u64, current_tsc_ratio);
														
@@ -1086,7 +1089,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 
															 	return target_tsc - tsc;
														
 
															 }
														
 
															-static void init_vmcb(struct vcpu_svm *svm, bool init_event)
														
 
															+static void init_vmcb(struct vcpu_svm *svm)
														
 
															 {
														
 
															 	struct vmcb_control_area *control = &svm->vmcb->control;
														
 
															 	struct vmcb_save_area *save = &svm->vmcb->save;
														
@@ -1157,8 +1160,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 
															 	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
														
 
															 	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
														
 
															-	if (!init_event)
														
 
															-		svm_set_efer(&svm->vcpu, 0);
														
 
															+	svm_set_efer(&svm->vcpu, 0);
														
 
															 	save->dr6 = 0xffff0ff0;
														
 
															 	kvm_set_rflags(&svm->vcpu, 2);
														
 
															 	save->rip = 0x0000fff0;
														
@@ -1212,7 +1214,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
															 		if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
														
 
															 			svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
														
 
															 	}
														
 
															-	init_vmcb(svm, init_event);
														
 
															+	init_vmcb(svm);
														
 
															 	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
														
 
															 	kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
														
@@ -1268,7 +1270,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 
															 	clear_page(svm->vmcb);
														
 
															 	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
														
 
															 	svm->asid_generation = 0;
														
 
															-	init_vmcb(svm, false);
														
 
															+	init_vmcb(svm);
														
 
															 	svm_init_osvw(&svm->vcpu);
														
@@ -1890,7 +1892,7 @@ static int shutdown_interception(struct vcpu_svm *svm)
 
															 	 * so reinitialize it.
														
 
															 	 */
														
 
															 	clear_page(svm->vmcb);
														
 
															-	init_vmcb(svm, false);
														
 
															+	init_vmcb(svm);
														
 
															 	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
														
 
															 	return 0;
														
@@ -2365,7 +2367,9 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 
															 	nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
														
 
															 	nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
														
 
															 	nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
														
 
															-	nested_vmcb->control.next_rip          = vmcb->control.next_rip;
														
 
															+
														
 
															+	if (svm->nrips_enabled)
														
 
															+		nested_vmcb->control.next_rip  = vmcb->control.next_rip;
														
 
															 	/*
														
 
															 	 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
														
@@ -3060,7 +3064,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
 
															 	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
														
 
															 	/* instruction emulation calls kvm_set_cr8() */
														
 
															 	r = cr_interception(svm);
														
 
															-	if (irqchip_in_kernel(svm->vcpu.kvm))
														
 
															+	if (lapic_in_kernel(&svm->vcpu))
														
 
															 		return r;
														
 
															 	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
														
 
															 		return r;
														
@@ -3294,24 +3298,11 @@ static int msr_interception(struct vcpu_svm *svm)
 
															 static int interrupt_window_interception(struct vcpu_svm *svm)
														
 
															 {
														
 
															-	struct kvm_run *kvm_run = svm->vcpu.run;
														
 
															-
														
 
															 	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
														
 
															 	svm_clear_vintr(svm);
														
 
															 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
														
 
															 	mark_dirty(svm->vmcb, VMCB_INTR);
														
 
															 	++svm->vcpu.stat.irq_window_exits;
														
 
															-	/*
														
 
															-	 * If the user space waits to inject interrupts, exit as soon as
														
 
															-	 * possible
														
 
															-	 */
														
 
															-	if (!irqchip_in_kernel(svm->vcpu.kvm) &&
														
 
															-	    kvm_run->request_interrupt_window &&
														
 
															-	    !kvm_cpu_has_interrupt(&svm->vcpu)) {
														
 
															-		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
														
 
															-		return 0;
														
 
															-	}
														
 
															-
														
 
															 	return 1;
														
 
															 }
														
@@ -3659,12 +3650,12 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 
															 	return;
														
 
															 }
														
 
															-static int svm_vm_has_apicv(struct kvm *kvm)
														
 
															+static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	return 0;
														
 
															 }
														
 
															-static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
														
 
															+static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	return;
														
 
															 }
														
@@ -4098,6 +4089,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 
															 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	struct vcpu_svm *svm = to_svm(vcpu);
														
 
															+
														
 
															+	/* Update nrips enabled cache */
														
 
															+	svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
														
 
															 }
														
 
															 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
														
@@ -4425,7 +4420,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
															 	.enable_irq_window = enable_irq_window,
														
 
															 	.update_cr8_intercept = update_cr8_intercept,
														
 
															 	.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
														
 
															-	.vm_has_apicv = svm_vm_has_apicv,
														
 
															+	.cpu_uses_apicv = svm_cpu_uses_apicv,
														
 
															 	.load_eoi_exitmap = svm_load_eoi_exitmap,
														
 
															 	.sync_pir_to_irr = svm_sync_pir_to_irr,
														
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -128,6 +128,24 @@ TRACE_EVENT(kvm_pio,
 
															 		  __entry->count > 1 ? "(...)" : "")
														
 
															 );
														
 
															+/*
														
 
															+ * Tracepoint for fast mmio.
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_fast_mmio,
														
 
															+	TP_PROTO(u64 gpa),
														
 
															+	TP_ARGS(gpa),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(u64,	gpa)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->gpa		= gpa;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("fast mmio at gpa 0x%llx", __entry->gpa)
														
 
															+);
														
 
															+
														
 
															 /*
														
 
															  * Tracepoint for cpuid.
														
 
															  */
														
@@ -974,6 +992,39 @@ TRACE_EVENT(kvm_enter_smm,
 
															 		  __entry->smbase)
														
 
															 );
														
 
															+/*
														
 
															+ * Tracepoint for VT-d posted-interrupts.
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_pi_irte_update,
														
 
															+	TP_PROTO(unsigned int vcpu_id, unsigned int gsi,
														
 
															+		 unsigned int gvec, u64 pi_desc_addr, bool set),
														
 
															+	TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	vcpu_id		)
														
 
															+		__field(	unsigned int,	gsi		)
														
 
															+		__field(	unsigned int,	gvec		)
														
 
															+		__field(	u64,		pi_desc_addr	)
														
 
															+		__field(	bool,		set		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->vcpu_id	= vcpu_id;
														
 
															+		__entry->gsi		= gsi;
														
 
															+		__entry->gvec		= gvec;
														
 
															+		__entry->pi_desc_addr	= pi_desc_addr;
														
 
															+		__entry->set		= set;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, "
														
 
															+		  "gvec: 0x%x, pi_desc_addr: 0x%llx",
														
 
															+		  __entry->set ? "enabled and being updated" : "disabled",
														
 
															+		  __entry->vcpu_id,
														
 
															+		  __entry->gsi,
														
 
															+		  __entry->gvec,
														
 
															+		  __entry->pi_desc_addr)
														
 
															+);
														
 
															+
														
 
															 #endif /* _TRACE_KVM_H */
														
 
															 #undef TRACE_INCLUDE_PATH
														
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -35,6 +35,7 @@
 
															 #include "kvm_cache_regs.h"
														
 
															 #include "x86.h"
														
 
															+#include <asm/cpu.h>
														
 
															 #include <asm/io.h>
														
 
															 #include <asm/desc.h>
														
 
															 #include <asm/vmx.h>
														
@@ -45,6 +46,7 @@
 
															 #include <asm/debugreg.h>
														
 
															 #include <asm/kexec.h>
														
 
															 #include <asm/apic.h>
														
 
															+#include <asm/irq_remapping.h>
														
 
															 #include "trace.h"
														
 
															 #include "pmu.h"
														
@@ -424,6 +426,9 @@ struct nested_vmx {
 
															 	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
														
 
															 	u64 vmcs01_debugctl;
														
 
															+	u16 vpid02;
														
 
															+	u16 last_vpid;
														
 
															+
														
 
															 	u32 nested_vmx_procbased_ctls_low;
														
 
															 	u32 nested_vmx_procbased_ctls_high;
														
 
															 	u32 nested_vmx_true_procbased_ctls_low;
														
@@ -440,14 +445,33 @@ struct nested_vmx {
 
															 	u32 nested_vmx_misc_low;
														
 
															 	u32 nested_vmx_misc_high;
														
 
															 	u32 nested_vmx_ept_caps;
														
 
															+	u32 nested_vmx_vpid_caps;
														
 
															 };
														
 
															 #define POSTED_INTR_ON  0
														
 
															+#define POSTED_INTR_SN  1
														
 
															+
														
 
															 /* Posted-Interrupt Descriptor */
														
 
															 struct pi_desc {
														
 
															 	u32 pir[8];     /* Posted interrupt requested */
														
 
															-	u32 control;	/* bit 0 of control is outstanding notification bit */
														
 
															-	u32 rsvd[7];
														
 
															+	union {
														
 
															+		struct {
														
 
															+				/* bit 256 - Outstanding Notification */
														
 
															+			u16	on	: 1,
														
 
															+				/* bit 257 - Suppress Notification */
														
 
															+				sn	: 1,
														
 
															+				/* bit 271:258 - Reserved */
														
 
															+				rsvd_1	: 14;
														
 
															+				/* bit 279:272 - Notification Vector */
														
 
															+			u8	nv;
														
 
															+				/* bit 287:280 - Reserved */
														
 
															+			u8	rsvd_2;
														
 
															+				/* bit 319:288 - Notification Destination */
														
 
															+			u32	ndst;
														
 
															+		};
														
 
															+		u64 control;
														
 
															+	};
														
 
															+	u32 rsvd[6];
														
 
															 } __aligned(64);
														
 
															 static bool pi_test_and_set_on(struct pi_desc *pi_desc)
														
@@ -467,6 +491,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
 
															 	return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
														
 
															 }
														
 
															+static inline void pi_clear_sn(struct pi_desc *pi_desc)
														
 
															+{
														
 
															+	return clear_bit(POSTED_INTR_SN,
														
 
															+			(unsigned long *)&pi_desc->control);
														
 
															+}
														
 
															+
														
 
															+static inline void pi_set_sn(struct pi_desc *pi_desc)
														
 
															+{
														
 
															+	return set_bit(POSTED_INTR_SN,
														
 
															+			(unsigned long *)&pi_desc->control);
														
 
															+}
														
 
															+
														
 
															+static inline int pi_test_on(struct pi_desc *pi_desc)
														
 
															+{
														
 
															+	return test_bit(POSTED_INTR_ON,
														
 
															+			(unsigned long *)&pi_desc->control);
														
 
															+}
														
 
															+
														
 
															+static inline int pi_test_sn(struct pi_desc *pi_desc)
														
 
															+{
														
 
															+	return test_bit(POSTED_INTR_SN,
														
 
															+			(unsigned long *)&pi_desc->control);
														
 
															+}
														
 
															+
														
 
															 struct vcpu_vmx {
														
 
															 	struct kvm_vcpu       vcpu;
														
 
															 	unsigned long         host_rsp;
														
@@ -532,8 +580,6 @@ struct vcpu_vmx {
 
															 	s64 vnmi_blocked_time;
														
 
															 	u32 exit_reason;
														
 
															-	bool rdtscp_enabled;
														
 
															-
														
 
															 	/* Posted interrupt descriptor */
														
 
															 	struct pi_desc pi_desc;
														
@@ -563,6 +609,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 
															 	return container_of(vcpu, struct vcpu_vmx, vcpu);
														
 
															 }
														
 
															+static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	return &(to_vmx(vcpu)->pi_desc);
														
 
															+}
														
 
															+
														
 
															 #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
														
 
															 #define FIELD(number, name)	[number] = VMCS12_OFFSET(name)
														
 
															 #define FIELD64(number, name)	[number] = VMCS12_OFFSET(name), \
														
@@ -809,7 +860,7 @@ static void kvm_cpu_vmxon(u64 addr);
 
															 static void kvm_cpu_vmxoff(void);
														
 
															 static bool vmx_mpx_supported(void);
														
 
															 static bool vmx_xsaves_supported(void);
														
 
															-static int vmx_vm_has_apicv(struct kvm *kvm);
														
 
															+static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu);
														
 
															 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
														
 
															 static void vmx_set_segment(struct kvm_vcpu *vcpu,
														
 
															 			    struct kvm_segment *var, int seg);
														
@@ -831,6 +882,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 
															 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
														
 
															 static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
														
 
															+/*
														
 
															+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
														
 
															+ * can find which vCPU should be waken up.
														
 
															+ */
														
 
															+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
														
 
															+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
														
 
															+
														
 
															 static unsigned long *vmx_io_bitmap_a;
														
 
															 static unsigned long *vmx_io_bitmap_b;
														
 
															 static unsigned long *vmx_msr_bitmap_legacy;
														
@@ -946,9 +1004,9 @@ static inline bool cpu_has_vmx_tpr_shadow(void)
 
															 	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
														
 
															 }
														
 
															-static inline bool vm_need_tpr_shadow(struct kvm *kvm)
														
 
															+static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
														
 
															+	return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu);
														
 
															 }
														
 
															 static inline bool cpu_has_secondary_exec_ctrls(void)
														
@@ -983,7 +1041,8 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
 
															 static inline bool cpu_has_vmx_posted_intr(void)
														
 
															 {
														
 
															-	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
														
 
															+	return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
														
 
															+		vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
														
 
															 }
														
 
															 static inline bool cpu_has_vmx_apicv(void)
														
@@ -1062,9 +1121,9 @@ static inline bool cpu_has_vmx_ple(void)
 
															 		SECONDARY_EXEC_PAUSE_LOOP_EXITING;
														
 
															 }
														
 
															-static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm)
														
 
															+static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return flexpriority_enabled && irqchip_in_kernel(kvm);
														
 
															+	return flexpriority_enabled && lapic_in_kernel(vcpu);
														
 
															 }
														
 
															 static inline bool cpu_has_vmx_vpid(void)
														
@@ -1157,6 +1216,11 @@ static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
 
															 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
														
 
															 }
														
 
															+static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
														
 
															+{
														
 
															+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
														
 
															+}
														
 
															+
														
 
															 static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
														
 
															 {
														
 
															 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
														
@@ -1337,13 +1401,13 @@ static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
 
															 			 __loaded_vmcs_clear, loaded_vmcs, 1);
														
 
															 }
														
 
															-static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
														
 
															+static inline void vpid_sync_vcpu_single(int vpid)
														
 
															 {
														
 
															-	if (vmx->vpid == 0)
														
 
															+	if (vpid == 0)
														
 
															 		return;
														
 
															 	if (cpu_has_vmx_invvpid_single())
														
 
															-		__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
														
 
															+		__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
														
 
															 }
														
 
															 static inline void vpid_sync_vcpu_global(void)
														
@@ -1352,10 +1416,10 @@ static inline void vpid_sync_vcpu_global(void)
 
															 		__invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
														
 
															 }
														
 
															-static inline void vpid_sync_context(struct vcpu_vmx *vmx)
														
 
															+static inline void vpid_sync_context(int vpid)
														
 
															 {
														
 
															 	if (cpu_has_vmx_invvpid_single())
														
 
															-		vpid_sync_vcpu_single(vmx);
														
 
															+		vpid_sync_vcpu_single(vpid);
														
 
															 	else
														
 
															 		vpid_sync_vcpu_global();
														
 
															 }
														
@@ -1895,6 +1959,52 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
 
															 	preempt_enable();
														
 
															 }
														
 
															+static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
														
 
															+{
														
 
															+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
														
 
															+	struct pi_desc old, new;
														
 
															+	unsigned int dest;
														
 
															+
														
 
															+	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
														
 
															+		!irq_remapping_cap(IRQ_POSTING_CAP))
														
 
															+		return;
														
 
															+
														
 
															+	do {
														
 
															+		old.control = new.control = pi_desc->control;
														
 
															+
														
 
															+		/*
														
 
															+		 * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
														
 
															+		 * are two possible cases:
														
 
															+		 * 1. After running 'pre_block', context switch
														
 
															+		 *    happened. For this case, 'sn' was set in
														
 
															+		 *    vmx_vcpu_put(), so we need to clear it here.
														
 
															+		 * 2. After running 'pre_block', we were blocked,
														
 
															+		 *    and woken up by some other guy. For this case,
														
 
															+		 *    we don't need to do anything, 'pi_post_block'
														
 
															+		 *    will do everything for us. However, we cannot
														
 
															+		 *    check whether it is case #1 or case #2 here
														
 
															+		 *    (maybe, not needed), so we also clear sn here,
														
 
															+		 *    I think it is not a big deal.
														
 
															+		 */
														
 
															+		if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
														
 
															+			if (vcpu->cpu != cpu) {
														
 
															+				dest = cpu_physical_id(cpu);
														
 
															+
														
 
															+				if (x2apic_enabled())
														
 
															+					new.ndst = dest;
														
 
															+				else
														
 
															+					new.ndst = (dest << 8) & 0xFF00;
														
 
															+			}
														
 
															+
														
 
															+			/* set 'NV' to 'notification vector' */
														
 
															+			new.nv = POSTED_INTR_VECTOR;
														
 
															+		}
														
 
															+
														
 
															+		/* Allow posting non-urgent interrupts */
														
 
															+		new.sn = 0;
														
 
															+	} while (cmpxchg(&pi_desc->control, old.control,
														
 
															+			new.control) != old.control);
														
 
															+}
														
 
															 /*
														
 
															  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
														
 
															  * vcpu mutex is already taken.
														
@@ -1945,10 +2055,27 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
															 		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
														
 
															 		vmx->loaded_vmcs->cpu = cpu;
														
 
															 	}
														
 
															+
														
 
															+	vmx_vcpu_pi_load(vcpu, cpu);
														
 
															+}
														
 
															+
														
 
															+static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
														
 
															+
														
 
															+	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
														
 
															+		!irq_remapping_cap(IRQ_POSTING_CAP))
														
 
															+		return;
														
 
															+
														
 
															+	/* Set SN when the vCPU is preempted */
														
 
															+	if (vcpu->preempted)
														
 
															+		pi_set_sn(pi_desc);
														
 
															 }
														
 
															 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	vmx_vcpu_pi_put(vcpu);
														
 
															+
														
 
															 	__vmx_load_host_state(to_vmx(vcpu));
														
 
															 	if (!vmm_exclusive) {
														
 
															 		__loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
														
@@ -2207,7 +2334,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 
															 		if (index >= 0)
														
 
															 			move_msr_up(vmx, index, save_nmsrs++);
														
 
															 		index = __find_msr_index(vmx, MSR_TSC_AUX);
														
 
															-		if (index >= 0 && vmx->rdtscp_enabled)
														
 
															+		if (index >= 0 && guest_cpuid_has_rdtscp(&vmx->vcpu))
														
 
															 			move_msr_up(vmx, index, save_nmsrs++);
														
 
															 		/*
														
 
															 		 * MSR_STAR is only needed on long mode guests, and only
														
@@ -2377,7 +2504,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 
															 	vmx->nested.nested_vmx_pinbased_ctls_high |=
														
 
															 		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
														
 
															 		PIN_BASED_VMX_PREEMPTION_TIMER;
														
 
															-	if (vmx_vm_has_apicv(vmx->vcpu.kvm))
														
 
															+	if (vmx_cpu_uses_apicv(&vmx->vcpu))
														
 
															 		vmx->nested.nested_vmx_pinbased_ctls_high |=
														
 
															 			PIN_BASED_POSTED_INTR;
														
@@ -2471,10 +2598,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 
															 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
														
 
															 		SECONDARY_EXEC_RDTSCP |
														
 
															 		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
														
 
															+		SECONDARY_EXEC_ENABLE_VPID |
														
 
															 		SECONDARY_EXEC_APIC_REGISTER_VIRT |
														
 
															 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
														
 
															 		SECONDARY_EXEC_WBINVD_EXITING |
														
 
															-		SECONDARY_EXEC_XSAVES;
														
 
															+		SECONDARY_EXEC_XSAVES |
														
 
															+		SECONDARY_EXEC_PCOMMIT;
														
 
															 	if (enable_ept) {
														
 
															 		/* nested EPT: emulate EPT also to L1 */
														
@@ -2493,6 +2622,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 
															 	} else
														
 
															 		vmx->nested.nested_vmx_ept_caps = 0;
														
 
															+	if (enable_vpid)
														
 
															+		vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
														
 
															+				VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
														
 
															+	else
														
 
															+		vmx->nested.nested_vmx_vpid_caps = 0;
														
 
															+
														
 
															 	if (enable_unrestricted_guest)
														
 
															 		vmx->nested.nested_vmx_secondary_ctls_high |=
														
 
															 			SECONDARY_EXEC_UNRESTRICTED_GUEST;
														
@@ -2608,7 +2743,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
															 		break;
														
 
															 	case MSR_IA32_VMX_EPT_VPID_CAP:
														
 
															 		/* Currently, no nested vpid support */
														
 
															-		*pdata = vmx->nested.nested_vmx_ept_caps;
														
 
															+		*pdata = vmx->nested.nested_vmx_ept_caps |
														
 
															+			((u64)vmx->nested.nested_vmx_vpid_caps << 32);
														
 
															 		break;
														
 
															 	default:
														
 
															 		return 1;
														
@@ -2673,7 +2809,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
															 		msr_info->data = vcpu->arch.ia32_xss;
														
 
															 		break;
														
 
															 	case MSR_TSC_AUX:
														
 
															-		if (!to_vmx(vcpu)->rdtscp_enabled)
														
 
															+		if (!guest_cpuid_has_rdtscp(vcpu))
														
 
															 			return 1;
														
 
															 		/* Otherwise falls through */
														
 
															 	default:
														
@@ -2779,7 +2915,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
															 			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
														
 
															 		break;
														
 
															 	case MSR_TSC_AUX:
														
 
															-		if (!vmx->rdtscp_enabled)
														
 
															+		if (!guest_cpuid_has_rdtscp(vcpu))
														
 
															 			return 1;
														
 
															 		/* Check reserved bit, higher 32 bits should be zero */
														
 
															 		if ((data >> 32) != 0)
														
@@ -2874,6 +3010,8 @@ static int hardware_enable(void)
 
															 		return -EBUSY;
														
 
															 	INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
														
 
															+	INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
														
 
															+	spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
														
 
															 	/*
														
 
															 	 * Now we can enable the vmclear operation in kdump
														
@@ -3015,7 +3153,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
															 			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
														
 
															 			SECONDARY_EXEC_SHADOW_VMCS |
														
 
															 			SECONDARY_EXEC_XSAVES |
														
 
															-			SECONDARY_EXEC_ENABLE_PML;
														
 
															+			SECONDARY_EXEC_ENABLE_PML |
														
 
															+			SECONDARY_EXEC_PCOMMIT;
														
 
															 		if (adjust_vmx_controls(min2, opt2,
														
 
															 					MSR_IA32_VMX_PROCBASED_CTLS2,
														
 
															 					&_cpu_based_2nd_exec_control) < 0)
														
@@ -3441,9 +3580,9 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 
															 #endif
														
 
															-static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
														
 
															+static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
														
 
															 {
														
 
															-	vpid_sync_context(to_vmx(vcpu));
														
 
															+	vpid_sync_context(vpid);
														
 
															 	if (enable_ept) {
														
 
															 		if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
														
 
															 			return;
														
@@ -3451,6 +3590,11 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
 
															 	}
														
 
															 }
														
 
															+static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
														
 
															+}
														
 
															+
														
 
															 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
														
@@ -3644,20 +3788,21 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
															 		if (!is_paging(vcpu)) {
														
 
															 			hw_cr4 &= ~X86_CR4_PAE;
														
 
															 			hw_cr4 |= X86_CR4_PSE;
														
 
															-			/*
														
 
															-			 * SMEP/SMAP is disabled if CPU is in non-paging mode
														
 
															-			 * in hardware. However KVM always uses paging mode to
														
 
															-			 * emulate guest non-paging mode with TDP.
														
 
															-			 * To emulate this behavior, SMEP/SMAP needs to be
														
 
															-			 * manually disabled when guest switches to non-paging
														
 
															-			 * mode.
														
 
															-			 */
														
 
															-			hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
														
 
															 		} else if (!(cr4 & X86_CR4_PAE)) {
														
 
															 			hw_cr4 &= ~X86_CR4_PAE;
														
 
															 		}
														
 
															 	}
														
 
															+	if (!enable_unrestricted_guest && !is_paging(vcpu))
														
 
															+		/*
														
 
															+		 * SMEP/SMAP is disabled if CPU is in non-paging mode in
														
 
															+		 * hardware.  However KVM always uses paging mode without
														
 
															+		 * unrestricted guest.
														
 
															+		 * To emulate this behavior, SMEP/SMAP needs to be manually
														
 
															+		 * disabled when guest switches to non-paging mode.
														
 
															+		 */
														
 
															+		hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
														
 
															+
														
 
															 	vmcs_writel(CR4_READ_SHADOW, cr4);
														
 
															 	vmcs_writel(GUEST_CR4, hw_cr4);
														
 
															 	return 0;
														
@@ -4146,29 +4291,28 @@ static int alloc_identity_pagetable(struct kvm *kvm)
 
															 	return r;
														
 
															 }
														
 
															-static void allocate_vpid(struct vcpu_vmx *vmx)
														
 
															+static int allocate_vpid(void)
														
 
															 {
														
 
															 	int vpid;
														
 
															-	vmx->vpid = 0;
														
 
															 	if (!enable_vpid)
														
 
															-		return;
														
 
															+		return 0;
														
 
															 	spin_lock(&vmx_vpid_lock);
														
 
															 	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
														
 
															-	if (vpid < VMX_NR_VPIDS) {
														
 
															-		vmx->vpid = vpid;
														
 
															+	if (vpid < VMX_NR_VPIDS)
														
 
															 		__set_bit(vpid, vmx_vpid_bitmap);
														
 
															-	}
														
 
															+	else
														
 
															+		vpid = 0;
														
 
															 	spin_unlock(&vmx_vpid_lock);
														
 
															+	return vpid;
														
 
															 }
														
 
															-static void free_vpid(struct vcpu_vmx *vmx)
														
 
															+static void free_vpid(int vpid)
														
 
															 {
														
 
															-	if (!enable_vpid)
														
 
															+	if (!enable_vpid || vpid == 0)
														
 
															 		return;
														
 
															 	spin_lock(&vmx_vpid_lock);
														
 
															-	if (vmx->vpid != 0)
														
 
															-		__clear_bit(vmx->vpid, vmx_vpid_bitmap);
														
 
															+	__clear_bit(vpid, vmx_vpid_bitmap);
														
 
															 	spin_unlock(&vmx_vpid_lock);
														
 
															 }
														
@@ -4323,9 +4467,9 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
 
															 			msr, MSR_TYPE_W);
														
 
															 }
														
 
															-static int vmx_vm_has_apicv(struct kvm *kvm)
														
 
															+static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return enable_apicv && irqchip_in_kernel(kvm);
														
 
															+	return enable_apicv && lapic_in_kernel(vcpu);
														
 
															 }
														
 
															 static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
														
@@ -4369,6 +4513,22 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 #ifdef CONFIG_SMP
														
 
															 	if (vcpu->mode == IN_GUEST_MODE) {
														
 
															+		struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+
														
 
															+		/*
														
 
															+		 * Currently, we don't support urgent interrupt,
														
 
															+		 * all interrupts are recognized as non-urgent
														
 
															+		 * interrupt, so we cannot post interrupts when
														
 
															+		 * 'SN' is set.
														
 
															+		 *
														
 
															+		 * If the vcpu is in guest mode, it means it is
														
 
															+		 * running instead of being scheduled out and
														
 
															+		 * waiting in the run queue, and that's the only
														
 
															+		 * case when 'SN' is set currently, warning if
														
 
															+		 * 'SN' is set.
														
 
															+		 */
														
 
															+		WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
														
 
															+
														
 
															 		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
														
 
															 				POSTED_INTR_VECTOR);
														
 
															 		return true;
														
@@ -4505,7 +4665,7 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
 
															 {
														
 
															 	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
														
 
															-	if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
														
 
															+	if (!vmx_cpu_uses_apicv(&vmx->vcpu))
														
 
															 		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
														
 
															 	return pin_based_exec_ctrl;
														
 
															 }
														
@@ -4517,7 +4677,7 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 
															 	if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
														
 
															 		exec_control &= ~CPU_BASED_MOV_DR_EXITING;
														
 
															-	if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) {
														
 
															+	if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
														
 
															 		exec_control &= ~CPU_BASED_TPR_SHADOW;
														
 
															 #ifdef CONFIG_X86_64
														
 
															 		exec_control |= CPU_BASED_CR8_STORE_EXITING |
														
@@ -4534,7 +4694,7 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 
															 static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															 	u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
														
 
															-	if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
														
 
															+	if (!cpu_need_virtualize_apic_accesses(&vmx->vcpu))
														
 
															 		exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
														
 
															 	if (vmx->vpid == 0)
														
 
															 		exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
														
@@ -4548,7 +4708,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 
															 		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
														
 
															 	if (!ple_gap)
														
 
															 		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
														
 
															-	if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
														
 
															+	if (!vmx_cpu_uses_apicv(&vmx->vcpu))
														
 
															 		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
														
 
															 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
														
 
															 	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
														
@@ -4558,8 +4718,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 
															 	   a current VMCS12
														
 
															 	*/
														
 
															 	exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
														
 
															-	/* PML is enabled/disabled in creating/destorying vcpu */
														
 
															-	exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
														
 
															+
														
 
															+	if (!enable_pml)
														
 
															+		exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
														
 
															+
														
 
															+	/* Currently, we allow L1 guest to directly run pcommit instruction. */
														
 
															+	exec_control &= ~SECONDARY_EXEC_PCOMMIT;
														
 
															 	return exec_control;
														
 
															 }
														
@@ -4604,12 +4768,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
															 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
														
 
															-	if (cpu_has_secondary_exec_ctrls()) {
														
 
															+	if (cpu_has_secondary_exec_ctrls())
														
 
															 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
														
 
															 				vmx_secondary_exec_control(vmx));
														
 
															-	}
														
 
															-	if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
														
 
															+	if (vmx_cpu_uses_apicv(&vmx->vcpu)) {
														
 
															 		vmcs_write64(EOI_EXIT_BITMAP0, 0);
														
 
															 		vmcs_write64(EOI_EXIT_BITMAP1, 0);
														
 
															 		vmcs_write64(EOI_EXIT_BITMAP2, 0);
														
@@ -4753,7 +4916,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
															 	if (cpu_has_vmx_tpr_shadow() && !init_event) {
														
 
															 		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
														
 
															-		if (vm_need_tpr_shadow(vcpu->kvm))
														
 
															+		if (cpu_need_tpr_shadow(vcpu))
														
 
															 			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
														
 
															 				     __pa(vcpu->arch.apic->regs));
														
 
															 		vmcs_write32(TPR_THRESHOLD, 0);
														
@@ -4761,7 +4924,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
															 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
														
 
															-	if (vmx_vm_has_apicv(vcpu->kvm))
														
 
															+	if (vmx_cpu_uses_apicv(vcpu))
														
 
															 		memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
														
 
															 	if (vmx->vpid != 0)
														
@@ -4771,12 +4934,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
															 	vmx_set_cr0(vcpu, cr0); /* enter rmode */
														
 
															 	vmx->vcpu.arch.cr0 = cr0;
														
 
															 	vmx_set_cr4(vcpu, 0);
														
 
															-	if (!init_event)
														
 
															-		vmx_set_efer(vcpu, 0);
														
 
															+	vmx_set_efer(vcpu, 0);
														
 
															 	vmx_fpu_activate(vcpu);
														
 
															 	update_exception_bitmap(vcpu);
														
 
															-	vpid_sync_context(vmx);
														
 
															+	vpid_sync_context(vmx->vpid);
														
 
															 }
														
 
															 /*
														
@@ -5296,7 +5458,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 
															 				u8 cr8 = (u8)val;
														
 
															 				err = kvm_set_cr8(vcpu, cr8);
														
 
															 				kvm_complete_insn_gp(vcpu, err);
														
 
															-				if (irqchip_in_kernel(vcpu->kvm))
														
 
															+				if (lapic_in_kernel(vcpu))
														
 
															 					return 1;
														
 
															 				if (cr8_prev <= cr8)
														
 
															 					return 1;
														
@@ -5510,17 +5672,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
 
															 	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															 	++vcpu->stat.irq_window_exits;
														
 
															-
														
 
															-	/*
														
 
															-	 * If the user space waits to inject interrupts, exit as soon as
														
 
															-	 * possible
														
 
															-	 */
														
 
															-	if (!irqchip_in_kernel(vcpu->kvm) &&
														
 
															-	    vcpu->run->request_interrupt_window &&
														
 
															-	    !kvm_cpu_has_interrupt(vcpu)) {
														
 
															-		vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
														
 
															-		return 0;
														
 
															-	}
														
 
															 	return 1;
														
 
															 }
														
@@ -5753,6 +5904,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 
															 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
														
 
															 	if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
														
 
															 		skip_emulated_instruction(vcpu);
														
 
															+		trace_kvm_fast_mmio(gpa);
														
 
															 		return 1;
														
 
															 	}
														
@@ -5910,6 +6062,25 @@ static void update_ple_window_actual_max(void)
 
															 			                    ple_window_grow, INT_MIN);
														
 
															 }
														
 
															+/*
														
 
															+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
														
 
															+ */
														
 
															+static void wakeup_handler(void)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+	int cpu = smp_processor_id();
														
 
															+
														
 
															+	spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
														
 
															+	list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
														
 
															+			blocked_vcpu_list) {
														
 
															+		struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
														
 
															+
														
 
															+		if (pi_test_on(pi_desc) == 1)
														
 
															+			kvm_vcpu_kick(vcpu);
														
 
															+	}
														
 
															+	spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
														
 
															+}
														
 
															+
														
 
															 static __init int hardware_setup(void)
														
 
															 {
														
 
															 	int r = -ENOMEM, i, msr;
														
@@ -6096,6 +6267,8 @@ static __init int hardware_setup(void)
 
															 		kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
														
 
															 	}
														
 
															+	kvm_set_posted_intr_wakeup_handler(wakeup_handler);
														
 
															+
														
 
															 	return alloc_kvm_area();
														
 
															 out8:
														
@@ -6627,7 +6800,6 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
 
															 static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															-	u32 exec_control;
														
 
															 	if (vmx->nested.current_vmptr == -1ull)
														
 
															 		return;
														
@@ -6640,9 +6812,8 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
 
															 		   they were modified */
														
 
															 		copy_shadow_to_vmcs12(vmx);
														
 
															 		vmx->nested.sync_shadow_vmcs = false;
														
 
															-		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
 
															-		exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
														
 
															-		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
														
 
															+		vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
														
 
															+				SECONDARY_EXEC_SHADOW_VMCS);
														
 
															 		vmcs_write64(VMCS_LINK_POINTER, -1ull);
														
 
															 	}
														
 
															 	vmx->nested.posted_intr_nv = -1;
														
@@ -6662,6 +6833,7 @@ static void free_nested(struct vcpu_vmx *vmx)
 
															 		return;
														
 
															 	vmx->nested.vmxon = false;
														
 
															+	free_vpid(vmx->nested.vpid02);
														
 
															 	nested_release_vmcs12(vmx);
														
 
															 	if (enable_shadow_vmcs)
														
 
															 		free_vmcs(vmx->nested.current_shadow_vmcs);
														
@@ -7038,7 +7210,6 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	gpa_t vmptr;
														
 
															-	u32 exec_control;
														
 
															 	if (!nested_vmx_check_permission(vcpu))
														
 
															 		return 1;
														
@@ -7070,9 +7241,8 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 
															 		vmx->nested.current_vmcs12 = new_vmcs12;
														
 
															 		vmx->nested.current_vmcs12_page = page;
														
 
															 		if (enable_shadow_vmcs) {
														
 
															-			exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
 
															-			exec_control |= SECONDARY_EXEC_SHADOW_VMCS;
														
 
															-			vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
														
 
															+			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
														
 
															+				      SECONDARY_EXEC_SHADOW_VMCS);
														
 
															 			vmcs_write64(VMCS_LINK_POINTER,
														
 
															 				     __pa(vmx->nested.current_shadow_vmcs));
														
 
															 			vmx->nested.sync_shadow_vmcs = true;
														
@@ -7178,7 +7348,63 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 
															 static int handle_invvpid(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	kvm_queue_exception(vcpu, UD_VECTOR);
														
 
															+	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+	u32 vmx_instruction_info;
														
 
															+	unsigned long type, types;
														
 
															+	gva_t gva;
														
 
															+	struct x86_exception e;
														
 
															+	int vpid;
														
 
															+
														
 
															+	if (!(vmx->nested.nested_vmx_secondary_ctls_high &
														
 
															+	      SECONDARY_EXEC_ENABLE_VPID) ||
														
 
															+			!(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) {
														
 
															+		kvm_queue_exception(vcpu, UD_VECTOR);
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															+	if (!nested_vmx_check_permission(vcpu))
														
 
															+		return 1;
														
 
															+
														
 
															+	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
														
 
															+	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
														
 
															+
														
 
															+	types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
														
 
															+
														
 
															+	if (!(types & (1UL << type))) {
														
 
															+		nested_vmx_failValid(vcpu,
														
 
															+			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															+	/* according to the intel vmx instruction reference, the memory
														
 
															+	 * operand is read even if it isn't needed (e.g., for type==global)
														
 
															+	 */
														
 
															+	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
														
 
															+			vmx_instruction_info, false, &gva))
														
 
															+		return 1;
														
 
															+	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid,
														
 
															+				sizeof(u32), &e)) {
														
 
															+		kvm_inject_page_fault(vcpu, &e);
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															+	switch (type) {
														
 
															+	case VMX_VPID_EXTENT_ALL_CONTEXT:
														
 
															+		if (get_vmcs12(vcpu)->virtual_processor_id == 0) {
														
 
															+			nested_vmx_failValid(vcpu,
														
 
															+				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
														
 
															+			return 1;
														
 
															+		}
														
 
															+		__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
														
 
															+		nested_vmx_succeed(vcpu);
														
 
															+		break;
														
 
															+	default:
														
 
															+		/* Trap single context invalidation invvpid calls */
														
 
															+		BUG_ON(1);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	skip_emulated_instruction(vcpu);
														
 
															 	return 1;
														
 
															 }
														
@@ -7207,6 +7433,13 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
 
															 	return 1;
														
 
															 }
														
 
															+static int handle_pcommit(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	/* we never catch pcommit instruct for L1 guest. */
														
 
															+	WARN_ON(1);
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * The exit handlers return 1 if the exit was handled fully and guest execution
														
 
															  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
														
@@ -7257,6 +7490,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 
															 	[EXIT_REASON_XSAVES]                  = handle_xsaves,
														
 
															 	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
														
 
															 	[EXIT_REASON_PML_FULL]		      = handle_pml_full,
														
 
															+	[EXIT_REASON_PCOMMIT]                 = handle_pcommit,
														
 
															 };
														
 
															 static const int kvm_vmx_max_exit_handlers =
														
@@ -7558,6 +7792,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 
															 		 * the XSS exit bitmap in vmcs12.
														
 
															 		 */
														
 
															 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
														
 
															+	case EXIT_REASON_PCOMMIT:
														
 
															+		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
														
 
															 	default:
														
 
															 		return true;
														
 
															 	}
														
@@ -7569,10 +7805,9 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
 
															 	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
														
 
															 }
														
 
															-static int vmx_enable_pml(struct vcpu_vmx *vmx)
														
 
															+static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															 	struct page *pml_pg;
														
 
															-	u32 exec_control;
														
 
															 	pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
														
 
															 	if (!pml_pg)
														
@@ -7583,24 +7818,15 @@ static int vmx_enable_pml(struct vcpu_vmx *vmx)
 
															 	vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
														
 
															 	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
														
 
															-	exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
 
															-	exec_control |= SECONDARY_EXEC_ENABLE_PML;
														
 
															-	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
														
 
															-
														
 
															 	return 0;
														
 
															 }
														
 
															-static void vmx_disable_pml(struct vcpu_vmx *vmx)
														
 
															+static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															-	u32 exec_control;
														
 
															-
														
 
															-	ASSERT(vmx->pml_pg);
														
 
															-	__free_page(vmx->pml_pg);
														
 
															-	vmx->pml_pg = NULL;
														
 
															-
														
 
															-	exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
 
															-	exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
														
 
															-	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
														
 
															+	if (vmx->pml_pg) {
														
 
															+		__free_page(vmx->pml_pg);
														
 
															+		vmx->pml_pg = NULL;
														
 
															+	}
														
 
															 }
														
 
															 static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
														
@@ -7924,10 +8150,10 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 
															 	 * apicv
														
 
															 	 */
														
 
															 	if (!cpu_has_vmx_virtualize_x2apic_mode() ||
														
 
															-				!vmx_vm_has_apicv(vcpu->kvm))
														
 
															+				!vmx_cpu_uses_apicv(vcpu))
														
 
															 		return;
														
 
															-	if (!vm_need_tpr_shadow(vcpu->kvm))
														
 
															+	if (!cpu_need_tpr_shadow(vcpu))
														
 
															 		return;
														
 
															 	sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
@@ -8029,9 +8255,10 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 
															 	}
														
 
															 }
														
 
															-static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
														
 
															+static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	if (!vmx_vm_has_apicv(vcpu->kvm))
														
 
															+	u64 *eoi_exit_bitmap = vcpu->arch.eoi_exit_bitmap;
														
 
															+	if (!vmx_cpu_uses_apicv(vcpu))
														
 
															 		return;
														
 
															 	vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
														
@@ -8477,8 +8704,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	if (enable_pml)
														
 
															-		vmx_disable_pml(vmx);
														
 
															-	free_vpid(vmx);
														
 
															+		vmx_destroy_pml_buffer(vmx);
														
 
															+	free_vpid(vmx->vpid);
														
 
															 	leave_guest_mode(vcpu);
														
 
															 	vmx_load_vmcs01(vcpu);
														
 
															 	free_nested(vmx);
														
@@ -8497,7 +8724,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
															 	if (!vmx)
														
 
															 		return ERR_PTR(-ENOMEM);
														
 
															-	allocate_vpid(vmx);
														
 
															+	vmx->vpid = allocate_vpid();
														
 
															 	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
														
 
															 	if (err)
														
@@ -8530,7 +8757,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
															 	put_cpu();
														
 
															 	if (err)
														
 
															 		goto free_vmcs;
														
 
															-	if (vm_need_virtualize_apic_accesses(kvm)) {
														
 
															+	if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
														
 
															 		err = alloc_apic_access_page(kvm);
														
 
															 		if (err)
														
 
															 			goto free_vmcs;
														
@@ -8545,8 +8772,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
															 			goto free_vmcs;
														
 
															 	}
														
 
															-	if (nested)
														
 
															+	if (nested) {
														
 
															 		nested_vmx_setup_ctls_msrs(vmx);
														
 
															+		vmx->nested.vpid02 = allocate_vpid();
														
 
															+	}
														
 
															 	vmx->nested.posted_intr_nv = -1;
														
 
															 	vmx->nested.current_vmptr = -1ull;
														
@@ -8559,7 +8788,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
															 	 * for the guest, etc.
														
 
															 	 */
														
 
															 	if (enable_pml) {
														
 
															-		err = vmx_enable_pml(vmx);
														
 
															+		err = vmx_create_pml_buffer(vmx);
														
 
															 		if (err)
														
 
															 			goto free_vmcs;
														
 
															 	}
														
@@ -8567,13 +8796,14 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
															 	return &vmx->vcpu;
														
 
															 free_vmcs:
														
 
															+	free_vpid(vmx->nested.vpid02);
														
 
															 	free_loaded_vmcs(vmx->loaded_vmcs);
														
 
															 free_msrs:
														
 
															 	kfree(vmx->guest_msrs);
														
 
															 uninit_vcpu:
														
 
															 	kvm_vcpu_uninit(&vmx->vcpu);
														
 
															 free_vcpu:
														
 
															-	free_vpid(vmx);
														
 
															+	free_vpid(vmx->vpid);
														
 
															 	kmem_cache_free(kvm_vcpu_cache, vmx);
														
 
															 	return ERR_PTR(err);
														
 
															 }
														
@@ -8648,49 +8878,67 @@ static int vmx_get_lpage_level(void)
 
															 		return PT_PDPE_LEVEL;
														
 
															 }
														
 
															+static void vmcs_set_secondary_exec_control(u32 new_ctl)
														
 
															+{
														
 
															+	/*
														
 
															+	 * These bits in the secondary execution controls field
														
 
															+	 * are dynamic, the others are mostly based on the hypervisor
														
 
															+	 * architecture and the guest's CPUID.  Do not touch the
														
 
															+	 * dynamic bits.
														
 
															+	 */
														
 
															+	u32 mask =
														
 
															+		SECONDARY_EXEC_SHADOW_VMCS |
														
 
															+		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
														
 
															+		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
														
 
															+
														
 
															+	u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
 
															+
														
 
															+	vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
														
 
															+		     (new_ctl & ~mask) | (cur_ctl & mask));
														
 
															+}
														
 
															+
														
 
															 static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct kvm_cpuid_entry2 *best;
														
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															-	u32 exec_control;
														
 
															+	u32 secondary_exec_ctl = vmx_secondary_exec_control(vmx);
														
 
															-	vmx->rdtscp_enabled = false;
														
 
															 	if (vmx_rdtscp_supported()) {
														
 
															-		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
 
															-		if (exec_control & SECONDARY_EXEC_RDTSCP) {
														
 
															-			best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
														
 
															-			if (best && (best->edx & bit(X86_FEATURE_RDTSCP)))
														
 
															-				vmx->rdtscp_enabled = true;
														
 
															-			else {
														
 
															-				exec_control &= ~SECONDARY_EXEC_RDTSCP;
														
 
															-				vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
														
 
															-						exec_control);
														
 
															-			}
														
 
															+		bool rdtscp_enabled = guest_cpuid_has_rdtscp(vcpu);
														
 
															+		if (!rdtscp_enabled)
														
 
															+			secondary_exec_ctl &= ~SECONDARY_EXEC_RDTSCP;
														
 
															+
														
 
															+		if (nested) {
														
 
															+			if (rdtscp_enabled)
														
 
															+				vmx->nested.nested_vmx_secondary_ctls_high |=
														
 
															+					SECONDARY_EXEC_RDTSCP;
														
 
															+			else
														
 
															+				vmx->nested.nested_vmx_secondary_ctls_high &=
														
 
															+					~SECONDARY_EXEC_RDTSCP;
														
 
															 		}
														
 
															-		if (nested && !vmx->rdtscp_enabled)
														
 
															-			vmx->nested.nested_vmx_secondary_ctls_high &=
														
 
															-				~SECONDARY_EXEC_RDTSCP;
														
 
															 	}
														
 
															 	/* Exposing INVPCID only when PCID is exposed */
														
 
															 	best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
														
 
															 	if (vmx_invpcid_supported() &&
														
 
															-	    best && (best->ebx & bit(X86_FEATURE_INVPCID)) &&
														
 
															-	    guest_cpuid_has_pcid(vcpu)) {
														
 
															-		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
 
															-		exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
														
 
															-		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
														
 
															-			     exec_control);
														
 
															-	} else {
														
 
															-		if (cpu_has_secondary_exec_ctrls()) {
														
 
															-			exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
 
															-			exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
														
 
															-			vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
														
 
															-				     exec_control);
														
 
															-		}
														
 
															+	    (!best || !(best->ebx & bit(X86_FEATURE_INVPCID)) ||
														
 
															+	    !guest_cpuid_has_pcid(vcpu))) {
														
 
															+		secondary_exec_ctl &= ~SECONDARY_EXEC_ENABLE_INVPCID;
														
 
															+
														
 
															 		if (best)
														
 
															 			best->ebx &= ~bit(X86_FEATURE_INVPCID);
														
 
															 	}
														
 
															+
														
 
															+	vmcs_set_secondary_exec_control(secondary_exec_ctl);
														
 
															+
														
 
															+	if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
														
 
															+		if (guest_cpuid_has_pcommit(vcpu))
														
 
															+			vmx->nested.nested_vmx_secondary_ctls_high |=
														
 
															+				SECONDARY_EXEC_PCOMMIT;
														
 
															+		else
														
 
															+			vmx->nested.nested_vmx_secondary_ctls_high &=
														
 
															+				~SECONDARY_EXEC_PCOMMIT;
														
 
															+	}
														
 
															 }
														
 
															 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
														
@@ -9298,13 +9546,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 	if (cpu_has_secondary_exec_ctrls()) {
														
 
															 		exec_control = vmx_secondary_exec_control(vmx);
														
 
															-		if (!vmx->rdtscp_enabled)
														
 
															-			exec_control &= ~SECONDARY_EXEC_RDTSCP;
														
 
															+
														
 
															 		/* Take the following fields only from vmcs12 */
														
 
															 		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
														
 
															 				  SECONDARY_EXEC_RDTSCP |
														
 
															 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
														
 
															-				  SECONDARY_EXEC_APIC_REGISTER_VIRT);
														
 
															+				  SECONDARY_EXEC_APIC_REGISTER_VIRT |
														
 
															+				  SECONDARY_EXEC_PCOMMIT);
														
 
															 		if (nested_cpu_has(vmcs12,
														
 
															 				CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
														
 
															 			exec_control |= vmcs12->secondary_vm_exec_control;
														
@@ -9323,7 +9571,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 				vmcs_write64(APIC_ACCESS_ADDR,
														
 
															 				  page_to_phys(vmx->nested.apic_access_page));
														
 
															 		} else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) &&
														
 
															-			    (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))) {
														
 
															+			    cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
														
 
															 			exec_control |=
														
 
															 				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
														
 
															 			kvm_vcpu_reload_apic_access_page(vcpu);
														
@@ -9433,12 +9681,24 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 	if (enable_vpid) {
														
 
															 		/*
														
 
															-		 * Trivially support vpid by letting L2s share their parent
														
 
															-		 * L1's vpid. TODO: move to a more elaborate solution, giving
														
 
															-		 * each L2 its own vpid and exposing the vpid feature to L1.
														
 
															+		 * There is no direct mapping between vpid02 and vpid12, the
														
 
															+		 * vpid02 is per-vCPU for L0 and reused while the value of
														
 
															+		 * vpid12 is changed w/ one invvpid during nested vmentry.
														
 
															+		 * The vpid12 is allocated by L1 for L2, so it will not
														
 
															+		 * influence global bitmap(for vpid01 and vpid02 allocation)
														
 
															+		 * even if spawn a lot of nested vCPUs.
														
 
															 		 */
														
 
															-		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
														
 
															-		vmx_flush_tlb(vcpu);
														
 
															+		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
														
 
															+			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
														
 
															+			if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
														
 
															+				vmx->nested.last_vpid = vmcs12->virtual_processor_id;
														
 
															+				__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
														
 
															+			}
														
 
															+		} else {
														
 
															+			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
														
 
															+			vmx_flush_tlb(vcpu);
														
 
															+		}
														
 
															+
														
 
															 	}
														
 
															 	if (nested_cpu_has_ept(vmcs12)) {
														
@@ -10278,6 +10538,201 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
 
															 	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
														
 
															 }
														
 
															+/*
														
 
															+ * This routine does the following things for vCPU which is going
														
 
															+ * to be blocked if VT-d PI is enabled.
														
 
															+ * - Store the vCPU to the wakeup list, so when interrupts happen
														
 
															+ *   we can find the right vCPU to wake up.
														
 
															+ * - Change the Posted-interrupt descriptor as below:
														
 
															+ *      'NDST' <-- vcpu->pre_pcpu
														
 
															+ *      'NV' <-- POSTED_INTR_WAKEUP_VECTOR
														
 
															+ * - If 'ON' is set during this process, which means at least one
														
 
															+ *   interrupt is posted for this vCPU, we cannot block it, in
														
 
															+ *   this case, return 1, otherwise, return 0.
														
 
															+ *
														
 
															+ */
														
 
															+static int vmx_pre_block(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	unsigned long flags;
														
 
															+	unsigned int dest;
														
 
															+	struct pi_desc old, new;
														
 
															+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
														
 
															+
														
 
															+	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
														
 
															+		!irq_remapping_cap(IRQ_POSTING_CAP))
														
 
															+		return 0;
														
 
															+
														
 
															+	vcpu->pre_pcpu = vcpu->cpu;
														
 
															+	spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
														
 
															+			  vcpu->pre_pcpu), flags);
														
 
															+	list_add_tail(&vcpu->blocked_vcpu_list,
														
 
															+		      &per_cpu(blocked_vcpu_on_cpu,
														
 
															+		      vcpu->pre_pcpu));
														
 
															+	spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
														
 
															+			       vcpu->pre_pcpu), flags);
														
 
															+
														
 
															+	do {
														
 
															+		old.control = new.control = pi_desc->control;
														
 
															+
														
 
															+		/*
														
 
															+		 * We should not block the vCPU if
														
 
															+		 * an interrupt is posted for it.
														
 
															+		 */
														
 
															+		if (pi_test_on(pi_desc) == 1) {
														
 
															+			spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
														
 
															+					  vcpu->pre_pcpu), flags);
														
 
															+			list_del(&vcpu->blocked_vcpu_list);
														
 
															+			spin_unlock_irqrestore(
														
 
															+					&per_cpu(blocked_vcpu_on_cpu_lock,
														
 
															+					vcpu->pre_pcpu), flags);
														
 
															+			vcpu->pre_pcpu = -1;
														
 
															+
														
 
															+			return 1;
														
 
															+		}
														
 
															+
														
 
															+		WARN((pi_desc->sn == 1),
														
 
															+		     "Warning: SN field of posted-interrupts "
														
 
															+		     "is set before blocking\n");
														
 
															+
														
 
															+		/*
														
 
															+		 * Since vCPU can be preempted during this process,
														
 
															+		 * vcpu->cpu could be different with pre_pcpu, we
														
 
															+		 * need to set pre_pcpu as the destination of wakeup
														
 
															+		 * notification event, then we can find the right vCPU
														
 
															+		 * to wakeup in wakeup handler if interrupts happen
														
 
															+		 * when the vCPU is in blocked state.
														
 
															+		 */
														
 
															+		dest = cpu_physical_id(vcpu->pre_pcpu);
														
 
															+
														
 
															+		if (x2apic_enabled())
														
 
															+			new.ndst = dest;
														
 
															+		else
														
 
															+			new.ndst = (dest << 8) & 0xFF00;
														
 
															+
														
 
															+		/* set 'NV' to 'wakeup vector' */
														
 
															+		new.nv = POSTED_INTR_WAKEUP_VECTOR;
														
 
															+	} while (cmpxchg(&pi_desc->control, old.control,
														
 
															+			new.control) != old.control);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void vmx_post_block(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
														
 
															+	struct pi_desc old, new;
														
 
															+	unsigned int dest;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
														
 
															+		!irq_remapping_cap(IRQ_POSTING_CAP))
														
 
															+		return;
														
 
															+
														
 
															+	do {
														
 
															+		old.control = new.control = pi_desc->control;
														
 
															+
														
 
															+		dest = cpu_physical_id(vcpu->cpu);
														
 
															+
														
 
															+		if (x2apic_enabled())
														
 
															+			new.ndst = dest;
														
 
															+		else
														
 
															+			new.ndst = (dest << 8) & 0xFF00;
														
 
															+
														
 
															+		/* Allow posting non-urgent interrupts */
														
 
															+		new.sn = 0;
														
 
															+
														
 
															+		/* set 'NV' to 'notification vector' */
														
 
															+		new.nv = POSTED_INTR_VECTOR;
														
 
															+	} while (cmpxchg(&pi_desc->control, old.control,
														
 
															+			new.control) != old.control);
														
 
															+
														
 
															+	if(vcpu->pre_pcpu != -1) {
														
 
															+		spin_lock_irqsave(
														
 
															+			&per_cpu(blocked_vcpu_on_cpu_lock,
														
 
															+			vcpu->pre_pcpu), flags);
														
 
															+		list_del(&vcpu->blocked_vcpu_list);
														
 
															+		spin_unlock_irqrestore(
														
 
															+			&per_cpu(blocked_vcpu_on_cpu_lock,
														
 
															+			vcpu->pre_pcpu), flags);
														
 
															+		vcpu->pre_pcpu = -1;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * vmx_update_pi_irte - set IRTE for Posted-Interrupts
														
 
															+ *
														
 
															+ * @kvm: kvm
														
 
															+ * @host_irq: host irq of the interrupt
														
 
															+ * @guest_irq: gsi of the interrupt
														
 
															+ * @set: set or unset PI
														
 
															+ * returns 0 on success, < 0 on failure
														
 
															+ */
														
 
															+static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
														
 
															+			      uint32_t guest_irq, bool set)
														
 
															+{
														
 
															+	struct kvm_kernel_irq_routing_entry *e;
														
 
															+	struct kvm_irq_routing_table *irq_rt;
														
 
															+	struct kvm_lapic_irq irq;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+	struct vcpu_data vcpu_info;
														
 
															+	int idx, ret = -EINVAL;
														
 
															+
														
 
															+	if (!kvm_arch_has_assigned_device(kvm) ||
														
 
															+		!irq_remapping_cap(IRQ_POSTING_CAP))
														
 
															+		return 0;
														
 
															+
														
 
															+	idx = srcu_read_lock(&kvm->irq_srcu);
														
 
															+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
														
 
															+	BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
														
 
															+
														
 
															+	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
														
 
															+		if (e->type != KVM_IRQ_ROUTING_MSI)
														
 
															+			continue;
														
 
															+		/*
														
 
															+		 * VT-d PI cannot support posting multicast/broadcast
														
 
															+		 * interrupts to a vCPU, we still use interrupt remapping
														
 
															+		 * for these kind of interrupts.
														
 
															+		 *
														
 
															+		 * For lowest-priority interrupts, we only support
														
 
															+		 * those with single CPU as the destination, e.g. user
														
 
															+		 * configures the interrupts via /proc/irq or uses
														
 
															+		 * irqbalance to make the interrupts single-CPU.
														
 
															+		 *
														
 
															+		 * We will support full lowest-priority interrupt later.
														
 
															+		 */
														
 
															+
														
 
															+		kvm_set_msi_irq(e, &irq);
														
 
															+		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
														
 
															+			continue;
														
 
															+
														
 
															+		vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
														
 
															+		vcpu_info.vector = irq.vector;
														
 
															+
														
 
															+		trace_kvm_pi_irte_update(vcpu->vcpu_id, e->gsi,
														
 
															+				vcpu_info.vector, vcpu_info.pi_desc_addr, set);
														
 
															+
														
 
															+		if (set)
														
 
															+			ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
														
 
															+		else {
														
 
															+			/* suppress notification event before unposting */
														
 
															+			pi_set_sn(vcpu_to_pi_desc(vcpu));
														
 
															+			ret = irq_set_vcpu_affinity(host_irq, NULL);
														
 
															+			pi_clear_sn(vcpu_to_pi_desc(vcpu));
														
 
															+		}
														
 
															+
														
 
															+		if (ret < 0) {
														
 
															+			printk(KERN_INFO "%s: failed to update PI IRTE\n",
														
 
															+					__func__);
														
 
															+			goto out;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	srcu_read_unlock(&kvm->irq_srcu, idx);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 static struct kvm_x86_ops vmx_x86_ops = {
														
 
															 	.cpu_has_kvm_support = cpu_has_kvm_support,
														
 
															 	.disabled_by_bios = vmx_disabled_by_bios,
														
@@ -10347,7 +10802,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
															 	.update_cr8_intercept = update_cr8_intercept,
														
 
															 	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
														
 
															 	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
														
 
															-	.vm_has_apicv = vmx_vm_has_apicv,
														
 
															+	.cpu_uses_apicv = vmx_cpu_uses_apicv,
														
 
															 	.load_eoi_exitmap = vmx_load_eoi_exitmap,
														
 
															 	.hwapic_irr_update = vmx_hwapic_irr_update,
														
 
															 	.hwapic_isr_update = vmx_hwapic_isr_update,
														
@@ -10394,7 +10849,12 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
															 	.flush_log_dirty = vmx_flush_log_dirty,
														
 
															 	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
														
 
															+	.pre_block = vmx_pre_block,
														
 
															+	.post_block = vmx_post_block,
														
 
															+
														
 
															 	.pmu_ops = &intel_pmu_ops,
														
 
															+
														
 
															+	.update_pi_irte = vmx_update_pi_irte,
														
 
															 };
														
 
															 static int __init vmx_init(void)
														
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -51,6 +51,8 @@
 
															 #include <linux/pci.h>
														
 
															 #include <linux/timekeeper_internal.h>
														
 
															 #include <linux/pvclock_gtod.h>
														
 
															+#include <linux/kvm_irqfd.h>
														
 
															+#include <linux/irqbypass.h>
														
 
															 #include <trace/events/kvm.h>
														
 
															 #define CREATE_TRACE_POINTS
														
@@ -64,6 +66,7 @@
 
															 #include <asm/fpu/internal.h> /* Ugh! */
														
 
															 #include <asm/pvclock.h>
														
 
															 #include <asm/div64.h>
														
 
															+#include <asm/irq_remapping.h>
														
 
															 #define MAX_IO_MSRS 256
														
 
															 #define KVM_MAX_MCE_BANKS 32
														
@@ -622,7 +625,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
															 	if ((cr0 ^ old_cr0) & update_bits)
														
 
															 		kvm_mmu_reset_context(vcpu);
														
 
															-	if ((cr0 ^ old_cr0) & X86_CR0_CD)
														
 
															+	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
														
 
															+	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
														
 
															+	    !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
														
 
															 		kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
														
 
															 	return 0;
														
@@ -789,7 +794,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
 
															 {
														
 
															 	if (cr8 & CR8_RESERVED_BITS)
														
 
															 		return 1;
														
 
															-	if (irqchip_in_kernel(vcpu->kvm))
														
 
															+	if (lapic_in_kernel(vcpu))
														
 
															 		kvm_lapic_set_tpr(vcpu, cr8);
														
 
															 	else
														
 
															 		vcpu->arch.cr8 = cr8;
														
@@ -799,7 +804,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr8);
 
															 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	if (irqchip_in_kernel(vcpu->kvm))
														
 
															+	if (lapic_in_kernel(vcpu))
														
 
															 		return kvm_lapic_get_cr8(vcpu);
														
 
															 	else
														
 
															 		return vcpu->arch.cr8;
														
@@ -953,6 +958,9 @@ static u32 emulated_msrs[] = {
 
															 	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
														
 
															 	HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
														
 
															 	HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
														
 
															+	HV_X64_MSR_RESET,
														
 
															+	HV_X64_MSR_VP_INDEX,
														
 
															+	HV_X64_MSR_VP_RUNTIME,
														
 
															 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
														
 
															 	MSR_KVM_PV_EOI_EN,
														
@@ -1898,6 +1906,8 @@ static void accumulate_steal_time(struct kvm_vcpu *vcpu)
 
															 static void record_steal_time(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	accumulate_steal_time(vcpu);
														
 
															+
														
 
															 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
														
 
															 		return;
														
@@ -2048,12 +2058,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
															 		if (!(data & KVM_MSR_ENABLED))
														
 
															 			break;
														
 
															-		vcpu->arch.st.last_steal = current->sched_info.run_delay;
														
 
															-
														
 
															-		preempt_disable();
														
 
															-		accumulate_steal_time(vcpu);
														
 
															-		preempt_enable();
														
 
															-
														
 
															 		kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
														
 
															 		break;
														
@@ -2449,6 +2453,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
															 	case KVM_CAP_ENABLE_CAP_VM:
														
 
															 	case KVM_CAP_DISABLE_QUIRKS:
														
 
															 	case KVM_CAP_SET_BOOT_CPU_ID:
														
 
															+ 	case KVM_CAP_SPLIT_IRQCHIP:
														
 
															 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
														
 
															 	case KVM_CAP_ASSIGN_DEV_IRQ:
														
 
															 	case KVM_CAP_PCI_2_3:
														
@@ -2628,7 +2633,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
															 		vcpu->cpu = cpu;
														
 
															 	}
														
 
															-	accumulate_steal_time(vcpu);
														
 
															 	kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
														
 
															 }
														
@@ -2662,12 +2666,24 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 
															 {
														
 
															 	if (irq->irq >= KVM_NR_INTERRUPTS)
														
 
															 		return -EINVAL;
														
 
															-	if (irqchip_in_kernel(vcpu->kvm))
														
 
															+
														
 
															+	if (!irqchip_in_kernel(vcpu->kvm)) {
														
 
															+		kvm_queue_interrupt(vcpu, irq->irq, false);
														
 
															+		kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * With in-kernel LAPIC, we only use this to inject EXTINT, so
														
 
															+	 * fail for in-kernel 8259.
														
 
															+	 */
														
 
															+	if (pic_in_kernel(vcpu->kvm))
														
 
															 		return -ENXIO;
														
 
															-	kvm_queue_interrupt(vcpu, irq->irq, false);
														
 
															-	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															+	if (vcpu->arch.pending_external_vector != -1)
														
 
															+		return -EEXIST;
														
 
															+	vcpu->arch.pending_external_vector = irq->irq;
														
 
															 	return 0;
														
 
															 }
														
@@ -3176,7 +3192,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
															 		struct kvm_vapic_addr va;
														
 
															 		r = -EINVAL;
														
 
															-		if (!irqchip_in_kernel(vcpu->kvm))
														
 
															+		if (!lapic_in_kernel(vcpu))
														
 
															 			goto out;
														
 
															 		r = -EFAULT;
														
 
															 		if (copy_from_user(&va, argp, sizeof va))
														
@@ -3425,41 +3441,35 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
															 static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
														
 
															 {
														
 
															-	int r = 0;
														
 
															-
														
 
															 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
														
 
															 	memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
														
 
															 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
														
 
															-	return r;
														
 
															+	return 0;
														
 
															 }
														
 
															 static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
														
 
															 {
														
 
															-	int r = 0;
														
 
															-
														
 
															 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
														
 
															 	memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
														
 
															 	kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
														
 
															 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
														
 
															-	return r;
														
 
															+	return 0;
														
 
															 }
														
 
															 static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
														
 
															 {
														
 
															-	int r = 0;
														
 
															-
														
 
															 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
														
 
															 	memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
														
 
															 		sizeof(ps->channels));
														
 
															 	ps->flags = kvm->arch.vpit->pit_state.flags;
														
 
															 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
														
 
															 	memset(&ps->reserved, 0, sizeof(ps->reserved));
														
 
															-	return r;
														
 
															+	return 0;
														
 
															 }
														
 
															 static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
														
 
															 {
														
 
															-	int r = 0, start = 0;
														
 
															+	int start = 0;
														
 
															 	u32 prev_legacy, cur_legacy;
														
 
															 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
														
 
															 	prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
														
@@ -3471,7 +3481,7 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
 
															 	kvm->arch.vpit->pit_state.flags = ps->flags;
														
 
															 	kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
														
 
															 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
														
 
															-	return r;
														
 
															+	return 0;
														
 
															 }
														
 
															 static int kvm_vm_ioctl_reinject(struct kvm *kvm,
														
@@ -3556,6 +3566,28 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 
															 		kvm->arch.disabled_quirks = cap->args[0];
														
 
															 		r = 0;
														
 
															 		break;
														
 
															+	case KVM_CAP_SPLIT_IRQCHIP: {
														
 
															+		mutex_lock(&kvm->lock);
														
 
															+		r = -EINVAL;
														
 
															+		if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
														
 
															+			goto split_irqchip_unlock;
														
 
															+		r = -EEXIST;
														
 
															+		if (irqchip_in_kernel(kvm))
														
 
															+			goto split_irqchip_unlock;
														
 
															+		if (atomic_read(&kvm->online_vcpus))
														
 
															+			goto split_irqchip_unlock;
														
 
															+		r = kvm_setup_empty_irq_routing(kvm);
														
 
															+		if (r)
														
 
															+			goto split_irqchip_unlock;
														
 
															+		/* Pairs with irqchip_in_kernel. */
														
 
															+		smp_wmb();
														
 
															+		kvm->arch.irqchip_split = true;
														
 
															+		kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
														
 
															+		r = 0;
														
 
															+split_irqchip_unlock:
														
 
															+		mutex_unlock(&kvm->lock);
														
 
															+		break;
														
 
															+	}
														
 
															 	default:
														
 
															 		r = -EINVAL;
														
 
															 		break;
														
@@ -3669,7 +3701,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 		}
														
 
															 		r = -ENXIO;
														
 
															-		if (!irqchip_in_kernel(kvm))
														
 
															+		if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
														
 
															 			goto get_irqchip_out;
														
 
															 		r = kvm_vm_ioctl_get_irqchip(kvm, chip);
														
 
															 		if (r)
														
@@ -3693,7 +3725,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 		}
														
 
															 		r = -ENXIO;
														
 
															-		if (!irqchip_in_kernel(kvm))
														
 
															+		if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
														
 
															 			goto set_irqchip_out;
														
 
															 		r = kvm_vm_ioctl_set_irqchip(kvm, chip);
														
 
															 		if (r)
														
@@ -4060,6 +4092,15 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 
															 	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
														
 
															 }
														
 
															+static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
														
 
															+		unsigned long addr, void *val, unsigned int bytes)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
														
 
															+	int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
														
 
															+
														
 
															+	return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
														
 
															+}
														
 
															+
														
 
															 int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
														
 
															 				       gva_t addr, void *val,
														
 
															 				       unsigned int bytes,
														
@@ -4795,6 +4836,7 @@ static const struct x86_emulate_ops emulate_ops = {
 
															 	.write_gpr           = emulator_write_gpr,
														
 
															 	.read_std            = kvm_read_guest_virt_system,
														
 
															 	.write_std           = kvm_write_guest_virt_system,
														
 
															+	.read_phys           = kvm_read_guest_phys_system,
														
 
															 	.fetch               = kvm_fetch_guest_virt,
														
 
															 	.read_emulated       = emulator_read_emulated,
														
 
															 	.write_emulated      = emulator_write_emulated,
														
@@ -5667,7 +5709,7 @@ void kvm_arch_exit(void)
 
															 int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	++vcpu->stat.halt_exits;
														
 
															-	if (irqchip_in_kernel(vcpu->kvm)) {
														
 
															+	if (lapic_in_kernel(vcpu)) {
														
 
															 		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
														
 
															 		return 1;
														
 
															 	} else {
														
@@ -5774,9 +5816,15 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
 
															  */
														
 
															 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
														
 
															-		vcpu->run->request_interrupt_window &&
														
 
															-		kvm_arch_interrupt_allowed(vcpu));
														
 
															+	if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm))
														
 
															+		return false;
														
 
															+
														
 
															+	if (kvm_cpu_has_interrupt(vcpu))
														
 
															+		return false;
														
 
															+
														
 
															+	return (irqchip_split(vcpu->kvm)
														
 
															+		? kvm_apic_accept_pic_intr(vcpu)
														
 
															+		: kvm_arch_interrupt_allowed(vcpu));
														
 
															 }
														
 
															 static void post_kvm_run_save(struct kvm_vcpu *vcpu)
														
@@ -5787,13 +5835,17 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 
															 	kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
														
 
															 	kvm_run->cr8 = kvm_get_cr8(vcpu);
														
 
															 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
														
 
															-	if (irqchip_in_kernel(vcpu->kvm))
														
 
															-		kvm_run->ready_for_interrupt_injection = 1;
														
 
															-	else
														
 
															+	if (!irqchip_in_kernel(vcpu->kvm))
														
 
															 		kvm_run->ready_for_interrupt_injection =
														
 
															 			kvm_arch_interrupt_allowed(vcpu) &&
														
 
															 			!kvm_cpu_has_interrupt(vcpu) &&
														
 
															 			!kvm_event_needs_reinjection(vcpu);
														
 
															+	else if (!pic_in_kernel(vcpu->kvm))
														
 
															+		kvm_run->ready_for_interrupt_injection =
														
 
															+			kvm_apic_accept_pic_intr(vcpu) &&
														
 
															+			!kvm_cpu_has_interrupt(vcpu);
														
 
															+	else
														
 
															+		kvm_run->ready_for_interrupt_injection = 1;
														
 
															 }
														
 
															 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
														
@@ -6144,18 +6196,18 @@ static void process_smi(struct kvm_vcpu *vcpu)
 
															 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	u64 eoi_exit_bitmap[4];
														
 
															-	u32 tmr[8];
														
 
															-
														
 
															 	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
														
 
															 		return;
														
 
															-	memset(eoi_exit_bitmap, 0, 32);
														
 
															-	memset(tmr, 0, 32);
														
 
															+	memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8);
														
 
															-	kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
														
 
															-	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
														
 
															-	kvm_apic_update_tmr(vcpu, tmr);
														
 
															+	if (irqchip_split(vcpu->kvm))
														
 
															+		kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap);
														
 
															+	else {
														
 
															+		kvm_x86_ops->sync_pir_to_irr(vcpu);
														
 
															+		kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
														
 
															+	}
														
 
															+	kvm_x86_ops->load_eoi_exitmap(vcpu);
														
 
															 }
														
 
															 static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
														
@@ -6168,7 +6220,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	struct page *page = NULL;
														
 
															-	if (!irqchip_in_kernel(vcpu->kvm))
														
 
															+	if (!lapic_in_kernel(vcpu))
														
 
															 		return;
														
 
															 	if (!kvm_x86_ops->set_apic_access_page_addr)
														
@@ -6206,7 +6258,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 
															 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	int r;
														
 
															-	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
														
 
															+	bool req_int_win = !lapic_in_kernel(vcpu) &&
														
 
															 		vcpu->run->request_interrupt_window;
														
 
															 	bool req_immediate_exit = false;
														
@@ -6258,6 +6310,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
															 			kvm_pmu_handle_event(vcpu);
														
 
															 		if (kvm_check_request(KVM_REQ_PMI, vcpu))
														
 
															 			kvm_pmu_deliver_pmi(vcpu);
														
 
															+		if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
														
 
															+			BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
														
 
															+			if (test_bit(vcpu->arch.pending_ioapic_eoi,
														
 
															+				     (void *) vcpu->arch.eoi_exit_bitmap)) {
														
 
															+				vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
														
 
															+				vcpu->run->eoi.vector =
														
 
															+						vcpu->arch.pending_ioapic_eoi;
														
 
															+				r = 0;
														
 
															+				goto out;
														
 
															+			}
														
 
															+		}
														
 
															 		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
														
 
															 			vcpu_scan_ioapic(vcpu);
														
 
															 		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
														
@@ -6268,6 +6331,26 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
															 			r = 0;
														
 
															 			goto out;
														
 
															 		}
														
 
															+		if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
														
 
															+			vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
														
 
															+			vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
														
 
															+			r = 0;
														
 
															+			goto out;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * KVM_REQ_EVENT is not set when posted interrupts are set by
														
 
															+	 * VT-d hardware, so we have to update RVI unconditionally.
														
 
															+	 */
														
 
															+	if (kvm_lapic_enabled(vcpu)) {
														
 
															+		/*
														
 
															+		 * Update architecture specific hints for APIC
														
 
															+		 * virtual interrupt delivery.
														
 
															+		 */
														
 
															+		if (kvm_x86_ops->hwapic_irr_update)
														
 
															+			kvm_x86_ops->hwapic_irr_update(vcpu,
														
 
															+				kvm_lapic_find_highest_irr(vcpu));
														
 
															 	}
														
 
															 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
														
@@ -6286,13 +6369,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
															 			kvm_x86_ops->enable_irq_window(vcpu);
														
 
															 		if (kvm_lapic_enabled(vcpu)) {
														
 
															-			/*
														
 
															-			 * Update architecture specific hints for APIC
														
 
															-			 * virtual interrupt delivery.
														
 
															-			 */
														
 
															-			if (kvm_x86_ops->hwapic_irr_update)
														
 
															-				kvm_x86_ops->hwapic_irr_update(vcpu,
														
 
															-					kvm_lapic_find_highest_irr(vcpu));
														
 
															 			update_cr8_intercept(vcpu);
														
 
															 			kvm_lapic_sync_to_vapic(vcpu);
														
 
															 		}
														
@@ -6428,10 +6504,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
															 static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	if (!kvm_arch_vcpu_runnable(vcpu)) {
														
 
															+	if (!kvm_arch_vcpu_runnable(vcpu) &&
														
 
															+	    (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
														
 
															 		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
														
 
															 		kvm_vcpu_block(vcpu);
														
 
															 		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
														
 
															+
														
 
															+		if (kvm_x86_ops->post_block)
														
 
															+			kvm_x86_ops->post_block(vcpu);
														
 
															+
														
 
															 		if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
														
 
															 			return 1;
														
 
															 	}
														
@@ -6468,10 +6549,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 
															 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
														
 
															 	for (;;) {
														
 
															-		if (kvm_vcpu_running(vcpu))
														
 
															+		if (kvm_vcpu_running(vcpu)) {
														
 
															 			r = vcpu_enter_guest(vcpu);
														
 
															-		else
														
 
															+		} else {
														
 
															 			r = vcpu_block(kvm, vcpu);
														
 
															+		}
														
 
															+
														
 
															 		if (r <= 0)
														
 
															 			break;
														
@@ -6480,8 +6563,8 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 
															 			kvm_inject_pending_timer_irqs(vcpu);
														
 
															 		if (dm_request_for_irq_injection(vcpu)) {
														
 
															-			r = -EINTR;
														
 
															-			vcpu->run->exit_reason = KVM_EXIT_INTR;
														
 
															+			r = 0;
														
 
															+			vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
														
 
															 			++vcpu->stat.request_irq_exits;
														
 
															 			break;
														
 
															 		}
														
@@ -6608,7 +6691,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	}
														
 
															 	/* re-sync apic's tpr */
														
 
															-	if (!irqchip_in_kernel(vcpu->kvm)) {
														
 
															+	if (!lapic_in_kernel(vcpu)) {
														
 
															 		if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
														
 
															 			r = -EINVAL;
														
 
															 			goto out;
														
@@ -7308,7 +7391,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
 
															 bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
														
 
															+	return irqchip_in_kernel(vcpu->kvm) == lapic_in_kernel(vcpu);
														
 
															 }
														
 
															 struct static_key kvm_no_apic_vcpu __read_mostly;
														
@@ -7377,6 +7460,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
															 	kvm_async_pf_hash_reset(vcpu);
														
 
															 	kvm_pmu_init(vcpu);
														
 
															+	vcpu->arch.pending_external_vector = -1;
														
 
															+
														
 
															 	return 0;
														
 
															 fail_free_mce_banks:
														
@@ -7402,7 +7487,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
															 	kvm_mmu_destroy(vcpu);
														
 
															 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
														
 
															 	free_page((unsigned long)vcpu->arch.pio_data);
														
 
															-	if (!irqchip_in_kernel(vcpu->kvm))
														
 
															+	if (!lapic_in_kernel(vcpu))
														
 
															 		static_key_slow_dec(&kvm_no_apic_vcpu);
														
 
															 }
														
@@ -8029,7 +8114,59 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
														
 
															+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
														
 
															+				      struct irq_bypass_producer *prod)
														
 
															+{
														
 
															+	struct kvm_kernel_irqfd *irqfd =
														
 
															+		container_of(cons, struct kvm_kernel_irqfd, consumer);
														
 
															+
														
 
															+	if (kvm_x86_ops->update_pi_irte) {
														
 
															+		irqfd->producer = prod;
														
 
															+		return kvm_x86_ops->update_pi_irte(irqfd->kvm,
														
 
															+				prod->irq, irqfd->gsi, 1);
														
 
															+	}
														
 
															+
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
														
 
															+				      struct irq_bypass_producer *prod)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct kvm_kernel_irqfd *irqfd =
														
 
															+		container_of(cons, struct kvm_kernel_irqfd, consumer);
														
 
															+
														
 
															+	if (!kvm_x86_ops->update_pi_irte) {
														
 
															+		WARN_ON(irqfd->producer != NULL);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	WARN_ON(irqfd->producer != prod);
														
 
															+	irqfd->producer = NULL;
														
 
															+
														
 
															+	/*
														
 
															+	 * When producer of consumer is unregistered, we change back to
														
 
															+	 * remapped mode, so we can re-use the current implementation
														
 
															+	 * when the irq is masked/disabed or the consumer side (KVM
														
 
															+	 * int this case doesn't want to receive the interrupts.
														
 
															+	*/
														
 
															+	ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
														
 
															+	if (ret)
														
 
															+		printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
														
 
															+		       " fails: %d\n", irqfd->consumer.token, ret);
														
 
															+}
														
 
															+
														
 
															+int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
														
 
															+				   uint32_t guest_irq, bool set)
														
 
															+{
														
 
															+	if (!kvm_x86_ops->update_pi_irte)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
														
 
															+}
														
 
															+
														
 
															 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
														
 
															+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
														
 
															 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
														
 
															 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
														
 
															 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
														
@@ -8044,3 +8181,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
 
															 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
														
 
															 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
														
 
															 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
														
 
															+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
														
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -63,9 +63,6 @@ enum hv_cpuid_function {
 
															 /* Define version of the synthetic interrupt controller. */
														
 
															 #define HV_SYNIC_VERSION		(1)
														
 
															-/* Define the expected SynIC version. */
														
 
															-#define HV_SYNIC_VERSION_1		(0x1)
														
 
															-
														
 
															 /* Define synthetic interrupt controller message constants. */
														
 
															 #define HV_MESSAGE_SIZE			(256)
														
 
															 #define HV_MESSAGE_PAYLOAD_BYTE_COUNT	(240)
														
@@ -105,8 +102,6 @@ enum hv_message_type {
 
															 	HVMSG_X64_LEGACY_FP_ERROR		= 0x80010005
														
 
															 };
														
 
															-/* Define the number of synthetic interrupt sources. */
														
 
															-#define HV_SYNIC_SINT_COUNT		(16)
														
 
															 #define HV_SYNIC_STIMER_COUNT		(4)
														
 
															 /* Define invalid partition identifier. */
														
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -22,7 +22,7 @@ int irq_remap_broken;
 
															 int disable_sourceid_checking;
														
 
															 int no_x2apic_optout;
														
 
															-int disable_irq_post = 1;
														
 
															+int disable_irq_post = 0;
														
 
															 static int disable_irq_remap;
														
 
															 static struct irq_remap_ops *remap_ops;
														
@@ -58,14 +58,18 @@ static __init int setup_irqremap(char *str)
 
															 		return -EINVAL;
														
 
															 	while (*str) {
														
 
															-		if (!strncmp(str, "on", 2))
														
 
															+		if (!strncmp(str, "on", 2)) {
														
 
															 			disable_irq_remap = 0;
														
 
															-		else if (!strncmp(str, "off", 3))
														
 
															+			disable_irq_post = 0;
														
 
															+		} else if (!strncmp(str, "off", 3)) {
														
 
															 			disable_irq_remap = 1;
														
 
															-		else if (!strncmp(str, "nosid", 5))
														
 
															+			disable_irq_post = 1;
														
 
															+		} else if (!strncmp(str, "nosid", 5))
														
 
															 			disable_sourceid_checking = 1;
														
 
															 		else if (!strncmp(str, "no_x2apic_optout", 16))
														
 
															 			no_x2apic_optout = 1;
														
 
															+		else if (!strncmp(str, "nopost", 6))
														
 
															+			disable_irq_post = 1;
														
 
															 		str += strcspn(str, ",");
														
 
															 		while (*str == ',')
														
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -33,3 +33,4 @@ menuconfig VFIO
 
															 source "drivers/vfio/pci/Kconfig"
														
 
															 source "drivers/vfio/platform/Kconfig"
														
 
															+source "virt/lib/Kconfig"
														
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -2,6 +2,7 @@ config VFIO_PCI
 
															 	tristate "VFIO support for PCI devices"
														
 
															 	depends on VFIO && PCI && EVENTFD
														
 
															 	select VFIO_VIRQFD
														
 
															+	select IRQ_BYPASS_MANAGER
														
 
															 	help
														
 
															 	  Support for the PCI VFIO bus driver.  This is required to make
														
 
															 	  use of PCI drivers using the VFIO framework.
														
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -319,6 +319,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
 
															 	if (vdev->ctx[vector].trigger) {
														
 
															 		free_irq(irq, vdev->ctx[vector].trigger);
														
 
															+		irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
														
 
															 		kfree(vdev->ctx[vector].name);
														
 
															 		eventfd_ctx_put(vdev->ctx[vector].trigger);
														
 
															 		vdev->ctx[vector].trigger = NULL;
														
@@ -360,6 +361,14 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
 
															 		return ret;
														
 
															 	}
														
 
															+	vdev->ctx[vector].producer.token = trigger;
														
 
															+	vdev->ctx[vector].producer.irq = irq;
														
 
															+	ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
														
 
															+	if (unlikely(ret))
														
 
															+		dev_info(&pdev->dev,
														
 
															+		"irq bypass producer (token %p) registration fails: %d\n",
														
 
															+		vdev->ctx[vector].producer.token, ret);
														
 
															+
														
 
															 	vdev->ctx[vector].trigger = trigger;
														
 
															 	return 0;
														
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -13,6 +13,7 @@
 
															 #include <linux/mutex.h>
														
 
															 #include <linux/pci.h>
														
 
															+#include <linux/irqbypass.h>
														
 
															 #ifndef VFIO_PCI_PRIVATE_H
														
 
															 #define VFIO_PCI_PRIVATE_H
														
@@ -29,6 +30,7 @@ struct vfio_pci_irq_ctx {
 
															 	struct virqfd		*mask;
														
 
															 	char			*name;
														
 
															 	bool			masked;
														
 
															+	struct irq_bypass_producer	producer;
														
 
															 };
														
 
															 struct vfio_pci_device {
														
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -51,7 +51,7 @@ struct arch_timer_cpu {
 
															 	bool				armed;
														
 
															 	/* Timer IRQ */
														
 
															-	const struct kvm_irq_level	*irq;
														
 
															+	struct kvm_irq_level		irq;
														
 
															 	/* VGIC mapping */
														
 
															 	struct irq_phys_map		*map;
														
@@ -71,5 +71,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 
															 int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
														
 
															 bool kvm_timer_should_fire(struct kvm_vcpu *vcpu);
														
 
															+void kvm_timer_schedule(struct kvm_vcpu *vcpu);
														
 
															+void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
														
 
															 #endif
														
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -112,7 +112,6 @@ struct vgic_vmcr {
 
															 struct vgic_ops {
														
 
															 	struct vgic_lr	(*get_lr)(const struct kvm_vcpu *, int);
														
 
															 	void	(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
														
 
															-	void	(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
														
 
															 	u64	(*get_elrsr)(const struct kvm_vcpu *vcpu);
														
 
															 	u64	(*get_eisr)(const struct kvm_vcpu *vcpu);
														
 
															 	void	(*clear_eisr)(struct kvm_vcpu *vcpu);
														
@@ -159,7 +158,6 @@ struct irq_phys_map {
 
															 	u32			virt_irq;
														
 
															 	u32			phys_irq;
														
 
															 	u32			irq;
														
 
															-	bool			active;
														
 
															 };
														
 
															 struct irq_phys_map_entry {
														
@@ -296,22 +294,16 @@ struct vgic_v3_cpu_if {
 
															 };
														
 
															 struct vgic_cpu {
														
 
															-	/* per IRQ to LR mapping */
														
 
															-	u8		*vgic_irq_lr_map;
														
 
															-
														
 
															 	/* Pending/active/both interrupts on this VCPU */
														
 
															-	DECLARE_BITMAP(	pending_percpu, VGIC_NR_PRIVATE_IRQS);
														
 
															-	DECLARE_BITMAP(	active_percpu, VGIC_NR_PRIVATE_IRQS);
														
 
															-	DECLARE_BITMAP(	pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
														
 
															+	DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS);
														
 
															+	DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS);
														
 
															+	DECLARE_BITMAP(pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
														
 
															 	/* Pending/active/both shared interrupts, dynamically sized */
														
 
															 	unsigned long	*pending_shared;
														
 
															 	unsigned long   *active_shared;
														
 
															 	unsigned long   *pend_act_shared;
														
 
															-	/* Bitmap of used/free list registers */
														
 
															-	DECLARE_BITMAP(	lr_used, VGIC_V2_MAX_LRS);
														
 
															-
														
 
															 	/* Number of list registers on this CPU */
														
 
															 	int		nr_lr;
														
@@ -354,8 +346,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
 
															 struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
														
 
															 					   int virt_irq, int irq);
														
 
															 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
														
 
															-bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
														
 
															-void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
														
 
															 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
														
 
															 #define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
														
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -26,6 +26,7 @@
 
															 #define _HYPERV_H
														
 
															 #include <uapi/linux/hyperv.h>
														
 
															+#include <uapi/asm/hyperv.h>
														
 
															 #include <linux/types.h>
														
 
															 #include <linux/scatterlist.h>
														
--- a/include/linux/irqbypass.h
+++ b/include/linux/irqbypass.h
@@ -0,0 +1,90 @@
 
															+/*
														
 
															+ * IRQ offload/bypass manager
														
 
															+ *
														
 
															+ * Copyright (C) 2015 Red Hat, Inc.
														
 
															+ * Copyright (c) 2015 Linaro Ltd.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+#ifndef IRQBYPASS_H
														
 
															+#define IRQBYPASS_H
														
 
															+
														
 
															+#include <linux/list.h>
														
 
															+
														
 
															+struct irq_bypass_consumer;
														
 
															+
														
 
															+/*
														
 
															+ * Theory of operation
														
 
															+ *
														
 
															+ * The IRQ bypass manager is a simple set of lists and callbacks that allows
														
 
															+ * IRQ producers (ex. physical interrupt sources) to be matched to IRQ
														
 
															+ * consumers (ex. virtualization hardware that allows IRQ bypass or offload)
														
 
															+ * via a shared token (ex. eventfd_ctx).  Producers and consumers register
														
 
															+ * independently.  When a token match is found, the optional @stop callback
														
 
															+ * will be called for each participant.  The pair will then be connected via
														
 
															+ * the @add_* callbacks, and finally the optional @start callback will allow
														
 
															+ * any final coordination.  When either participant is unregistered, the
														
 
															+ * process is repeated using the @del_* callbacks in place of the @add_*
														
 
															+ * callbacks.  Match tokens must be unique per producer/consumer, 1:N pairings
														
 
															+ * are not supported.
														
 
															+ */
														
 
															+
														
 
															+/**
														
 
															+ * struct irq_bypass_producer - IRQ bypass producer definition
														
 
															+ * @node: IRQ bypass manager private list management
														
 
															+ * @token: opaque token to match between producer and consumer
														
 
															+ * @irq: Linux IRQ number for the producer device
														
 
															+ * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional)
														
 
															+ * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional)
														
 
															+ * @stop: Perform any quiesce operations necessary prior to add/del (optional)
														
 
															+ * @start: Perform any startup operations necessary after add/del (optional)
														
 
															+ *
														
 
															+ * The IRQ bypass producer structure represents an interrupt source for
														
 
															+ * participation in possible host bypass, for instance an interrupt vector
														
 
															+ * for a physical device assigned to a VM.
														
 
															+ */
														
 
															+struct irq_bypass_producer {
														
 
															+	struct list_head node;
														
 
															+	void *token;
														
 
															+	int irq;
														
 
															+	int (*add_consumer)(struct irq_bypass_producer *,
														
 
															+			    struct irq_bypass_consumer *);
														
 
															+	void (*del_consumer)(struct irq_bypass_producer *,
														
 
															+			     struct irq_bypass_consumer *);
														
 
															+	void (*stop)(struct irq_bypass_producer *);
														
 
															+	void (*start)(struct irq_bypass_producer *);
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * struct irq_bypass_consumer - IRQ bypass consumer definition
														
 
															+ * @node: IRQ bypass manager private list management
														
 
															+ * @token: opaque token to match between producer and consumer
														
 
															+ * @add_producer: Connect the IRQ consumer to an IRQ producer
														
 
															+ * @del_producer: Disconnect the IRQ consumer from an IRQ producer
														
 
															+ * @stop: Perform any quiesce operations necessary prior to add/del (optional)
														
 
															+ * @start: Perform any startup operations necessary after add/del (optional)
														
 
															+ *
														
 
															+ * The IRQ bypass consumer structure represents an interrupt sink for
														
 
															+ * participation in possible host bypass, for instance a hypervisor may
														
 
															+ * support offloads to allow bypassing the host entirely or offload
														
 
															+ * portions of the interrupt handling to the VM.
														
 
															+ */
														
 
															+struct irq_bypass_consumer {
														
 
															+	struct list_head node;
														
 
															+	void *token;
														
 
															+	int (*add_producer)(struct irq_bypass_consumer *,
														
 
															+			    struct irq_bypass_producer *);
														
 
															+	void (*del_producer)(struct irq_bypass_consumer *,
														
 
															+			     struct irq_bypass_producer *);
														
 
															+	void (*stop)(struct irq_bypass_consumer *);
														
 
															+	void (*start)(struct irq_bypass_consumer *);
														
 
															+};
														
 
															+
														
 
															+int irq_bypass_register_producer(struct irq_bypass_producer *);
														
 
															+void irq_bypass_unregister_producer(struct irq_bypass_producer *);
														
 
															+int irq_bypass_register_consumer(struct irq_bypass_consumer *);
														
 
															+void irq_bypass_unregister_consumer(struct irq_bypass_consumer *);
														
 
															+
														
 
															+#endif /* IRQBYPASS_H */
														
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -24,6 +24,7 @@
 
															 #include <linux/err.h>
														
 
															 #include <linux/irqflags.h>
														
 
															 #include <linux/context_tracking.h>
														
 
															+#include <linux/irqbypass.h>
														
 
															 #include <asm/signal.h>
														
 
															 #include <linux/kvm.h>
														
@@ -140,6 +141,8 @@ static inline bool is_error_page(struct page *page)
 
															 #define KVM_REQ_APIC_PAGE_RELOAD  25
														
 
															 #define KVM_REQ_SMI               26
														
 
															 #define KVM_REQ_HV_CRASH          27
														
 
															+#define KVM_REQ_IOAPIC_EOI_EXIT   28
														
 
															+#define KVM_REQ_HV_RESET          29
														
 
															 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
														
 
															 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
														
@@ -231,6 +234,9 @@ struct kvm_vcpu {
 
															 	unsigned long requests;
														
 
															 	unsigned long guest_debug;
														
 
															+	int pre_pcpu;
														
 
															+	struct list_head blocked_vcpu_list;
														
 
															+
														
 
															 	struct mutex mutex;
														
 
															 	struct kvm_run *run;
														
@@ -329,6 +335,18 @@ struct kvm_kernel_irq_routing_entry {
 
															 	struct hlist_node link;
														
 
															 };
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															+struct kvm_irq_routing_table {
														
 
															+	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
														
 
															+	u32 nr_rt_entries;
														
 
															+	/*
														
 
															+	 * Array indexed by gsi. Each entry contains list of irq chips
														
 
															+	 * the gsi is connected to.
														
 
															+	 */
														
 
															+	struct hlist_head map[0];
														
 
															+};
														
 
															+#endif
														
 
															+
														
 
															 #ifndef KVM_PRIVATE_MEM_SLOTS
														
 
															 #define KVM_PRIVATE_MEM_SLOTS 0
														
 
															 #endif
														
@@ -455,10 +473,14 @@ void vcpu_put(struct kvm_vcpu *vcpu);
 
															 #ifdef __KVM_HAVE_IOAPIC
														
 
															 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
														
 
															+void kvm_arch_irq_routing_update(struct kvm *kvm);
														
 
															 #else
														
 
															 static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
														
 
															 {
														
 
															 }
														
 
															+static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
														
 
															+{
														
 
															+}
														
 
															 #endif
														
 
															 #ifdef CONFIG_HAVE_KVM_IRQFD
														
@@ -625,6 +647,8 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
 
															 void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
														
 
															 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
														
 
															+void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
														
 
															+void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
														
 
															 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
														
 
															 int kvm_vcpu_yield_to(struct kvm_vcpu *target);
														
 
															 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
														
@@ -803,10 +827,13 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
 
															 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
														
 
															 		bool line_status);
														
 
															-int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
														
 
															 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
														
 
															 		int irq_source_id, int level, bool line_status);
														
 
															+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
														
 
															+			       struct kvm *kvm, int irq_source_id,
														
 
															+			       int level, bool line_status);
														
 
															 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
														
 
															+void kvm_notify_acked_gsi(struct kvm *kvm, int gsi);
														
 
															 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
														
 
															 void kvm_register_irq_ack_notifier(struct kvm *kvm,
														
 
															 				   struct kvm_irq_ack_notifier *kian);
														
@@ -1002,6 +1029,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
 
															 #endif
														
 
															 int kvm_setup_default_irq_routing(struct kvm *kvm);
														
 
															+int kvm_setup_empty_irq_routing(struct kvm *kvm);
														
 
															 int kvm_set_irq_routing(struct kvm *kvm,
														
 
															 			const struct kvm_irq_routing_entry *entries,
														
 
															 			unsigned nr,
														
@@ -1144,5 +1172,15 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
 
															 {
														
 
															 }
														
 
															 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
														
 
															-#endif
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
														
 
															+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
														
 
															+			   struct irq_bypass_producer *);
														
 
															+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
														
 
															+			   struct irq_bypass_producer *);
														
 
															+void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
														
 
															+void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
														
 
															+int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
														
 
															+				  uint32_t guest_irq, bool set);
														
 
															+#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
														
 
															+#endif
														
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -0,0 +1,71 @@
 
															+/*
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License as published by
														
 
															+ * the Free Software Foundation; either version 2 of the License.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ *
														
 
															+ * irqfd: Allows an fd to be used to inject an interrupt to the guest
														
 
															+ * Credit goes to Avi Kivity for the original idea.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __LINUX_KVM_IRQFD_H
														
 
															+#define __LINUX_KVM_IRQFD_H
														
 
															+
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/poll.h>
														
 
															+
														
 
															+/*
														
 
															+ * Resampling irqfds are a special variety of irqfds used to emulate
														
 
															+ * level triggered interrupts.  The interrupt is asserted on eventfd
														
 
															+ * trigger.  On acknowledgment through the irq ack notifier, the
														
 
															+ * interrupt is de-asserted and userspace is notified through the
														
 
															+ * resamplefd.  All resamplers on the same gsi are de-asserted
														
 
															+ * together, so we don't need to track the state of each individual
														
 
															+ * user.  We can also therefore share the same irq source ID.
														
 
															+ */
														
 
															+struct kvm_kernel_irqfd_resampler {
														
 
															+	struct kvm *kvm;
														
 
															+	/*
														
 
															+	 * List of resampling struct _irqfd objects sharing this gsi.
														
 
															+	 * RCU list modified under kvm->irqfds.resampler_lock
														
 
															+	 */
														
 
															+	struct list_head list;
														
 
															+	struct kvm_irq_ack_notifier notifier;
														
 
															+	/*
														
 
															+	 * Entry in list of kvm->irqfd.resampler_list.  Use for sharing
														
 
															+	 * resamplers among irqfds on the same gsi.
														
 
															+	 * Accessed and modified under kvm->irqfds.resampler_lock
														
 
															+	 */
														
 
															+	struct list_head link;
														
 
															+};
														
 
															+
														
 
															+struct kvm_kernel_irqfd {
														
 
															+	/* Used for MSI fast-path */
														
 
															+	struct kvm *kvm;
														
 
															+	wait_queue_t wait;
														
 
															+	/* Update side is protected by irqfds.lock */
														
 
															+	struct kvm_kernel_irq_routing_entry irq_entry;
														
 
															+	seqcount_t irq_entry_sc;
														
 
															+	/* Used for level IRQ fast-path */
														
 
															+	int gsi;
														
 
															+	struct work_struct inject;
														
 
															+	/* The resampler used by this irqfd (resampler-only) */
														
 
															+	struct kvm_kernel_irqfd_resampler *resampler;
														
 
															+	/* Eventfd notified on resample (resampler-only) */
														
 
															+	struct eventfd_ctx *resamplefd;
														
 
															+	/* Entry in list of irqfds for a resampler (resampler-only) */
														
 
															+	struct list_head resampler_link;
														
 
															+	/* Used for setup/shutdown */
														
 
															+	struct eventfd_ctx *eventfd;
														
 
															+	struct list_head list;
														
 
															+	poll_table pt;
														
 
															+	struct work_struct shutdown;
														
 
															+	struct irq_bypass_consumer consumer;
														
 
															+	struct irq_bypass_producer *producer;
														
 
															+};
														
 
															+
														
 
															+#endif /* __LINUX_KVM_IRQFD_H */
														
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -183,6 +183,7 @@ struct kvm_s390_skeys {
 
															 #define KVM_EXIT_EPR              23
														
 
															 #define KVM_EXIT_SYSTEM_EVENT     24
														
 
															 #define KVM_EXIT_S390_STSI        25
														
 
															+#define KVM_EXIT_IOAPIC_EOI       26
														
 
															 /* For KVM_EXIT_INTERNAL_ERROR */
														
 
															 /* Emulate instruction failed. */
														
@@ -333,6 +334,10 @@ struct kvm_run {
 
															 			__u8 sel1;
														
 
															 			__u16 sel2;
														
 
															 		} s390_stsi;
														
 
															+		/* KVM_EXIT_IOAPIC_EOI */
														
 
															+		struct {
														
 
															+			__u8 vector;
														
 
															+		} eoi;
														
 
															 		/* Fix the size of the union. */
														
 
															 		char padding[256];
														
 
															 	};
														
@@ -824,6 +829,8 @@ struct kvm_ppc_smmu_info {
 
															 #define KVM_CAP_MULTI_ADDRESS_SPACE 118
														
 
															 #define KVM_CAP_GUEST_DEBUG_HW_BPS 119
														
 
															 #define KVM_CAP_GUEST_DEBUG_HW_WPS 120
														
 
															+#define KVM_CAP_SPLIT_IRQCHIP 121
														
 
															+#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
														
 
															 #ifdef KVM_CAP_IRQ_ROUTING
														
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -444,6 +444,7 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 
															 	*ut = p->utime;
														
 
															 	*st = p->stime;
														
 
															 }
														
 
															+EXPORT_SYMBOL_GPL(task_cputime_adjusted);
														
 
															 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
														
 
															 {
														
@@ -652,6 +653,7 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 
															 	task_cputime(p, &cputime.utime, &cputime.stime);
														
 
															 	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
														
 
															 }
														
 
															+EXPORT_SYMBOL_GPL(task_cputime_adjusted);
														
 
															 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
														
 
															 {
														
--- a/virt/Makefile
+++ b/virt/Makefile
@@ -0,0 +1 @@
 
															+obj-y	+= lib/
														
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -46,4 +46,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT
 
															 config KVM_COMPAT
														
 
															        def_bool y
														
 
															-       depends on COMPAT && !S390
														
 
															+       depends on KVM && COMPAT && !S390
														
 
															+
														
 
															+config HAVE_KVM_IRQ_BYPASS
														
 
															+       bool
														
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -28,6 +28,8 @@
 
															 #include <kvm/arm_vgic.h>
														
 
															 #include <kvm/arm_arch_timer.h>
														
 
															+#include "trace.h"
														
 
															+
														
 
															 static struct timecounter *timecounter;
														
 
															 static struct workqueue_struct *wqueue;
														
 
															 static unsigned int host_vtimer_irq;
														
@@ -59,18 +61,6 @@ static void timer_disarm(struct arch_timer_cpu *timer)
 
															 	}
														
 
															 }
														
 
															-static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
														
 
															-{
														
 
															-	int ret;
														
 
															-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
														
 
															-
														
 
															-	kvm_vgic_set_phys_irq_active(timer->map, true);
														
 
															-	ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
														
 
															-					 timer->map,
														
 
															-					 timer->irq->level);
														
 
															-	WARN_ON(ret);
														
 
															-}
														
 
															-
														
 
															 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
														
 
															 {
														
 
															 	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
														
@@ -111,14 +101,20 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
 
															 	return HRTIMER_NORESTART;
														
 
															 }
														
 
															+static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
														
 
															+
														
 
															+	return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
														
 
															+		(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
														
 
															+}
														
 
															+
														
 
															 bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
														
 
															 	cycle_t cval, now;
														
 
															-	if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
														
 
															-	    !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) ||
														
 
															-	    kvm_vgic_get_phys_irq_active(timer->map))
														
 
															+	if (!kvm_timer_irq_can_fire(vcpu))
														
 
															 		return false;
														
 
															 	cval = timer->cntv_cval;
														
@@ -127,12 +123,94 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
 
															 	return cval <= now;
														
 
															 }
														
 
															+static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
														
 
															+
														
 
															+	BUG_ON(!vgic_initialized(vcpu->kvm));
														
 
															+
														
 
															+	timer->irq.level = new_level;
														
 
															+	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq,
														
 
															+				   timer->irq.level);
														
 
															+	ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
														
 
															+					 timer->map,
														
 
															+					 timer->irq.level);
														
 
															+	WARN_ON(ret);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Check if there was a change in the timer state (should we raise or lower
														
 
															+ * the line level to the GIC).
														
 
															+ */
														
 
															+static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
														
 
															+
														
 
															+	/*
														
 
															+	 * If userspace modified the timer registers via SET_ONE_REG before
														
 
															+	 * the vgic was initialized, we mustn't set the timer->irq.level value
														
 
															+	 * because the guest would never see the interrupt.  Instead wait
														
 
															+	 * until we call this function from kvm_timer_flush_hwstate.
														
 
															+	 */
														
 
															+	if (!vgic_initialized(vcpu->kvm))
														
 
															+	    return;
														
 
															+
														
 
															+	if (kvm_timer_should_fire(vcpu) != timer->irq.level)
														
 
															+		kvm_timer_update_irq(vcpu, !timer->irq.level);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Schedule the background timer before calling kvm_vcpu_block, so that this
														
 
															+ * thread is removed from its waitqueue and made runnable when there's a timer
														
 
															+ * interrupt to handle.
														
 
															+ */
														
 
															+void kvm_timer_schedule(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
														
 
															+	u64 ns;
														
 
															+	cycle_t cval, now;
														
 
															+
														
 
															+	BUG_ON(timer_is_armed(timer));
														
 
															+
														
 
															+	/*
														
 
															+	 * No need to schedule a background timer if the guest timer has
														
 
															+	 * already expired, because kvm_vcpu_block will return before putting
														
 
															+	 * the thread to sleep.
														
 
															+	 */
														
 
															+	if (kvm_timer_should_fire(vcpu))
														
 
															+		return;
														
 
															+
														
 
															+	/*
														
 
															+	 * If the timer is not capable of raising interrupts (disabled or
														
 
															+	 * masked), then there's no more work for us to do.
														
 
															+	 */
														
 
															+	if (!kvm_timer_irq_can_fire(vcpu))
														
 
															+		return;
														
 
															+
														
 
															+	/*  The timer has not yet expired, schedule a background timer */
														
 
															+	cval = timer->cntv_cval;
														
 
															+	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
														
 
															+
														
 
															+	ns = cyclecounter_cyc2ns(timecounter->cc,
														
 
															+				 cval - now,
														
 
															+				 timecounter->mask,
														
 
															+				 &timecounter->frac);
														
 
															+	timer_arm(timer, ns);
														
 
															+}
														
 
															+
														
 
															+void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
														
 
															+	timer_disarm(timer);
														
 
															+}
														
 
															+
														
 
															 /**
														
 
															  * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
														
 
															  * @vcpu: The vcpu pointer
														
 
															  *
														
 
															- * Disarm any pending soft timers, since the world-switch code will write the
														
 
															- * virtual timer state back to the physical CPU.
														
 
															+ * Check if the virtual timer has expired while we were running in the host,
														
 
															+ * and inject an interrupt if that was the case.
														
 
															  */
														
 
															 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
														
 
															 {
														
@@ -140,28 +218,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
 
															 	bool phys_active;
														
 
															 	int ret;
														
 
															-	/*
														
 
															-	 * We're about to run this vcpu again, so there is no need to
														
 
															-	 * keep the background timer running, as we're about to
														
 
															-	 * populate the CPU timer again.
														
 
															-	 */
														
 
															-	timer_disarm(timer);
														
 
															+	kvm_timer_update_state(vcpu);
														
 
															 	/*
														
 
															-	 * If the timer expired while we were not scheduled, now is the time
														
 
															-	 * to inject it.
														
 
															+	 * If we enter the guest with the virtual input level to the VGIC
														
 
															+	 * asserted, then we have already told the VGIC what we need to, and
														
 
															+	 * we don't need to exit from the guest until the guest deactivates
														
 
															+	 * the already injected interrupt, so therefore we should set the
														
 
															+	 * hardware active state to prevent unnecessary exits from the guest.
														
 
															+	 *
														
 
															+	 * Conversely, if the virtual input level is deasserted, then always
														
 
															+	 * clear the hardware active state to ensure that hardware interrupts
														
 
															+	 * from the timer triggers a guest exit.
														
 
															 	 */
														
 
															-	if (kvm_timer_should_fire(vcpu))
														
 
															-		kvm_timer_inject_irq(vcpu);
														
 
															-
														
 
															-	/*
														
 
															-	 * We keep track of whether the edge-triggered interrupt has been
														
 
															-	 * signalled to the vgic/guest, and if so, we mask the interrupt and
														
 
															-	 * the physical distributor to prevent the timer from raising a
														
 
															-	 * physical interrupt whenever we run a guest, preventing forward
														
 
															-	 * VCPU progress.
														
 
															-	 */
														
 
															-	if (kvm_vgic_get_phys_irq_active(timer->map))
														
 
															+	if (timer->irq.level)
														
 
															 		phys_active = true;
														
 
															 	else
														
 
															 		phys_active = false;
														
@@ -176,32 +246,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
 
															  * kvm_timer_sync_hwstate - sync timer state from cpu
														
 
															  * @vcpu: The vcpu pointer
														
 
															  *
														
 
															- * Check if the virtual timer was armed and either schedule a corresponding
														
 
															- * soft timer or inject directly if already expired.
														
 
															+ * Check if the virtual timer has expired while we were running in the guest,
														
 
															+ * and inject an interrupt if that was the case.
														
 
															  */
														
 
															 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
														
 
															-	cycle_t cval, now;
														
 
															-	u64 ns;
														
 
															 	BUG_ON(timer_is_armed(timer));
														
 
															-	if (kvm_timer_should_fire(vcpu)) {
														
 
															-		/*
														
 
															-		 * Timer has already expired while we were not
														
 
															-		 * looking. Inject the interrupt and carry on.
														
 
															-		 */
														
 
															-		kvm_timer_inject_irq(vcpu);
														
 
															-		return;
														
 
															-	}
														
 
															-
														
 
															-	cval = timer->cntv_cval;
														
 
															-	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
														
 
															-
														
 
															-	ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask,
														
 
															-				 &timecounter->frac);
														
 
															-	timer_arm(timer, ns);
														
 
															+	/*
														
 
															+	 * The guest could have modified the timer registers or the timer
														
 
															+	 * could have expired, update the timer state.
														
 
															+	 */
														
 
															+	kvm_timer_update_state(vcpu);
														
 
															 }
														
 
															 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
														
@@ -216,7 +274,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 
															 	 * kvm_vcpu_set_target(). To handle this, we determine
														
 
															 	 * vcpu timer irq number when the vcpu is reset.
														
 
															 	 */
														
 
															-	timer->irq = irq;
														
 
															+	timer->irq.irq = irq->irq;
														
 
															 	/*
														
 
															 	 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
														
@@ -225,6 +283,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 
															 	 * the ARMv7 architecture.
														
 
															 	 */
														
 
															 	timer->cntv_ctl = 0;
														
 
															+	kvm_timer_update_state(vcpu);
														
 
															 	/*
														
 
															 	 * Tell the VGIC that the virtual interrupt is tied to a
														
@@ -269,6 +328,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
 
															 	default:
														
 
															 		return -1;
														
 
															 	}
														
 
															+
														
 
															+	kvm_timer_update_state(vcpu);
														
 
															 	return 0;
														
 
															 }
														
--- a/virt/kvm/arm/trace.h
+++ b/virt/kvm/arm/trace.h
@@ -0,0 +1,63 @@
 
															+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
														
 
															+#define _TRACE_KVM_H
														
 
															+
														
 
															+#include <linux/tracepoint.h>
														
 
															+
														
 
															+#undef TRACE_SYSTEM
														
 
															+#define TRACE_SYSTEM kvm
														
 
															+
														
 
															+/*
														
 
															+ * Tracepoints for vgic
														
 
															+ */
														
 
															+TRACE_EVENT(vgic_update_irq_pending,
														
 
															+	TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
														
 
															+	TP_ARGS(vcpu_id, irq, level),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned long,	vcpu_id	)
														
 
															+		__field(	__u32,		irq	)
														
 
															+		__field(	bool,		level	)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->vcpu_id	= vcpu_id;
														
 
															+		__entry->irq		= irq;
														
 
															+		__entry->level		= level;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("VCPU: %ld, IRQ %d, level: %d",
														
 
															+		  __entry->vcpu_id, __entry->irq, __entry->level)
														
 
															+);
														
 
															+
														
 
															+/*
														
 
															+ * Tracepoints for arch_timer
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_timer_update_irq,
														
 
															+	TP_PROTO(unsigned long vcpu_id, __u32 irq, int level),
														
 
															+	TP_ARGS(vcpu_id, irq, level),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned long,	vcpu_id	)
														
 
															+		__field(	__u32,		irq	)
														
 
															+		__field(	int,		level	)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->vcpu_id	= vcpu_id;
														
 
															+		__entry->irq		= irq;
														
 
															+		__entry->level		= level;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("VCPU: %ld, IRQ %d, level %d",
														
 
															+		  __entry->vcpu_id, __entry->irq, __entry->level)
														
 
															+);
														
 
															+
														
 
															+#endif /* _TRACE_KVM_H */
														
 
															+
														
 
															+#undef TRACE_INCLUDE_PATH
														
 
															+#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm
														
 
															+#undef TRACE_INCLUDE_FILE
														
 
															+#define TRACE_INCLUDE_FILE trace
														
 
															+
														
 
															+/* This part must be outside protection */
														
 
															+#include <trace/define_trace.h>
														
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
 
															 		lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
														
 
															 	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
														
 
															-}
														
 
															-static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
														
 
															-				  struct vgic_lr lr_desc)
														
 
															-{
														
 
															 	if (!(lr_desc.state & LR_STATE_MASK))
														
 
															 		vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
														
 
															 	else
														
@@ -158,6 +154,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
 
															 	 * anyway.
														
 
															 	 */
														
 
															 	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
														
 
															+	vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
														
 
															 	/* Get the show on the road... */
														
 
															 	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
														
@@ -166,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
 
															 static const struct vgic_ops vgic_v2_ops = {
														
 
															 	.get_lr			= vgic_v2_get_lr,
														
 
															 	.set_lr			= vgic_v2_set_lr,
														
 
															-	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
														
 
															 	.get_elrsr		= vgic_v2_get_elrsr,
														
 
															 	.get_eisr		= vgic_v2_get_eisr,
														
 
															 	.clear_eisr		= vgic_v2_clear_eisr,
														
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
 
															 	}
														
 
															 	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
														
 
															-}
														
 
															-static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
														
 
															-				  struct vgic_lr lr_desc)
														
 
															-{
														
 
															 	if (!(lr_desc.state & LR_STATE_MASK))
														
 
															 		vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
														
 
															 	else
														
@@ -193,6 +189,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
 
															 	 * anyway.
														
 
															 	 */
														
 
															 	vgic_v3->vgic_vmcr = 0;
														
 
															+	vgic_v3->vgic_elrsr = ~0;
														
 
															 	/*
														
 
															 	 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
														
@@ -211,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
 
															 static const struct vgic_ops vgic_v3_ops = {
														
 
															 	.get_lr			= vgic_v3_get_lr,
														
 
															 	.set_lr			= vgic_v3_set_lr,
														
 
															-	.sync_lr_elrsr		= vgic_v3_sync_lr_elrsr,
														
 
															 	.get_elrsr		= vgic_v3_get_elrsr,
														
 
															 	.get_eisr		= vgic_v3_get_eisr,
														
 
															 	.clear_eisr		= vgic_v3_clear_eisr,
														
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -34,6 +34,9 @@
 
															 #include <asm/kvm.h>
														
 
															 #include <kvm/iodev.h>
														
 
															+#define CREATE_TRACE_POINTS
														
 
															+#include "trace.h"
														
 
															+
														
 
															 /*
														
 
															  * How the whole thing works (courtesy of Christoffer Dall):
														
 
															  *
														
@@ -102,11 +105,13 @@
 
															 #include "vgic.h"
														
 
															 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
														
 
															-static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
														
 
															+static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu);
														
 
															 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
														
 
															 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
														
 
															+static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu);
														
 
															 static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
														
 
															 						int virt_irq);
														
 
															+static int compute_pending_for_cpu(struct kvm_vcpu *vcpu);
														
 
															 static const struct vgic_ops *vgic_ops;
														
 
															 static const struct vgic_params *vgic;
														
@@ -357,6 +362,11 @@ static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
 
															 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															 	vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
														
 
															+	if (!vgic_dist_irq_get_level(vcpu, irq)) {
														
 
															+		vgic_dist_irq_clear_pending(vcpu, irq);
														
 
															+		if (!compute_pending_for_cpu(vcpu))
														
 
															+			clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
														
 
															+	}
														
 
															 }
														
 
															 static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
														
@@ -531,34 +541,6 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm,
 
															 	return false;
														
 
															 }
														
 
															-/*
														
 
															- * If a mapped interrupt's state has been modified by the guest such that it
														
 
															- * is no longer active or pending, without it have gone through the sync path,
														
 
															- * then the map->active field must be cleared so the interrupt can be taken
														
 
															- * again.
														
 
															- */
														
 
															-static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu)
														
 
															-{
														
 
															-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
														
 
															-	struct list_head *root;
														
 
															-	struct irq_phys_map_entry *entry;
														
 
															-	struct irq_phys_map *map;
														
 
															-
														
 
															-	rcu_read_lock();
														
 
															-
														
 
															-	/* Check for PPIs */
														
 
															-	root = &vgic_cpu->irq_phys_map_list;
														
 
															-	list_for_each_entry_rcu(entry, root, entry) {
														
 
															-		map = &entry->map;
														
 
															-
														
 
															-		if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) &&
														
 
															-		    !vgic_irq_is_active(vcpu, map->virt_irq))
														
 
															-			map->active = false;
														
 
															-	}
														
 
															-
														
 
															-	rcu_read_unlock();
														
 
															-}
														
 
															-
														
 
															 bool vgic_handle_clear_pending_reg(struct kvm *kvm,
														
 
															 				   struct kvm_exit_mmio *mmio,
														
 
															 				   phys_addr_t offset, int vcpu_id)
														
@@ -589,7 +571,6 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm,
 
															 					  vcpu_id, offset);
														
 
															 		vgic_reg_access(mmio, reg, offset, mode);
														
 
															-		vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
														
 
															 		vgic_update_state(kvm);
														
 
															 		return true;
														
 
															 	}
														
@@ -627,7 +608,6 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm,
 
															 			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
														
 
															 	if (mmio->is_write) {
														
 
															-		vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
														
 
															 		vgic_update_state(kvm);
														
 
															 		return true;
														
 
															 	}
														
@@ -684,10 +664,9 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
 
															 	vgic_reg_access(mmio, &val, offset,
														
 
															 			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
														
 
															 	if (mmio->is_write) {
														
 
															-		if (offset < 8) {
														
 
															-			*reg = ~0U; /* Force PPIs/SGIs to 1 */
														
 
															+		/* Ignore writes to read-only SGI and PPI bits */
														
 
															+		if (offset < 8)
														
 
															 			return false;
														
 
															-		}
														
 
															 		val = vgic_cfg_compress(val);
														
 
															 		if (offset & 4) {
														
@@ -713,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
 
															 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
														
 
															+	u64 elrsr = vgic_get_elrsr(vcpu);
														
 
															+	unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
														
 
															 	int i;
														
 
															-	for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
														
 
															+	for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) {
														
 
															 		struct vgic_lr lr = vgic_get_lr(vcpu, i);
														
 
															 		/*
														
@@ -736,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 
															 		 * interrupt then move the active state to the
														
 
															 		 * distributor tracking bit.
														
 
															 		 */
														
 
															-		if (lr.state & LR_STATE_ACTIVE) {
														
 
															+		if (lr.state & LR_STATE_ACTIVE)
														
 
															 			vgic_irq_set_active(vcpu, lr.irq);
														
 
															-			lr.state &= ~LR_STATE_ACTIVE;
														
 
															-		}
														
 
															 		/*
														
 
															 		 * Reestablish the pending state on the distributor and the
														
 
															-		 * CPU interface.  It may have already been pending, but that
														
 
															-		 * is fine, then we are only setting a few bits that were
														
 
															-		 * already set.
														
 
															+		 * CPU interface and mark the LR as free for other use.
														
 
															 		 */
														
 
															-		if (lr.state & LR_STATE_PENDING) {
														
 
															-			vgic_dist_irq_set_pending(vcpu, lr.irq);
														
 
															-			lr.state &= ~LR_STATE_PENDING;
														
 
															-		}
														
 
															-
														
 
															-		vgic_set_lr(vcpu, i, lr);
														
 
															-
														
 
															-		/*
														
 
															-		 * Mark the LR as free for other use.
														
 
															-		 */
														
 
															-		BUG_ON(lr.state & LR_STATE_MASK);
														
 
															-		vgic_retire_lr(i, lr.irq, vcpu);
														
 
															-		vgic_irq_clear_queued(vcpu, lr.irq);
														
 
															+		vgic_retire_lr(i, vcpu);
														
 
															 		/* Finally update the VGIC state. */
														
 
															 		vgic_update_state(vcpu->kvm);
														
@@ -1067,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
 
															 	vgic_ops->set_lr(vcpu, lr, vlr);
														
 
															 }
														
 
															-static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
														
 
															-			       struct vgic_lr vlr)
														
 
															-{
														
 
															-	vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
														
 
															-}
														
 
															-
														
 
															 static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	return vgic_ops->get_elrsr(vcpu);
														
@@ -1118,25 +1077,23 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu)
 
															 	vgic_ops->enable(vcpu);
														
 
															 }
														
 
															-static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
														
 
															+static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
														
 
															 	struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
														
 
															+	vgic_irq_clear_queued(vcpu, vlr.irq);
														
 
															+
														
 
															 	/*
														
 
															 	 * We must transfer the pending state back to the distributor before
														
 
															 	 * retiring the LR, otherwise we may loose edge-triggered interrupts.
														
 
															 	 */
														
 
															 	if (vlr.state & LR_STATE_PENDING) {
														
 
															-		vgic_dist_irq_set_pending(vcpu, irq);
														
 
															+		vgic_dist_irq_set_pending(vcpu, vlr.irq);
														
 
															 		vlr.hwirq = 0;
														
 
															 	}
														
 
															 	vlr.state = 0;
														
 
															 	vgic_set_lr(vcpu, lr_nr, vlr);
														
 
															-	clear_bit(lr_nr, vgic_cpu->lr_used);
														
 
															-	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
														
 
															-	vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
														
 
															 }
														
 
															 /*
														
@@ -1150,17 +1107,15 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 
															  */
														
 
															 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
														
 
															+	u64 elrsr = vgic_get_elrsr(vcpu);
														
 
															+	unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
														
 
															 	int lr;
														
 
															-	for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
														
 
															+	for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
														
 
															 		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
														
 
															-		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
														
 
															-			vgic_retire_lr(lr, vlr.irq, vcpu);
														
 
															-			if (vgic_irq_is_queued(vcpu, vlr.irq))
														
 
															-				vgic_irq_clear_queued(vcpu, vlr.irq);
														
 
															-		}
														
 
															+		if (!vgic_irq_is_enabled(vcpu, vlr.irq))
														
 
															+			vgic_retire_lr(lr, vcpu);
														
 
															 	}
														
 
															 }
														
@@ -1200,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
 
															 	}
														
 
															 	vgic_set_lr(vcpu, lr_nr, vlr);
														
 
															-	vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
														
 
															 }
														
 
															 /*
														
@@ -1210,8 +1164,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
 
															  */
														
 
															 bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
														
 
															 {
														
 
															-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
														
 
															 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															+	u64 elrsr = vgic_get_elrsr(vcpu);
														
 
															+	unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
														
 
															 	struct vgic_lr vlr;
														
 
															 	int lr;
														
@@ -1222,28 +1177,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
															 	kvm_debug("Queue IRQ%d\n", irq);
														
 
															-	lr = vgic_cpu->vgic_irq_lr_map[irq];
														
 
															-
														
 
															 	/* Do we have an active interrupt for the same CPUID? */
														
 
															-	if (lr != LR_EMPTY) {
														
 
															+	for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
														
 
															 		vlr = vgic_get_lr(vcpu, lr);
														
 
															-		if (vlr.source == sgi_source_id) {
														
 
															+		if (vlr.irq == irq && vlr.source == sgi_source_id) {
														
 
															 			kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
														
 
															-			BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
														
 
															 			vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
														
 
															 			return true;
														
 
															 		}
														
 
															 	}
														
 
															 	/* Try to use another LR for this interrupt */
														
 
															-	lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
														
 
															-			       vgic->nr_lr);
														
 
															+	lr = find_first_bit(elrsr_ptr, vgic->nr_lr);
														
 
															 	if (lr >= vgic->nr_lr)
														
 
															 		return false;
														
 
															 	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
														
 
															-	vgic_cpu->vgic_irq_lr_map[irq] = lr;
														
 
															-	set_bit(lr, vgic_cpu->lr_used);
														
 
															 	vlr.irq = irq;
														
 
															 	vlr.source = sgi_source_id;
														
@@ -1338,12 +1287,60 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
															 	}
														
 
															 }
														
 
															+static int process_queued_irq(struct kvm_vcpu *vcpu,
														
 
															+				   int lr, struct vgic_lr vlr)
														
 
															+{
														
 
															+	int pending = 0;
														
 
															+
														
 
															+	/*
														
 
															+	 * If the IRQ was EOIed (called from vgic_process_maintenance) or it
														
 
															+	 * went from active to non-active (called from vgic_sync_hwirq) it was
														
 
															+	 * also ACKed and we we therefore assume we can clear the soft pending
														
 
															+	 * state (should it had been set) for this interrupt.
														
 
															+	 *
														
 
															+	 * Note: if the IRQ soft pending state was set after the IRQ was
														
 
															+	 * acked, it actually shouldn't be cleared, but we have no way of
														
 
															+	 * knowing that unless we start trapping ACKs when the soft-pending
														
 
															+	 * state is set.
														
 
															+	 */
														
 
															+	vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
														
 
															+
														
 
															+	/*
														
 
															+	 * Tell the gic to start sampling this interrupt again.
														
 
															+	 */
														
 
															+	vgic_irq_clear_queued(vcpu, vlr.irq);
														
 
															+
														
 
															+	/* Any additional pending interrupt? */
														
 
															+	if (vgic_irq_is_edge(vcpu, vlr.irq)) {
														
 
															+		BUG_ON(!(vlr.state & LR_HW));
														
 
															+		pending = vgic_dist_irq_is_pending(vcpu, vlr.irq);
														
 
															+	} else {
														
 
															+		if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
														
 
															+			vgic_cpu_irq_set(vcpu, vlr.irq);
														
 
															+			pending = 1;
														
 
															+		} else {
														
 
															+			vgic_dist_irq_clear_pending(vcpu, vlr.irq);
														
 
															+			vgic_cpu_irq_clear(vcpu, vlr.irq);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * Despite being EOIed, the LR may not have
														
 
															+	 * been marked as empty.
														
 
															+	 */
														
 
															+	vlr.state = 0;
														
 
															+	vlr.hwirq = 0;
														
 
															+	vgic_set_lr(vcpu, lr, vlr);
														
 
															+
														
 
															+	return pending;
														
 
															+}
														
 
															+
														
 
															 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	u32 status = vgic_get_interrupt_status(vcpu);
														
 
															 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															-	bool level_pending = false;
														
 
															 	struct kvm *kvm = vcpu->kvm;
														
 
															+	int level_pending = 0;
														
 
															 	kvm_debug("STATUS = %08x\n", status);
														
@@ -1358,54 +1355,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 
															 		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
														
 
															 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
														
 
															-			WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
														
 
															-			spin_lock(&dist->lock);
														
 
															-			vgic_irq_clear_queued(vcpu, vlr.irq);
														
 
															+			WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
														
 
															 			WARN_ON(vlr.state & LR_STATE_MASK);
														
 
															-			vlr.state = 0;
														
 
															-			vgic_set_lr(vcpu, lr, vlr);
														
 
															-			/*
														
 
															-			 * If the IRQ was EOIed it was also ACKed and we we
														
 
															-			 * therefore assume we can clear the soft pending
														
 
															-			 * state (should it had been set) for this interrupt.
														
 
															-			 *
														
 
															-			 * Note: if the IRQ soft pending state was set after
														
 
															-			 * the IRQ was acked, it actually shouldn't be
														
 
															-			 * cleared, but we have no way of knowing that unless
														
 
															-			 * we start trapping ACKs when the soft-pending state
														
 
															-			 * is set.
														
 
															-			 */
														
 
															-			vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
														
 
															 			/*
														
 
															 			 * kvm_notify_acked_irq calls kvm_set_irq()
														
 
															-			 * to reset the IRQ level. Need to release the
														
 
															-			 * lock for kvm_set_irq to grab it.
														
 
															+			 * to reset the IRQ level, which grabs the dist->lock
														
 
															+			 * so we call this before taking the dist->lock.
														
 
															 			 */
														
 
															-			spin_unlock(&dist->lock);
														
 
															-
														
 
															 			kvm_notify_acked_irq(kvm, 0,
														
 
															 					     vlr.irq - VGIC_NR_PRIVATE_IRQS);
														
 
															-			spin_lock(&dist->lock);
														
 
															-
														
 
															-			/* Any additional pending interrupt? */
														
 
															-			if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
														
 
															-				vgic_cpu_irq_set(vcpu, vlr.irq);
														
 
															-				level_pending = true;
														
 
															-			} else {
														
 
															-				vgic_dist_irq_clear_pending(vcpu, vlr.irq);
														
 
															-				vgic_cpu_irq_clear(vcpu, vlr.irq);
														
 
															-			}
														
 
															+			spin_lock(&dist->lock);
														
 
															+			level_pending |= process_queued_irq(vcpu, lr, vlr);
														
 
															 			spin_unlock(&dist->lock);
														
 
															-
														
 
															-			/*
														
 
															-			 * Despite being EOIed, the LR may not have
														
 
															-			 * been marked as empty.
														
 
															-			 */
														
 
															-			vgic_sync_lr_elrsr(vcpu, lr, vlr);
														
 
															 		}
														
 
															 	}
														
@@ -1426,35 +1391,40 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 
															 /*
														
 
															  * Save the physical active state, and reset it to inactive.
														
 
															  *
														
 
															- * Return 1 if HW interrupt went from active to inactive, and 0 otherwise.
														
 
															+ * Return true if there's a pending forwarded interrupt to queue.
														
 
															  */
														
 
															-static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
														
 
															+static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
														
 
															 {
														
 
															+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															 	struct irq_phys_map *map;
														
 
															+	bool phys_active;
														
 
															+	bool level_pending;
														
 
															 	int ret;
														
 
															 	if (!(vlr.state & LR_HW))
														
 
															-		return 0;
														
 
															+		return false;
														
 
															 	map = vgic_irq_map_search(vcpu, vlr.irq);
														
 
															 	BUG_ON(!map);
														
 
															 	ret = irq_get_irqchip_state(map->irq,
														
 
															 				    IRQCHIP_STATE_ACTIVE,
														
 
															-				    &map->active);
														
 
															+				    &phys_active);
														
 
															 	WARN_ON(ret);
														
 
															-	if (map->active)
														
 
															+	if (phys_active)
														
 
															 		return 0;
														
 
															-	return 1;
														
 
															+	spin_lock(&dist->lock);
														
 
															+	level_pending = process_queued_irq(vcpu, lr, vlr);
														
 
															+	spin_unlock(&dist->lock);
														
 
															+	return level_pending;
														
 
															 }
														
 
															 /* Sync back the VGIC state after a guest run */
														
 
															 static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
														
 
															 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															 	u64 elrsr;
														
 
															 	unsigned long *elrsr_ptr;
														
@@ -1462,40 +1432,18 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 
															 	bool level_pending;
														
 
															 	level_pending = vgic_process_maintenance(vcpu);
														
 
															-	elrsr = vgic_get_elrsr(vcpu);
														
 
															-	elrsr_ptr = u64_to_bitmask(&elrsr);
														
 
															 	/* Deal with HW interrupts, and clear mappings for empty LRs */
														
 
															 	for (lr = 0; lr < vgic->nr_lr; lr++) {
														
 
															-		struct vgic_lr vlr;
														
 
															-
														
 
															-		if (!test_bit(lr, vgic_cpu->lr_used))
														
 
															-			continue;
														
 
															-
														
 
															-		vlr = vgic_get_lr(vcpu, lr);
														
 
															-		if (vgic_sync_hwirq(vcpu, vlr)) {
														
 
															-			/*
														
 
															-			 * So this is a HW interrupt that the guest
														
 
															-			 * EOI-ed. Clean the LR state and allow the
														
 
															-			 * interrupt to be sampled again.
														
 
															-			 */
														
 
															-			vlr.state = 0;
														
 
															-			vlr.hwirq = 0;
														
 
															-			vgic_set_lr(vcpu, lr, vlr);
														
 
															-			vgic_irq_clear_queued(vcpu, vlr.irq);
														
 
															-			set_bit(lr, elrsr_ptr);
														
 
															-		}
														
 
															-
														
 
															-		if (!test_bit(lr, elrsr_ptr))
														
 
															-			continue;
														
 
															-
														
 
															-		clear_bit(lr, vgic_cpu->lr_used);
														
 
															+		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
														
 
															+		level_pending |= vgic_sync_hwirq(vcpu, lr, vlr);
														
 
															 		BUG_ON(vlr.irq >= dist->nr_irqs);
														
 
															-		vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
														
 
															 	}
														
 
															 	/* Check if we still have something up our sleeve... */
														
 
															+	elrsr = vgic_get_elrsr(vcpu);
														
 
															+	elrsr_ptr = u64_to_bitmask(&elrsr);
														
 
															 	pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
														
 
															 	if (level_pending || pending < vgic->nr_lr)
														
 
															 		set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
														
@@ -1585,6 +1533,8 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
															 	int enabled;
														
 
															 	bool ret = true, can_inject = true;
														
 
															+	trace_vgic_update_irq_pending(cpuid, irq_num, level);
														
 
															+
														
 
															 	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
														
 
															 		return -EINVAL;
														
@@ -1863,30 +1813,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
 
															 	kfree(entry);
														
 
															 }
														
 
															-/**
														
 
															- * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
														
 
															- *
														
 
															- * Return the logical active state of a mapped interrupt. This doesn't
														
 
															- * necessarily reflects the current HW state.
														
 
															- */
														
 
															-bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
														
 
															-{
														
 
															-	BUG_ON(!map);
														
 
															-	return map->active;
														
 
															-}
														
 
															-
														
 
															-/**
														
 
															- * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
														
 
															- *
														
 
															- * Set the logical active state of a mapped interrupt. This doesn't
														
 
															- * immediately affects the HW state.
														
 
															- */
														
 
															-void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
														
 
															-{
														
 
															-	BUG_ON(!map);
														
 
															-	map->active = active;
														
 
															-}
														
 
															-
														
 
															 /**
														
 
															  * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
														
 
															  * @vcpu: The VCPU pointer
														
@@ -1942,12 +1868,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
 
															 	kfree(vgic_cpu->pending_shared);
														
 
															 	kfree(vgic_cpu->active_shared);
														
 
															 	kfree(vgic_cpu->pend_act_shared);
														
 
															-	kfree(vgic_cpu->vgic_irq_lr_map);
														
 
															 	vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
														
 
															 	vgic_cpu->pending_shared = NULL;
														
 
															 	vgic_cpu->active_shared = NULL;
														
 
															 	vgic_cpu->pend_act_shared = NULL;
														
 
															-	vgic_cpu->vgic_irq_lr_map = NULL;
														
 
															 }
														
 
															 static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
														
@@ -1958,18 +1882,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 
															 	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
														
 
															 	vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
														
 
															 	vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
														
 
															-	vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
														
 
															 	if (!vgic_cpu->pending_shared
														
 
															 		|| !vgic_cpu->active_shared
														
 
															-		|| !vgic_cpu->pend_act_shared
														
 
															-		|| !vgic_cpu->vgic_irq_lr_map) {
														
 
															+		|| !vgic_cpu->pend_act_shared) {
														
 
															 		kvm_vgic_vcpu_destroy(vcpu);
														
 
															 		return -ENOMEM;
														
 
															 	}
														
 
															-	memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
														
 
															-
														
 
															 	/*
														
 
															 	 * Store the number of LRs per vcpu, so we don't have to go
														
 
															 	 * all the way to the distributor structure to find out. Only
														
@@ -2111,14 +2031,24 @@ int vgic_init(struct kvm *kvm)
 
															 			break;
														
 
															 		}
														
 
															-		for (i = 0; i < dist->nr_irqs; i++) {
														
 
															-			if (i < VGIC_NR_PPIS)
														
 
															+		/*
														
 
															+		 * Enable and configure all SGIs to be edge-triggere and
														
 
															+		 * configure all PPIs as level-triggered.
														
 
															+		 */
														
 
															+		for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
														
 
															+			if (i < VGIC_NR_SGIS) {
														
 
															+				/* SGIs */
														
 
															 				vgic_bitmap_set_irq_val(&dist->irq_enabled,
														
 
															 							vcpu->vcpu_id, i, 1);
														
 
															-			if (i < VGIC_NR_PRIVATE_IRQS)
														
 
															 				vgic_bitmap_set_irq_val(&dist->irq_cfg,
														
 
															 							vcpu->vcpu_id, i,
														
 
															 							VGIC_CFG_EDGE);
														
 
															+			} else if (i < VGIC_NR_PRIVATE_IRQS) {
														
 
															+				/* PPIs */
														
 
															+				vgic_bitmap_set_irq_val(&dist->irq_cfg,
														
 
															+							vcpu->vcpu_id, i,
														
 
															+							VGIC_CFG_LEVEL);
														
 
															+			}
														
 
															 		}
														
 
															 		vgic_enable(vcpu);
														
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -94,6 +94,10 @@ static void async_pf_execute(struct work_struct *work)
 
															 	trace_kvm_async_pf_completed(addr, gva);
														
 
															+	/*
														
 
															+	 * This memory barrier pairs with prepare_to_wait's set_current_state()
														
 
															+	 */
														
 
															+	smp_mb();
														
 
															 	if (waitqueue_active(&vcpu->wq))
														
 
															 		wake_up_interruptible(&vcpu->wq);
														
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -23,6 +23,7 @@
 
															 #include <linux/kvm_host.h>
														
 
															 #include <linux/kvm.h>
														
 
															+#include <linux/kvm_irqfd.h>
														
 
															 #include <linux/workqueue.h>
														
 
															 #include <linux/syscalls.h>
														
 
															 #include <linux/wait.h>
														
@@ -34,73 +35,20 @@
 
															 #include <linux/srcu.h>
														
 
															 #include <linux/slab.h>
														
 
															 #include <linux/seqlock.h>
														
 
															+#include <linux/irqbypass.h>
														
 
															 #include <trace/events/kvm.h>
														
 
															 #include <kvm/iodev.h>
														
 
															 #ifdef CONFIG_HAVE_KVM_IRQFD
														
 
															-/*
														
 
															- * --------------------------------------------------------------------
														
 
															- * irqfd: Allows an fd to be used to inject an interrupt to the guest
														
 
															- *
														
 
															- * Credit goes to Avi Kivity for the original idea.
														
 
															- * --------------------------------------------------------------------
														
 
															- */
														
 
															-
														
 
															-/*
														
 
															- * Resampling irqfds are a special variety of irqfds used to emulate
														
 
															- * level triggered interrupts.  The interrupt is asserted on eventfd
														
 
															- * trigger.  On acknowledgement through the irq ack notifier, the
														
 
															- * interrupt is de-asserted and userspace is notified through the
														
 
															- * resamplefd.  All resamplers on the same gsi are de-asserted
														
 
															- * together, so we don't need to track the state of each individual
														
 
															- * user.  We can also therefore share the same irq source ID.
														
 
															- */
														
 
															-struct _irqfd_resampler {
														
 
															-	struct kvm *kvm;
														
 
															-	/*
														
 
															-	 * List of resampling struct _irqfd objects sharing this gsi.
														
 
															-	 * RCU list modified under kvm->irqfds.resampler_lock
														
 
															-	 */
														
 
															-	struct list_head list;
														
 
															-	struct kvm_irq_ack_notifier notifier;
														
 
															-	/*
														
 
															-	 * Entry in list of kvm->irqfd.resampler_list.  Use for sharing
														
 
															-	 * resamplers among irqfds on the same gsi.
														
 
															-	 * Accessed and modified under kvm->irqfds.resampler_lock
														
 
															-	 */
														
 
															-	struct list_head link;
														
 
															-};
														
 
															-
														
 
															-struct _irqfd {
														
 
															-	/* Used for MSI fast-path */
														
 
															-	struct kvm *kvm;
														
 
															-	wait_queue_t wait;
														
 
															-	/* Update side is protected by irqfds.lock */
														
 
															-	struct kvm_kernel_irq_routing_entry irq_entry;
														
 
															-	seqcount_t irq_entry_sc;
														
 
															-	/* Used for level IRQ fast-path */
														
 
															-	int gsi;
														
 
															-	struct work_struct inject;
														
 
															-	/* The resampler used by this irqfd (resampler-only) */
														
 
															-	struct _irqfd_resampler *resampler;
														
 
															-	/* Eventfd notified on resample (resampler-only) */
														
 
															-	struct eventfd_ctx *resamplefd;
														
 
															-	/* Entry in list of irqfds for a resampler (resampler-only) */
														
 
															-	struct list_head resampler_link;
														
 
															-	/* Used for setup/shutdown */
														
 
															-	struct eventfd_ctx *eventfd;
														
 
															-	struct list_head list;
														
 
															-	poll_table pt;
														
 
															-	struct work_struct shutdown;
														
 
															-};
														
 
															 static struct workqueue_struct *irqfd_cleanup_wq;
														
 
															 static void
														
 
															 irqfd_inject(struct work_struct *work)
														
 
															 {
														
 
															-	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
														
 
															+	struct kvm_kernel_irqfd *irqfd =
														
 
															+		container_of(work, struct kvm_kernel_irqfd, inject);
														
 
															 	struct kvm *kvm = irqfd->kvm;
														
 
															 	if (!irqfd->resampler) {
														
@@ -121,12 +69,13 @@ irqfd_inject(struct work_struct *work)
 
															 static void
														
 
															 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
														
 
															 {
														
 
															-	struct _irqfd_resampler *resampler;
														
 
															+	struct kvm_kernel_irqfd_resampler *resampler;
														
 
															 	struct kvm *kvm;
														
 
															-	struct _irqfd *irqfd;
														
 
															+	struct kvm_kernel_irqfd *irqfd;
														
 
															 	int idx;
														
 
															-	resampler = container_of(kian, struct _irqfd_resampler, notifier);
														
 
															+	resampler = container_of(kian,
														
 
															+			struct kvm_kernel_irqfd_resampler, notifier);
														
 
															 	kvm = resampler->kvm;
														
 
															 	kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
														
@@ -141,9 +90,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 
															 }
														
 
															 static void
														
 
															-irqfd_resampler_shutdown(struct _irqfd *irqfd)
														
 
															+irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
														
 
															 {
														
 
															-	struct _irqfd_resampler *resampler = irqfd->resampler;
														
 
															+	struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
														
 
															 	struct kvm *kvm = resampler->kvm;
														
 
															 	mutex_lock(&kvm->irqfds.resampler_lock);
														
@@ -168,7 +117,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
 
															 static void
														
 
															 irqfd_shutdown(struct work_struct *work)
														
 
															 {
														
 
															-	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
														
 
															+	struct kvm_kernel_irqfd *irqfd =
														
 
															+		container_of(work, struct kvm_kernel_irqfd, shutdown);
														
 
															 	u64 cnt;
														
 
															 	/*
														
@@ -191,6 +141,9 @@ irqfd_shutdown(struct work_struct *work)
 
															 	/*
														
 
															 	 * It is now safe to release the object's resources
														
 
															 	 */
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
														
 
															+	irq_bypass_unregister_consumer(&irqfd->consumer);
														
 
															+#endif
														
 
															 	eventfd_ctx_put(irqfd->eventfd);
														
 
															 	kfree(irqfd);
														
 
															 }
														
@@ -198,7 +151,7 @@ irqfd_shutdown(struct work_struct *work)
 
															 /* assumes kvm->irqfds.lock is held */
														
 
															 static bool
														
 
															-irqfd_is_active(struct _irqfd *irqfd)
														
 
															+irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
														
 
															 {
														
 
															 	return list_empty(&irqfd->list) ? false : true;
														
 
															 }
														
@@ -209,7 +162,7 @@ irqfd_is_active(struct _irqfd *irqfd)
 
															  * assumes kvm->irqfds.lock is held
														
 
															  */
														
 
															 static void
														
 
															-irqfd_deactivate(struct _irqfd *irqfd)
														
 
															+irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
														
 
															 {
														
 
															 	BUG_ON(!irqfd_is_active(irqfd));
														
@@ -218,13 +171,23 @@ irqfd_deactivate(struct _irqfd *irqfd)
 
															 	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
														
 
															 }
														
 
															+int __attribute__((weak)) kvm_arch_set_irq_inatomic(
														
 
															+				struct kvm_kernel_irq_routing_entry *irq,
														
 
															+				struct kvm *kvm, int irq_source_id,
														
 
															+				int level,
														
 
															+				bool line_status)
														
 
															+{
														
 
															+	return -EWOULDBLOCK;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Called with wqh->lock held and interrupts disabled
														
 
															  */
														
 
															 static int
														
 
															 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
														
 
															 {
														
 
															-	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
														
 
															+	struct kvm_kernel_irqfd *irqfd =
														
 
															+		container_of(wait, struct kvm_kernel_irqfd, wait);
														
 
															 	unsigned long flags = (unsigned long)key;
														
 
															 	struct kvm_kernel_irq_routing_entry irq;
														
 
															 	struct kvm *kvm = irqfd->kvm;
														
@@ -238,10 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 
															 			irq = irqfd->irq_entry;
														
 
															 		} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
														
 
															 		/* An event has been signaled, inject an interrupt */
														
 
															-		if (irq.type == KVM_IRQ_ROUTING_MSI)
														
 
															-			kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
														
 
															-					false);
														
 
															-		else
														
 
															+		if (kvm_arch_set_irq_inatomic(&irq, kvm,
														
 
															+					      KVM_USERSPACE_IRQ_SOURCE_ID, 1,
														
 
															+					      false) == -EWOULDBLOCK)
														
 
															 			schedule_work(&irqfd->inject);
														
 
															 		srcu_read_unlock(&kvm->irq_srcu, idx);
														
 
															 	}
														
@@ -274,37 +236,54 @@ static void
 
															 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
														
 
															 			poll_table *pt)
														
 
															 {
														
 
															-	struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
														
 
															+	struct kvm_kernel_irqfd *irqfd =
														
 
															+		container_of(pt, struct kvm_kernel_irqfd, pt);
														
 
															 	add_wait_queue(wqh, &irqfd->wait);
														
 
															 }
														
 
															 /* Must be called under irqfds.lock */
														
 
															-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
														
 
															+static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
														
 
															 {
														
 
															 	struct kvm_kernel_irq_routing_entry *e;
														
 
															 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
														
 
															-	int i, n_entries;
														
 
															+	int n_entries;
														
 
															 	n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
														
 
															 	write_seqcount_begin(&irqfd->irq_entry_sc);
														
 
															-	irqfd->irq_entry.type = 0;
														
 
															-
														
 
															 	e = entries;
														
 
															-	for (i = 0; i < n_entries; ++i, ++e) {
														
 
															-		/* Only fast-path MSI. */
														
 
															-		if (e->type == KVM_IRQ_ROUTING_MSI)
														
 
															-			irqfd->irq_entry = *e;
														
 
															-	}
														
 
															+	if (n_entries == 1)
														
 
															+		irqfd->irq_entry = *e;
														
 
															+	else
														
 
															+		irqfd->irq_entry.type = 0;
														
 
															 	write_seqcount_end(&irqfd->irq_entry_sc);
														
 
															 }
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
														
 
															+void __attribute__((weak)) kvm_arch_irq_bypass_stop(
														
 
															+				struct irq_bypass_consumer *cons)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+void __attribute__((weak)) kvm_arch_irq_bypass_start(
														
 
															+				struct irq_bypass_consumer *cons)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
														
 
															+				struct kvm *kvm, unsigned int host_irq,
														
 
															+				uint32_t guest_irq, bool set)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															 static int
														
 
															 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
														
 
															 {
														
 
															-	struct _irqfd *irqfd, *tmp;
														
 
															+	struct kvm_kernel_irqfd *irqfd, *tmp;
														
 
															 	struct fd f;
														
 
															 	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
														
 
															 	int ret;
														
@@ -340,7 +319,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 
															 	irqfd->eventfd = eventfd;
														
 
															 	if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
														
 
															-		struct _irqfd_resampler *resampler;
														
 
															+		struct kvm_kernel_irqfd_resampler *resampler;
														
 
															 		resamplefd = eventfd_ctx_fdget(args->resamplefd);
														
 
															 		if (IS_ERR(resamplefd)) {
														
@@ -428,6 +407,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 
															 	 * we might race against the POLLHUP
														
 
															 	 */
														
 
															 	fdput(f);
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
														
 
															+	irqfd->consumer.token = (void *)irqfd->eventfd;
														
 
															+	irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
														
 
															+	irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
														
 
															+	irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
														
 
															+	irqfd->consumer.start = kvm_arch_irq_bypass_start;
														
 
															+	ret = irq_bypass_register_consumer(&irqfd->consumer);
														
 
															+	if (ret)
														
 
															+		pr_info("irq bypass consumer (token %p) registration fails: %d\n",
														
 
															+				irqfd->consumer.token, ret);
														
 
															+#endif
														
 
															 	return 0;
														
@@ -469,9 +459,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
														
 
															-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
														
 
															+void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
														
 
															 {
														
 
															 	struct kvm_irq_ack_notifier *kian;
														
 
															+
														
 
															+	hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
														
 
															+				 link)
														
 
															+		if (kian->gsi == gsi)
														
 
															+			kian->irq_acked(kian);
														
 
															+}
														
 
															+
														
 
															+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
														
 
															+{
														
 
															 	int gsi, idx;
														
 
															 	trace_kvm_ack_irq(irqchip, pin);
														
@@ -479,10 +478,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 
															 	idx = srcu_read_lock(&kvm->irq_srcu);
														
 
															 	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
														
 
															 	if (gsi != -1)
														
 
															-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
														
 
															-					 link)
														
 
															-			if (kian->gsi == gsi)
														
 
															-				kian->irq_acked(kian);
														
 
															+		kvm_notify_acked_gsi(kvm, gsi);
														
 
															 	srcu_read_unlock(&kvm->irq_srcu, idx);
														
 
															 }
														
@@ -525,7 +521,7 @@ kvm_eventfd_init(struct kvm *kvm)
 
															 static int
														
 
															 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
														
 
															 {
														
 
															-	struct _irqfd *irqfd, *tmp;
														
 
															+	struct kvm_kernel_irqfd *irqfd, *tmp;
														
 
															 	struct eventfd_ctx *eventfd;
														
 
															 	eventfd = eventfd_ctx_fdget(args->fd);
														
@@ -581,7 +577,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 
															 void
														
 
															 kvm_irqfd_release(struct kvm *kvm)
														
 
															 {
														
 
															-	struct _irqfd *irqfd, *tmp;
														
 
															+	struct kvm_kernel_irqfd *irqfd, *tmp;
														
 
															 	spin_lock_irq(&kvm->irqfds.lock);
														
@@ -604,13 +600,23 @@ kvm_irqfd_release(struct kvm *kvm)
 
															  */
														
 
															 void kvm_irq_routing_update(struct kvm *kvm)
														
 
															 {
														
 
															-	struct _irqfd *irqfd;
														
 
															+	struct kvm_kernel_irqfd *irqfd;
														
 
															 	spin_lock_irq(&kvm->irqfds.lock);
														
 
															-	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
														
 
															+	list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
														
 
															 		irqfd_update(kvm, irqfd);
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
														
 
															+		if (irqfd->producer) {
														
 
															+			int ret = kvm_arch_update_irqfd_routing(
														
 
															+					irqfd->kvm, irqfd->producer->irq,
														
 
															+					irqfd->gsi, 1);
														
 
															+			WARN_ON(ret);
														
 
															+		}
														
 
															+#endif
														
 
															+	}
														
 
															+
														
 
															 	spin_unlock_irq(&kvm->irqfds.lock);
														
 
															 }
														
@@ -914,9 +920,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
															 		return -EINVAL;
														
 
															 	/* ioeventfd with no length can't be combined with DATAMATCH */
														
 
															-	if (!args->len &&
														
 
															-	    args->flags & (KVM_IOEVENTFD_FLAG_PIO |
														
 
															-			   KVM_IOEVENTFD_FLAG_DATAMATCH))
														
 
															+	if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
														
 
															 		return -EINVAL;
														
 
															 	ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
														
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,16 +31,6 @@
 
															 #include <trace/events/kvm.h>
														
 
															 #include "irq.h"
														
 
															-struct kvm_irq_routing_table {
														
 
															-	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
														
 
															-	u32 nr_rt_entries;
														
 
															-	/*
														
 
															-	 * Array indexed by gsi. Each entry contains list of irq chips
														
 
															-	 * the gsi is connected to.
														
 
															-	 */
														
 
															-	struct hlist_head map[0];
														
 
															-};
														
 
															-
														
 
															 int kvm_irq_map_gsi(struct kvm *kvm,
														
 
															 		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
														
 
															 {
														
@@ -154,11 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 
															 	/*
														
 
															 	 * Do not allow GSI to be mapped to the same irqchip more than once.
														
 
															-	 * Allow only one to one mapping between GSI and MSI.
														
 
															+	 * Allow only one to one mapping between GSI and non-irqchip routing.
														
 
															 	 */
														
 
															 	hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
														
 
															-		if (ei->type == KVM_IRQ_ROUTING_MSI ||
														
 
															-		    ue->type == KVM_IRQ_ROUTING_MSI ||
														
 
															+		if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
														
 
															+		    ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
														
 
															 		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
														
 
															 			return r;
														
@@ -231,6 +221,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
															 	kvm_irq_routing_update(kvm);
														
 
															 	mutex_unlock(&kvm->irq_lock);
														
 
															+	kvm_arch_irq_routing_update(kvm);
														
 
															+
														
 
															 	synchronize_srcu_expedited(&kvm->irq_srcu);
														
 
															 	new = old;
														
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 
															 	init_waitqueue_head(&vcpu->wq);
														
 
															 	kvm_async_pf_vcpu_init(vcpu);
														
 
															+	vcpu->pre_pcpu = -1;
														
 
															+	INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
														
 
															+
														
 
															 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
														
 
															 	if (!page) {
														
 
															 		r = -ENOMEM;
														
@@ -2018,6 +2021,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 
															 		} while (single_task_running() && ktime_before(cur, stop));
														
 
															 	}
														
 
															+	kvm_arch_vcpu_blocking(vcpu);
														
 
															+
														
 
															 	for (;;) {
														
 
															 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
														
@@ -2031,6 +2036,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 
															 	finish_wait(&vcpu->wq, &wait);
														
 
															 	cur = ktime_get();
														
 
															+	kvm_arch_vcpu_unblocking(vcpu);
														
 
															 out:
														
 
															 	block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
														
@@ -2718,6 +2724,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 
															 	case KVM_CAP_IRQFD:
														
 
															 	case KVM_CAP_IRQFD_RESAMPLE:
														
 
															 #endif
														
 
															+	case KVM_CAP_IOEVENTFD_ANY_LENGTH:
														
 
															 	case KVM_CAP_CHECK_EXTENSION_VM:
														
 
															 		return 1;
														
 
															 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
@@ -3341,7 +3348,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 
															 	if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
														
 
															 		return -ENOSPC;
														
 
															-	new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) *
														
 
															+	new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) *
														
 
															 			  sizeof(struct kvm_io_range)), GFP_KERNEL);
														
 
															 	if (!new_bus)
														
 
															 		return -ENOMEM;
														
@@ -3373,7 +3380,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 
															 	if (r)
														
 
															 		return r;
														
 
															-	new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) *
														
 
															+	new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
														
 
															 			  sizeof(struct kvm_io_range)), GFP_KERNEL);
														
 
															 	if (!new_bus)
														
 
															 		return -ENOMEM;
														
--- a/virt/lib/Kconfig
+++ b/virt/lib/Kconfig
@@ -0,0 +1,2 @@
 
															+config IRQ_BYPASS_MANAGER
														
 
															+	tristate
														
--- a/virt/lib/Makefile
+++ b/virt/lib/Makefile
@@ -0,0 +1 @@
 
															+obj-$(CONFIG_IRQ_BYPASS_MANAGER) += irqbypass.o
														
--- a/virt/lib/irqbypass.c
+++ b/virt/lib/irqbypass.c
@@ -0,0 +1,257 @@
 
															+/*
														
 
															+ * IRQ offload/bypass manager
														
 
															+ *
														
 
															+ * Copyright (C) 2015 Red Hat, Inc.
														
 
															+ * Copyright (c) 2015 Linaro Ltd.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * Various virtualization hardware acceleration techniques allow bypassing or
														
 
															+ * offloading interrupts received from devices around the host kernel.  Posted
														
 
															+ * Interrupts on Intel VT-d systems can allow interrupts to be received
														
 
															+ * directly by a virtual machine.  ARM IRQ Forwarding allows forwarded physical
														
 
															+ * interrupts to be directly deactivated by the guest.  This manager allows
														
 
															+ * interrupt producers and consumers to find each other to enable this sort of
														
 
															+ * bypass.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/irqbypass.h>
														
 
															+#include <linux/list.h>
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/mutex.h>
														
 
															+
														
 
															+MODULE_LICENSE("GPL v2");
														
 
															+MODULE_DESCRIPTION("IRQ bypass manager utility module");
														
 
															+
														
 
															+static LIST_HEAD(producers);
														
 
															+static LIST_HEAD(consumers);
														
 
															+static DEFINE_MUTEX(lock);
														
 
															+
														
 
															+/* @lock must be held when calling connect */
														
 
															+static int __connect(struct irq_bypass_producer *prod,
														
 
															+		     struct irq_bypass_consumer *cons)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	if (prod->stop)
														
 
															+		prod->stop(prod);
														
 
															+	if (cons->stop)
														
 
															+		cons->stop(cons);
														
 
															+
														
 
															+	if (prod->add_consumer)
														
 
															+		ret = prod->add_consumer(prod, cons);
														
 
															+
														
 
															+	if (!ret) {
														
 
															+		ret = cons->add_producer(cons, prod);
														
 
															+		if (ret && prod->del_consumer)
														
 
															+			prod->del_consumer(prod, cons);
														
 
															+	}
														
 
															+
														
 
															+	if (cons->start)
														
 
															+		cons->start(cons);
														
 
															+	if (prod->start)
														
 
															+		prod->start(prod);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/* @lock must be held when calling disconnect */
														
 
															+static void __disconnect(struct irq_bypass_producer *prod,
														
 
															+			 struct irq_bypass_consumer *cons)
														
 
															+{
														
 
															+	if (prod->stop)
														
 
															+		prod->stop(prod);
														
 
															+	if (cons->stop)
														
 
															+		cons->stop(cons);
														
 
															+
														
 
															+	cons->del_producer(cons, prod);
														
 
															+
														
 
															+	if (prod->del_consumer)
														
 
															+		prod->del_consumer(prod, cons);
														
 
															+
														
 
															+	if (cons->start)
														
 
															+		cons->start(cons);
														
 
															+	if (prod->start)
														
 
															+		prod->start(prod);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * irq_bypass_register_producer - register IRQ bypass producer
														
 
															+ * @producer: pointer to producer structure
														
 
															+ *
														
 
															+ * Add the provided IRQ producer to the list of producers and connect
														
 
															+ * with any matching token found on the IRQ consumers list.
														
 
															+ */
														
 
															+int irq_bypass_register_producer(struct irq_bypass_producer *producer)
														
 
															+{
														
 
															+	struct irq_bypass_producer *tmp;
														
 
															+	struct irq_bypass_consumer *consumer;
														
 
															+
														
 
															+	might_sleep();
														
 
															+
														
 
															+	if (!try_module_get(THIS_MODULE))
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	mutex_lock(&lock);
														
 
															+
														
 
															+	list_for_each_entry(tmp, &producers, node) {
														
 
															+		if (tmp->token == producer->token) {
														
 
															+			mutex_unlock(&lock);
														
 
															+			module_put(THIS_MODULE);
														
 
															+			return -EBUSY;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	list_for_each_entry(consumer, &consumers, node) {
														
 
															+		if (consumer->token == producer->token) {
														
 
															+			int ret = __connect(producer, consumer);
														
 
															+			if (ret) {
														
 
															+				mutex_unlock(&lock);
														
 
															+				module_put(THIS_MODULE);
														
 
															+				return ret;
														
 
															+			}
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	list_add(&producer->node, &producers);
														
 
															+
														
 
															+	mutex_unlock(&lock);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(irq_bypass_register_producer);
														
 
															+
														
 
															+/**
														
 
															+ * irq_bypass_unregister_producer - unregister IRQ bypass producer
														
 
															+ * @producer: pointer to producer structure
														
 
															+ *
														
 
															+ * Remove a previously registered IRQ producer from the list of producers
														
 
															+ * and disconnect it from any connected IRQ consumer.
														
 
															+ */
														
 
															+void irq_bypass_unregister_producer(struct irq_bypass_producer *producer)
														
 
															+{
														
 
															+	struct irq_bypass_producer *tmp;
														
 
															+	struct irq_bypass_consumer *consumer;
														
 
															+
														
 
															+	might_sleep();
														
 
															+
														
 
															+	if (!try_module_get(THIS_MODULE))
														
 
															+		return; /* nothing in the list anyway */
														
 
															+
														
 
															+	mutex_lock(&lock);
														
 
															+
														
 
															+	list_for_each_entry(tmp, &producers, node) {
														
 
															+		if (tmp->token != producer->token)
														
 
															+			continue;
														
 
															+
														
 
															+		list_for_each_entry(consumer, &consumers, node) {
														
 
															+			if (consumer->token == producer->token) {
														
 
															+				__disconnect(producer, consumer);
														
 
															+				break;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		list_del(&producer->node);
														
 
															+		module_put(THIS_MODULE);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	mutex_unlock(&lock);
														
 
															+
														
 
															+	module_put(THIS_MODULE);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(irq_bypass_unregister_producer);
														
 
															+
														
 
															+/**
														
 
															+ * irq_bypass_register_consumer - register IRQ bypass consumer
														
 
															+ * @consumer: pointer to consumer structure
														
 
															+ *
														
 
															+ * Add the provided IRQ consumer to the list of consumers and connect
														
 
															+ * with any matching token found on the IRQ producer list.
														
 
															+ */
														
 
															+int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer)
														
 
															+{
														
 
															+	struct irq_bypass_consumer *tmp;
														
 
															+	struct irq_bypass_producer *producer;
														
 
															+
														
 
															+	if (!consumer->add_producer || !consumer->del_producer)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	might_sleep();
														
 
															+
														
 
															+	if (!try_module_get(THIS_MODULE))
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	mutex_lock(&lock);
														
 
															+
														
 
															+	list_for_each_entry(tmp, &consumers, node) {
														
 
															+		if (tmp->token == consumer->token) {
														
 
															+			mutex_unlock(&lock);
														
 
															+			module_put(THIS_MODULE);
														
 
															+			return -EBUSY;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	list_for_each_entry(producer, &producers, node) {
														
 
															+		if (producer->token == consumer->token) {
														
 
															+			int ret = __connect(producer, consumer);
														
 
															+			if (ret) {
														
 
															+				mutex_unlock(&lock);
														
 
															+				module_put(THIS_MODULE);
														
 
															+				return ret;
														
 
															+			}
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	list_add(&consumer->node, &consumers);
														
 
															+
														
 
															+	mutex_unlock(&lock);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(irq_bypass_register_consumer);
														
 
															+
														
 
															+/**
														
 
															+ * irq_bypass_unregister_consumer - unregister IRQ bypass consumer
														
 
															+ * @consumer: pointer to consumer structure
														
 
															+ *
														
 
															+ * Remove a previously registered IRQ consumer from the list of consumers
														
 
															+ * and disconnect it from any connected IRQ producer.
														
 
															+ */
														
 
															+void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer)
														
 
															+{
														
 
															+	struct irq_bypass_consumer *tmp;
														
 
															+	struct irq_bypass_producer *producer;
														
 
															+
														
 
															+	might_sleep();
														
 
															+
														
 
															+	if (!try_module_get(THIS_MODULE))
														
 
															+		return; /* nothing in the list anyway */
														
 
															+
														
 
															+	mutex_lock(&lock);
														
 
															+
														
 
															+	list_for_each_entry(tmp, &consumers, node) {
														
 
															+		if (tmp->token != consumer->token)
														
 
															+			continue;
														
 
															+
														
 
															+		list_for_each_entry(producer, &producers, node) {
														
 
															+			if (producer->token == consumer->token) {
														
 
															+				__disconnect(producer, consumer);
														
 
															+				break;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		list_del(&consumer->node);
														
 
															+		module_put(THIS_MODULE);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	mutex_unlock(&lock);
														
 
															+
														
 
															+	module_put(THIS_MODULE);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(irq_bypass_unregister_consumer);
	`@@ -0,0 +1 @@`
			`+obj-$(CONFIG_IRQ_BYPASS_MANAGER) += irqbypass.o`