12 years ago · 01227a889e
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1486,15 +1486,23 @@ struct kvm_ioeventfd {
 
															 	__u8  pad[36];
														
 
															 };
														
 
															+For the special case of virtio-ccw devices on s390, the ioevent is matched
														
 
															+to a subchannel/virtqueue tuple instead.
														
 
															+
														
 
															 The following flags are defined:
														
 
															 #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
														
 
															 #define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
														
 
															 #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
														
 
															+#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \
														
 
															+	(1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify)
														
 
															 If datamatch flag is set, the event will be signaled only if the written value
														
 
															 to the registered address is equal to datamatch in struct kvm_ioeventfd.
														
 
															+For virtio-ccw devices, addr contains the subchannel id and datamatch the
														
 
															+virtqueue index.
														
 
															+
														
 
															 4.60 KVM_DIRTY_TLB
														
@@ -1780,27 +1788,48 @@ registers, find a list below:
 
															   PPC   | KVM_REG_PPC_VPA_DTL   | 128
														
 
															   PPC   | KVM_REG_PPC_EPCR	| 32
														
 
															   PPC   | KVM_REG_PPC_EPR	| 32
														
 
															+  PPC   | KVM_REG_PPC_TCR	| 32
														
 
															+  PPC   | KVM_REG_PPC_TSR	| 32
														
 
															+  PPC   | KVM_REG_PPC_OR_TSR	| 32
														
 
															+  PPC   | KVM_REG_PPC_CLEAR_TSR	| 32
														
 
															+  PPC   | KVM_REG_PPC_MAS0	| 32
														
 
															+  PPC   | KVM_REG_PPC_MAS1	| 32
														
 
															+  PPC   | KVM_REG_PPC_MAS2	| 64
														
 
															+  PPC   | KVM_REG_PPC_MAS7_3	| 64
														
 
															+  PPC   | KVM_REG_PPC_MAS4	| 32
														
 
															+  PPC   | KVM_REG_PPC_MAS6	| 32
														
 
															+  PPC   | KVM_REG_PPC_MMUCFG	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB0CFG	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB1CFG	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB2CFG	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB3CFG	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB0PS	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB1PS	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB2PS	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB3PS	| 32
														
 
															+  PPC   | KVM_REG_PPC_EPTCFG	| 32
														
 
															+  PPC   | KVM_REG_PPC_ICP_STATE | 64
														
 
															 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
														
 
															 is the register group type, or coprocessor number:
														
 
															 ARM core registers have the following id bit patterns:
														
 
															-  0x4002 0000 0010 <index into the kvm_regs struct:16>
														
 
															+  0x4020 0000 0010 <index into the kvm_regs struct:16>
														
 
															 ARM 32-bit CP15 registers have the following id bit patterns:
														
 
															-  0x4002 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3>
														
 
															+  0x4020 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3>
														
 
															 ARM 64-bit CP15 registers have the following id bit patterns:
														
 
															-  0x4003 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3>
														
 
															+  0x4030 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3>
														
 
															 ARM CCSIDR registers are demultiplexed by CSSELR value:
														
 
															-  0x4002 0000 0011 00 <csselr:8>
														
 
															+  0x4020 0000 0011 00 <csselr:8>
														
 
															 ARM 32-bit VFP control registers have the following id bit patterns:
														
 
															-  0x4002 0000 0012 1 <regno:12>
														
 
															+  0x4020 0000 0012 1 <regno:12>
														
 
															 ARM 64-bit FP registers have the following id bit patterns:
														
 
															-  0x4002 0000 0012 0 <regno:12>
														
 
															+  0x4030 0000 0012 0 <regno:12>
														
 
															 4.69 KVM_GET_ONE_REG
														
@@ -2161,6 +2190,76 @@ header; first `n_valid' valid entries with contents from the data
 
															 written, then `n_invalid' invalid entries, invalidating any previously
														
 
															 valid entries found.
														
 
															+4.79 KVM_CREATE_DEVICE
														
 
															+
														
 
															+Capability: KVM_CAP_DEVICE_CTRL
														
 
															+Type: vm ioctl
														
 
															+Parameters: struct kvm_create_device (in/out)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+Errors:
														
 
															+  ENODEV: The device type is unknown or unsupported
														
 
															+  EEXIST: Device already created, and this type of device may not
														
 
															+          be instantiated multiple times
														
 
															+
														
 
															+  Other error conditions may be defined by individual device types or
														
 
															+  have their standard meanings.
														
 
															+
														
 
															+Creates an emulated device in the kernel.  The file descriptor returned
														
 
															+in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
														
 
															+
														
 
															+If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
														
 
															+device type is supported (not necessarily whether it can be created
														
 
															+in the current vm).
														
 
															+
														
 
															+Individual devices should not define flags.  Attributes should be used
														
 
															+for specifying any behavior that is not implied by the device type
														
 
															+number.
														
 
															+
														
 
															+struct kvm_create_device {
														
 
															+	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
														
 
															+	__u32	fd;	/* out: device handle */
														
 
															+	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
														
 
															+};
														
 
															+
														
 
															+4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
														
 
															+
														
 
															+Capability: KVM_CAP_DEVICE_CTRL
														
 
															+Type: device ioctl
														
 
															+Parameters: struct kvm_device_attr
														
 
															+Returns: 0 on success, -1 on error
														
 
															+Errors:
														
 
															+  ENXIO:  The group or attribute is unknown/unsupported for this device
														
 
															+  EPERM:  The attribute cannot (currently) be accessed this way
														
 
															+          (e.g. read-only attribute, or attribute that only makes
														
 
															+          sense when the device is in a different state)
														
 
															+
														
 
															+  Other error conditions may be defined by individual device types.
														
 
															+
														
 
															+Gets/sets a specified piece of device configuration and/or state.  The
														
 
															+semantics are device-specific.  See individual device documentation in
														
 
															+the "devices" directory.  As with ONE_REG, the size of the data
														
 
															+transferred is defined by the particular attribute.
														
 
															+
														
 
															+struct kvm_device_attr {
														
 
															+	__u32	flags;		/* no flags currently defined */
														
 
															+	__u32	group;		/* device-defined */
														
 
															+	__u64	attr;		/* group-defined */
														
 
															+	__u64	addr;		/* userspace address of attr data */
														
 
															+};
														
 
															+
														
 
															+4.81 KVM_HAS_DEVICE_ATTR
														
 
															+
														
 
															+Capability: KVM_CAP_DEVICE_CTRL
														
 
															+Type: device ioctl
														
 
															+Parameters: struct kvm_device_attr
														
 
															+Returns: 0 on success, -1 on error
														
 
															+Errors:
														
 
															+  ENXIO:  The group or attribute is unknown/unsupported for this device
														
 
															+
														
 
															+Tests whether a device supports a particular attribute.  A successful
														
 
															+return indicates the attribute is implemented.  It does not necessarily
														
 
															+indicate that the attribute can be read or written in the device's
														
 
															+current state.  "addr" is ignored.
														
 
															 4.77 KVM_ARM_VCPU_INIT
														
@@ -2243,6 +2342,25 @@ and distributor interface, the ioctl must be called after calling
 
															 KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs.  Calling
														
 
															 this ioctl twice for any of the base addresses will return -EEXIST.
														
 
															+4.82 KVM_PPC_RTAS_DEFINE_TOKEN
														
 
															+
														
 
															+Capability: KVM_CAP_PPC_RTAS
														
 
															+Architectures: ppc
														
 
															+Type: vm ioctl
														
 
															+Parameters: struct kvm_rtas_token_args
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Defines a token value for a RTAS (Run Time Abstraction Services)
														
 
															+service in order to allow it to be handled in the kernel.  The
														
 
															+argument struct gives the name of the service, which must be the name
														
 
															+of a service that has a kernel-side implementation.  If the token
														
 
															+value is non-zero, it will be associated with that service, and
														
 
															+subsequent RTAS calls by the guest specifying that token will be
														
 
															+handled by the kernel.  If the token value is 0, then any token
														
 
															+associated with the service will be forgotten, and subsequent RTAS
														
 
															+calls by the guest for that service will be passed to userspace to be
														
 
															+handled.
														
 
															+
														
 
															 5. The kvm_run structure
														
 
															 ------------------------
														
@@ -2646,3 +2764,19 @@ to receive the topmost interrupt vector.
 
															 When disabled (args[0] == 0), behavior is as if this facility is unsupported.
														
 
															 When this capability is enabled, KVM_EXIT_EPR can occur.
														
 
															+
														
 
															+6.6 KVM_CAP_IRQ_MPIC
														
 
															+
														
 
															+Architectures: ppc
														
 
															+Parameters: args[0] is the MPIC device fd
														
 
															+            args[1] is the MPIC CPU number for this vcpu
														
 
															+
														
 
															+This capability connects the vcpu to an in-kernel MPIC device.
														
 
															+
														
 
															+6.7 KVM_CAP_IRQ_XICS
														
 
															+
														
 
															+Architectures: ppc
														
 
															+Parameters: args[0] is the XICS device fd
														
 
															+            args[1] is the XICS CPU number (server ID) for this vcpu
														
 
															+
														
 
															+This capability connects the vcpu to an in-kernel XICS device.
														
--- a/Documentation/virtual/kvm/devices/README
+++ b/Documentation/virtual/kvm/devices/README
@@ -0,0 +1 @@
 
															+This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.
														
--- a/Documentation/virtual/kvm/devices/mpic.txt
+++ b/Documentation/virtual/kvm/devices/mpic.txt
@@ -0,0 +1,53 @@
 
															+MPIC interrupt controller
														
 
															+=========================
														
 
															+
														
 
															+Device types supported:
														
 
															+  KVM_DEV_TYPE_FSL_MPIC_20     Freescale MPIC v2.0
														
 
															+  KVM_DEV_TYPE_FSL_MPIC_42     Freescale MPIC v4.2
														
 
															+
														
 
															+Only one MPIC instance, of any type, may be instantiated.  The created
														
 
															+MPIC will act as the system interrupt controller, connecting to each
														
 
															+vcpu's interrupt inputs.
														
 
															+
														
 
															+Groups:
														
 
															+  KVM_DEV_MPIC_GRP_MISC
														
 
															+  Attributes:
														
 
															+    KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
														
 
															+      Base address of the 256 KiB MPIC register space.  Must be
														
 
															+      naturally aligned.  A value of zero disables the mapping.
														
 
															+      Reset value is zero.
														
 
															+
														
 
															+  KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit)
														
 
															+    Access an MPIC register, as if the access were made from the guest.
														
 
															+    "attr" is the byte offset into the MPIC register space.  Accesses
														
 
															+    must be 4-byte aligned.
														
 
															+
														
 
															+    MSIs may be signaled by using this attribute group to write
														
 
															+    to the relevant MSIIR.
														
 
															+
														
 
															+  KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit)
														
 
															+    IRQ input line for each standard openpic source.  0 is inactive and 1
														
 
															+    is active, regardless of interrupt sense.
														
 
															+
														
 
															+    For edge-triggered interrupts:  Writing 1 is considered an activating
														
 
															+    edge, and writing 0 is ignored.  Reading returns 1 if a previously
														
 
															+    signaled edge has not been acknowledged, and 0 otherwise.
														
 
															+
														
 
															+    "attr" is the IRQ number.  IRQ numbers for standard sources are the
														
 
															+    byte offset of the relevant IVPR from EIVPR0, divided by 32.
														
 
															+
														
 
															+IRQ Routing:
														
 
															+
														
 
															+  The MPIC emulation supports IRQ routing. Only a single MPIC device can
														
 
															+  be instantiated. Once that device has been created, it's available as
														
 
															+  irqchip id 0.
														
 
															+
														
 
															+  This irqchip 0 has 256 interrupt pins, which expose the interrupts in
														
 
															+  the main array of interrupt sources (a.k.a. "SRC" interrupts).
														
 
															+
														
 
															+  The numbering is the same as the MPIC device tree binding -- based on
														
 
															+  the register offset from the beginning of the sources array, without
														
 
															+  regard to any subdivisions in chip documentation such as "internal"
														
 
															+  or "external" interrupts.
														
 
															+
														
 
															+  Access to non-SRC interrupts is not implemented through IRQ routing mechanisms.
														
--- a/Documentation/virtual/kvm/devices/xics.txt
+++ b/Documentation/virtual/kvm/devices/xics.txt
@@ -0,0 +1,66 @@
 
															+XICS interrupt controller
														
 
															+
														
 
															+Device type supported: KVM_DEV_TYPE_XICS
														
 
															+
														
 
															+Groups:
														
 
															+  KVM_DEV_XICS_SOURCES
														
 
															+  Attributes: One per interrupt source, indexed by the source number.
														
 
															+
														
 
															+This device emulates the XICS (eXternal Interrupt Controller
														
 
															+Specification) defined in PAPR.  The XICS has a set of interrupt
														
 
															+sources, each identified by a 20-bit source number, and a set of
														
 
															+Interrupt Control Presentation (ICP) entities, also called "servers",
														
 
															+each associated with a virtual CPU.
														
 
															+
														
 
															+The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH
														
 
															+capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and
														
 
															+the interrupt server number (i.e. the vcpu number from the XICS's
														
 
															+point of view) in args[1] of the kvm_enable_cap struct.  Each ICP has
														
 
															+64 bits of state which can be read and written using the
														
 
															+KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu.  The 64 bit
														
 
															+state word has the following bitfields, starting at the
														
 
															+least-significant end of the word:
														
 
															+
														
 
															+* Unused, 16 bits
														
 
															+
														
 
															+* Pending interrupt priority, 8 bits
														
 
															+  Zero is the highest priority, 255 means no interrupt is pending.
														
 
															+
														
 
															+* Pending IPI (inter-processor interrupt) priority, 8 bits
														
 
															+  Zero is the highest priority, 255 means no IPI is pending.
														
 
															+
														
 
															+* Pending interrupt source number, 24 bits
														
 
															+  Zero means no interrupt pending, 2 means an IPI is pending
														
 
															+
														
 
															+* Current processor priority, 8 bits
														
 
															+  Zero is the highest priority, meaning no interrupts can be
														
 
															+  delivered, and 255 is the lowest priority.
														
 
															+
														
 
															+Each source has 64 bits of state that can be read and written using
														
 
															+the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
														
 
															+KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
														
 
															+the interrupt source number.  The 64 bit state word has the following
														
 
															+bitfields, starting from the least-significant end of the word:
														
 
															+
														
 
															+* Destination (server number), 32 bits
														
 
															+  This specifies where the interrupt should be sent, and is the
														
 
															+  interrupt server number specified for the destination vcpu.
														
 
															+
														
 
															+* Priority, 8 bits
														
 
															+  This is the priority specified for this interrupt source, where 0 is
														
 
															+  the highest priority and 255 is the lowest.  An interrupt with a
														
 
															+  priority of 255 will never be delivered.
														
 
															+
														
 
															+* Level sensitive flag, 1 bit
														
 
															+  This bit is 1 for a level-sensitive interrupt source, or 0 for
														
 
															+  edge-sensitive (or MSI).
														
 
															+
														
 
															+* Masked flag, 1 bit
														
 
															+  This bit is set to 1 if the interrupt is masked (cannot be delivered
														
 
															+  regardless of its priority), for example by the ibm,int-off RTAS
														
 
															+  call, or 0 if it is not masked.
														
 
															+
														
 
															+* Pending flag, 1 bit
														
 
															+  This bit is 1 if the source has a pending interrupt, otherwise 0.
														
 
															+
														
 
															+Only one XICS instance may be created per VM.
														
--- a/arch/arm/include/asm/idmap.h
+++ b/arch/arm/include/asm/idmap.h
@@ -8,7 +8,6 @@
 
															 #define __idmap __section(.idmap.text) noinline notrace
														
 
															 extern pgd_t *idmap_pgd;
														
 
															-extern pgd_t *hyp_pgd;
														
 
															 void setup_mm_for_reboot(void);
														
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -87,7 +87,7 @@ struct kvm_vcpu_fault_info {
 
															 	u32 hyp_pc;		/* PC when exception was taken from Hyp mode */
														
 
															 };
														
 
															-typedef struct vfp_hard_struct kvm_kernel_vfp_t;
														
 
															+typedef struct vfp_hard_struct kvm_cpu_context_t;
														
 
															 struct kvm_vcpu_arch {
														
 
															 	struct kvm_regs regs;
														
@@ -105,8 +105,10 @@ struct kvm_vcpu_arch {
 
															 	struct kvm_vcpu_fault_info fault;
														
 
															 	/* Floating point registers (VFP and Advanced SIMD/NEON) */
														
 
															-	kvm_kernel_vfp_t vfp_guest;
														
 
															-	kvm_kernel_vfp_t *vfp_host;
														
 
															+	struct vfp_hard_struct vfp_guest;
														
 
															+
														
 
															+	/* Host FP context */
														
 
															+	kvm_cpu_context_t *host_cpu_context;
														
 
															 	/* VGIC state */
														
 
															 	struct vgic_cpu vgic_cpu;
														
@@ -188,23 +190,38 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 
															 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
														
 
															 		int exception_index);
														
 
															-static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr,
														
 
															+static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr,
														
 
															+				       unsigned long long pgd_ptr,
														
 
															 				       unsigned long hyp_stack_ptr,
														
 
															 				       unsigned long vector_ptr)
														
 
															 {
														
 
															-	unsigned long pgd_low, pgd_high;
														
 
															-
														
 
															-	pgd_low = (pgd_ptr & ((1ULL << 32) - 1));
														
 
															-	pgd_high = (pgd_ptr >> 32ULL);
														
 
															-
														
 
															 	/*
														
 
															-	 * Call initialization code, and switch to the full blown
														
 
															-	 * HYP code. The init code doesn't need to preserve these registers as
														
 
															-	 * r1-r3 and r12 are already callee save according to the AAPCS.
														
 
															-	 * Note that we slightly misuse the prototype by casing the pgd_low to
														
 
															-	 * a void *.
														
 
															+	 * Call initialization code, and switch to the full blown HYP
														
 
															+	 * code. The init code doesn't need to preserve these
														
 
															+	 * registers as r0-r3 are already callee saved according to
														
 
															+	 * the AAPCS.
														
 
															+	 * Note that we slightly misuse the prototype by casing the
														
 
															+	 * stack pointer to a void *.
														
 
															+	 *
														
 
															+	 * We don't have enough registers to perform the full init in
														
 
															+	 * one go.  Install the boot PGD first, and then install the
														
 
															+	 * runtime PGD, stack pointer and vectors. The PGDs are always
														
 
															+	 * passed as the third argument, in order to be passed into
														
 
															+	 * r2-r3 to the init code (yes, this is compliant with the
														
 
															+	 * PCS!).
														
 
															 	 */
														
 
															-	kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr);
														
 
															+
														
 
															+	kvm_call_hyp(NULL, 0, boot_pgd_ptr);
														
 
															+
														
 
															+	kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
														
 
															 }
														
 
															+static inline int kvm_arch_dev_ioctl_check_extension(long ext)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvm_perf_init(void);
														
 
															+int kvm_perf_teardown(void);
														
 
															+
														
 
															 #endif /* __ARM_KVM_HOST_H__ */
														
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -19,21 +19,33 @@
 
															 #ifndef __ARM_KVM_MMU_H__
														
 
															 #define __ARM_KVM_MMU_H__
														
 
															-#include <asm/cacheflush.h>
														
 
															-#include <asm/pgalloc.h>
														
 
															-#include <asm/idmap.h>
														
 
															+#include <asm/memory.h>
														
 
															+#include <asm/page.h>
														
 
															 /*
														
 
															  * We directly use the kernel VA for the HYP, as we can directly share
														
 
															  * the mapping (HTTBR "covers" TTBR1).
														
 
															  */
														
 
															-#define HYP_PAGE_OFFSET_MASK	(~0UL)
														
 
															+#define HYP_PAGE_OFFSET_MASK	UL(~0)
														
 
															 #define HYP_PAGE_OFFSET		PAGE_OFFSET
														
 
															 #define KERN_TO_HYP(kva)	(kva)
														
 
															+/*
														
 
															+ * Our virtual mapping for the boot-time MMU-enable code. Must be
														
 
															+ * shared across all the page-tables. Conveniently, we use the vectors
														
 
															+ * page, where no kernel data will ever be shared with HYP.
														
 
															+ */
														
 
															+#define TRAMPOLINE_VA		UL(CONFIG_VECTORS_BASE)
														
 
															+
														
 
															+#ifndef __ASSEMBLY__
														
 
															+
														
 
															+#include <asm/cacheflush.h>
														
 
															+#include <asm/pgalloc.h>
														
 
															+
														
 
															 int create_hyp_mappings(void *from, void *to);
														
 
															 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
														
 
															-void free_hyp_pmds(void);
														
 
															+void free_boot_hyp_pgd(void);
														
 
															+void free_hyp_pgds(void);
														
 
															 int kvm_alloc_stage2_pgd(struct kvm *kvm);
														
 
															 void kvm_free_stage2_pgd(struct kvm *kvm);
														
@@ -45,6 +57,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
															 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
														
 
															 phys_addr_t kvm_mmu_get_httbr(void);
														
 
															+phys_addr_t kvm_mmu_get_boot_httbr(void);
														
 
															+phys_addr_t kvm_get_idmap_vector(void);
														
 
															 int kvm_mmu_init(void);
														
 
															 void kvm_clear_hyp_idmap(void);
														
@@ -114,4 +128,8 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
 
															 	}
														
 
															 }
														
 
															+#define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
														
 
															+
														
 
															+#endif	/* !__ASSEMBLY__ */
														
 
															+
														
 
															 #endif /* __ARM_KVM_MMU_H__ */
														
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -158,7 +158,7 @@ int main(void)
 
															   DEFINE(VCPU_MIDR,		offsetof(struct kvm_vcpu, arch.midr));
														
 
															   DEFINE(VCPU_CP15,		offsetof(struct kvm_vcpu, arch.cp15));
														
 
															   DEFINE(VCPU_VFP_GUEST,	offsetof(struct kvm_vcpu, arch.vfp_guest));
														
 
															-  DEFINE(VCPU_VFP_HOST,		offsetof(struct kvm_vcpu, arch.vfp_host));
														
 
															+  DEFINE(VCPU_VFP_HOST,		offsetof(struct kvm_vcpu, arch.host_cpu_context));
														
 
															   DEFINE(VCPU_REGS,		offsetof(struct kvm_vcpu, arch.regs));
														
 
															   DEFINE(VCPU_USR_REGS,		offsetof(struct kvm_vcpu, arch.regs.usr_regs));
														
 
															   DEFINE(VCPU_SVC_REGS,		offsetof(struct kvm_vcpu, arch.regs.svc_regs));
														
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -20,7 +20,7 @@
 
															 	VMLINUX_SYMBOL(__idmap_text_start) = .;				\
														
 
															 	*(.idmap.text)							\
														
 
															 	VMLINUX_SYMBOL(__idmap_text_end) = .;				\
														
 
															-	ALIGN_FUNCTION();						\
														
 
															+	. = ALIGN(32);							\
														
 
															 	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;			\
														
 
															 	*(.hyp.idmap.text)						\
														
 
															 	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
														
@@ -315,3 +315,8 @@ SECTIONS
 
															  */
														
 
															 ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
														
 
															 ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
														
 
															+/*
														
 
															+ * The HYP init code can't be more than a page long.
														
 
															+ * The above comment applies as well.
														
 
															+ */
														
 
															+ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big")
														
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -41,9 +41,9 @@ config KVM_ARM_HOST
 
															 	  Provides host support for ARM processors.
														
 
															 config KVM_ARM_MAX_VCPUS
														
 
															-	int "Number maximum supported virtual CPUs per VM"
														
 
															-	depends on KVM_ARM_HOST
														
 
															-	default 4
														
 
															+	int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST
														
 
															+	default 4 if KVM_ARM_HOST
														
 
															+	default 0
														
 
															 	help
														
 
															 	  Static number of max supported virtual CPUs per VM.
														
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -18,6 +18,6 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
															 obj-y += kvm-arm.o init.o interrupts.o
														
 
															 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
														
 
															-obj-y += coproc.o coproc_a15.o mmio.o psci.o
														
 
															+obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
														
 
															 obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
														
 
															 obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
														
--- a/arch/arm/kvm/arch_timer.c
+++ b/arch/arm/kvm/arch_timer.c
@@ -22,6 +22,7 @@
 
															 #include <linux/kvm_host.h>
														
 
															 #include <linux/interrupt.h>
														
 
															+#include <clocksource/arm_arch_timer.h>
														
 
															 #include <asm/arch_timer.h>
														
 
															 #include <asm/kvm_vgic.h>
														
@@ -64,7 +65,7 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
														
 
															-	timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */
														
 
															+	timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
														
 
															 	kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
														
 
															 			    vcpu->arch.timer_cpu.irq->irq,
														
 
															 			    vcpu->arch.timer_cpu.irq->level);
														
@@ -133,8 +134,8 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 
															 	cycle_t cval, now;
														
 
															 	u64 ns;
														
 
															-	/* Check if the timer is enabled and unmasked first */
														
 
															-	if ((timer->cntv_ctl & 3) != 1)
														
 
															+	if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
														
 
															+		!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
														
 
															 		return;
														
 
															 	cval = timer->cntv_cval;
														
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -16,6 +16,7 @@
 
															  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
														
 
															  */
														
 
															+#include <linux/cpu.h>
														
 
															 #include <linux/errno.h>
														
 
															 #include <linux/err.h>
														
 
															 #include <linux/kvm_host.h>
														
@@ -48,7 +49,7 @@ __asm__(".arch_extension	virt");
 
															 #endif
														
 
															 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
														
 
															-static kvm_kernel_vfp_t __percpu *kvm_host_vfp_state;
														
 
															+static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
														
 
															 static unsigned long hyp_default_vectors;
														
 
															 /* Per-CPU variable containing the currently running vcpu. */
														
@@ -206,7 +207,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 		r = KVM_MAX_VCPUS;
														
 
															 		break;
														
 
															 	default:
														
 
															-		r = 0;
														
 
															+		r = kvm_arch_dev_ioctl_check_extension(ext);
														
 
															 		break;
														
 
															 	}
														
 
															 	return r;
														
@@ -218,27 +219,18 @@ long kvm_arch_dev_ioctl(struct file *filp,
 
															 	return -EINVAL;
														
 
															 }
														
 
															-int kvm_arch_set_memory_region(struct kvm *kvm,
														
 
															-			       struct kvm_userspace_memory_region *mem,
														
 
															-			       struct kvm_memory_slot old,
														
 
															-			       int user_alloc)
														
 
															-{
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															 int kvm_arch_prepare_memory_region(struct kvm *kvm,
														
 
															 				   struct kvm_memory_slot *memslot,
														
 
															-				   struct kvm_memory_slot old,
														
 
															 				   struct kvm_userspace_memory_region *mem,
														
 
															-				   bool user_alloc)
														
 
															+				   enum kvm_mr_change change)
														
 
															 {
														
 
															 	return 0;
														
 
															 }
														
 
															 void kvm_arch_commit_memory_region(struct kvm *kvm,
														
 
															 				   struct kvm_userspace_memory_region *mem,
														
 
															-				   struct kvm_memory_slot old,
														
 
															-				   bool user_alloc)
														
 
															+				   const struct kvm_memory_slot *old,
														
 
															+				   enum kvm_mr_change change)
														
 
															 {
														
 
															 }
														
@@ -326,7 +318,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
															 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
														
 
															 {
														
 
															 	vcpu->cpu = cpu;
														
 
															-	vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state);
														
 
															+	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
														
 
															 	/*
														
 
															 	 * Check whether this vcpu requires the cache to be flushed on
														
@@ -639,7 +631,8 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
 
															 	return 0;
														
 
															 }
														
 
															-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level)
														
 
															+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
														
 
															+			  bool line_status)
														
 
															 {
														
 
															 	u32 irq = irq_level->irq;
														
 
															 	unsigned int irq_type, vcpu_idx, irq_num;
														
@@ -794,30 +787,48 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 	}
														
 
															 }
														
 
															-static void cpu_init_hyp_mode(void *vector)
														
 
															+static void cpu_init_hyp_mode(void *dummy)
														
 
															 {
														
 
															+	unsigned long long boot_pgd_ptr;
														
 
															 	unsigned long long pgd_ptr;
														
 
															 	unsigned long hyp_stack_ptr;
														
 
															 	unsigned long stack_page;
														
 
															 	unsigned long vector_ptr;
														
 
															 	/* Switch from the HYP stub to our own HYP init vector */
														
 
															-	__hyp_set_vectors((unsigned long)vector);
														
 
															+	__hyp_set_vectors(kvm_get_idmap_vector());
														
 
															+	boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr();
														
 
															 	pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
														
 
															 	stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
														
 
															 	hyp_stack_ptr = stack_page + PAGE_SIZE;
														
 
															 	vector_ptr = (unsigned long)__kvm_hyp_vector;
														
 
															-	__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
														
 
															+	__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
														
 
															+}
														
 
															+
														
 
															+static int hyp_init_cpu_notify(struct notifier_block *self,
														
 
															+			       unsigned long action, void *cpu)
														
 
															+{
														
 
															+	switch (action) {
														
 
															+	case CPU_STARTING:
														
 
															+	case CPU_STARTING_FROZEN:
														
 
															+		cpu_init_hyp_mode(NULL);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return NOTIFY_OK;
														
 
															 }
														
 
															+static struct notifier_block hyp_init_cpu_nb = {
														
 
															+	.notifier_call = hyp_init_cpu_notify,
														
 
															+};
														
 
															+
														
 
															 /**
														
 
															  * Inits Hyp-mode on all online CPUs
														
 
															  */
														
 
															 static int init_hyp_mode(void)
														
 
															 {
														
 
															-	phys_addr_t init_phys_addr;
														
 
															 	int cpu;
														
 
															 	int err = 0;
														
@@ -849,24 +860,6 @@ static int init_hyp_mode(void)
 
															 		per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
														
 
															 	}
														
 
															-	/*
														
 
															-	 * Execute the init code on each CPU.
														
 
															-	 *
														
 
															-	 * Note: The stack is not mapped yet, so don't do anything else than
														
 
															-	 * initializing the hypervisor mode on each CPU using a local stack
														
 
															-	 * space for temporary storage.
														
 
															-	 */
														
 
															-	init_phys_addr = virt_to_phys(__kvm_hyp_init);
														
 
															-	for_each_online_cpu(cpu) {
														
 
															-		smp_call_function_single(cpu, cpu_init_hyp_mode,
														
 
															-					 (void *)(long)init_phys_addr, 1);
														
 
															-	}
														
 
															-
														
 
															-	/*
														
 
															-	 * Unmap the identity mapping
														
 
															-	 */
														
 
															-	kvm_clear_hyp_idmap();
														
 
															-
														
 
															 	/*
														
 
															 	 * Map the Hyp-code called directly from the host
														
 
															 	 */
														
@@ -890,33 +883,38 @@ static int init_hyp_mode(void)
 
															 	}
														
 
															 	/*
														
 
															-	 * Map the host VFP structures
														
 
															+	 * Map the host CPU structures
														
 
															 	 */
														
 
															-	kvm_host_vfp_state = alloc_percpu(kvm_kernel_vfp_t);
														
 
															-	if (!kvm_host_vfp_state) {
														
 
															+	kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
														
 
															+	if (!kvm_host_cpu_state) {
														
 
															 		err = -ENOMEM;
														
 
															-		kvm_err("Cannot allocate host VFP state\n");
														
 
															+		kvm_err("Cannot allocate host CPU state\n");
														
 
															 		goto out_free_mappings;
														
 
															 	}
														
 
															 	for_each_possible_cpu(cpu) {
														
 
															-		kvm_kernel_vfp_t *vfp;
														
 
															+		kvm_cpu_context_t *cpu_ctxt;
														
 
															-		vfp = per_cpu_ptr(kvm_host_vfp_state, cpu);
														
 
															-		err = create_hyp_mappings(vfp, vfp + 1);
														
 
															+		cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu);
														
 
															+		err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1);
														
 
															 		if (err) {
														
 
															-			kvm_err("Cannot map host VFP state: %d\n", err);
														
 
															-			goto out_free_vfp;
														
 
															+			kvm_err("Cannot map host CPU state: %d\n", err);
														
 
															+			goto out_free_context;
														
 
															 		}
														
 
															 	}
														
 
															+	/*
														
 
															+	 * Execute the init code on each CPU.
														
 
															+	 */
														
 
															+	on_each_cpu(cpu_init_hyp_mode, NULL, 1);
														
 
															+
														
 
															 	/*
														
 
															 	 * Init HYP view of VGIC
														
 
															 	 */
														
 
															 	err = kvm_vgic_hyp_init();
														
 
															 	if (err)
														
 
															-		goto out_free_vfp;
														
 
															+		goto out_free_context;
														
 
															 #ifdef CONFIG_KVM_ARM_VGIC
														
 
															 		vgic_present = true;
														
@@ -929,12 +927,19 @@ static int init_hyp_mode(void)
 
															 	if (err)
														
 
															 		goto out_free_mappings;
														
 
															+#ifndef CONFIG_HOTPLUG_CPU
														
 
															+	free_boot_hyp_pgd();
														
 
															+#endif
														
 
															+
														
 
															+	kvm_perf_init();
														
 
															+
														
 
															 	kvm_info("Hyp mode initialized successfully\n");
														
 
															+
														
 
															 	return 0;
														
 
															-out_free_vfp:
														
 
															-	free_percpu(kvm_host_vfp_state);
														
 
															+out_free_context:
														
 
															+	free_percpu(kvm_host_cpu_state);
														
 
															 out_free_mappings:
														
 
															-	free_hyp_pmds();
														
 
															+	free_hyp_pgds();
														
 
															 out_free_stack_pages:
														
 
															 	for_each_possible_cpu(cpu)
														
 
															 		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
														
@@ -943,27 +948,42 @@ out_err:
 
															 	return err;
														
 
															 }
														
 
															+static void check_kvm_target_cpu(void *ret)
														
 
															+{
														
 
															+	*(int *)ret = kvm_target_cpu();
														
 
															+}
														
 
															+
														
 
															 /**
														
 
															  * Initialize Hyp-mode and memory mappings on all CPUs.
														
 
															  */
														
 
															 int kvm_arch_init(void *opaque)
														
 
															 {
														
 
															 	int err;
														
 
															+	int ret, cpu;
														
 
															 	if (!is_hyp_mode_available()) {
														
 
															 		kvm_err("HYP mode not available\n");
														
 
															 		return -ENODEV;
														
 
															 	}
														
 
															-	if (kvm_target_cpu() < 0) {
														
 
															-		kvm_err("Target CPU not supported!\n");
														
 
															-		return -ENODEV;
														
 
															+	for_each_online_cpu(cpu) {
														
 
															+		smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
														
 
															+		if (ret < 0) {
														
 
															+			kvm_err("Error, CPU %d not supported!\n", cpu);
														
 
															+			return -ENODEV;
														
 
															+		}
														
 
															 	}
														
 
															 	err = init_hyp_mode();
														
 
															 	if (err)
														
 
															 		goto out_err;
														
 
															+	err = register_cpu_notifier(&hyp_init_cpu_nb);
														
 
															+	if (err) {
														
 
															+		kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
														
 
															+		goto out_err;
														
 
															+	}
														
 
															+
														
 
															 	kvm_coproc_table_init();
														
 
															 	return 0;
														
 
															 out_err:
														
@@ -973,6 +993,7 @@ out_err:
 
															 /* NOP: Compiling as a module not supported */
														
 
															 void kvm_arch_exit(void)
														
 
															 {
														
 
															+	kvm_perf_teardown();
														
 
															 }
														
 
															 static int arm_init(void)
														
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -21,13 +21,33 @@
 
															 #include <asm/asm-offsets.h>
														
 
															 #include <asm/kvm_asm.h>
														
 
															 #include <asm/kvm_arm.h>
														
 
															+#include <asm/kvm_mmu.h>
														
 
															 /********************************************************************
														
 
															  * Hypervisor initialization
														
 
															  *   - should be called with:
														
 
															- *       r0,r1 = Hypervisor pgd pointer
														
 
															- *       r2 = top of Hyp stack (kernel VA)
														
 
															- *       r3 = pointer to hyp vectors
														
 
															+ *       r0 = top of Hyp stack (kernel VA)
														
 
															+ *       r1 = pointer to hyp vectors
														
 
															+ *       r2,r3 = Hypervisor pgd pointer
														
 
															+ *
														
 
															+ * The init scenario is:
														
 
															+ * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd,
														
 
															+ *   runtime stack, runtime vectors
														
 
															+ * - Enable the MMU with the boot pgd
														
 
															+ * - Jump to a target into the trampoline page (remember, this is the same
														
 
															+ *   physical page!)
														
 
															+ * - Now switch to the runtime pgd (same VA, and still the same physical
														
 
															+ *   page!)
														
 
															+ * - Invalidate TLBs
														
 
															+ * - Set stack and vectors
														
 
															+ * - Profit! (or eret, if you only care about the code).
														
 
															+ *
														
 
															+ * As we only have four registers available to pass parameters (and we
														
 
															+ * need six), we split the init in two phases:
														
 
															+ * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD.
														
 
															+ *   Provides the basic HYP init, and enable the MMU.
														
 
															+ * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD.
														
 
															+ *   Switches to the runtime PGD, set stack and vectors.
														
 
															  */
														
 
															 	.text
														
@@ -47,22 +67,25 @@ __kvm_hyp_init:
 
															 	W(b)	.
														
 
															 __do_hyp_init:
														
 
															+	cmp	r0, #0			@ We have a SP?
														
 
															+	bne	phase2			@ Yes, second stage init
														
 
															+
														
 
															 	@ Set the HTTBR to point to the hypervisor PGD pointer passed
														
 
															-	mcrr	p15, 4, r0, r1, c2
														
 
															+	mcrr	p15, 4, r2, r3, c2
														
 
															 	@ Set the HTCR and VTCR to the same shareability and cacheability
														
 
															 	@ settings as the non-secure TTBCR and with T0SZ == 0.
														
 
															 	mrc	p15, 4, r0, c2, c0, 2	@ HTCR
														
 
															-	ldr	r12, =HTCR_MASK
														
 
															-	bic	r0, r0, r12
														
 
															+	ldr	r2, =HTCR_MASK
														
 
															+	bic	r0, r0, r2
														
 
															 	mrc	p15, 0, r1, c2, c0, 2	@ TTBCR
														
 
															 	and	r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ)
														
 
															 	orr	r0, r0, r1
														
 
															 	mcr	p15, 4, r0, c2, c0, 2	@ HTCR
														
 
															 	mrc	p15, 4, r1, c2, c1, 2	@ VTCR
														
 
															-	ldr	r12, =VTCR_MASK
														
 
															-	bic	r1, r1, r12
														
 
															+	ldr	r2, =VTCR_MASK
														
 
															+	bic	r1, r1, r2
														
 
															 	bic	r0, r0, #(~VTCR_HTCR_SH)	@ clear non-reusable HTCR bits
														
 
															 	orr	r1, r0, r1
														
 
															 	orr	r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S)
														
@@ -85,24 +108,41 @@ __do_hyp_init:
 
															 	@  - Memory alignment checks: enabled
														
 
															 	@  - MMU: enabled (this code must be run from an identity mapping)
														
 
															 	mrc	p15, 4, r0, c1, c0, 0	@ HSCR
														
 
															-	ldr	r12, =HSCTLR_MASK
														
 
															-	bic	r0, r0, r12
														
 
															+	ldr	r2, =HSCTLR_MASK
														
 
															+	bic	r0, r0, r2
														
 
															 	mrc	p15, 0, r1, c1, c0, 0	@ SCTLR
														
 
															-	ldr	r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
														
 
															-	and	r1, r1, r12
														
 
															- ARM(	ldr	r12, =(HSCTLR_M | HSCTLR_A)			)
														
 
															- THUMB(	ldr	r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE)		)
														
 
															-	orr	r1, r1, r12
														
 
															+	ldr	r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
														
 
															+	and	r1, r1, r2
														
 
															+ ARM(	ldr	r2, =(HSCTLR_M | HSCTLR_A)			)
														
 
															+ THUMB(	ldr	r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE)		)
														
 
															+	orr	r1, r1, r2
														
 
															 	orr	r0, r0, r1
														
 
															 	isb
														
 
															 	mcr	p15, 4, r0, c1, c0, 0	@ HSCR
														
 
															-	isb
														
 
															-	@ Set stack pointer and return to the kernel
														
 
															-	mov	sp, r2
														
 
															+	@ End of init phase-1
														
 
															+	eret
														
 
															+
														
 
															+phase2:
														
 
															+	@ Set stack pointer
														
 
															+	mov	sp, r0
														
 
															 	@ Set HVBAR to point to the HYP vectors
														
 
															-	mcr	p15, 4, r3, c12, c0, 0	@ HVBAR
														
 
															+	mcr	p15, 4, r1, c12, c0, 0	@ HVBAR
														
 
															+
														
 
															+	@ Jump to the trampoline page
														
 
															+	ldr	r0, =TRAMPOLINE_VA
														
 
															+	adr	r1, target
														
 
															+	bfi	r0, r1, #0, #PAGE_SHIFT
														
 
															+	mov	pc, r0
														
 
															+
														
 
															+target:	@ We're now in the trampoline code, switch page tables
														
 
															+	mcrr	p15, 4, r2, r3, c2
														
 
															+	isb
														
 
															+
														
 
															+	@ Invalidate the old TLBs
														
 
															+	mcr	p15, 4, r0, c8, c7, 0	@ TLBIALLH
														
 
															+	dsb
														
 
															 	eret
														
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -32,8 +32,15 @@
 
															 extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
														
 
															+static pgd_t *boot_hyp_pgd;
														
 
															+static pgd_t *hyp_pgd;
														
 
															 static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
														
 
															+static void *init_bounce_page;
														
 
															+static unsigned long hyp_idmap_start;
														
 
															+static unsigned long hyp_idmap_end;
														
 
															+static phys_addr_t hyp_idmap_vector;
														
 
															+
														
 
															 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
														
 
															 {
														
 
															 	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
														
@@ -71,172 +78,224 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 
															 	return p;
														
 
															 }
														
 
															-static void free_ptes(pmd_t *pmd, unsigned long addr)
														
 
															+static void clear_pud_entry(pud_t *pud)
														
 
															 {
														
 
															-	pte_t *pte;
														
 
															-	unsigned int i;
														
 
															+	pmd_t *pmd_table = pmd_offset(pud, 0);
														
 
															+	pud_clear(pud);
														
 
															+	pmd_free(NULL, pmd_table);
														
 
															+	put_page(virt_to_page(pud));
														
 
															+}
														
 
															-	for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
														
 
															-		if (!pmd_none(*pmd) && pmd_table(*pmd)) {
														
 
															-			pte = pte_offset_kernel(pmd, addr);
														
 
															-			pte_free_kernel(NULL, pte);
														
 
															-		}
														
 
															-		pmd++;
														
 
															+static void clear_pmd_entry(pmd_t *pmd)
														
 
															+{
														
 
															+	pte_t *pte_table = pte_offset_kernel(pmd, 0);
														
 
															+	pmd_clear(pmd);
														
 
															+	pte_free_kernel(NULL, pte_table);
														
 
															+	put_page(virt_to_page(pmd));
														
 
															+}
														
 
															+
														
 
															+static bool pmd_empty(pmd_t *pmd)
														
 
															+{
														
 
															+	struct page *pmd_page = virt_to_page(pmd);
														
 
															+	return page_count(pmd_page) == 1;
														
 
															+}
														
 
															+
														
 
															+static void clear_pte_entry(pte_t *pte)
														
 
															+{
														
 
															+	if (pte_present(*pte)) {
														
 
															+		kvm_set_pte(pte, __pte(0));
														
 
															+		put_page(virt_to_page(pte));
														
 
															 	}
														
 
															 }
														
 
															-static void free_hyp_pgd_entry(unsigned long addr)
														
 
															+static bool pte_empty(pte_t *pte)
														
 
															+{
														
 
															+	struct page *pte_page = virt_to_page(pte);
														
 
															+	return page_count(pte_page) == 1;
														
 
															+}
														
 
															+
														
 
															+static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size)
														
 
															 {
														
 
															 	pgd_t *pgd;
														
 
															 	pud_t *pud;
														
 
															 	pmd_t *pmd;
														
 
															-	unsigned long hyp_addr = KERN_TO_HYP(addr);
														
 
															+	pte_t *pte;
														
 
															+	unsigned long long addr = start, end = start + size;
														
 
															+	u64 range;
														
 
															+
														
 
															+	while (addr < end) {
														
 
															+		pgd = pgdp + pgd_index(addr);
														
 
															+		pud = pud_offset(pgd, addr);
														
 
															+		if (pud_none(*pud)) {
														
 
															+			addr += PUD_SIZE;
														
 
															+			continue;
														
 
															+		}
														
 
															-	pgd = hyp_pgd + pgd_index(hyp_addr);
														
 
															-	pud = pud_offset(pgd, hyp_addr);
														
 
															+		pmd = pmd_offset(pud, addr);
														
 
															+		if (pmd_none(*pmd)) {
														
 
															+			addr += PMD_SIZE;
														
 
															+			continue;
														
 
															+		}
														
 
															-	if (pud_none(*pud))
														
 
															-		return;
														
 
															-	BUG_ON(pud_bad(*pud));
														
 
															+		pte = pte_offset_kernel(pmd, addr);
														
 
															+		clear_pte_entry(pte);
														
 
															+		range = PAGE_SIZE;
														
 
															-	pmd = pmd_offset(pud, hyp_addr);
														
 
															-	free_ptes(pmd, addr);
														
 
															-	pmd_free(NULL, pmd);
														
 
															-	pud_clear(pud);
														
 
															+		/* If we emptied the pte, walk back up the ladder */
														
 
															+		if (pte_empty(pte)) {
														
 
															+			clear_pmd_entry(pmd);
														
 
															+			range = PMD_SIZE;
														
 
															+			if (pmd_empty(pmd)) {
														
 
															+				clear_pud_entry(pud);
														
 
															+				range = PUD_SIZE;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		addr += range;
														
 
															+	}
														
 
															 }
														
 
															 /**
														
 
															- * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
														
 
															+ * free_boot_hyp_pgd - free HYP boot page tables
														
 
															  *
														
 
															- * Assumes this is a page table used strictly in Hyp-mode and therefore contains
														
 
															- * either mappings in the kernel memory area (above PAGE_OFFSET), or
														
 
															- * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END).
														
 
															+ * Free the HYP boot page tables. The bounce page is also freed.
														
 
															  */
														
 
															-void free_hyp_pmds(void)
														
 
															+void free_boot_hyp_pgd(void)
														
 
															 {
														
 
															-	unsigned long addr;
														
 
															-
														
 
															 	mutex_lock(&kvm_hyp_pgd_mutex);
														
 
															-	for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
														
 
															-		free_hyp_pgd_entry(addr);
														
 
															-	for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
														
 
															-		free_hyp_pgd_entry(addr);
														
 
															+
														
 
															+	if (boot_hyp_pgd) {
														
 
															+		unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
														
 
															+		unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
														
 
															+		kfree(boot_hyp_pgd);
														
 
															+		boot_hyp_pgd = NULL;
														
 
															+	}
														
 
															+
														
 
															+	if (hyp_pgd)
														
 
															+		unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
														
 
															+
														
 
															+	kfree(init_bounce_page);
														
 
															+	init_bounce_page = NULL;
														
 
															+
														
 
															 	mutex_unlock(&kvm_hyp_pgd_mutex);
														
 
															 }
														
 
															-static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
														
 
															-				    unsigned long end)
														
 
															+/**
														
 
															+ * free_hyp_pgds - free Hyp-mode page tables
														
 
															+ *
														
 
															+ * Assumes hyp_pgd is a page table used strictly in Hyp-mode and
														
 
															+ * therefore contains either mappings in the kernel memory area (above
														
 
															+ * PAGE_OFFSET), or device mappings in the vmalloc range (from
														
 
															+ * VMALLOC_START to VMALLOC_END).
														
 
															+ *
														
 
															+ * boot_hyp_pgd should only map two pages for the init code.
														
 
															+ */
														
 
															+void free_hyp_pgds(void)
														
 
															 {
														
 
															-	pte_t *pte;
														
 
															 	unsigned long addr;
														
 
															-	struct page *page;
														
 
															-	for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
														
 
															-		unsigned long hyp_addr = KERN_TO_HYP(addr);
														
 
															+	free_boot_hyp_pgd();
														
 
															+
														
 
															+	mutex_lock(&kvm_hyp_pgd_mutex);
														
 
															-		pte = pte_offset_kernel(pmd, hyp_addr);
														
 
															-		BUG_ON(!virt_addr_valid(addr));
														
 
															-		page = virt_to_page(addr);
														
 
															-		kvm_set_pte(pte, mk_pte(page, PAGE_HYP));
														
 
															+	if (hyp_pgd) {
														
 
															+		for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
														
 
															+			unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
														
 
															+		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
														
 
															+			unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
														
 
															+		kfree(hyp_pgd);
														
 
															+		hyp_pgd = NULL;
														
 
															 	}
														
 
															+
														
 
															+	mutex_unlock(&kvm_hyp_pgd_mutex);
														
 
															 }
														
 
															-static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start,
														
 
															-				       unsigned long end,
														
 
															-				       unsigned long *pfn_base)
														
 
															+static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
														
 
															+				    unsigned long end, unsigned long pfn,
														
 
															+				    pgprot_t prot)
														
 
															 {
														
 
															 	pte_t *pte;
														
 
															 	unsigned long addr;
														
 
															-	for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
														
 
															-		unsigned long hyp_addr = KERN_TO_HYP(addr);
														
 
															-
														
 
															-		pte = pte_offset_kernel(pmd, hyp_addr);
														
 
															-		BUG_ON(pfn_valid(*pfn_base));
														
 
															-		kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE));
														
 
															-		(*pfn_base)++;
														
 
															-	}
														
 
															+	addr = start;
														
 
															+	do {
														
 
															+		pte = pte_offset_kernel(pmd, addr);
														
 
															+		kvm_set_pte(pte, pfn_pte(pfn, prot));
														
 
															+		get_page(virt_to_page(pte));
														
 
															+		kvm_flush_dcache_to_poc(pte, sizeof(*pte));
														
 
															+		pfn++;
														
 
															+	} while (addr += PAGE_SIZE, addr != end);
														
 
															 }
														
 
															 static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
														
 
															-				   unsigned long end, unsigned long *pfn_base)
														
 
															+				   unsigned long end, unsigned long pfn,
														
 
															+				   pgprot_t prot)
														
 
															 {
														
 
															 	pmd_t *pmd;
														
 
															 	pte_t *pte;
														
 
															 	unsigned long addr, next;
														
 
															-	for (addr = start; addr < end; addr = next) {
														
 
															-		unsigned long hyp_addr = KERN_TO_HYP(addr);
														
 
															-		pmd = pmd_offset(pud, hyp_addr);
														
 
															+	addr = start;
														
 
															+	do {
														
 
															+		pmd = pmd_offset(pud, addr);
														
 
															 		BUG_ON(pmd_sect(*pmd));
														
 
															 		if (pmd_none(*pmd)) {
														
 
															-			pte = pte_alloc_one_kernel(NULL, hyp_addr);
														
 
															+			pte = pte_alloc_one_kernel(NULL, addr);
														
 
															 			if (!pte) {
														
 
															 				kvm_err("Cannot allocate Hyp pte\n");
														
 
															 				return -ENOMEM;
														
 
															 			}
														
 
															 			pmd_populate_kernel(NULL, pmd, pte);
														
 
															+			get_page(virt_to_page(pmd));
														
 
															+			kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
														
 
															 		}
														
 
															 		next = pmd_addr_end(addr, end);
														
 
															-		/*
														
 
															-		 * If pfn_base is NULL, we map kernel pages into HYP with the
														
 
															-		 * virtual address. Otherwise, this is considered an I/O
														
 
															-		 * mapping and we map the physical region starting at
														
 
															-		 * *pfn_base to [start, end[.
														
 
															-		 */
														
 
															-		if (!pfn_base)
														
 
															-			create_hyp_pte_mappings(pmd, addr, next);
														
 
															-		else
														
 
															-			create_hyp_io_pte_mappings(pmd, addr, next, pfn_base);
														
 
															-	}
														
 
															+		create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
														
 
															+		pfn += (next - addr) >> PAGE_SHIFT;
														
 
															+	} while (addr = next, addr != end);
														
 
															 	return 0;
														
 
															 }
														
 
															-static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base)
														
 
															+static int __create_hyp_mappings(pgd_t *pgdp,
														
 
															+				 unsigned long start, unsigned long end,
														
 
															+				 unsigned long pfn, pgprot_t prot)
														
 
															 {
														
 
															-	unsigned long start = (unsigned long)from;
														
 
															-	unsigned long end = (unsigned long)to;
														
 
															 	pgd_t *pgd;
														
 
															 	pud_t *pud;
														
 
															 	pmd_t *pmd;
														
 
															 	unsigned long addr, next;
														
 
															 	int err = 0;
														
 
															-	if (start >= end)
														
 
															-		return -EINVAL;
														
 
															-	/* Check for a valid kernel memory mapping */
														
 
															-	if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1)))
														
 
															-		return -EINVAL;
														
 
															-	/* Check for a valid kernel IO mapping */
														
 
															-	if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)))
														
 
															-		return -EINVAL;
														
 
															-
														
 
															 	mutex_lock(&kvm_hyp_pgd_mutex);
														
 
															-	for (addr = start; addr < end; addr = next) {
														
 
															-		unsigned long hyp_addr = KERN_TO_HYP(addr);
														
 
															-		pgd = hyp_pgd + pgd_index(hyp_addr);
														
 
															-		pud = pud_offset(pgd, hyp_addr);
														
 
															+	addr = start & PAGE_MASK;
														
 
															+	end = PAGE_ALIGN(end);
														
 
															+	do {
														
 
															+		pgd = pgdp + pgd_index(addr);
														
 
															+		pud = pud_offset(pgd, addr);
														
 
															 		if (pud_none_or_clear_bad(pud)) {
														
 
															-			pmd = pmd_alloc_one(NULL, hyp_addr);
														
 
															+			pmd = pmd_alloc_one(NULL, addr);
														
 
															 			if (!pmd) {
														
 
															 				kvm_err("Cannot allocate Hyp pmd\n");
														
 
															 				err = -ENOMEM;
														
 
															 				goto out;
														
 
															 			}
														
 
															 			pud_populate(NULL, pud, pmd);
														
 
															+			get_page(virt_to_page(pud));
														
 
															+			kvm_flush_dcache_to_poc(pud, sizeof(*pud));
														
 
															 		}
														
 
															 		next = pgd_addr_end(addr, end);
														
 
															-		err = create_hyp_pmd_mappings(pud, addr, next, pfn_base);
														
 
															+		err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
														
 
															 		if (err)
														
 
															 			goto out;
														
 
															-	}
														
 
															+		pfn += (next - addr) >> PAGE_SHIFT;
														
 
															+	} while (addr = next, addr != end);
														
 
															 out:
														
 
															 	mutex_unlock(&kvm_hyp_pgd_mutex);
														
 
															 	return err;
														
@@ -250,27 +309,41 @@ out:
 
															  * The same virtual address as the kernel virtual address is also used
														
 
															  * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
														
 
															  * physical pages.
														
 
															- *
														
 
															- * Note: Wrapping around zero in the "to" address is not supported.
														
 
															  */
														
 
															 int create_hyp_mappings(void *from, void *to)
														
 
															 {
														
 
															-	return __create_hyp_mappings(from, to, NULL);
														
 
															+	unsigned long phys_addr = virt_to_phys(from);
														
 
															+	unsigned long start = KERN_TO_HYP((unsigned long)from);
														
 
															+	unsigned long end = KERN_TO_HYP((unsigned long)to);
														
 
															+
														
 
															+	/* Check for a valid kernel memory mapping */
														
 
															+	if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	return __create_hyp_mappings(hyp_pgd, start, end,
														
 
															+				     __phys_to_pfn(phys_addr), PAGE_HYP);
														
 
															 }
														
 
															 /**
														
 
															  * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
														
 
															  * @from:	The kernel start VA of the range
														
 
															  * @to:		The kernel end VA of the range (exclusive)
														
 
															- * @addr:	The physical start address which gets mapped
														
 
															+ * @phys_addr:	The physical start address which gets mapped
														
 
															  *
														
 
															  * The resulting HYP VA is the same as the kernel VA, modulo
														
 
															  * HYP_PAGE_OFFSET.
														
 
															  */
														
 
															-int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr)
														
 
															+int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
														
 
															 {
														
 
															-	unsigned long pfn = __phys_to_pfn(addr);
														
 
															-	return __create_hyp_mappings(from, to, &pfn);
														
 
															+	unsigned long start = KERN_TO_HYP((unsigned long)from);
														
 
															+	unsigned long end = KERN_TO_HYP((unsigned long)to);
														
 
															+
														
 
															+	/* Check for a valid kernel IO mapping */
														
 
															+	if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	return __create_hyp_mappings(hyp_pgd, start, end,
														
 
															+				     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
														
 
															 }
														
 
															 /**
														
@@ -307,42 +380,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 
															 	return 0;
														
 
															 }
														
 
															-static void clear_pud_entry(pud_t *pud)
														
 
															-{
														
 
															-	pmd_t *pmd_table = pmd_offset(pud, 0);
														
 
															-	pud_clear(pud);
														
 
															-	pmd_free(NULL, pmd_table);
														
 
															-	put_page(virt_to_page(pud));
														
 
															-}
														
 
															-
														
 
															-static void clear_pmd_entry(pmd_t *pmd)
														
 
															-{
														
 
															-	pte_t *pte_table = pte_offset_kernel(pmd, 0);
														
 
															-	pmd_clear(pmd);
														
 
															-	pte_free_kernel(NULL, pte_table);
														
 
															-	put_page(virt_to_page(pmd));
														
 
															-}
														
 
															-
														
 
															-static bool pmd_empty(pmd_t *pmd)
														
 
															-{
														
 
															-	struct page *pmd_page = virt_to_page(pmd);
														
 
															-	return page_count(pmd_page) == 1;
														
 
															-}
														
 
															-
														
 
															-static void clear_pte_entry(pte_t *pte)
														
 
															-{
														
 
															-	if (pte_present(*pte)) {
														
 
															-		kvm_set_pte(pte, __pte(0));
														
 
															-		put_page(virt_to_page(pte));
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-static bool pte_empty(pte_t *pte)
														
 
															-{
														
 
															-	struct page *pte_page = virt_to_page(pte);
														
 
															-	return page_count(pte_page) == 1;
														
 
															-}
														
 
															-
														
 
															 /**
														
 
															  * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
														
 
															  * @kvm:   The VM pointer
														
@@ -356,43 +393,7 @@ static bool pte_empty(pte_t *pte)
 
															  */
														
 
															 static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
														
 
															 {
														
 
															-	pgd_t *pgd;
														
 
															-	pud_t *pud;
														
 
															-	pmd_t *pmd;
														
 
															-	pte_t *pte;
														
 
															-	phys_addr_t addr = start, end = start + size;
														
 
															-	u64 range;
														
 
															-
														
 
															-	while (addr < end) {
														
 
															-		pgd = kvm->arch.pgd + pgd_index(addr);
														
 
															-		pud = pud_offset(pgd, addr);
														
 
															-		if (pud_none(*pud)) {
														
 
															-			addr += PUD_SIZE;
														
 
															-			continue;
														
 
															-		}
														
 
															-
														
 
															-		pmd = pmd_offset(pud, addr);
														
 
															-		if (pmd_none(*pmd)) {
														
 
															-			addr += PMD_SIZE;
														
 
															-			continue;
														
 
															-		}
														
 
															-
														
 
															-		pte = pte_offset_kernel(pmd, addr);
														
 
															-		clear_pte_entry(pte);
														
 
															-		range = PAGE_SIZE;
														
 
															-
														
 
															-		/* If we emptied the pte, walk back up the ladder */
														
 
															-		if (pte_empty(pte)) {
														
 
															-			clear_pmd_entry(pmd);
														
 
															-			range = PMD_SIZE;
														
 
															-			if (pmd_empty(pmd)) {
														
 
															-				clear_pud_entry(pud);
														
 
															-				range = PUD_SIZE;
														
 
															-			}
														
 
															-		}
														
 
															-
														
 
															-		addr += range;
														
 
															-	}
														
 
															+	unmap_range(kvm->arch.pgd, start, size);
														
 
															 }
														
 
															 /**
														
@@ -728,47 +729,105 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 
															 phys_addr_t kvm_mmu_get_httbr(void)
														
 
															 {
														
 
															-	VM_BUG_ON(!virt_addr_valid(hyp_pgd));
														
 
															 	return virt_to_phys(hyp_pgd);
														
 
															 }
														
 
															+phys_addr_t kvm_mmu_get_boot_httbr(void)
														
 
															+{
														
 
															+	return virt_to_phys(boot_hyp_pgd);
														
 
															+}
														
 
															+
														
 
															+phys_addr_t kvm_get_idmap_vector(void)
														
 
															+{
														
 
															+	return hyp_idmap_vector;
														
 
															+}
														
 
															+
														
 
															 int kvm_mmu_init(void)
														
 
															 {
														
 
															-	if (!hyp_pgd) {
														
 
															+	int err;
														
 
															+
														
 
															+	hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start);
														
 
															+	hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end);
														
 
															+	hyp_idmap_vector = virt_to_phys(__kvm_hyp_init);
														
 
															+
														
 
															+	if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
														
 
															+		/*
														
 
															+		 * Our init code is crossing a page boundary. Allocate
														
 
															+		 * a bounce page, copy the code over and use that.
														
 
															+		 */
														
 
															+		size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
														
 
															+		phys_addr_t phys_base;
														
 
															+
														
 
															+		init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
														
 
															+		if (!init_bounce_page) {
														
 
															+			kvm_err("Couldn't allocate HYP init bounce page\n");
														
 
															+			err = -ENOMEM;
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		memcpy(init_bounce_page, __hyp_idmap_text_start, len);
														
 
															+		/*
														
 
															+		 * Warning: the code we just copied to the bounce page
														
 
															+		 * must be flushed to the point of coherency.
														
 
															+		 * Otherwise, the data may be sitting in L2, and HYP
														
 
															+		 * mode won't be able to observe it as it runs with
														
 
															+		 * caches off at that point.
														
 
															+		 */
														
 
															+		kvm_flush_dcache_to_poc(init_bounce_page, len);
														
 
															+
														
 
															+		phys_base = virt_to_phys(init_bounce_page);
														
 
															+		hyp_idmap_vector += phys_base - hyp_idmap_start;
														
 
															+		hyp_idmap_start = phys_base;
														
 
															+		hyp_idmap_end = phys_base + len;
														
 
															+
														
 
															+		kvm_info("Using HYP init bounce page @%lx\n",
														
 
															+			 (unsigned long)phys_base);
														
 
															+	}
														
 
															+
														
 
															+	hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
														
 
															+	boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
														
 
															+	if (!hyp_pgd || !boot_hyp_pgd) {
														
 
															 		kvm_err("Hyp mode PGD not allocated\n");
														
 
															-		return -ENOMEM;
														
 
															+		err = -ENOMEM;
														
 
															+		goto out;
														
 
															 	}
														
 
															-	return 0;
														
 
															-}
														
 
															+	/* Create the idmap in the boot page tables */
														
 
															+	err = 	__create_hyp_mappings(boot_hyp_pgd,
														
 
															+				      hyp_idmap_start, hyp_idmap_end,
														
 
															+				      __phys_to_pfn(hyp_idmap_start),
														
 
															+				      PAGE_HYP);
														
 
															-/**
														
 
															- * kvm_clear_idmap - remove all idmaps from the hyp pgd
														
 
															- *
														
 
															- * Free the underlying pmds for all pgds in range and clear the pgds (but
														
 
															- * don't free them) afterwards.
														
 
															- */
														
 
															-void kvm_clear_hyp_idmap(void)
														
 
															-{
														
 
															-	unsigned long addr, end;
														
 
															-	unsigned long next;
														
 
															-	pgd_t *pgd = hyp_pgd;
														
 
															-	pud_t *pud;
														
 
															-	pmd_t *pmd;
														
 
															+	if (err) {
														
 
															+		kvm_err("Failed to idmap %lx-%lx\n",
														
 
															+			hyp_idmap_start, hyp_idmap_end);
														
 
															+		goto out;
														
 
															+	}
														
 
															-	addr = virt_to_phys(__hyp_idmap_text_start);
														
 
															-	end = virt_to_phys(__hyp_idmap_text_end);
														
 
															+	/* Map the very same page at the trampoline VA */
														
 
															+	err = 	__create_hyp_mappings(boot_hyp_pgd,
														
 
															+				      TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
														
 
															+				      __phys_to_pfn(hyp_idmap_start),
														
 
															+				      PAGE_HYP);
														
 
															+	if (err) {
														
 
															+		kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n",
														
 
															+			TRAMPOLINE_VA);
														
 
															+		goto out;
														
 
															+	}
														
 
															-	pgd += pgd_index(addr);
														
 
															-	do {
														
 
															-		next = pgd_addr_end(addr, end);
														
 
															-		if (pgd_none_or_clear_bad(pgd))
														
 
															-			continue;
														
 
															-		pud = pud_offset(pgd, addr);
														
 
															-		pmd = pmd_offset(pud, addr);
														
 
															+	/* Map the same page again into the runtime page tables */
														
 
															+	err = 	__create_hyp_mappings(hyp_pgd,
														
 
															+				      TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
														
 
															+				      __phys_to_pfn(hyp_idmap_start),
														
 
															+				      PAGE_HYP);
														
 
															+	if (err) {
														
 
															+		kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n",
														
 
															+			TRAMPOLINE_VA);
														
 
															+		goto out;
														
 
															+	}
														
 
															-		pud_clear(pud);
														
 
															-		kvm_clean_pmd_entry(pmd);
														
 
															-		pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
														
 
															-	} while (pgd++, addr = next, addr < end);
														
 
															+	return 0;
														
 
															+out:
														
 
															+	free_hyp_pgds();
														
 
															+	return err;
														
 
															 }
														
--- a/arch/arm/kvm/perf.c
+++ b/arch/arm/kvm/perf.c
@@ -0,0 +1,68 @@
 
															+/*
														
 
															+ * Based on the x86 implementation.
														
 
															+ *
														
 
															+ * Copyright (C) 2012 ARM Ltd.
														
 
															+ * Author: Marc Zyngier <marc.zyngier@arm.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public License
														
 
															+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/perf_event.h>
														
 
															+#include <linux/kvm_host.h>
														
 
															+
														
 
															+#include <asm/kvm_emulate.h>
														
 
															+
														
 
															+static int kvm_is_in_guest(void)
														
 
															+{
														
 
															+        return kvm_arm_get_running_vcpu() != NULL;
														
 
															+}
														
 
															+
														
 
															+static int kvm_is_user_mode(void)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+
														
 
															+	vcpu = kvm_arm_get_running_vcpu();
														
 
															+
														
 
															+	if (vcpu)
														
 
															+		return !vcpu_mode_priv(vcpu);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static unsigned long kvm_get_guest_ip(void)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+
														
 
															+	vcpu = kvm_arm_get_running_vcpu();
														
 
															+
														
 
															+	if (vcpu)
														
 
															+		return *vcpu_pc(vcpu);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static struct perf_guest_info_callbacks kvm_guest_cbs = {
														
 
															+	.is_in_guest	= kvm_is_in_guest,
														
 
															+	.is_user_mode	= kvm_is_user_mode,
														
 
															+	.get_guest_ip	= kvm_get_guest_ip,
														
 
															+};
														
 
															+
														
 
															+int kvm_perf_init(void)
														
 
															+{
														
 
															+	return perf_register_guest_info_callbacks(&kvm_guest_cbs);
														
 
															+}
														
 
															+
														
 
															+int kvm_perf_teardown(void)
														
 
															+{
														
 
															+	return perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
														
 
															+}
														
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -8,7 +8,6 @@
 
															 #include <asm/pgtable.h>
														
 
															 #include <asm/sections.h>
														
 
															 #include <asm/system_info.h>
														
 
															-#include <asm/virt.h>
														
 
															 pgd_t *idmap_pgd;
														
@@ -83,37 +82,10 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start,
 
															 	} while (pgd++, addr = next, addr != end);
														
 
															 }
														
 
															-#if defined(CONFIG_ARM_VIRT_EXT) && defined(CONFIG_ARM_LPAE)
														
 
															-pgd_t *hyp_pgd;
														
 
															-
														
 
															-extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
														
 
															-
														
 
															-static int __init init_static_idmap_hyp(void)
														
 
															-{
														
 
															-	hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
														
 
															-	if (!hyp_pgd)
														
 
															-		return -ENOMEM;
														
 
															-
														
 
															-	pr_info("Setting up static HYP identity map for 0x%p - 0x%p\n",
														
 
															-		__hyp_idmap_text_start, __hyp_idmap_text_end);
														
 
															-	identity_mapping_add(hyp_pgd, __hyp_idmap_text_start,
														
 
															-			     __hyp_idmap_text_end, PMD_SECT_AP1);
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-#else
														
 
															-static int __init init_static_idmap_hyp(void)
														
 
															-{
														
 
															-	return 0;
														
 
															-}
														
 
															-#endif
														
 
															-
														
 
															 extern char  __idmap_text_start[], __idmap_text_end[];
														
 
															 static int __init init_static_idmap(void)
														
 
															 {
														
 
															-	int ret;
														
 
															-
														
 
															 	idmap_pgd = pgd_alloc(&init_mm);
														
 
															 	if (!idmap_pgd)
														
 
															 		return -ENOMEM;
														
@@ -123,12 +95,10 @@ static int __init init_static_idmap(void)
 
															 	identity_mapping_add(idmap_pgd, __idmap_text_start,
														
 
															 			     __idmap_text_end, 0);
														
 
															-	ret = init_static_idmap_hyp();
														
 
															-
														
 
															 	/* Flush L1 for the hardware to see this page table content */
														
 
															 	flush_cache_louis();
														
 
															-	return ret;
														
 
															+	return 0;
														
 
															 }
														
 
															 early_initcall(init_static_idmap);
														
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -26,6 +26,7 @@
 
															 #define KVM_USER_MEM_SLOTS 32
														
 
															 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
														
 
															+#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
														
 
															 /* define exit reasons from vmm to kvm*/
														
 
															 #define EXIT_REASON_VM_PANIC		0
														
--- a/arch/ia64/include/uapi/asm/kvm.h
+++ b/arch/ia64/include/uapi/asm/kvm.h
@@ -27,7 +27,6 @@
 
															 /* Select x86 specific features in <linux/kvm.h> */
														
 
															 #define __KVM_HAVE_IOAPIC
														
 
															 #define __KVM_HAVE_IRQ_LINE
														
 
															-#define __KVM_HAVE_DEVICE_ASSIGNMENT
														
 
															 /* Architectural interrupt line count. */
														
 
															 #define KVM_NR_INTERRUPTS 256
														
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -21,12 +21,11 @@ config KVM
 
															 	tristate "Kernel-based Virtual Machine (KVM) support"
														
 
															 	depends on BROKEN
														
 
															 	depends on HAVE_KVM && MODULES
														
 
															-	# for device assignment:
														
 
															-	depends on PCI
														
 
															 	depends on BROKEN
														
 
															 	select PREEMPT_NOTIFIERS
														
 
															 	select ANON_INODES
														
 
															 	select HAVE_KVM_IRQCHIP
														
 
															+	select HAVE_KVM_IRQ_ROUTING
														
 
															 	select KVM_APIC_ARCHITECTURE
														
 
															 	select KVM_MMIO
														
 
															 	---help---
														
@@ -50,6 +49,17 @@ config KVM_INTEL
 
															 	  Provides support for KVM on Itanium 2 processors equipped with the VT
														
 
															 	  extensions.
														
 
															+config KVM_DEVICE_ASSIGNMENT
														
 
															+	bool "KVM legacy PCI device assignment support"
														
 
															+	depends on KVM && PCI && IOMMU_API
														
 
															+	default y
														
 
															+	---help---
														
 
															+	  Provide support for legacy PCI device assignment through KVM.  The
														
 
															+	  kernel now also supports a full featured userspace device driver
														
 
															+	  framework through VFIO, which supersedes much of this support.
														
 
															+
														
 
															+	  If unsure, say Y.
														
 
															+
														
 
															 source drivers/vhost/Kconfig
														
 
															 endif # VIRTUALIZATION
														
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -49,10 +49,10 @@ ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
 
															 asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
														
 
															 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
														
 
															-		coalesced_mmio.o irq_comm.o assigned-dev.o)
														
 
															+		coalesced_mmio.o irq_comm.o)
														
 
															-ifeq ($(CONFIG_IOMMU_API),y)
														
 
															-common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
														
 
															+ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
														
 
															+common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o)
														
 
															 endif
														
 
															 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
														
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -204,9 +204,11 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_COALESCED_MMIO:
														
 
															 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
														
 
															 		break;
														
 
															+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
														
 
															 	case KVM_CAP_IOMMU:
														
 
															 		r = iommu_present(&pci_bus_type);
														
 
															 		break;
														
 
															+#endif
														
 
															 	default:
														
 
															 		r = 0;
														
 
															 	}
														
@@ -924,13 +926,15 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 	return 0;
														
 
															 }
														
 
															-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
														
 
															+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
														
 
															+		bool line_status)
														
 
															 {
														
 
															 	if (!irqchip_in_kernel(kvm))
														
 
															 		return -ENXIO;
														
 
															 	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
														
 
															-					irq_event->irq, irq_event->level);
														
 
															+					irq_event->irq, irq_event->level,
														
 
															+					line_status);
														
 
															 	return 0;
														
 
															 }
														
@@ -942,24 +946,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 	int r = -ENOTTY;
														
 
															 	switch (ioctl) {
														
 
															-	case KVM_SET_MEMORY_REGION: {
														
 
															-		struct kvm_memory_region kvm_mem;
														
 
															-		struct kvm_userspace_memory_region kvm_userspace_mem;
														
 
															-
														
 
															-		r = -EFAULT;
														
 
															-		if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
														
 
															-			goto out;
														
 
															-		kvm_userspace_mem.slot = kvm_mem.slot;
														
 
															-		kvm_userspace_mem.flags = kvm_mem.flags;
														
 
															-		kvm_userspace_mem.guest_phys_addr =
														
 
															-					kvm_mem.guest_phys_addr;
														
 
															-		kvm_userspace_mem.memory_size = kvm_mem.memory_size;
														
 
															-		r = kvm_vm_ioctl_set_memory_region(kvm,
														
 
															-					&kvm_userspace_mem, false);
														
 
															-		if (r)
														
 
															-			goto out;
														
 
															-		break;
														
 
															-		}
														
 
															 	case KVM_CREATE_IRQCHIP:
														
 
															 		r = -EFAULT;
														
 
															 		r = kvm_ioapic_init(kvm);
														
@@ -1384,9 +1370,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
 
															 void kvm_arch_destroy_vm(struct kvm *kvm)
														
 
															 {
														
 
															 	kvm_iommu_unmap_guest(kvm);
														
 
															-#ifdef  KVM_CAP_DEVICE_ASSIGNMENT
														
 
															 	kvm_free_all_assigned_devices(kvm);
														
 
															-#endif
														
 
															 	kfree(kvm->arch.vioapic);
														
 
															 	kvm_release_vm_pages(kvm);
														
 
															 }
														
@@ -1578,9 +1562,8 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 
															 int kvm_arch_prepare_memory_region(struct kvm *kvm,
														
 
															 		struct kvm_memory_slot *memslot,
														
 
															-		struct kvm_memory_slot old,
														
 
															 		struct kvm_userspace_memory_region *mem,
														
 
															-		bool user_alloc)
														
 
															+		enum kvm_mr_change change)
														
 
															 {
														
 
															 	unsigned long i;
														
 
															 	unsigned long pfn;
														
@@ -1610,8 +1593,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
															 void kvm_arch_commit_memory_region(struct kvm *kvm,
														
 
															 		struct kvm_userspace_memory_region *mem,
														
 
															-		struct kvm_memory_slot old,
														
 
															-		bool user_alloc)
														
 
															+		const struct kvm_memory_slot *old,
														
 
															+		enum kvm_mr_change change)
														
 
															 {
														
 
															 	return;
														
 
															 }
														
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -27,10 +27,4 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
 
															 #define kvm_apic_present(x) (true)
														
 
															 #define kvm_lapic_enabled(x) (true)
														
 
															-static inline bool kvm_apic_vid_enabled(void)
														
 
															-{
														
 
															-	/* IA64 has no apicv supporting, do nothing here */
														
 
															-	return false;
														
 
															-}
														
 
															-
														
 
															 #endif
														
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -270,6 +270,9 @@
 
															 #define H_SET_MODE		0x31C
														
 
															 #define MAX_HCALL_OPCODE	H_SET_MODE
														
 
															+/* Platform specific hcalls, used by KVM */
														
 
															+#define H_RTAS			0xf000
														
 
															+
														
 
															 #ifndef __ASSEMBLY__
														
 
															 /**
														
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -142,6 +142,8 @@ extern int kvmppc_mmu_hv_init(void);
 
															 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
														
 
															 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
														
 
															 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
														
 
															+extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
														
 
															+					  unsigned int vec);
														
 
															 extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
														
 
															 extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
														
 
															 			   bool upper, u32 val);
														
@@ -156,7 +158,8 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
 
															 			unsigned long pte_index);
														
 
															 extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
														
 
															 			unsigned long *nb_ret);
														
 
															-extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
														
 
															+extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
														
 
															+			unsigned long gpa, bool dirty);
														
 
															 extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
														
 
															 			long pte_index, unsigned long pteh, unsigned long ptel);
														
 
															 extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
														
@@ -458,6 +461,8 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 
															 #define OSI_SC_MAGIC_R4			0x77810F9B
														
 
															 #define INS_DCBZ			0x7c0007ec
														
 
															+/* TO = 31 for unconditional trap */
														
 
															+#define INS_TW				0x7fe00008
														
 
															 /* LPIDs we support with this build -- runtime limit may be lower */
														
 
															 #define KVMPPC_NR_LPIDS			(LPID_RSVD + 1)
														
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -268,4 +268,17 @@ static inline int is_vrma_hpte(unsigned long hpte_v)
 
															 		(HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
														
 
															 }
														
 
															+#ifdef CONFIG_KVM_BOOK3S_64_HV
														
 
															+/*
														
 
															+ * Note modification of an HPTE; set the HPTE modified bit
														
 
															+ * if anyone is interested.
														
 
															+ */
														
 
															+static inline void note_hpte_modification(struct kvm *kvm,
														
 
															+					  struct revmap_entry *rev)
														
 
															+{
														
 
															+	if (atomic_read(&kvm->arch.hpte_mod_interest))
														
 
															+		rev->guest_rpte |= HPTE_GR_MODIFIED;
														
 
															+}
														
 
															+#endif /* CONFIG_KVM_BOOK3S_64_HV */
														
 
															+
														
 
															 #endif /* __ASM_KVM_BOOK3S_64_H__ */
														
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -20,6 +20,11 @@
 
															 #ifndef __ASM_KVM_BOOK3S_ASM_H__
														
 
															 #define __ASM_KVM_BOOK3S_ASM_H__
														
 
															+/* XICS ICP register offsets */
														
 
															+#define XICS_XIRR		4
														
 
															+#define XICS_MFRR		0xc
														
 
															+#define XICS_IPI		2	/* interrupt source # for IPIs */
														
 
															+
														
 
															 #ifdef __ASSEMBLY__
														
 
															 #ifdef CONFIG_KVM_BOOK3S_HANDLER
														
@@ -81,10 +86,11 @@ struct kvmppc_host_state {
 
															 #ifdef CONFIG_KVM_BOOK3S_64_HV
														
 
															 	u8 hwthread_req;
														
 
															 	u8 hwthread_state;
														
 
															-
														
 
															+	u8 host_ipi;
														
 
															 	struct kvm_vcpu *kvm_vcpu;
														
 
															 	struct kvmppc_vcore *kvm_vcore;
														
 
															 	unsigned long xics_phys;
														
 
															+	u32 saved_xirr;
														
 
															 	u64 dabr;
														
 
															 	u64 host_mmcr[3];
														
 
															 	u32 host_pmc[8];
														
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -26,6 +26,8 @@
 
															 /* LPIDs we support with this build -- runtime limit may be lower */
														
 
															 #define KVMPPC_NR_LPIDS                        64
														
 
															+#define KVMPPC_INST_EHPRIV	0x7c00021c
														
 
															+
														
 
															 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
														
 
															 {
														
 
															 	vcpu->arch.gpr[num] = val;
														
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -44,6 +44,10 @@
 
															 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
														
 
															 #endif
														
 
															+/* These values are internal and can be increased later */
														
 
															+#define KVM_NR_IRQCHIPS          1
														
 
															+#define KVM_IRQCHIP_NUM_PINS     256
														
 
															+
														
 
															 #if !defined(CONFIG_KVM_440)
														
 
															 #include <linux/mmu_notifier.h>
														
@@ -188,6 +192,10 @@ struct kvmppc_linear_info {
 
															 	int		 type;
														
 
															 };
														
 
															+/* XICS components, defined in book3s_xics.c */
														
 
															+struct kvmppc_xics;
														
 
															+struct kvmppc_icp;
														
 
															+
														
 
															 /*
														
 
															  * The reverse mapping array has one entry for each HPTE,
														
 
															  * which stores the guest's view of the second word of the HPTE
														
@@ -255,6 +263,13 @@ struct kvm_arch {
 
															 #endif /* CONFIG_KVM_BOOK3S_64_HV */
														
 
															 #ifdef CONFIG_PPC_BOOK3S_64
														
 
															 	struct list_head spapr_tce_tables;
														
 
															+	struct list_head rtas_tokens;
														
 
															+#endif
														
 
															+#ifdef CONFIG_KVM_MPIC
														
 
															+	struct openpic *mpic;
														
 
															+#endif
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+	struct kvmppc_xics *xics;
														
 
															 #endif
														
 
															 };
														
@@ -301,11 +316,13 @@ struct kvmppc_vcore {
 
															  * that a guest can register.
														
 
															  */
														
 
															 struct kvmppc_vpa {
														
 
															+	unsigned long gpa;	/* Current guest phys addr */
														
 
															 	void *pinned_addr;	/* Address in kernel linear mapping */
														
 
															 	void *pinned_end;	/* End of region */
														
 
															 	unsigned long next_gpa;	/* Guest phys addr for update */
														
 
															 	unsigned long len;	/* Number of bytes required */
														
 
															 	u8 update_pending;	/* 1 => update pinned_addr from next_gpa */
														
 
															+	bool dirty;		/* true => area has been modified by kernel */
														
 
															 };
														
 
															 struct kvmppc_pte {
														
@@ -359,6 +376,11 @@ struct kvmppc_slb {
 
															 #define KVMPPC_BOOKE_MAX_IAC	4
														
 
															 #define KVMPPC_BOOKE_MAX_DAC	2
														
 
															+/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */
														
 
															+#define KVMPPC_EPR_NONE		0 /* EPR not supported */
														
 
															+#define KVMPPC_EPR_USER		1 /* exit to userspace to fill EPR */
														
 
															+#define KVMPPC_EPR_KERNEL	2 /* in-kernel irqchip */
														
 
															+
														
 
															 struct kvmppc_booke_debug_reg {
														
 
															 	u32 dbcr0;
														
 
															 	u32 dbcr1;
														
@@ -370,6 +392,12 @@ struct kvmppc_booke_debug_reg {
 
															 	u64 dac[KVMPPC_BOOKE_MAX_DAC];
														
 
															 };
														
 
															+#define KVMPPC_IRQ_DEFAULT	0
														
 
															+#define KVMPPC_IRQ_MPIC		1
														
 
															+#define KVMPPC_IRQ_XICS		2
														
 
															+
														
 
															+struct openpic;
														
 
															+
														
 
															 struct kvm_vcpu_arch {
														
 
															 	ulong host_stack;
														
 
															 	u32 host_pid;
														
@@ -502,8 +530,11 @@ struct kvm_vcpu_arch {
 
															 	spinlock_t wdt_lock;
														
 
															 	struct timer_list wdt_timer;
														
 
															 	u32 tlbcfg[4];
														
 
															+	u32 tlbps[4];
														
 
															 	u32 mmucfg;
														
 
															+	u32 eptcfg;
														
 
															 	u32 epr;
														
 
															+	u32 crit_save;
														
 
															 	struct kvmppc_booke_debug_reg dbg_reg;
														
 
															 #endif
														
 
															 	gpa_t paddr_accessed;
														
@@ -521,7 +552,7 @@ struct kvm_vcpu_arch {
 
															 	u8 sane;
														
 
															 	u8 cpu_type;
														
 
															 	u8 hcall_needed;
														
 
															-	u8 epr_enabled;
														
 
															+	u8 epr_flags; /* KVMPPC_EPR_xxx */
														
 
															 	u8 epr_needed;
														
 
															 	u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
														
@@ -548,6 +579,13 @@ struct kvm_vcpu_arch {
 
															 	unsigned long magic_page_pa; /* phys addr to map the magic page to */
														
 
															 	unsigned long magic_page_ea; /* effect. addr to map the magic page to */
														
 
															+	int irq_type;		/* one of KVM_IRQ_* */
														
 
															+	int irq_cpu_id;
														
 
															+	struct openpic *mpic;	/* KVM_IRQ_MPIC */
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+	struct kvmppc_icp *icp; /* XICS presentation controller */
														
 
															+#endif
														
 
															+
														
 
															 #ifdef CONFIG_KVM_BOOK3S_64_HV
														
 
															 	struct kvm_vcpu_arch_shared shregs;
														
@@ -588,5 +626,6 @@ struct kvm_vcpu_arch {
 
															 #define KVM_MMIO_REG_FQPR	0x0060
														
 
															 #define __KVM_HAVE_ARCH_WQP
														
 
															+#define __KVM_HAVE_CREATE_DEVICE
														
 
															 #endif /* __POWERPC_KVM_HOST_H__ */
														
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -44,7 +44,7 @@ enum emulation_result {
 
															 	EMULATE_DO_DCR,       /* kvm_run filled with DCR request */
														
 
															 	EMULATE_FAIL,         /* can't emulate this instruction */
														
 
															 	EMULATE_AGAIN,        /* something went wrong. go again */
														
 
															-	EMULATE_DO_PAPR,      /* kvm_run filled with PAPR request */
														
 
															+	EMULATE_EXIT_USER,    /* emulation requires exit to user-space */
														
 
															 };
														
 
															 extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
														
@@ -104,8 +104,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
 
															 extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
														
 
															 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
														
 
															                                        struct kvm_interrupt *irq);
														
 
															-extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
														
 
															-                                         struct kvm_interrupt *irq);
														
 
															+extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
														
 
															 extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
														
 
															 extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
														
@@ -131,6 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
 
															 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
														
 
															 			struct kvm_memory_slot *memslot, unsigned long porder);
														
 
															 extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
														
 
															+
														
 
															 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
														
 
															 				struct kvm_create_spapr_tce *args);
														
 
															 extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
														
@@ -152,7 +152,7 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 
															 				struct kvm_userspace_memory_region *mem);
														
 
															 extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
														
 
															 				struct kvm_userspace_memory_region *mem,
														
 
															-				struct kvm_memory_slot old);
														
 
															+				const struct kvm_memory_slot *old);
														
 
															 extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
														
 
															 				      struct kvm_ppc_smmu_info *info);
														
 
															 extern void kvmppc_core_flush_memslot(struct kvm *kvm,
														
@@ -165,6 +165,18 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
 
															 extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
														
 
															+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
														
 
															+
														
 
															+extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
														
 
															+extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
														
 
															+extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
														
 
															+extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
														
 
															+				u32 priority);
														
 
															+extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
														
 
															+				u32 *priority);
														
 
															+extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
														
 
															+extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
														
 
															+
														
 
															 /*
														
 
															  * Cuts out inst bits with ordering according to spec.
														
 
															  * That means the leftmost bit is zero. All given bits are included.
														
@@ -246,12 +258,29 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
 
															 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
														
 
															+struct openpic;
														
 
															+
														
 
															 #ifdef CONFIG_KVM_BOOK3S_64_HV
														
 
															 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
														
 
															 {
														
 
															 	paca[cpu].kvm_hstate.xics_phys = addr;
														
 
															 }
														
 
															+static inline u32 kvmppc_get_xics_latch(void)
														
 
															+{
														
 
															+	u32 xirr = get_paca()->kvm_hstate.saved_xirr;
														
 
															+
														
 
															+	get_paca()->kvm_hstate.saved_xirr = 0;
														
 
															+
														
 
															+	return xirr;
														
 
															+}
														
 
															+
														
 
															+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
														
 
															+{
														
 
															+	paca[cpu].kvm_hstate.host_ipi = host_ipi;
														
 
															+}
														
 
															+
														
 
															+extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
														
 
															 extern void kvm_linear_init(void);
														
 
															 #else
														
@@ -260,6 +289,46 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 
															 static inline void kvm_linear_init(void)
														
 
															 {}
														
 
															+
														
 
															+static inline u32 kvmppc_get_xics_latch(void)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
														
 
															+{}
														
 
															+
														
 
															+static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	kvm_vcpu_kick(vcpu);
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
														
 
															+}
														
 
															+extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
														
 
															+extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
														
 
															+extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
														
 
															+extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
														
 
															+extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
														
 
															+extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
														
 
															+extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
														
 
															+			struct kvm_vcpu *vcpu, u32 cpu);
														
 
															+#else
														
 
															+static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
														
 
															+	{ return 0; }
														
 
															+static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
														
 
															+static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu,
														
 
															+					 unsigned long server)
														
 
															+	{ return -EINVAL; }
														
 
															+static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm,
														
 
															+					struct kvm_irq_level *args)
														
 
															+	{ return -ENOTTY; }
														
 
															+static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
														
 
															+	{ return 0; }
														
 
															 #endif
														
 
															 static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
														
@@ -271,6 +340,32 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
 
															 #endif
														
 
															 }
														
 
															+#ifdef CONFIG_KVM_MPIC
														
 
															+
														
 
															+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu);
														
 
															+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
														
 
															+			     u32 cpu);
														
 
															+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu);
														
 
															+
														
 
															+#else
														
 
															+
														
 
															+static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev,
														
 
															+		struct kvm_vcpu *vcpu, u32 cpu)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															+static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp,
														
 
															+		struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+#endif /* CONFIG_KVM_MPIC */
														
 
															+
														
 
															 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
														
 
															 			      struct kvm_config_tlb *cfg);
														
 
															 int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
														
@@ -283,8 +378,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids);
 
															 static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
														
 
															 {
														
 
															-	/* Clear i-cache for new pages */
														
 
															 	struct page *page;
														
 
															+	/*
														
 
															+	 * We can only access pages that the kernel maps
														
 
															+	 * as memory. Bail out for unmapped ones.
														
 
															+	 */
														
 
															+	if (!pfn_valid(pfn))
														
 
															+		return;
														
 
															+
														
 
															+	/* Clear i-cache for new pages */
														
 
															 	page = pfn_to_page(pfn);
														
 
															 	if (!test_bit(PG_arch_1, &page->flags)) {
														
 
															 		flush_dcache_icache_page(page);
														
@@ -324,4 +426,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
 
															 	return ea;
														
 
															 }
														
 
															+extern void xics_wake_cpu(int cpu);
														
 
															+
														
 
															 #endif /* __POWERPC_KVM_PPC_H__ */
														
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -300,6 +300,7 @@
 
															 #define     LPCR_PECE1	0x00002000	/* decrementer can cause exit */
														
 
															 #define     LPCR_PECE2	0x00001000	/* machine check etc can cause exit */
														
 
															 #define   LPCR_MER	0x00000800	/* Mediated External Exception */
														
 
															+#define   LPCR_MER_SH	11
														
 
															 #define   LPCR_LPES    0x0000000c
														
 
															 #define   LPCR_LPES0   0x00000008      /* LPAR Env selector 0 */
														
 
															 #define   LPCR_LPES1   0x00000004      /* LPAR Env selector 1 */
														
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -25,6 +25,8 @@
 
															 /* Select powerpc specific features in <linux/kvm.h> */
														
 
															 #define __KVM_HAVE_SPAPR_TCE
														
 
															 #define __KVM_HAVE_PPC_SMT
														
 
															+#define __KVM_HAVE_IRQCHIP
														
 
															+#define __KVM_HAVE_IRQ_LINE
														
 
															 struct kvm_regs {
														
 
															 	__u64 pc;
														
@@ -272,8 +274,31 @@ struct kvm_debug_exit_arch {
 
															 /* for KVM_SET_GUEST_DEBUG */
														
 
															 struct kvm_guest_debug_arch {
														
 
															+	struct {
														
 
															+		/* H/W breakpoint/watchpoint address */
														
 
															+		__u64 addr;
														
 
															+		/*
														
 
															+		 * Type denotes h/w breakpoint, read watchpoint, write
														
 
															+		 * watchpoint or watchpoint (both read and write).
														
 
															+		 */
														
 
															+#define KVMPPC_DEBUG_NONE		0x0
														
 
															+#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
														
 
															+#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
														
 
															+#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
														
 
															+		__u32 type;
														
 
															+		__u32 reserved;
														
 
															+	} bp[16];
														
 
															 };
														
 
															+/* Debug related defines */
														
 
															+/*
														
 
															+ * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic
														
 
															+ * and upper 16 bits are architecture specific. Architecture specific defines
														
 
															+ * that ioctl is for setting hardware breakpoint or software breakpoint.
														
 
															+ */
														
 
															+#define KVM_GUESTDBG_USE_SW_BP		0x00010000
														
 
															+#define KVM_GUESTDBG_USE_HW_BP		0x00020000
														
 
															+
														
 
															 /* definition of registers in kvm_run */
														
 
															 struct kvm_sync_regs {
														
 
															 };
														
@@ -299,6 +324,12 @@ struct kvm_allocate_rma {
 
															 	__u64 rma_size;
														
 
															 };
														
 
															+/* for KVM_CAP_PPC_RTAS */
														
 
															+struct kvm_rtas_token_args {
														
 
															+	char name[120];
														
 
															+	__u64 token;	/* Use a token of 0 to undefine a mapping */
														
 
															+};
														
 
															+
														
 
															 struct kvm_book3e_206_tlb_entry {
														
 
															 	__u32 mas8;
														
 
															 	__u32 mas1;
														
@@ -359,6 +390,26 @@ struct kvm_get_htab_header {
 
															 	__u16	n_invalid;
														
 
															 };
														
 
															+/* Per-vcpu XICS interrupt controller state */
														
 
															+#define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
														
 
															+
														
 
															+#define  KVM_REG_PPC_ICP_CPPR_SHIFT	56	/* current proc priority */
														
 
															+#define  KVM_REG_PPC_ICP_CPPR_MASK	0xff
														
 
															+#define  KVM_REG_PPC_ICP_XISR_SHIFT	32	/* interrupt status field */
														
 
															+#define  KVM_REG_PPC_ICP_XISR_MASK	0xffffff
														
 
															+#define  KVM_REG_PPC_ICP_MFRR_SHIFT	24	/* pending IPI priority */
														
 
															+#define  KVM_REG_PPC_ICP_MFRR_MASK	0xff
														
 
															+#define  KVM_REG_PPC_ICP_PPRI_SHIFT	16	/* pending irq priority */
														
 
															+#define  KVM_REG_PPC_ICP_PPRI_MASK	0xff
														
 
															+
														
 
															+/* Device control API: PPC-specific devices */
														
 
															+#define KVM_DEV_MPIC_GRP_MISC		1
														
 
															+#define   KVM_DEV_MPIC_BASE_ADDR	0	/* 64-bit */
														
 
															+
														
 
															+#define KVM_DEV_MPIC_GRP_REGISTER	2	/* 32-bit */
														
 
															+#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE	3	/* 32-bit */
														
 
															+
														
 
															+/* One-Reg API: PPC-specific registers */
														
 
															 #define KVM_REG_PPC_HIOR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
														
 
															 #define KVM_REG_PPC_IAC1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
														
 
															 #define KVM_REG_PPC_IAC2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
														
@@ -417,4 +468,47 @@ struct kvm_get_htab_header {
 
															 #define KVM_REG_PPC_EPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
														
 
															 #define KVM_REG_PPC_EPR		(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
														
 
															+/* Timer Status Register OR/CLEAR interface */
														
 
															+#define KVM_REG_PPC_OR_TSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87)
														
 
															+#define KVM_REG_PPC_CLEAR_TSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88)
														
 
															+#define KVM_REG_PPC_TCR		(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89)
														
 
															+#define KVM_REG_PPC_TSR		(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a)
														
 
															+
														
 
															+/* Debugging: Special instruction for software breakpoint */
														
 
															+#define KVM_REG_PPC_DEBUG_INST	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
														
 
															+
														
 
															+/* MMU registers */
														
 
															+#define KVM_REG_PPC_MAS0	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c)
														
 
															+#define KVM_REG_PPC_MAS1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d)
														
 
															+#define KVM_REG_PPC_MAS2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e)
														
 
															+#define KVM_REG_PPC_MAS7_3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f)
														
 
															+#define KVM_REG_PPC_MAS4	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90)
														
 
															+#define KVM_REG_PPC_MAS6	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91)
														
 
															+#define KVM_REG_PPC_MMUCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92)
														
 
															+/*
														
 
															+ * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using
														
 
															+ * KVM_CAP_SW_TLB ioctl
														
 
															+ */
														
 
															+#define KVM_REG_PPC_TLB0CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93)
														
 
															+#define KVM_REG_PPC_TLB1CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
														
 
															+#define KVM_REG_PPC_TLB2CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
														
 
															+#define KVM_REG_PPC_TLB3CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
														
 
															+#define KVM_REG_PPC_TLB0PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97)
														
 
															+#define KVM_REG_PPC_TLB1PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
														
 
															+#define KVM_REG_PPC_TLB2PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
														
 
															+#define KVM_REG_PPC_TLB3PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
														
 
															+#define KVM_REG_PPC_EPTCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
														
 
															+
														
 
															+/* PPC64 eXternal Interrupt Controller Specification */
														
 
															+#define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */
														
 
															+
														
 
															+/* Layout of 64-bit source attribute values */
														
 
															+#define  KVM_XICS_DESTINATION_SHIFT	0
														
 
															+#define  KVM_XICS_DESTINATION_MASK	0xffffffffULL
														
 
															+#define  KVM_XICS_PRIORITY_SHIFT	32
														
 
															+#define  KVM_XICS_PRIORITY_MASK		0xff
														
 
															+#define  KVM_XICS_LEVEL_SENSITIVE	(1ULL << 40)
														
 
															+#define  KVM_XICS_MASKED		(1ULL << 41)
														
 
															+#define  KVM_XICS_PENDING		(1ULL << 42)
														
 
															+
														
 
															 #endif /* __LINUX_KVM_POWERPC_H */
														
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -480,6 +480,7 @@ int main(void)
 
															 	DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
														
 
															 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
														
 
															 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
														
 
															+	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
														
 
															 #endif
														
 
															 #ifdef CONFIG_PPC_BOOK3S
														
 
															 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
														
@@ -576,6 +577,8 @@ int main(void)
 
															 	HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
														
 
															 	HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
														
 
															 	HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
														
 
															+	HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
														
 
															+	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
														
 
															 	HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
														
 
															 	HSTATE_FIELD(HSTATE_PMC, host_pmc);
														
 
															 	HSTATE_FIELD(HSTATE_PURR, host_purr);
														
@@ -599,6 +602,7 @@ int main(void)
 
															 	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
														
 
															 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
														
 
															 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
														
 
															+	DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
														
 
															 #endif /* CONFIG_PPC_BOOK3S */
														
 
															 #endif /* CONFIG_KVM */
														
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 
															 	return kvmppc_set_sregs_ivor(vcpu, sregs);
														
 
															 }
														
 
															+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
 
															+			union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
 
															+		       union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
														
 
															 {
														
 
															 	struct kvmppc_vcpu_44x *vcpu_44x;
														
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -136,21 +136,41 @@ config KVM_E500V2
 
															 	  If unsure, say N.
														
 
															 config KVM_E500MC
														
 
															-	bool "KVM support for PowerPC E500MC/E5500 processors"
														
 
															+	bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
														
 
															 	depends on PPC_E500MC
														
 
															 	select KVM
														
 
															 	select KVM_MMIO
														
 
															 	select KVM_BOOKE_HV
														
 
															 	select MMU_NOTIFIER
														
 
															 	---help---
														
 
															-	  Support running unmodified E500MC/E5500 (32-bit) guest kernels in
														
 
															-	  virtual machines on E500MC/E5500 host processors.
														
 
															+	  Support running unmodified E500MC/E5500/E6500 guest kernels in
														
 
															+	  virtual machines on E500MC/E5500/E6500 host processors.
														
 
															 	  This module provides access to the hardware capabilities through
														
 
															 	  a character device node named /dev/kvm.
														
 
															 	  If unsure, say N.
														
 
															+config KVM_MPIC
														
 
															+	bool "KVM in-kernel MPIC emulation"
														
 
															+	depends on KVM && E500
														
 
															+	select HAVE_KVM_IRQCHIP
														
 
															+	select HAVE_KVM_IRQ_ROUTING
														
 
															+	select HAVE_KVM_MSI
														
 
															+	help
														
 
															+	  Enable support for emulating MPIC devices inside the
														
 
															+          host kernel, rather than relying on userspace to emulate.
														
 
															+          Currently, support is limited to certain versions of
														
 
															+          Freescale's MPIC implementation.
														
 
															+
														
 
															+config KVM_XICS
														
 
															+	bool "KVM in-kernel XICS emulation"
														
 
															+	depends on KVM_BOOK3S_64 && !KVM_MPIC
														
 
															+	---help---
														
 
															+	  Include support for the XICS (eXternal Interrupt Controller
														
 
															+	  Specification) interrupt controller architecture used on
														
 
															+	  IBM POWER (pSeries) servers.
														
 
															+
														
 
															 source drivers/vhost/Kconfig
														
 
															 endif # VIRTUALIZATION
														
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -72,12 +72,18 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 
															 	book3s_hv.o \
														
 
															 	book3s_hv_interrupts.o \
														
 
															 	book3s_64_mmu_hv.o
														
 
															+kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
														
 
															+	book3s_hv_rm_xics.o
														
 
															 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
														
 
															 	book3s_hv_rmhandlers.o \
														
 
															 	book3s_hv_rm_mmu.o \
														
 
															 	book3s_64_vio_hv.o \
														
 
															 	book3s_hv_ras.o \
														
 
															-	book3s_hv_builtin.o
														
 
															+	book3s_hv_builtin.o \
														
 
															+	$(kvm-book3s_64-builtin-xics-objs-y)
														
 
															+
														
 
															+kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
														
 
															+	book3s_xics.o
														
 
															 kvm-book3s_64-module-objs := \
														
 
															 	../../../virt/kvm/kvm_main.o \
														
@@ -86,6 +92,7 @@ kvm-book3s_64-module-objs := \
 
															 	emulate.o \
														
 
															 	book3s.o \
														
 
															 	book3s_64_vio.o \
														
 
															+	book3s_rtas.o \
														
 
															 	$(kvm-book3s_64-objs-y)
														
 
															 kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
														
@@ -103,6 +110,9 @@ kvm-book3s_32-objs := \
 
															 	book3s_32_mmu.o
														
 
															 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
														
 
															+kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
														
 
															+kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
														
 
															+
														
 
															 kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
														
 
															 obj-$(CONFIG_KVM_440) += kvm.o
														
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
 
															 	return prio;
														
 
															 }
														
 
															-static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
														
 
															+void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
														
 
															 					  unsigned int vec)
														
 
															 {
														
 
															 	unsigned long old_pending = vcpu->arch.pending_exceptions;
														
@@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
 
															 	kvmppc_book3s_queue_irqprio(vcpu, vec);
														
 
															 }
														
 
															-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
														
 
															-                                  struct kvm_interrupt *irq)
														
 
															+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
														
 
															 	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
														
@@ -530,6 +529,21 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 
															 			val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
														
 
															 			break;
														
 
															 #endif /* CONFIG_ALTIVEC */
														
 
															+		case KVM_REG_PPC_DEBUG_INST: {
														
 
															+			u32 opcode = INS_TW;
														
 
															+			r = copy_to_user((u32 __user *)(long)reg->addr,
														
 
															+					 &opcode, sizeof(u32));
														
 
															+			break;
														
 
															+		}
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+		case KVM_REG_PPC_ICP_STATE:
														
 
															+			if (!vcpu->arch.icp) {
														
 
															+				r = -ENXIO;
														
 
															+				break;
														
 
															+			}
														
 
															+			val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
														
 
															+			break;
														
 
															+#endif /* CONFIG_KVM_XICS */
														
 
															 		default:
														
 
															 			r = -EINVAL;
														
 
															 			break;
														
@@ -592,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 
															 			vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
														
 
															 			break;
														
 
															 #endif /* CONFIG_ALTIVEC */
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+		case KVM_REG_PPC_ICP_STATE:
														
 
															+			if (!vcpu->arch.icp) {
														
 
															+				r = -ENXIO;
														
 
															+				break;
														
 
															+			}
														
 
															+			r = kvmppc_xics_set_icp(vcpu,
														
 
															+						set_reg_val(reg->id, val));
														
 
															+			break;
														
 
															+#endif /* CONFIG_KVM_XICS */
														
 
															 		default:
														
 
															 			r = -EINVAL;
														
 
															 			break;
														
@@ -607,6 +631,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
															 	return 0;
														
 
															 }
														
 
															+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
														
 
															+					struct kvm_guest_debug *dbg)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															 void kvmppc_decrementer_func(unsigned long data)
														
 
															 {
														
 
															 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
														
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
															 			/* Harvest R and C */
														
 
															 			rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
														
 
															 			*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
														
 
															-			rev[i].guest_rpte = ptel | rcbits;
														
 
															+			if (rcbits & ~rev[i].guest_rpte) {
														
 
															+				rev[i].guest_rpte = ptel | rcbits;
														
 
															+				note_hpte_modification(kvm, &rev[i]);
														
 
															+			}
														
 
															 		}
														
 
															 		unlock_rmap(rmapp);
														
 
															 		hptep[0] &= ~HPTE_V_HVLOCK;
														
@@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
															 		/* Now check and modify the HPTE */
														
 
															 		if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
														
 
															 			kvmppc_clear_ref_hpte(kvm, hptep, i);
														
 
															-			rev[i].guest_rpte |= HPTE_R_R;
														
 
															+			if (!(rev[i].guest_rpte & HPTE_R_R)) {
														
 
															+				rev[i].guest_rpte |= HPTE_R_R;
														
 
															+				note_hpte_modification(kvm, &rev[i]);
														
 
															+			}
														
 
															 			ret = 1;
														
 
															 		}
														
 
															 		hptep[0] &= ~HPTE_V_HVLOCK;
														
@@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
 
															 			hptep[1] &= ~HPTE_R_C;
														
 
															 			eieio();
														
 
															 			hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
														
 
															-			rev[i].guest_rpte |= HPTE_R_C;
														
 
															+			if (!(rev[i].guest_rpte & HPTE_R_C)) {
														
 
															+				rev[i].guest_rpte |= HPTE_R_C;
														
 
															+				note_hpte_modification(kvm, &rev[i]);
														
 
															+			}
														
 
															 			ret = 1;
														
 
															 		}
														
 
															 		hptep[0] &= ~HPTE_V_HVLOCK;
														
@@ -1090,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
 
															 	return ret;
														
 
															 }
														
 
															+static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
														
 
															+			      struct kvm_memory_slot *memslot,
														
 
															+			      unsigned long *map)
														
 
															+{
														
 
															+	unsigned long gfn;
														
 
															+
														
 
															+	if (!vpa->dirty || !vpa->pinned_addr)
														
 
															+		return;
														
 
															+	gfn = vpa->gpa >> PAGE_SHIFT;
														
 
															+	if (gfn < memslot->base_gfn ||
														
 
															+	    gfn >= memslot->base_gfn + memslot->npages)
														
 
															+		return;
														
 
															+
														
 
															+	vpa->dirty = false;
														
 
															+	if (map)
														
 
															+		__set_bit_le(gfn - memslot->base_gfn, map);
														
 
															+}
														
 
															+
														
 
															 long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
														
 
															 			     unsigned long *map)
														
 
															 {
														
 
															 	unsigned long i;
														
 
															 	unsigned long *rmapp;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															 	preempt_disable();
														
 
															 	rmapp = memslot->arch.rmap;
														
@@ -1103,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
															 			__set_bit_le(i, map);
														
 
															 		++rmapp;
														
 
															 	}
														
 
															+
														
 
															+	/* Harvest dirty bits from VPA and DTL updates */
														
 
															+	/* Note: we never modify the SLB shadow buffer areas */
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		spin_lock(&vcpu->arch.vpa_update_lock);
														
 
															+		harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
														
 
															+		harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
														
 
															+		spin_unlock(&vcpu->arch.vpa_update_lock);
														
 
															+	}
														
 
															 	preempt_enable();
														
 
															 	return 0;
														
 
															 }
														
@@ -1114,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 
															 	unsigned long gfn = gpa >> PAGE_SHIFT;
														
 
															 	struct page *page, *pages[1];
														
 
															 	int npages;
														
 
															-	unsigned long hva, psize, offset;
														
 
															+	unsigned long hva, offset;
														
 
															 	unsigned long pa;
														
 
															 	unsigned long *physp;
														
 
															 	int srcu_idx;
														
@@ -1146,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 
															 	}
														
 
															 	srcu_read_unlock(&kvm->srcu, srcu_idx);
														
 
															-	psize = PAGE_SIZE;
														
 
															-	if (PageHuge(page)) {
														
 
															-		page = compound_head(page);
														
 
															-		psize <<= compound_order(page);
														
 
															-	}
														
 
															-	offset = gpa & (psize - 1);
														
 
															+	offset = gpa & (PAGE_SIZE - 1);
														
 
															 	if (nb_ret)
														
 
															-		*nb_ret = psize - offset;
														
 
															+		*nb_ret = PAGE_SIZE - offset;
														
 
															 	return page_address(page) + offset;
														
 
															  err:
														
@@ -1161,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 
															 	return NULL;
														
 
															 }
														
 
															-void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
														
 
															+void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
														
 
															+			     bool dirty)
														
 
															 {
														
 
															 	struct page *page = virt_to_page(va);
														
 
															+	struct kvm_memory_slot *memslot;
														
 
															+	unsigned long gfn;
														
 
															+	unsigned long *rmap;
														
 
															+	int srcu_idx;
														
 
															 	put_page(page);
														
 
															+
														
 
															+	if (!dirty || !kvm->arch.using_mmu_notifiers)
														
 
															+		return;
														
 
															+
														
 
															+	/* We need to mark this page dirty in the rmap chain */
														
 
															+	gfn = gpa >> PAGE_SHIFT;
														
 
															+	srcu_idx = srcu_read_lock(&kvm->srcu);
														
 
															+	memslot = gfn_to_memslot(kvm, gfn);
														
 
															+	if (memslot) {
														
 
															+		rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
														
 
															+		lock_rmap(rmap);
														
 
															+		*rmap |= KVMPPC_RMAP_CHANGED;
														
 
															+		unlock_rmap(rmap);
														
 
															+	}
														
 
															+	srcu_read_unlock(&kvm->srcu, srcu_idx);
														
 
															 }
														
 
															 /*
														
@@ -1193,16 +1245,36 @@ struct kvm_htab_ctx {
 
															 #define HPTE_SIZE	(2 * sizeof(unsigned long))
														
 
															+/*
														
 
															+ * Returns 1 if this HPT entry has been modified or has pending
														
 
															+ * R/C bit changes.
														
 
															+ */
														
 
															+static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
														
 
															+{
														
 
															+	unsigned long rcbits_unset;
														
 
															+
														
 
															+	if (revp->guest_rpte & HPTE_GR_MODIFIED)
														
 
															+		return 1;
														
 
															+
														
 
															+	/* Also need to consider changes in reference and changed bits */
														
 
															+	rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
														
 
															+	if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset))
														
 
															+		return 1;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 static long record_hpte(unsigned long flags, unsigned long *hptp,
														
 
															 			unsigned long *hpte, struct revmap_entry *revp,
														
 
															 			int want_valid, int first_pass)
														
 
															 {
														
 
															 	unsigned long v, r;
														
 
															+	unsigned long rcbits_unset;
														
 
															 	int ok = 1;
														
 
															 	int valid, dirty;
														
 
															 	/* Unmodified entries are uninteresting except on the first pass */
														
 
															-	dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
														
 
															+	dirty = hpte_dirty(revp, hptp);
														
 
															 	if (!first_pass && !dirty)
														
 
															 		return 0;
														
@@ -1223,16 +1295,28 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
 
															 		while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
														
 
															 			cpu_relax();
														
 
															 		v = hptp[0];
														
 
															+
														
 
															+		/* re-evaluate valid and dirty from synchronized HPTE value */
														
 
															+		valid = !!(v & HPTE_V_VALID);
														
 
															+		dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
														
 
															+
														
 
															+		/* Harvest R and C into guest view if necessary */
														
 
															+		rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
														
 
															+		if (valid && (rcbits_unset & hptp[1])) {
														
 
															+			revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) |
														
 
															+				HPTE_GR_MODIFIED;
														
 
															+			dirty = 1;
														
 
															+		}
														
 
															+
														
 
															 		if (v & HPTE_V_ABSENT) {
														
 
															 			v &= ~HPTE_V_ABSENT;
														
 
															 			v |= HPTE_V_VALID;
														
 
															+			valid = 1;
														
 
															 		}
														
 
															-		/* re-evaluate valid and dirty from synchronized HPTE value */
														
 
															-		valid = !!(v & HPTE_V_VALID);
														
 
															 		if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
														
 
															 			valid = 0;
														
 
															-		r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
														
 
															-		dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
														
 
															+
														
 
															+		r = revp->guest_rpte;
														
 
															 		/* only clear modified if this is the right sort of entry */
														
 
															 		if (valid == want_valid && dirty) {
														
 
															 			r &= ~HPTE_GR_MODIFIED;
														
@@ -1288,7 +1372,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
 
															 		/* Skip uninteresting entries, i.e. clean on not-first pass */
														
 
															 		if (!first_pass) {
														
 
															 			while (i < kvm->arch.hpt_npte &&
														
 
															-			       !(revp->guest_rpte & HPTE_GR_MODIFIED)) {
														
 
															+			       !hpte_dirty(revp, hptp)) {
														
 
															 				++i;
														
 
															 				hptp += 2;
														
 
															 				++revp;
														
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -194,7 +194,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
															 				run->papr_hcall.args[i] = gpr;
														
 
															 			}
														
 
															-			emulated = EMULATE_DO_PAPR;
														
 
															+			run->exit_reason = KVM_EXIT_PAPR_HCALL;
														
 
															+			vcpu->arch.hcall_needed = 1;
														
 
															+			emulated = EMULATE_EXIT_USER;
														
 
															 			break;
														
 
															 		}
														
 
															 #endif
														
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -66,6 +66,31 @@
 
															 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
														
 
															 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
														
 
															+void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	int me;
														
 
															+	int cpu = vcpu->cpu;
														
 
															+	wait_queue_head_t *wqp;
														
 
															+
														
 
															+	wqp = kvm_arch_vcpu_wq(vcpu);
														
 
															+	if (waitqueue_active(wqp)) {
														
 
															+		wake_up_interruptible(wqp);
														
 
															+		++vcpu->stat.halt_wakeup;
														
 
															+	}
														
 
															+
														
 
															+	me = get_cpu();
														
 
															+
														
 
															+	/* CPU points to the first thread of the core */
														
 
															+	if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
														
 
															+		int real_cpu = cpu + vcpu->arch.ptid;
														
 
															+		if (paca[real_cpu].kvm_hstate.xics_phys)
														
 
															+			xics_wake_cpu(real_cpu);
														
 
															+		else if (cpu_online(cpu))
														
 
															+			smp_send_reschedule(cpu);
														
 
															+	}
														
 
															+	put_cpu();
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * We use the vcpu_load/put functions to measure stolen time.
														
 
															  * Stolen time is counted as time when either the vcpu is able to
														
@@ -259,7 +284,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 
															 			len = ((struct reg_vpa *)va)->length.hword;
														
 
															 		else
														
 
															 			len = ((struct reg_vpa *)va)->length.word;
														
 
															-		kvmppc_unpin_guest_page(kvm, va);
														
 
															+		kvmppc_unpin_guest_page(kvm, va, vpa, false);
														
 
															 		/* Check length */
														
 
															 		if (len > nb || len < sizeof(struct reg_vpa))
														
@@ -359,13 +384,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
 
															 		va = NULL;
														
 
															 		nb = 0;
														
 
															 		if (gpa)
														
 
															-			va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
														
 
															+			va = kvmppc_pin_guest_page(kvm, gpa, &nb);
														
 
															 		spin_lock(&vcpu->arch.vpa_update_lock);
														
 
															 		if (gpa == vpap->next_gpa)
														
 
															 			break;
														
 
															 		/* sigh... unpin that one and try again */
														
 
															 		if (va)
														
 
															-			kvmppc_unpin_guest_page(kvm, va);
														
 
															+			kvmppc_unpin_guest_page(kvm, va, gpa, false);
														
 
															 	}
														
 
															 	vpap->update_pending = 0;
														
@@ -375,12 +400,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
 
															 		 * has changed the mappings underlying guest memory,
														
 
															 		 * so unregister the region.
														
 
															 		 */
														
 
															-		kvmppc_unpin_guest_page(kvm, va);
														
 
															+		kvmppc_unpin_guest_page(kvm, va, gpa, false);
														
 
															 		va = NULL;
														
 
															 	}
														
 
															 	if (vpap->pinned_addr)
														
 
															-		kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
														
 
															+		kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
														
 
															+					vpap->dirty);
														
 
															+	vpap->gpa = gpa;
														
 
															 	vpap->pinned_addr = va;
														
 
															+	vpap->dirty = false;
														
 
															 	if (va)
														
 
															 		vpap->pinned_end = va + vpap->len;
														
 
															 }
														
@@ -472,6 +500,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 
															 	/* order writing *dt vs. writing vpa->dtl_idx */
														
 
															 	smp_wmb();
														
 
															 	vpa->dtl_idx = ++vcpu->arch.dtl_index;
														
 
															+	vcpu->arch.dtl.dirty = true;
														
 
															 }
														
 
															 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
														
@@ -479,7 +508,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
															 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
														
 
															 	unsigned long target, ret = H_SUCCESS;
														
 
															 	struct kvm_vcpu *tvcpu;
														
 
															-	int idx;
														
 
															+	int idx, rc;
														
 
															 	switch (req) {
														
 
															 	case H_ENTER:
														
@@ -515,6 +544,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
															 					kvmppc_get_gpr(vcpu, 5),
														
 
															 					kvmppc_get_gpr(vcpu, 6));
														
 
															 		break;
														
 
															+	case H_RTAS:
														
 
															+		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
														
 
															+			return RESUME_HOST;
														
 
															+
														
 
															+		rc = kvmppc_rtas_hcall(vcpu);
														
 
															+
														
 
															+		if (rc == -ENOENT)
														
 
															+			return RESUME_HOST;
														
 
															+		else if (rc == 0)
														
 
															+			break;
														
 
															+
														
 
															+		/* Send the error out to userspace via KVM_RUN */
														
 
															+		return rc;
														
 
															+
														
 
															+	case H_XIRR:
														
 
															+	case H_CPPR:
														
 
															+	case H_EOI:
														
 
															+	case H_IPI:
														
 
															+		if (kvmppc_xics_enabled(vcpu)) {
														
 
															+			ret = kvmppc_xics_hcall(vcpu, req);
														
 
															+			break;
														
 
															+		} /* fallthrough */
														
 
															 	default:
														
 
															 		return RESUME_HOST;
														
 
															 	}
														
@@ -913,15 +964,19 @@ out:
 
															 	return ERR_PTR(err);
														
 
															 }
														
 
															+static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
														
 
															+{
														
 
															+	if (vpa->pinned_addr)
														
 
															+		kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
														
 
															+					vpa->dirty);
														
 
															+}
														
 
															+
														
 
															 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	spin_lock(&vcpu->arch.vpa_update_lock);
														
 
															-	if (vcpu->arch.dtl.pinned_addr)
														
 
															-		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
														
 
															-	if (vcpu->arch.slb_shadow.pinned_addr)
														
 
															-		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
														
 
															-	if (vcpu->arch.vpa.pinned_addr)
														
 
															-		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
														
 
															+	unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
														
 
															+	unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
														
 
															+	unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
														
 
															 	spin_unlock(&vcpu->arch.vpa_update_lock);
														
 
															 	kvm_vcpu_uninit(vcpu);
														
 
															 	kmem_cache_free(kvm_vcpu_cache, vcpu);
														
@@ -955,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 
															 }
														
 
															 extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
														
 
															-extern void xics_wake_cpu(int cpu);
														
 
															 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
														
 
															 				   struct kvm_vcpu *vcpu)
														
@@ -1330,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
															 			break;
														
 
															 		vc->runner = vcpu;
														
 
															 		n_ceded = 0;
														
 
															-		list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
														
 
															+		list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
														
 
															 			if (!v->arch.pending_exceptions)
														
 
															 				n_ceded += v->arch.ceded;
														
 
															+			else
														
 
															+				v->arch.ceded = 0;
														
 
															+		}
														
 
															 		if (n_ceded == vc->n_runnable)
														
 
															 			kvmppc_vcore_blocked(vc);
														
 
															 		else
														
@@ -1645,12 +1702,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 
															 void kvmppc_core_commit_memory_region(struct kvm *kvm,
														
 
															 				      struct kvm_userspace_memory_region *mem,
														
 
															-				      struct kvm_memory_slot old)
														
 
															+				      const struct kvm_memory_slot *old)
														
 
															 {
														
 
															 	unsigned long npages = mem->memory_size >> PAGE_SHIFT;
														
 
															 	struct kvm_memory_slot *memslot;
														
 
															-	if (npages && old.npages) {
														
 
															+	if (npages && old->npages) {
														
 
															 		/*
														
 
															 		 * If modifying a memslot, reset all the rmap dirty bits.
														
 
															 		 * If this is a new memslot, we don't need to do anything
														
@@ -1827,6 +1884,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 
															 	cpumask_setall(&kvm->arch.need_tlb_flush);
														
 
															 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
														
 
															+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
														
 
															 	kvm->arch.rma = NULL;
														
@@ -1872,6 +1930,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 
															 		kvm->arch.rma = NULL;
														
 
															 	}
														
 
															+	kvmppc_rtas_tokens_free(kvm);
														
 
															+
														
 
															 	kvmppc_free_hpt(kvm);
														
 
															 	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
														
 
															 }
														
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -97,17 +97,6 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
														
 
															-/*
														
 
															- * Note modification of an HPTE; set the HPTE modified bit
														
 
															- * if anyone is interested.
														
 
															- */
														
 
															-static inline void note_hpte_modification(struct kvm *kvm,
														
 
															-					  struct revmap_entry *rev)
														
 
															-{
														
 
															-	if (atomic_read(&kvm->arch.hpte_mod_interest))
														
 
															-		rev->guest_rpte |= HPTE_GR_MODIFIED;
														
 
															-}
														
 
															-
														
 
															 /* Remove this HPTE from the chain for a real page */
														
 
															 static void remove_revmap_chain(struct kvm *kvm, long pte_index,
														
 
															 				struct revmap_entry *rev,
														
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -0,0 +1,406 @@
 
															+/*
														
 
															+ * Copyright 2012 Michael Ellerman, IBM Corporation.
														
 
															+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License, version 2, as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/err.h>
														
 
															+
														
 
															+#include <asm/kvm_book3s.h>
														
 
															+#include <asm/kvm_ppc.h>
														
 
															+#include <asm/hvcall.h>
														
 
															+#include <asm/xics.h>
														
 
															+#include <asm/debug.h>
														
 
															+#include <asm/synch.h>
														
 
															+#include <asm/ppc-opcode.h>
														
 
															+
														
 
															+#include "book3s_xics.h"
														
 
															+
														
 
															+#define DEBUG_PASSUP
														
 
															+
														
 
															+static inline void rm_writeb(unsigned long paddr, u8 val)
														
 
															+{
														
 
															+	__asm__ __volatile__("sync; stbcix %0,0,%1"
														
 
															+		: : "r" (val), "r" (paddr) : "memory");
														
 
															+}
														
 
															+
														
 
															+static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
														
 
															+				struct kvm_vcpu *this_vcpu)
														
 
															+{
														
 
															+	struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
														
 
															+	unsigned long xics_phys;
														
 
															+	int cpu;
														
 
															+
														
 
															+	/* Mark the target VCPU as having an interrupt pending */
														
 
															+	vcpu->stat.queue_intr++;
														
 
															+	set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
														
 
															+
														
 
															+	/* Kick self ? Just set MER and return */
														
 
															+	if (vcpu == this_vcpu) {
														
 
															+		mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	/* Check if the core is loaded, if not, too hard */
														
 
															+	cpu = vcpu->cpu;
														
 
															+	if (cpu < 0 || cpu >= nr_cpu_ids) {
														
 
															+		this_icp->rm_action |= XICS_RM_KICK_VCPU;
														
 
															+		this_icp->rm_kick_target = vcpu;
														
 
															+		return;
														
 
															+	}
														
 
															+	/* In SMT cpu will always point to thread 0, we adjust it */
														
 
															+	cpu += vcpu->arch.ptid;
														
 
															+
														
 
															+	/* Not too hard, then poke the target */
														
 
															+	xics_phys = paca[cpu].kvm_hstate.xics_phys;
														
 
															+	rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
														
 
															+}
														
 
															+
														
 
															+static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	/* Note: Only called on self ! */
														
 
															+	clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
														
 
															+		  &vcpu->arch.pending_exceptions);
														
 
															+	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
														
 
															+}
														
 
															+
														
 
															+static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
														
 
															+				     union kvmppc_icp_state old,
														
 
															+				     union kvmppc_icp_state new)
														
 
															+{
														
 
															+	struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
														
 
															+	bool success;
														
 
															+
														
 
															+	/* Calculate new output value */
														
 
															+	new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
														
 
															+
														
 
															+	/* Attempt atomic update */
														
 
															+	success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
														
 
															+	if (!success)
														
 
															+		goto bail;
														
 
															+
														
 
															+	/*
														
 
															+	 * Check for output state update
														
 
															+	 *
														
 
															+	 * Note that this is racy since another processor could be updating
														
 
															+	 * the state already. This is why we never clear the interrupt output
														
 
															+	 * here, we only ever set it. The clear only happens prior to doing
														
 
															+	 * an update and only by the processor itself. Currently we do it
														
 
															+	 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
														
 
															+	 *
														
 
															+	 * We also do not try to figure out whether the EE state has changed,
														
 
															+	 * we unconditionally set it if the new state calls for it. The reason
														
 
															+	 * for that is that we opportunistically remove the pending interrupt
														
 
															+	 * flag when raising CPPR, so we need to set it back here if an
														
 
															+	 * interrupt is still pending.
														
 
															+	 */
														
 
															+	if (new.out_ee)
														
 
															+		icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
														
 
															+
														
 
															+	/* Expose the state change for debug purposes */
														
 
															+	this_vcpu->arch.icp->rm_dbgstate = new;
														
 
															+	this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
														
 
															+
														
 
															+ bail:
														
 
															+	return success;
														
 
															+}
														
 
															+
														
 
															+static inline int check_too_hard(struct kvmppc_xics *xics,
														
 
															+				 struct kvmppc_icp *icp)
														
 
															+{
														
 
															+	return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
														
 
															+}
														
 
															+
														
 
															+static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
														
 
															+			     u8 new_cppr)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	bool resend;
														
 
															+
														
 
															+	/*
														
 
															+	 * This handles several related states in one operation:
														
 
															+	 *
														
 
															+	 * ICP State: Down_CPPR
														
 
															+	 *
														
 
															+	 * Load CPPR with new value and if the XISR is 0
														
 
															+	 * then check for resends:
														
 
															+	 *
														
 
															+	 * ICP State: Resend
														
 
															+	 *
														
 
															+	 * If MFRR is more favored than CPPR, check for IPIs
														
 
															+	 * and notify ICS of a potential resend. This is done
														
 
															+	 * asynchronously (when used in real mode, we will have
														
 
															+	 * to exit here).
														
 
															+	 *
														
 
															+	 * We do not handle the complete Check_IPI as documented
														
 
															+	 * here. In the PAPR, this state will be used for both
														
 
															+	 * Set_MFRR and Down_CPPR. However, we know that we aren't
														
 
															+	 * changing the MFRR state here so we don't need to handle
														
 
															+	 * the case of an MFRR causing a reject of a pending irq,
														
 
															+	 * this will have been handled when the MFRR was set in the
														
 
															+	 * first place.
														
 
															+	 *
														
 
															+	 * Thus we don't have to handle rejects, only resends.
														
 
															+	 *
														
 
															+	 * When implementing real mode for HV KVM, resend will lead to
														
 
															+	 * a H_TOO_HARD return and the whole transaction will be handled
														
 
															+	 * in virtual mode.
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		/* Down_CPPR */
														
 
															+		new_state.cppr = new_cppr;
														
 
															+
														
 
															+		/*
														
 
															+		 * Cut down Resend / Check_IPI / IPI
														
 
															+		 *
														
 
															+		 * The logic is that we cannot have a pending interrupt
														
 
															+		 * trumped by an IPI at this point (see above), so we
														
 
															+		 * know that either the pending interrupt is already an
														
 
															+		 * IPI (in which case we don't care to override it) or
														
 
															+		 * it's either more favored than us or non existent
														
 
															+		 */
														
 
															+		if (new_state.mfrr < new_cppr &&
														
 
															+		    new_state.mfrr <= new_state.pending_pri) {
														
 
															+			new_state.pending_pri = new_state.mfrr;
														
 
															+			new_state.xisr = XICS_IPI;
														
 
															+		}
														
 
															+
														
 
															+		/* Latch/clear resend bit */
														
 
															+		resend = new_state.need_resend;
														
 
															+		new_state.need_resend = 0;
														
 
															+
														
 
															+	} while (!icp_rm_try_update(icp, old_state, new_state));
														
 
															+
														
 
															+	/*
														
 
															+	 * Now handle resend checks. Those are asynchronous to the ICP
														
 
															+	 * state update in HW (ie bus transactions) so we can handle them
														
 
															+	 * separately here as well.
														
 
															+	 */
														
 
															+	if (resend)
														
 
															+		icp->rm_action |= XICS_RM_CHECK_RESEND;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	u32 xirr;
														
 
															+
														
 
															+	if (!xics || !xics->real_mode)
														
 
															+		return H_TOO_HARD;
														
 
															+
														
 
															+	/* First clear the interrupt */
														
 
															+	icp_rm_clr_vcpu_irq(icp->vcpu);
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: Accept_Interrupt
														
 
															+	 *
														
 
															+	 * Return the pending interrupt (if any) along with the
														
 
															+	 * current CPPR, then clear the XISR & set CPPR to the
														
 
															+	 * pending priority
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
														
 
															+		if (!old_state.xisr)
														
 
															+			break;
														
 
															+		new_state.cppr = new_state.pending_pri;
														
 
															+		new_state.pending_pri = 0xff;
														
 
															+		new_state.xisr = 0;
														
 
															+
														
 
															+	} while (!icp_rm_try_update(icp, old_state, new_state));
														
 
															+
														
 
															+	/* Return the result in GPR4 */
														
 
															+	vcpu->arch.gpr[4] = xirr;
														
 
															+
														
 
															+	return check_too_hard(xics, icp);
														
 
															+}
														
 
															+
														
 
															+int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
														
 
															+		    unsigned long mfrr)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp;
														
 
															+	u32 reject;
														
 
															+	bool resend;
														
 
															+	bool local;
														
 
															+
														
 
															+	if (!xics || !xics->real_mode)
														
 
															+		return H_TOO_HARD;
														
 
															+
														
 
															+	local = this_icp->server_num == server;
														
 
															+	if (local)
														
 
															+		icp = this_icp;
														
 
															+	else
														
 
															+		icp = kvmppc_xics_find_server(vcpu->kvm, server);
														
 
															+	if (!icp)
														
 
															+		return H_PARAMETER;
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP state: Set_MFRR
														
 
															+	 *
														
 
															+	 * If the CPPR is more favored than the new MFRR, then
														
 
															+	 * nothing needs to be done as there can be no XISR to
														
 
															+	 * reject.
														
 
															+	 *
														
 
															+	 * If the CPPR is less favored, then we might be replacing
														
 
															+	 * an interrupt, and thus need to possibly reject it as in
														
 
															+	 *
														
 
															+	 * ICP state: Check_IPI
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		/* Set_MFRR */
														
 
															+		new_state.mfrr = mfrr;
														
 
															+
														
 
															+		/* Check_IPI */
														
 
															+		reject = 0;
														
 
															+		resend = false;
														
 
															+		if (mfrr < new_state.cppr) {
														
 
															+			/* Reject a pending interrupt if not an IPI */
														
 
															+			if (mfrr <= new_state.pending_pri)
														
 
															+				reject = new_state.xisr;
														
 
															+			new_state.pending_pri = mfrr;
														
 
															+			new_state.xisr = XICS_IPI;
														
 
															+		}
														
 
															+
														
 
															+		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
														
 
															+			resend = new_state.need_resend;
														
 
															+			new_state.need_resend = 0;
														
 
															+		}
														
 
															+	} while (!icp_rm_try_update(icp, old_state, new_state));
														
 
															+
														
 
															+	/* Pass rejects to virtual mode */
														
 
															+	if (reject && reject != XICS_IPI) {
														
 
															+		this_icp->rm_action |= XICS_RM_REJECT;
														
 
															+		this_icp->rm_reject = reject;
														
 
															+	}
														
 
															+
														
 
															+	/* Pass resends to virtual mode */
														
 
															+	if (resend)
														
 
															+		this_icp->rm_action |= XICS_RM_CHECK_RESEND;
														
 
															+
														
 
															+	return check_too_hard(xics, this_icp);
														
 
															+}
														
 
															+
														
 
															+int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	u32 reject;
														
 
															+
														
 
															+	if (!xics || !xics->real_mode)
														
 
															+		return H_TOO_HARD;
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: Set_CPPR
														
 
															+	 *
														
 
															+	 * We can safely compare the new value with the current
														
 
															+	 * value outside of the transaction as the CPPR is only
														
 
															+	 * ever changed by the processor on itself
														
 
															+	 */
														
 
															+	if (cppr > icp->state.cppr) {
														
 
															+		icp_rm_down_cppr(xics, icp, cppr);
														
 
															+		goto bail;
														
 
															+	} else if (cppr == icp->state.cppr)
														
 
															+		return H_SUCCESS;
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: Up_CPPR
														
 
															+	 *
														
 
															+	 * The processor is raising its priority, this can result
														
 
															+	 * in a rejection of a pending interrupt:
														
 
															+	 *
														
 
															+	 * ICP State: Reject_Current
														
 
															+	 *
														
 
															+	 * We can remove EE from the current processor, the update
														
 
															+	 * transaction will set it again if needed
														
 
															+	 */
														
 
															+	icp_rm_clr_vcpu_irq(icp->vcpu);
														
 
															+
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		reject = 0;
														
 
															+		new_state.cppr = cppr;
														
 
															+
														
 
															+		if (cppr <= new_state.pending_pri) {
														
 
															+			reject = new_state.xisr;
														
 
															+			new_state.xisr = 0;
														
 
															+			new_state.pending_pri = 0xff;
														
 
															+		}
														
 
															+
														
 
															+	} while (!icp_rm_try_update(icp, old_state, new_state));
														
 
															+
														
 
															+	/* Pass rejects to virtual mode */
														
 
															+	if (reject && reject != XICS_IPI) {
														
 
															+		icp->rm_action |= XICS_RM_REJECT;
														
 
															+		icp->rm_reject = reject;
														
 
															+	}
														
 
															+ bail:
														
 
															+	return check_too_hard(xics, icp);
														
 
															+}
														
 
															+
														
 
															+int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *state;
														
 
															+	u32 irq = xirr & 0x00ffffff;
														
 
															+	u16 src;
														
 
															+
														
 
															+	if (!xics || !xics->real_mode)
														
 
															+		return H_TOO_HARD;
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: EOI
														
 
															+	 *
														
 
															+	 * Note: If EOI is incorrectly used by SW to lower the CPPR
														
 
															+	 * value (ie more favored), we do not check for rejection of
														
 
															+	 * a pending interrupt, this is a SW error and PAPR sepcifies
														
 
															+	 * that we don't have to deal with it.
														
 
															+	 *
														
 
															+	 * The sending of an EOI to the ICS is handled after the
														
 
															+	 * CPPR update
														
 
															+	 *
														
 
															+	 * ICP State: Down_CPPR which we handle
														
 
															+	 * in a separate function as it's shared with H_CPPR.
														
 
															+	 */
														
 
															+	icp_rm_down_cppr(xics, icp, xirr >> 24);
														
 
															+
														
 
															+	/* IPIs have no EOI */
														
 
															+	if (irq == XICS_IPI)
														
 
															+		goto bail;
														
 
															+	/*
														
 
															+	 * EOI handling: If the interrupt is still asserted, we need to
														
 
															+	 * resend it. We can take a lockless "peek" at the ICS state here.
														
 
															+	 *
														
 
															+	 * "Message" interrupts will never have "asserted" set
														
 
															+	 */
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics)
														
 
															+		goto bail;
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	/* Still asserted, resend it, we make it look like a reject */
														
 
															+	if (state->asserted) {
														
 
															+		icp->rm_action |= XICS_RM_REJECT;
														
 
															+		icp->rm_reject = irq;
														
 
															+	}
														
 
															+ bail:
														
 
															+	return check_too_hard(xics, icp);
														
 
															+}
														
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
 
															  *                                                                            *
														
 
															  *****************************************************************************/
														
 
															-#define XICS_XIRR		4
														
 
															-#define XICS_QIRR		0xc
														
 
															-#define XICS_IPI		2	/* interrupt source # for IPIs */
														
 
															-
														
 
															 /*
														
 
															  * We come in here when wakened from nap mode on a secondary hw thread.
														
 
															  * Relocation is off and most register values are lost.
														
@@ -101,50 +97,51 @@ kvm_start_guest:
 
															 	li	r0,1
														
 
															 	stb	r0,PACA_NAPSTATELOST(r13)
														
 
															-	/* get vcpu pointer, NULL if we have no vcpu to run */
														
 
															-	ld	r4,HSTATE_KVM_VCPU(r13)
														
 
															-	cmpdi	cr1,r4,0
														
 
															+	/* were we napping due to cede? */
														
 
															+	lbz	r0,HSTATE_NAPPING(r13)
														
 
															+	cmpwi	r0,0
														
 
															+	bne	kvm_end_cede
														
 
															+
														
 
															+	/*
														
 
															+	 * We weren't napping due to cede, so this must be a secondary
														
 
															+	 * thread being woken up to run a guest, or being woken up due
														
 
															+	 * to a stray IPI.  (Or due to some machine check or hypervisor
														
 
															+	 * maintenance interrupt while the core is in KVM.)
														
 
															+	 */
														
 
															 	/* Check the wake reason in SRR1 to see why we got here */
														
 
															 	mfspr	r3,SPRN_SRR1
														
 
															 	rlwinm	r3,r3,44-31,0x7		/* extract wake reason field */
														
 
															 	cmpwi	r3,4			/* was it an external interrupt? */
														
 
															-	bne	27f
														
 
															-
														
 
															-	/*
														
 
															-	 * External interrupt - for now assume it is an IPI, since we
														
 
															-	 * should never get any other interrupts sent to offline threads.
														
 
															-	 * Only do this for secondary threads.
														
 
															-	 */
														
 
															-	beq	cr1,25f
														
 
															-	lwz	r3,VCPU_PTID(r4)
														
 
															-	cmpwi	r3,0
														
 
															-	beq	27f
														
 
															-25:	ld	r5,HSTATE_XICS_PHYS(r13)
														
 
															-	li	r0,0xff
														
 
															-	li	r6,XICS_QIRR
														
 
															-	li	r7,XICS_XIRR
														
 
															+	bne	27f			/* if not */
														
 
															+	ld	r5,HSTATE_XICS_PHYS(r13)
														
 
															+	li	r7,XICS_XIRR		/* if it was an external interrupt, */
														
 
															 	lwzcix	r8,r5,r7		/* get and ack the interrupt */
														
 
															 	sync
														
 
															 	clrldi.	r9,r8,40		/* get interrupt source ID. */
														
 
															-	beq	27f			/* none there? */
														
 
															-	cmpwi	r9,XICS_IPI
														
 
															-	bne	26f
														
 
															+	beq	28f			/* none there? */
														
 
															+	cmpwi	r9,XICS_IPI		/* was it an IPI? */
														
 
															+	bne	29f
														
 
															+	li	r0,0xff
														
 
															+	li	r6,XICS_MFRR
														
 
															 	stbcix	r0,r5,r6		/* clear IPI */
														
 
															-26:	stwcix	r8,r5,r7		/* EOI the interrupt */
														
 
															-
														
 
															-27:	/* XXX should handle hypervisor maintenance interrupts etc. here */
														
 
															+	stwcix	r8,r5,r7		/* EOI the interrupt */
														
 
															+	sync				/* order loading of vcpu after that */
														
 
															-	/* reload vcpu pointer after clearing the IPI */
														
 
															+	/* get vcpu pointer, NULL if we have no vcpu to run */
														
 
															 	ld	r4,HSTATE_KVM_VCPU(r13)
														
 
															 	cmpdi	r4,0
														
 
															 	/* if we have no vcpu to run, go back to sleep */
														
 
															 	beq	kvm_no_guest
														
 
															+	b	kvmppc_hv_entry
														
 
															-	/* were we napping due to cede? */
														
 
															-	lbz	r0,HSTATE_NAPPING(r13)
														
 
															-	cmpwi	r0,0
														
 
															-	bne	kvm_end_cede
														
 
															+27:	/* XXX should handle hypervisor maintenance interrupts etc. here */
														
 
															+	b	kvm_no_guest
														
 
															+28:	/* SRR1 said external but ICP said nope?? */
														
 
															+	b	kvm_no_guest
														
 
															+29:	/* External non-IPI interrupt to offline secondary thread? help?? */
														
 
															+	stw	r8,HSTATE_SAVED_XIRR(r13)
														
 
															+	b	kvm_no_guest
														
 
															 .global kvmppc_hv_entry
														
 
															 kvmppc_hv_entry:
														
@@ -260,6 +257,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
															 	lwz	r5, LPPACA_YIELDCOUNT(r3)
														
 
															 	addi	r5, r5, 1
														
 
															 	stw	r5, LPPACA_YIELDCOUNT(r3)
														
 
															+	li	r6, 1
														
 
															+	stb	r6, VCPU_VPA_DIRTY(r4)
														
 
															 25:
														
 
															 	/* Load up DAR and DSISR */
														
 
															 	ld	r5, VCPU_DAR(r4)
														
@@ -485,20 +484,20 @@ toc_tlbie_lock:
 
															 	mtctr	r6
														
 
															 	mtxer	r7
														
 
															+	ld	r10, VCPU_PC(r4)
														
 
															+	ld	r11, VCPU_MSR(r4)
														
 
															 kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
														
 
															 	ld	r6, VCPU_SRR0(r4)
														
 
															 	ld	r7, VCPU_SRR1(r4)
														
 
															-	ld	r10, VCPU_PC(r4)
														
 
															-	ld	r11, VCPU_MSR(r4)	/* r11 = vcpu->arch.msr & ~MSR_HV */
														
 
															+	/* r11 = vcpu->arch.msr & ~MSR_HV */
														
 
															 	rldicl	r11, r11, 63 - MSR_HV_LG, 1
														
 
															 	rotldi	r11, r11, 1 + MSR_HV_LG
														
 
															 	ori	r11, r11, MSR_ME
														
 
															 	/* Check if we can deliver an external or decrementer interrupt now */
														
 
															 	ld	r0,VCPU_PENDING_EXC(r4)
														
 
															-	li	r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
														
 
															-	oris	r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
														
 
															+	lis	r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
														
 
															 	and	r0,r0,r8
														
 
															 	cmpdi	cr1,r0,0
														
 
															 	andi.	r0,r11,MSR_EE
														
@@ -526,10 +525,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
															 	/* Move SRR0 and SRR1 into the respective regs */
														
 
															 5:	mtspr	SPRN_SRR0, r6
														
 
															 	mtspr	SPRN_SRR1, r7
														
 
															-	li	r0,0
														
 
															-	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
														
 
															 fast_guest_return:
														
 
															+	li	r0,0
														
 
															+	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
														
 
															 	mtspr	SPRN_HSRR0,r10
														
 
															 	mtspr	SPRN_HSRR1,r11
														
@@ -676,17 +675,99 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
															 	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
														
 
															 	beq	hcall_try_real_mode
														
 
															-	/* Check for mediated interrupts (could be done earlier really ...) */
														
 
															+	/* Only handle external interrupts here on arch 206 and later */
														
 
															 BEGIN_FTR_SECTION
														
 
															-	cmpwi	r12,BOOK3S_INTERRUPT_EXTERNAL
														
 
															-	bne+	1f
														
 
															-	andi.	r0,r11,MSR_EE
														
 
															-	beq	1f
														
 
															-	mfspr	r5,SPRN_LPCR
														
 
															-	andi.	r0,r5,LPCR_MER
														
 
															-	bne	bounce_ext_interrupt
														
 
															-1:
														
 
															-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
														
 
															+	b	ext_interrupt_to_host
														
 
															+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
														
 
															+
														
 
															+	/* External interrupt ? */
														
 
															+	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
														
 
															+	bne+	ext_interrupt_to_host
														
 
															+
														
 
															+	/* External interrupt, first check for host_ipi. If this is
														
 
															+	 * set, we know the host wants us out so let's do it now
														
 
															+	 */
														
 
															+do_ext_interrupt:
														
 
															+	lbz	r0, HSTATE_HOST_IPI(r13)
														
 
															+	cmpwi	r0, 0
														
 
															+	bne	ext_interrupt_to_host
														
 
															+
														
 
															+	/* Now read the interrupt from the ICP */
														
 
															+	ld	r5, HSTATE_XICS_PHYS(r13)
														
 
															+	li	r7, XICS_XIRR
														
 
															+	cmpdi	r5, 0
														
 
															+	beq-	ext_interrupt_to_host
														
 
															+	lwzcix	r3, r5, r7
														
 
															+	rlwinm.	r0, r3, 0, 0xffffff
														
 
															+	sync
														
 
															+	beq	3f		/* if nothing pending in the ICP */
														
 
															+
														
 
															+	/* We found something in the ICP...
														
 
															+	 *
														
 
															+	 * If it's not an IPI, stash it in the PACA and return to
														
 
															+	 * the host, we don't (yet) handle directing real external
														
 
															+	 * interrupts directly to the guest
														
 
															+	 */
														
 
															+	cmpwi	r0, XICS_IPI
														
 
															+	bne	ext_stash_for_host
														
 
															+
														
 
															+	/* It's an IPI, clear the MFRR and EOI it */
														
 
															+	li	r0, 0xff
														
 
															+	li	r6, XICS_MFRR
														
 
															+	stbcix	r0, r5, r6		/* clear the IPI */
														
 
															+	stwcix	r3, r5, r7		/* EOI it */
														
 
															+	sync
														
 
															+
														
 
															+	/* We need to re-check host IPI now in case it got set in the
														
 
															+	 * meantime. If it's clear, we bounce the interrupt to the
														
 
															+	 * guest
														
 
															+	 */
														
 
															+	lbz	r0, HSTATE_HOST_IPI(r13)
														
 
															+	cmpwi	r0, 0
														
 
															+	bne-	1f
														
 
															+
														
 
															+	/* Allright, looks like an IPI for the guest, we need to set MER */
														
 
															+3:
														
 
															+	/* Check if any CPU is heading out to the host, if so head out too */
														
 
															+	ld	r5, HSTATE_KVM_VCORE(r13)
														
 
															+	lwz	r0, VCORE_ENTRY_EXIT(r5)
														
 
															+	cmpwi	r0, 0x100
														
 
															+	bge	ext_interrupt_to_host
														
 
															+
														
 
															+	/* See if there is a pending interrupt for the guest */
														
 
															+	mfspr	r8, SPRN_LPCR
														
 
															+	ld	r0, VCPU_PENDING_EXC(r9)
														
 
															+	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
														
 
															+	rldicl.	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
														
 
															+	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
														
 
															+	beq	2f
														
 
															+
														
 
															+	/* And if the guest EE is set, we can deliver immediately, else
														
 
															+	 * we return to the guest with MER set
														
 
															+	 */
														
 
															+	andi.	r0, r11, MSR_EE
														
 
															+	beq	2f
														
 
															+	mtspr	SPRN_SRR0, r10
														
 
															+	mtspr	SPRN_SRR1, r11
														
 
															+	li	r10, BOOK3S_INTERRUPT_EXTERNAL
														
 
															+	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
														
 
															+	rotldi	r11, r11, 63
														
 
															+2:	mr	r4, r9
														
 
															+	mtspr	SPRN_LPCR, r8
														
 
															+	b	fast_guest_return
														
 
															+
														
 
															+	/* We raced with the host, we need to resend that IPI, bummer */
														
 
															+1:	li	r0, IPI_PRIORITY
														
 
															+	stbcix	r0, r5, r6		/* set the IPI */
														
 
															+	sync
														
 
															+	b	ext_interrupt_to_host
														
 
															+
														
 
															+ext_stash_for_host:
														
 
															+	/* It's not an IPI and it's for the host, stash it in the PACA
														
 
															+	 * before exit, it will be picked up by the host ICP driver
														
 
															+	 */
														
 
															+	stw	r3, HSTATE_SAVED_XIRR(r13)
														
 
															+ext_interrupt_to_host:
														
 
															 guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
														
 
															 	/* Save DEC */
														
@@ -829,7 +910,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 
															 	beq	44f
														
 
															 	ld	r8,HSTATE_XICS_PHYS(r6)	/* get thread's XICS reg addr */
														
 
															 	li	r0,IPI_PRIORITY
														
 
															-	li	r7,XICS_QIRR
														
 
															+	li	r7,XICS_MFRR
														
 
															 	stbcix	r0,r7,r8		/* trigger the IPI */
														
 
															 44:	srdi.	r3,r3,1
														
 
															 	addi	r6,r6,PACA_SIZE
														
@@ -1018,6 +1099,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
															 	lwz	r3, LPPACA_YIELDCOUNT(r8)
														
 
															 	addi	r3, r3, 1
														
 
															 	stw	r3, LPPACA_YIELDCOUNT(r8)
														
 
															+	li	r3, 1
														
 
															+	stb	r3, VCPU_VPA_DIRTY(r9)
														
 
															 25:
														
 
															 	/* Save PMU registers if requested */
														
 
															 	/* r8 and cr0.eq are live here */
														
@@ -1350,11 +1433,19 @@ hcall_real_table:
 
															 	.long	0		/* 0x58 */
														
 
															 	.long	0		/* 0x5c */
														
 
															 	.long	0		/* 0x60 */
														
 
															-	.long	0		/* 0x64 */
														
 
															-	.long	0		/* 0x68 */
														
 
															-	.long	0		/* 0x6c */
														
 
															-	.long	0		/* 0x70 */
														
 
															-	.long	0		/* 0x74 */
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+	.long	.kvmppc_rm_h_eoi - hcall_real_table
														
 
															+	.long	.kvmppc_rm_h_cppr - hcall_real_table
														
 
															+	.long	.kvmppc_rm_h_ipi - hcall_real_table
														
 
															+	.long	0		/* 0x70 - H_IPOLL */
														
 
															+	.long	.kvmppc_rm_h_xirr - hcall_real_table
														
 
															+#else
														
 
															+	.long	0		/* 0x64 - H_EOI */
														
 
															+	.long	0		/* 0x68 - H_CPPR */
														
 
															+	.long	0		/* 0x6c - H_IPI */
														
 
															+	.long	0		/* 0x70 - H_IPOLL */
														
 
															+	.long	0		/* 0x74 - H_XIRR */
														
 
															+#endif
														
 
															 	.long	0		/* 0x78 */
														
 
															 	.long	0		/* 0x7c */
														
 
															 	.long	0		/* 0x80 */
														
@@ -1405,15 +1496,6 @@ ignore_hdec:
 
															 	mr	r4,r9
														
 
															 	b	fast_guest_return
														
 
															-bounce_ext_interrupt:
														
 
															-	mr	r4,r9
														
 
															-	mtspr	SPRN_SRR0,r10
														
 
															-	mtspr	SPRN_SRR1,r11
														
 
															-	li	r10,BOOK3S_INTERRUPT_EXTERNAL
														
 
															-	li	r11,(MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
														
 
															-	rotldi	r11,r11,63
														
 
															-	b	fast_guest_return
														
 
															-
														
 
															 _GLOBAL(kvmppc_h_set_dabr)
														
 
															 	std	r4,VCPU_DABR(r3)
														
 
															 	/* Work around P7 bug where DABR can get corrupted on mtspr */
														
@@ -1519,6 +1601,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
 
															 	b	.
														
 
															 kvm_end_cede:
														
 
															+	/* get vcpu pointer */
														
 
															+	ld	r4, HSTATE_KVM_VCPU(r13)
														
 
															+
														
 
															 	/* Woken by external or decrementer interrupt */
														
 
															 	ld	r1, HSTATE_HOST_R1(r13)
														
@@ -1558,6 +1643,16 @@ kvm_end_cede:
 
															 	li	r0,0
														
 
															 	stb	r0,HSTATE_NAPPING(r13)
														
 
															+	/* Check the wake reason in SRR1 to see why we got here */
														
 
															+	mfspr	r3, SPRN_SRR1
														
 
															+	rlwinm	r3, r3, 44-31, 0x7	/* extract wake reason field */
														
 
															+	cmpwi	r3, 4			/* was it an external interrupt? */
														
 
															+	li	r12, BOOK3S_INTERRUPT_EXTERNAL
														
 
															+	mr	r9, r4
														
 
															+	ld	r10, VCPU_PC(r9)
														
 
															+	ld	r11, VCPU_MSR(r9)
														
 
															+	beq	do_ext_interrupt	/* if so */
														
 
															+
														
 
															 	/* see if any other thread is already exiting */
														
 
															 	lwz	r0,VCORE_ENTRY_EXIT(r5)
														
 
															 	cmpwi	r0,0x100
														
@@ -1577,8 +1672,7 @@ kvm_cede_prodded:
 
															 	/* we've ceded but we want to give control to the host */
														
 
															 kvm_cede_exit:
														
 
															-	li	r3,H_TOO_HARD
														
 
															-	blr
														
 
															+	b	hcall_real_fallback
														
 
															 	/* Try to handle a machine check in real mode */
														
 
															 machine_check_realmode:
														
@@ -1626,7 +1720,7 @@ secondary_nap:
 
															 	beq	37f
														
 
															 	sync
														
 
															 	li	r0, 0xff
														
 
															-	li	r6, XICS_QIRR
														
 
															+	li	r6, XICS_MFRR
														
 
															 	stbcix	r0, r5, r6		/* clear the IPI */
														
 
															 	stwcix	r3, r5, r7		/* EOI it */
														
 
															 37:	sync
														
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -762,9 +762,7 @@ program_interrupt:
 
															 			run->exit_reason = KVM_EXIT_MMIO;
														
 
															 			r = RESUME_HOST_NV;
														
 
															 			break;
														
 
															-		case EMULATE_DO_PAPR:
														
 
															-			run->exit_reason = KVM_EXIT_PAPR_HCALL;
														
 
															-			vcpu->arch.hcall_needed = 1;
														
 
															+		case EMULATE_EXIT_USER:
														
 
															 			r = RESUME_HOST_NV;
														
 
															 			break;
														
 
															 		default:
														
@@ -1283,7 +1281,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 
															 void kvmppc_core_commit_memory_region(struct kvm *kvm,
														
 
															 				struct kvm_userspace_memory_region *mem,
														
 
															-				struct kvm_memory_slot old)
														
 
															+				const struct kvm_memory_slot *old)
														
 
															 {
														
 
															 }
														
@@ -1298,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 
															 {
														
 
															 #ifdef CONFIG_PPC64
														
 
															 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
														
 
															+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
														
 
															 #endif
														
 
															 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
														
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -227,6 +227,13 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
 
															 	return EMULATE_DONE;
														
 
															 }
														
 
															+static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
														
 
															+{
														
 
															+	long rc = kvmppc_xics_hcall(vcpu, cmd);
														
 
															+	kvmppc_set_gpr(vcpu, 3, rc);
														
 
															+	return EMULATE_DONE;
														
 
															+}
														
 
															+
														
 
															 int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
														
 
															 {
														
 
															 	switch (cmd) {
														
@@ -246,6 +253,20 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 
															 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
														
 
															 		vcpu->stat.halt_wakeup++;
														
 
															 		return EMULATE_DONE;
														
 
															+	case H_XIRR:
														
 
															+	case H_CPPR:
														
 
															+	case H_EOI:
														
 
															+	case H_IPI:
														
 
															+		if (kvmppc_xics_enabled(vcpu))
														
 
															+			return kvmppc_h_pr_xics_hcall(vcpu, cmd);
														
 
															+		break;
														
 
															+	case H_RTAS:
														
 
															+		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
														
 
															+			return RESUME_HOST;
														
 
															+		if (kvmppc_rtas_hcall(vcpu))
														
 
															+			break;
														
 
															+		kvmppc_set_gpr(vcpu, 3, 0);
														
 
															+		return EMULATE_DONE;
														
 
															 	}
														
 
															 	return EMULATE_FAIL;
														
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -0,0 +1,274 @@
 
															+/*
														
 
															+ * Copyright 2012 Michael Ellerman, IBM Corporation.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License, version 2, as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/kvm.h>
														
 
															+#include <linux/err.h>
														
 
															+
														
 
															+#include <asm/uaccess.h>
														
 
															+#include <asm/kvm_book3s.h>
														
 
															+#include <asm/kvm_ppc.h>
														
 
															+#include <asm/hvcall.h>
														
 
															+#include <asm/rtas.h>
														
 
															+
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
														
 
															+{
														
 
															+	u32 irq, server, priority;
														
 
															+	int rc;
														
 
															+
														
 
															+	if (args->nargs != 3 || args->nret != 1) {
														
 
															+		rc = -3;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	irq = args->args[0];
														
 
															+	server = args->args[1];
														
 
															+	priority = args->args[2];
														
 
															+
														
 
															+	rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
														
 
															+	if (rc)
														
 
															+		rc = -3;
														
 
															+out:
														
 
															+	args->rets[0] = rc;
														
 
															+}
														
 
															+
														
 
															+static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
														
 
															+{
														
 
															+	u32 irq, server, priority;
														
 
															+	int rc;
														
 
															+
														
 
															+	if (args->nargs != 1 || args->nret != 3) {
														
 
															+		rc = -3;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	irq = args->args[0];
														
 
															+
														
 
															+	server = priority = 0;
														
 
															+	rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
														
 
															+	if (rc) {
														
 
															+		rc = -3;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	args->rets[1] = server;
														
 
															+	args->rets[2] = priority;
														
 
															+out:
														
 
															+	args->rets[0] = rc;
														
 
															+}
														
 
															+
														
 
															+static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
														
 
															+{
														
 
															+	u32 irq;
														
 
															+	int rc;
														
 
															+
														
 
															+	if (args->nargs != 1 || args->nret != 1) {
														
 
															+		rc = -3;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	irq = args->args[0];
														
 
															+
														
 
															+	rc = kvmppc_xics_int_off(vcpu->kvm, irq);
														
 
															+	if (rc)
														
 
															+		rc = -3;
														
 
															+out:
														
 
															+	args->rets[0] = rc;
														
 
															+}
														
 
															+
														
 
															+static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
														
 
															+{
														
 
															+	u32 irq;
														
 
															+	int rc;
														
 
															+
														
 
															+	if (args->nargs != 1 || args->nret != 1) {
														
 
															+		rc = -3;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	irq = args->args[0];
														
 
															+
														
 
															+	rc = kvmppc_xics_int_on(vcpu->kvm, irq);
														
 
															+	if (rc)
														
 
															+		rc = -3;
														
 
															+out:
														
 
															+	args->rets[0] = rc;
														
 
															+}
														
 
															+#endif /* CONFIG_KVM_XICS */
														
 
															+
														
 
															+struct rtas_handler {
														
 
															+	void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
														
 
															+	char *name;
														
 
															+};
														
 
															+
														
 
															+static struct rtas_handler rtas_handlers[] = {
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+	{ .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
														
 
															+	{ .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
														
 
															+	{ .name = "ibm,int-off",  .handler = kvm_rtas_int_off },
														
 
															+	{ .name = "ibm,int-on",   .handler = kvm_rtas_int_on },
														
 
															+#endif
														
 
															+};
														
 
															+
														
 
															+struct rtas_token_definition {
														
 
															+	struct list_head list;
														
 
															+	struct rtas_handler *handler;
														
 
															+	u64 token;
														
 
															+};
														
 
															+
														
 
															+static int rtas_name_matches(char *s1, char *s2)
														
 
															+{
														
 
															+	struct kvm_rtas_token_args args;
														
 
															+	return !strncmp(s1, s2, sizeof(args.name));
														
 
															+}
														
 
															+
														
 
															+static int rtas_token_undefine(struct kvm *kvm, char *name)
														
 
															+{
														
 
															+	struct rtas_token_definition *d, *tmp;
														
 
															+
														
 
															+	lockdep_assert_held(&kvm->lock);
														
 
															+
														
 
															+	list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
														
 
															+		if (rtas_name_matches(d->handler->name, name)) {
														
 
															+			list_del(&d->list);
														
 
															+			kfree(d);
														
 
															+			return 0;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/* It's not an error to undefine an undefined token */
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
														
 
															+{
														
 
															+	struct rtas_token_definition *d;
														
 
															+	struct rtas_handler *h = NULL;
														
 
															+	bool found;
														
 
															+	int i;
														
 
															+
														
 
															+	lockdep_assert_held(&kvm->lock);
														
 
															+
														
 
															+	list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
														
 
															+		if (d->token == token)
														
 
															+			return -EEXIST;
														
 
															+	}
														
 
															+
														
 
															+	found = false;
														
 
															+	for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
														
 
															+		h = &rtas_handlers[i];
														
 
															+		if (rtas_name_matches(h->name, name)) {
														
 
															+			found = true;
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (!found)
														
 
															+		return -ENOENT;
														
 
															+
														
 
															+	d = kzalloc(sizeof(*d), GFP_KERNEL);
														
 
															+	if (!d)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	d->handler = h;
														
 
															+	d->token = token;
														
 
															+
														
 
															+	list_add_tail(&d->list, &kvm->arch.rtas_tokens);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
														
 
															+{
														
 
															+	struct kvm_rtas_token_args args;
														
 
															+	int rc;
														
 
															+
														
 
															+	if (copy_from_user(&args, argp, sizeof(args)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+
														
 
															+	if (args.token)
														
 
															+		rc = rtas_token_define(kvm, args.name, args.token);
														
 
															+	else
														
 
															+		rc = rtas_token_undefine(kvm, args.name);
														
 
															+
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct rtas_token_definition *d;
														
 
															+	struct rtas_args args;
														
 
															+	rtas_arg_t *orig_rets;
														
 
															+	gpa_t args_phys;
														
 
															+	int rc;
														
 
															+
														
 
															+	/* r4 contains the guest physical address of the RTAS args */
														
 
															+	args_phys = kvmppc_get_gpr(vcpu, 4);
														
 
															+
														
 
															+	rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
														
 
															+	if (rc)
														
 
															+		goto fail;
														
 
															+
														
 
															+	/*
														
 
															+	 * args->rets is a pointer into args->args. Now that we've
														
 
															+	 * copied args we need to fix it up to point into our copy,
														
 
															+	 * not the guest args. We also need to save the original
														
 
															+	 * value so we can restore it on the way out.
														
 
															+	 */
														
 
															+	orig_rets = args.rets;
														
 
															+	args.rets = &args.args[args.nargs];
														
 
															+
														
 
															+	mutex_lock(&vcpu->kvm->lock);
														
 
															+
														
 
															+	rc = -ENOENT;
														
 
															+	list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
														
 
															+		if (d->token == args.token) {
														
 
															+			d->handler->handler(vcpu, &args);
														
 
															+			rc = 0;
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	mutex_unlock(&vcpu->kvm->lock);
														
 
															+
														
 
															+	if (rc == 0) {
														
 
															+		args.rets = orig_rets;
														
 
															+		rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
														
 
															+		if (rc)
														
 
															+			goto fail;
														
 
															+	}
														
 
															+
														
 
															+	return rc;
														
 
															+
														
 
															+fail:
														
 
															+	/*
														
 
															+	 * We only get here if the guest has called RTAS with a bogus
														
 
															+	 * args pointer. That means we can't get to the args, and so we
														
 
															+	 * can't fail the RTAS call. So fail right out to userspace,
														
 
															+	 * which should kill the guest.
														
 
															+	 */
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															+void kvmppc_rtas_tokens_free(struct kvm *kvm)
														
 
															+{
														
 
															+	struct rtas_token_definition *d, *tmp;
														
 
															+
														
 
															+	lockdep_assert_held(&kvm->lock);
														
 
															+
														
 
															+	list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
														
 
															+		list_del(&d->list);
														
 
															+		kfree(d);
														
 
															+	}
														
 
															+}
														
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -0,0 +1,1270 @@
 
															+/*
														
 
															+ * Copyright 2012 Michael Ellerman, IBM Corporation.
														
 
															+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License, version 2, as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/err.h>
														
 
															+#include <linux/gfp.h>
														
 
															+#include <linux/anon_inodes.h>
														
 
															+
														
 
															+#include <asm/uaccess.h>
														
 
															+#include <asm/kvm_book3s.h>
														
 
															+#include <asm/kvm_ppc.h>
														
 
															+#include <asm/hvcall.h>
														
 
															+#include <asm/xics.h>
														
 
															+#include <asm/debug.h>
														
 
															+
														
 
															+#include <linux/debugfs.h>
														
 
															+#include <linux/seq_file.h>
														
 
															+
														
 
															+#include "book3s_xics.h"
														
 
															+
														
 
															+#if 1
														
 
															+#define XICS_DBG(fmt...) do { } while (0)
														
 
															+#else
														
 
															+#define XICS_DBG(fmt...) trace_printk(fmt)
														
 
															+#endif
														
 
															+
														
 
															+#define ENABLE_REALMODE	true
														
 
															+#define DEBUG_REALMODE	false
														
 
															+
														
 
															+/*
														
 
															+ * LOCKING
														
 
															+ * =======
														
 
															+ *
														
 
															+ * Each ICS has a mutex protecting the information about the IRQ
														
 
															+ * sources and avoiding simultaneous deliveries if the same interrupt.
														
 
															+ *
														
 
															+ * ICP operations are done via a single compare & swap transaction
														
 
															+ * (most ICP state fits in the union kvmppc_icp_state)
														
 
															+ */
														
 
															+
														
 
															+/*
														
 
															+ * TODO
														
 
															+ * ====
														
 
															+ *
														
 
															+ * - To speed up resends, keep a bitmap of "resend" set bits in the
														
 
															+ *   ICS
														
 
															+ *
														
 
															+ * - Speed up server# -> ICP lookup (array ? hash table ?)
														
 
															+ *
														
 
															+ * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
														
 
															+ *   locks array to improve scalability
														
 
															+ */
														
 
															+
														
 
															+/* -- ICS routines -- */
														
 
															+
														
 
															+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
														
 
															+			    u32 new_irq);
														
 
															+
														
 
															+static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level,
														
 
															+			   bool report_status)
														
 
															+{
														
 
															+	struct ics_irq_state *state;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	u16 src;
														
 
															+
														
 
															+	XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
														
 
															+
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics) {
														
 
															+		XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+	state = &ics->irq_state[src];
														
 
															+	if (!state->exists)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (report_status)
														
 
															+		return state->asserted;
														
 
															+
														
 
															+	/*
														
 
															+	 * We set state->asserted locklessly. This should be fine as
														
 
															+	 * we are the only setter, thus concurrent access is undefined
														
 
															+	 * to begin with.
														
 
															+	 */
														
 
															+	if (level == KVM_INTERRUPT_SET_LEVEL)
														
 
															+		state->asserted = 1;
														
 
															+	else if (level == KVM_INTERRUPT_UNSET) {
														
 
															+		state->asserted = 0;
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	/* Attempt delivery */
														
 
															+	icp_deliver_irq(xics, NULL, irq);
														
 
															+
														
 
															+	return state->asserted;
														
 
															+}
														
 
															+
														
 
															+static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
														
 
															+			     struct kvmppc_icp *icp)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	mutex_lock(&ics->lock);
														
 
															+
														
 
															+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
														
 
															+		struct ics_irq_state *state = &ics->irq_state[i];
														
 
															+
														
 
															+		if (!state->resend)
														
 
															+			continue;
														
 
															+
														
 
															+		XICS_DBG("resend %#x prio %#x\n", state->number,
														
 
															+			      state->priority);
														
 
															+
														
 
															+		mutex_unlock(&ics->lock);
														
 
															+		icp_deliver_irq(xics, icp, state->number);
														
 
															+		mutex_lock(&ics->lock);
														
 
															+	}
														
 
															+
														
 
															+	mutex_unlock(&ics->lock);
														
 
															+}
														
 
															+
														
 
															+static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
														
 
															+		       struct ics_irq_state *state,
														
 
															+		       u32 server, u32 priority, u32 saved_priority)
														
 
															+{
														
 
															+	bool deliver;
														
 
															+
														
 
															+	mutex_lock(&ics->lock);
														
 
															+
														
 
															+	state->server = server;
														
 
															+	state->priority = priority;
														
 
															+	state->saved_priority = saved_priority;
														
 
															+	deliver = false;
														
 
															+	if ((state->masked_pending || state->resend) && priority != MASKED) {
														
 
															+		state->masked_pending = 0;
														
 
															+		deliver = true;
														
 
															+	}
														
 
															+
														
 
															+	mutex_unlock(&ics->lock);
														
 
															+
														
 
															+	return deliver;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *state;
														
 
															+	u16 src;
														
 
															+
														
 
															+	if (!xics)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics)
														
 
															+		return -EINVAL;
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	icp = kvmppc_xics_find_server(kvm, server);
														
 
															+	if (!icp)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n",
														
 
															+		 irq, server, priority,
														
 
															+		 state->masked_pending, state->resend);
														
 
															+
														
 
															+	if (write_xive(xics, ics, state, server, priority, priority))
														
 
															+		icp_deliver_irq(xics, icp, irq);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = kvm->arch.xics;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *state;
														
 
															+	u16 src;
														
 
															+
														
 
															+	if (!xics)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics)
														
 
															+		return -EINVAL;
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	mutex_lock(&ics->lock);
														
 
															+	*server = state->server;
														
 
															+	*priority = state->priority;
														
 
															+	mutex_unlock(&ics->lock);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_int_on(struct kvm *kvm, u32 irq)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *state;
														
 
															+	u16 src;
														
 
															+
														
 
															+	if (!xics)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics)
														
 
															+		return -EINVAL;
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	icp = kvmppc_xics_find_server(kvm, state->server);
														
 
															+	if (!icp)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (write_xive(xics, ics, state, state->server, state->saved_priority,
														
 
															+		       state->saved_priority))
														
 
															+		icp_deliver_irq(xics, icp, irq);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_int_off(struct kvm *kvm, u32 irq)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = kvm->arch.xics;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *state;
														
 
															+	u16 src;
														
 
															+
														
 
															+	if (!xics)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics)
														
 
															+		return -EINVAL;
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	write_xive(xics, ics, state, state->server, MASKED, state->priority);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/* -- ICP routines, including hcalls -- */
														
 
															+
														
 
															+static inline bool icp_try_update(struct kvmppc_icp *icp,
														
 
															+				  union kvmppc_icp_state old,
														
 
															+				  union kvmppc_icp_state new,
														
 
															+				  bool change_self)
														
 
															+{
														
 
															+	bool success;
														
 
															+
														
 
															+	/* Calculate new output value */
														
 
															+	new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
														
 
															+
														
 
															+	/* Attempt atomic update */
														
 
															+	success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
														
 
															+	if (!success)
														
 
															+		goto bail;
														
 
															+
														
 
															+	XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
														
 
															+		 icp->server_num,
														
 
															+		 old.cppr, old.mfrr, old.pending_pri, old.xisr,
														
 
															+		 old.need_resend, old.out_ee);
														
 
															+	XICS_DBG("UPD        - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
														
 
															+		 new.cppr, new.mfrr, new.pending_pri, new.xisr,
														
 
															+		 new.need_resend, new.out_ee);
														
 
															+	/*
														
 
															+	 * Check for output state update
														
 
															+	 *
														
 
															+	 * Note that this is racy since another processor could be updating
														
 
															+	 * the state already. This is why we never clear the interrupt output
														
 
															+	 * here, we only ever set it. The clear only happens prior to doing
														
 
															+	 * an update and only by the processor itself. Currently we do it
														
 
															+	 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
														
 
															+	 *
														
 
															+	 * We also do not try to figure out whether the EE state has changed,
														
 
															+	 * we unconditionally set it if the new state calls for it. The reason
														
 
															+	 * for that is that we opportunistically remove the pending interrupt
														
 
															+	 * flag when raising CPPR, so we need to set it back here if an
														
 
															+	 * interrupt is still pending.
														
 
															+	 */
														
 
															+	if (new.out_ee) {
														
 
															+		kvmppc_book3s_queue_irqprio(icp->vcpu,
														
 
															+					    BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
														
 
															+		if (!change_self)
														
 
															+			kvmppc_fast_vcpu_kick(icp->vcpu);
														
 
															+	}
														
 
															+ bail:
														
 
															+	return success;
														
 
															+}
														
 
															+
														
 
															+static void icp_check_resend(struct kvmppc_xics *xics,
														
 
															+			     struct kvmppc_icp *icp)
														
 
															+{
														
 
															+	u32 icsid;
														
 
															+
														
 
															+	/* Order this load with the test for need_resend in the caller */
														
 
															+	smp_rmb();
														
 
															+	for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
														
 
															+		struct kvmppc_ics *ics = xics->ics[icsid];
														
 
															+
														
 
															+		if (!test_and_clear_bit(icsid, icp->resend_map))
														
 
															+			continue;
														
 
															+		if (!ics)
														
 
															+			continue;
														
 
															+		ics_check_resend(xics, ics, icp);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
														
 
															+			       u32 *reject)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	bool success;
														
 
															+
														
 
															+	XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority,
														
 
															+		 icp->server_num);
														
 
															+
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		*reject = 0;
														
 
															+
														
 
															+		/* See if we can deliver */
														
 
															+		success = new_state.cppr > priority &&
														
 
															+			new_state.mfrr > priority &&
														
 
															+			new_state.pending_pri > priority;
														
 
															+
														
 
															+		/*
														
 
															+		 * If we can, check for a rejection and perform the
														
 
															+		 * delivery
														
 
															+		 */
														
 
															+		if (success) {
														
 
															+			*reject = new_state.xisr;
														
 
															+			new_state.xisr = irq;
														
 
															+			new_state.pending_pri = priority;
														
 
															+		} else {
														
 
															+			/*
														
 
															+			 * If we failed to deliver we set need_resend
														
 
															+			 * so a subsequent CPPR state change causes us
														
 
															+			 * to try a new delivery.
														
 
															+			 */
														
 
															+			new_state.need_resend = true;
														
 
															+		}
														
 
															+
														
 
															+	} while (!icp_try_update(icp, old_state, new_state, false));
														
 
															+
														
 
															+	return success;
														
 
															+}
														
 
															+
														
 
															+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
														
 
															+			    u32 new_irq)
														
 
															+{
														
 
															+	struct ics_irq_state *state;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	u32 reject;
														
 
															+	u16 src;
														
 
															+
														
 
															+	/*
														
 
															+	 * This is used both for initial delivery of an interrupt and
														
 
															+	 * for subsequent rejection.
														
 
															+	 *
														
 
															+	 * Rejection can be racy vs. resends. We have evaluated the
														
 
															+	 * rejection in an atomic ICP transaction which is now complete,
														
 
															+	 * so potentially the ICP can already accept the interrupt again.
														
 
															+	 *
														
 
															+	 * So we need to retry the delivery. Essentially the reject path
														
 
															+	 * boils down to a failed delivery. Always.
														
 
															+	 *
														
 
															+	 * Now the interrupt could also have moved to a different target,
														
 
															+	 * thus we may need to re-do the ICP lookup as well
														
 
															+	 */
														
 
															+
														
 
															+ again:
														
 
															+	/* Get the ICS state and lock it */
														
 
															+	ics = kvmppc_xics_find_ics(xics, new_irq, &src);
														
 
															+	if (!ics) {
														
 
															+		XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq);
														
 
															+		return;
														
 
															+	}
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	/* Get a lock on the ICS */
														
 
															+	mutex_lock(&ics->lock);
														
 
															+
														
 
															+	/* Get our server */
														
 
															+	if (!icp || state->server != icp->server_num) {
														
 
															+		icp = kvmppc_xics_find_server(xics->kvm, state->server);
														
 
															+		if (!icp) {
														
 
															+			pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n",
														
 
															+				new_irq, state->server);
														
 
															+			goto out;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/* Clear the resend bit of that interrupt */
														
 
															+	state->resend = 0;
														
 
															+
														
 
															+	/*
														
 
															+	 * If masked, bail out
														
 
															+	 *
														
 
															+	 * Note: PAPR doesn't mention anything about masked pending
														
 
															+	 * when doing a resend, only when doing a delivery.
														
 
															+	 *
														
 
															+	 * However that would have the effect of losing a masked
														
 
															+	 * interrupt that was rejected and isn't consistent with
														
 
															+	 * the whole masked_pending business which is about not
														
 
															+	 * losing interrupts that occur while masked.
														
 
															+	 *
														
 
															+	 * I don't differenciate normal deliveries and resends, this
														
 
															+	 * implementation will differ from PAPR and not lose such
														
 
															+	 * interrupts.
														
 
															+	 */
														
 
															+	if (state->priority == MASKED) {
														
 
															+		XICS_DBG("irq %#x masked pending\n", new_irq);
														
 
															+		state->masked_pending = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * Try the delivery, this will set the need_resend flag
														
 
															+	 * in the ICP as part of the atomic transaction if the
														
 
															+	 * delivery is not possible.
														
 
															+	 *
														
 
															+	 * Note that if successful, the new delivery might have itself
														
 
															+	 * rejected an interrupt that was "delivered" before we took the
														
 
															+	 * icp mutex.
														
 
															+	 *
														
 
															+	 * In this case we do the whole sequence all over again for the
														
 
															+	 * new guy. We cannot assume that the rejected interrupt is less
														
 
															+	 * favored than the new one, and thus doesn't need to be delivered,
														
 
															+	 * because by the time we exit icp_try_to_deliver() the target
														
 
															+	 * processor may well have alrady consumed & completed it, and thus
														
 
															+	 * the rejected interrupt might actually be already acceptable.
														
 
															+	 */
														
 
															+	if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
														
 
															+		/*
														
 
															+		 * Delivery was successful, did we reject somebody else ?
														
 
															+		 */
														
 
															+		if (reject && reject != XICS_IPI) {
														
 
															+			mutex_unlock(&ics->lock);
														
 
															+			new_irq = reject;
														
 
															+			goto again;
														
 
															+		}
														
 
															+	} else {
														
 
															+		/*
														
 
															+		 * We failed to deliver the interrupt we need to set the
														
 
															+		 * resend map bit and mark the ICS state as needing a resend
														
 
															+		 */
														
 
															+		set_bit(ics->icsid, icp->resend_map);
														
 
															+		state->resend = 1;
														
 
															+
														
 
															+		/*
														
 
															+		 * If the need_resend flag got cleared in the ICP some time
														
 
															+		 * between icp_try_to_deliver() atomic update and now, then
														
 
															+		 * we know it might have missed the resend_map bit. So we
														
 
															+		 * retry
														
 
															+		 */
														
 
															+		smp_mb();
														
 
															+		if (!icp->state.need_resend) {
														
 
															+			mutex_unlock(&ics->lock);
														
 
															+			goto again;
														
 
															+		}
														
 
															+	}
														
 
															+ out:
														
 
															+	mutex_unlock(&ics->lock);
														
 
															+}
														
 
															+
														
 
															+static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
														
 
															+			  u8 new_cppr)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	bool resend;
														
 
															+
														
 
															+	/*
														
 
															+	 * This handles several related states in one operation:
														
 
															+	 *
														
 
															+	 * ICP State: Down_CPPR
														
 
															+	 *
														
 
															+	 * Load CPPR with new value and if the XISR is 0
														
 
															+	 * then check for resends:
														
 
															+	 *
														
 
															+	 * ICP State: Resend
														
 
															+	 *
														
 
															+	 * If MFRR is more favored than CPPR, check for IPIs
														
 
															+	 * and notify ICS of a potential resend. This is done
														
 
															+	 * asynchronously (when used in real mode, we will have
														
 
															+	 * to exit here).
														
 
															+	 *
														
 
															+	 * We do not handle the complete Check_IPI as documented
														
 
															+	 * here. In the PAPR, this state will be used for both
														
 
															+	 * Set_MFRR and Down_CPPR. However, we know that we aren't
														
 
															+	 * changing the MFRR state here so we don't need to handle
														
 
															+	 * the case of an MFRR causing a reject of a pending irq,
														
 
															+	 * this will have been handled when the MFRR was set in the
														
 
															+	 * first place.
														
 
															+	 *
														
 
															+	 * Thus we don't have to handle rejects, only resends.
														
 
															+	 *
														
 
															+	 * When implementing real mode for HV KVM, resend will lead to
														
 
															+	 * a H_TOO_HARD return and the whole transaction will be handled
														
 
															+	 * in virtual mode.
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		/* Down_CPPR */
														
 
															+		new_state.cppr = new_cppr;
														
 
															+
														
 
															+		/*
														
 
															+		 * Cut down Resend / Check_IPI / IPI
														
 
															+		 *
														
 
															+		 * The logic is that we cannot have a pending interrupt
														
 
															+		 * trumped by an IPI at this point (see above), so we
														
 
															+		 * know that either the pending interrupt is already an
														
 
															+		 * IPI (in which case we don't care to override it) or
														
 
															+		 * it's either more favored than us or non existent
														
 
															+		 */
														
 
															+		if (new_state.mfrr < new_cppr &&
														
 
															+		    new_state.mfrr <= new_state.pending_pri) {
														
 
															+			WARN_ON(new_state.xisr != XICS_IPI &&
														
 
															+				new_state.xisr != 0);
														
 
															+			new_state.pending_pri = new_state.mfrr;
														
 
															+			new_state.xisr = XICS_IPI;
														
 
															+		}
														
 
															+
														
 
															+		/* Latch/clear resend bit */
														
 
															+		resend = new_state.need_resend;
														
 
															+		new_state.need_resend = 0;
														
 
															+
														
 
															+	} while (!icp_try_update(icp, old_state, new_state, true));
														
 
															+
														
 
															+	/*
														
 
															+	 * Now handle resend checks. Those are asynchronous to the ICP
														
 
															+	 * state update in HW (ie bus transactions) so we can handle them
														
 
															+	 * separately here too
														
 
															+	 */
														
 
															+	if (resend)
														
 
															+		icp_check_resend(xics, icp);
														
 
															+}
														
 
															+
														
 
															+static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	u32 xirr;
														
 
															+
														
 
															+	/* First, remove EE from the processor */
														
 
															+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
														
 
															+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: Accept_Interrupt
														
 
															+	 *
														
 
															+	 * Return the pending interrupt (if any) along with the
														
 
															+	 * current CPPR, then clear the XISR & set CPPR to the
														
 
															+	 * pending priority
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
														
 
															+		if (!old_state.xisr)
														
 
															+			break;
														
 
															+		new_state.cppr = new_state.pending_pri;
														
 
															+		new_state.pending_pri = 0xff;
														
 
															+		new_state.xisr = 0;
														
 
															+
														
 
															+	} while (!icp_try_update(icp, old_state, new_state, true));
														
 
															+
														
 
															+	XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
														
 
															+
														
 
															+	return xirr;
														
 
															+}
														
 
															+
														
 
															+static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
														
 
															+				 unsigned long mfrr)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp;
														
 
															+	u32 reject;
														
 
															+	bool resend;
														
 
															+	bool local;
														
 
															+
														
 
															+	XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
														
 
															+		 vcpu->vcpu_id, server, mfrr);
														
 
															+
														
 
															+	icp = vcpu->arch.icp;
														
 
															+	local = icp->server_num == server;
														
 
															+	if (!local) {
														
 
															+		icp = kvmppc_xics_find_server(vcpu->kvm, server);
														
 
															+		if (!icp)
														
 
															+			return H_PARAMETER;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP state: Set_MFRR
														
 
															+	 *
														
 
															+	 * If the CPPR is more favored than the new MFRR, then
														
 
															+	 * nothing needs to be rejected as there can be no XISR to
														
 
															+	 * reject.  If the MFRR is being made less favored then
														
 
															+	 * there might be a previously-rejected interrupt needing
														
 
															+	 * to be resent.
														
 
															+	 *
														
 
															+	 * If the CPPR is less favored, then we might be replacing
														
 
															+	 * an interrupt, and thus need to possibly reject it as in
														
 
															+	 *
														
 
															+	 * ICP state: Check_IPI
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		/* Set_MFRR */
														
 
															+		new_state.mfrr = mfrr;
														
 
															+
														
 
															+		/* Check_IPI */
														
 
															+		reject = 0;
														
 
															+		resend = false;
														
 
															+		if (mfrr < new_state.cppr) {
														
 
															+			/* Reject a pending interrupt if not an IPI */
														
 
															+			if (mfrr <= new_state.pending_pri)
														
 
															+				reject = new_state.xisr;
														
 
															+			new_state.pending_pri = mfrr;
														
 
															+			new_state.xisr = XICS_IPI;
														
 
															+		}
														
 
															+
														
 
															+		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
														
 
															+			resend = new_state.need_resend;
														
 
															+			new_state.need_resend = 0;
														
 
															+		}
														
 
															+	} while (!icp_try_update(icp, old_state, new_state, local));
														
 
															+
														
 
															+	/* Handle reject */
														
 
															+	if (reject && reject != XICS_IPI)
														
 
															+		icp_deliver_irq(xics, icp, reject);
														
 
															+
														
 
															+	/* Handle resend */
														
 
															+	if (resend)
														
 
															+		icp_check_resend(xics, icp);
														
 
															+
														
 
															+	return H_SUCCESS;
														
 
															+}
														
 
															+
														
 
															+static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	u32 reject;
														
 
															+
														
 
															+	XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: Set_CPPR
														
 
															+	 *
														
 
															+	 * We can safely compare the new value with the current
														
 
															+	 * value outside of the transaction as the CPPR is only
														
 
															+	 * ever changed by the processor on itself
														
 
															+	 */
														
 
															+	if (cppr > icp->state.cppr)
														
 
															+		icp_down_cppr(xics, icp, cppr);
														
 
															+	else if (cppr == icp->state.cppr)
														
 
															+		return;
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: Up_CPPR
														
 
															+	 *
														
 
															+	 * The processor is raising its priority, this can result
														
 
															+	 * in a rejection of a pending interrupt:
														
 
															+	 *
														
 
															+	 * ICP State: Reject_Current
														
 
															+	 *
														
 
															+	 * We can remove EE from the current processor, the update
														
 
															+	 * transaction will set it again if needed
														
 
															+	 */
														
 
															+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
														
 
															+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
														
 
															+
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		reject = 0;
														
 
															+		new_state.cppr = cppr;
														
 
															+
														
 
															+		if (cppr <= new_state.pending_pri) {
														
 
															+			reject = new_state.xisr;
														
 
															+			new_state.xisr = 0;
														
 
															+			new_state.pending_pri = 0xff;
														
 
															+		}
														
 
															+
														
 
															+	} while (!icp_try_update(icp, old_state, new_state, true));
														
 
															+
														
 
															+	/*
														
 
															+	 * Check for rejects. They are handled by doing a new delivery
														
 
															+	 * attempt (see comments in icp_deliver_irq).
														
 
															+	 */
														
 
															+	if (reject && reject != XICS_IPI)
														
 
															+		icp_deliver_irq(xics, icp, reject);
														
 
															+}
														
 
															+
														
 
															+static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *state;
														
 
															+	u32 irq = xirr & 0x00ffffff;
														
 
															+	u16 src;
														
 
															+
														
 
															+	XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: EOI
														
 
															+	 *
														
 
															+	 * Note: If EOI is incorrectly used by SW to lower the CPPR
														
 
															+	 * value (ie more favored), we do not check for rejection of
														
 
															+	 * a pending interrupt, this is a SW error and PAPR sepcifies
														
 
															+	 * that we don't have to deal with it.
														
 
															+	 *
														
 
															+	 * The sending of an EOI to the ICS is handled after the
														
 
															+	 * CPPR update
														
 
															+	 *
														
 
															+	 * ICP State: Down_CPPR which we handle
														
 
															+	 * in a separate function as it's shared with H_CPPR.
														
 
															+	 */
														
 
															+	icp_down_cppr(xics, icp, xirr >> 24);
														
 
															+
														
 
															+	/* IPIs have no EOI */
														
 
															+	if (irq == XICS_IPI)
														
 
															+		return H_SUCCESS;
														
 
															+	/*
														
 
															+	 * EOI handling: If the interrupt is still asserted, we need to
														
 
															+	 * resend it. We can take a lockless "peek" at the ICS state here.
														
 
															+	 *
														
 
															+	 * "Message" interrupts will never have "asserted" set
														
 
															+	 */
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics) {
														
 
															+		XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
														
 
															+		return H_PARAMETER;
														
 
															+	}
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	/* Still asserted, resend it */
														
 
															+	if (state->asserted)
														
 
															+		icp_deliver_irq(xics, icp, irq);
														
 
															+
														
 
															+	return H_SUCCESS;
														
 
															+}
														
 
															+
														
 
															+static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+
														
 
															+	XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
														
 
															+		 hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
														
 
															+
														
 
															+	if (icp->rm_action & XICS_RM_KICK_VCPU)
														
 
															+		kvmppc_fast_vcpu_kick(icp->rm_kick_target);
														
 
															+	if (icp->rm_action & XICS_RM_CHECK_RESEND)
														
 
															+		icp_check_resend(xics, icp);
														
 
															+	if (icp->rm_action & XICS_RM_REJECT)
														
 
															+		icp_deliver_irq(xics, icp, icp->rm_reject);
														
 
															+
														
 
															+	icp->rm_action = 0;
														
 
															+
														
 
															+	return H_SUCCESS;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	unsigned long res;
														
 
															+	int rc = H_SUCCESS;
														
 
															+
														
 
															+	/* Check if we have an ICP */
														
 
															+	if (!xics || !vcpu->arch.icp)
														
 
															+		return H_HARDWARE;
														
 
															+
														
 
															+	/* Check for real mode returning too hard */
														
 
															+	if (xics->real_mode)
														
 
															+		return kvmppc_xics_rm_complete(vcpu, req);
														
 
															+
														
 
															+	switch (req) {
														
 
															+	case H_XIRR:
														
 
															+		res = kvmppc_h_xirr(vcpu);
														
 
															+		kvmppc_set_gpr(vcpu, 4, res);
														
 
															+		break;
														
 
															+	case H_CPPR:
														
 
															+		kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
														
 
															+		break;
														
 
															+	case H_EOI:
														
 
															+		rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
														
 
															+		break;
														
 
															+	case H_IPI:
														
 
															+		rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
														
 
															+				  kvmppc_get_gpr(vcpu, 5));
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/* -- Initialisation code etc. -- */
														
 
															+
														
 
															+static int xics_debug_show(struct seq_file *m, void *private)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = m->private;
														
 
															+	struct kvm *kvm = xics->kvm;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+	int icsid, i;
														
 
															+
														
 
															+	if (!kvm)
														
 
															+		return 0;
														
 
															+
														
 
															+	seq_printf(m, "=========\nICP state\n=========\n");
														
 
															+
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+		union kvmppc_icp_state state;
														
 
															+
														
 
															+		if (!icp)
														
 
															+			continue;
														
 
															+
														
 
															+		state.raw = ACCESS_ONCE(icp->state.raw);
														
 
															+		seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n",
														
 
															+			   icp->server_num, state.xisr,
														
 
															+			   state.pending_pri, state.cppr, state.mfrr,
														
 
															+			   state.out_ee, state.need_resend);
														
 
															+	}
														
 
															+
														
 
															+	for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
														
 
															+		struct kvmppc_ics *ics = xics->ics[icsid];
														
 
															+
														
 
															+		if (!ics)
														
 
															+			continue;
														
 
															+
														
 
															+		seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
														
 
															+			   icsid);
														
 
															+
														
 
															+		mutex_lock(&ics->lock);
														
 
															+
														
 
															+		for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
														
 
															+			struct ics_irq_state *irq = &ics->irq_state[i];
														
 
															+
														
 
															+			seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n",
														
 
															+				   irq->number, irq->server, irq->priority,
														
 
															+				   irq->saved_priority, irq->asserted,
														
 
															+				   irq->resend, irq->masked_pending);
														
 
															+
														
 
															+		}
														
 
															+		mutex_unlock(&ics->lock);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int xics_debug_open(struct inode *inode, struct file *file)
														
 
															+{
														
 
															+	return single_open(file, xics_debug_show, inode->i_private);
														
 
															+}
														
 
															+
														
 
															+static const struct file_operations xics_debug_fops = {
														
 
															+	.open = xics_debug_open,
														
 
															+	.read = seq_read,
														
 
															+	.llseek = seq_lseek,
														
 
															+	.release = single_release,
														
 
															+};
														
 
															+
														
 
															+static void xics_debugfs_init(struct kvmppc_xics *xics)
														
 
															+{
														
 
															+	char *name;
														
 
															+
														
 
															+	name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics);
														
 
															+	if (!name) {
														
 
															+		pr_err("%s: no memory for name\n", __func__);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
														
 
															+					   xics, &xics_debug_fops);
														
 
															+
														
 
															+	pr_debug("%s: created %s\n", __func__, name);
														
 
															+	kfree(name);
														
 
															+}
														
 
															+
														
 
															+static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
														
 
															+					struct kvmppc_xics *xics, int irq)
														
 
															+{
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	int i, icsid;
														
 
															+
														
 
															+	icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+
														
 
															+	/* ICS already exists - somebody else got here first */
														
 
															+	if (xics->ics[icsid])
														
 
															+		goto out;
														
 
															+
														
 
															+	/* Create the ICS */
														
 
															+	ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL);
														
 
															+	if (!ics)
														
 
															+		goto out;
														
 
															+
														
 
															+	mutex_init(&ics->lock);
														
 
															+	ics->icsid = icsid;
														
 
															+
														
 
															+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
														
 
															+		ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i;
														
 
															+		ics->irq_state[i].priority = MASKED;
														
 
															+		ics->irq_state[i].saved_priority = MASKED;
														
 
															+	}
														
 
															+	smp_wmb();
														
 
															+	xics->ics[icsid] = ics;
														
 
															+
														
 
															+	if (icsid > xics->max_icsid)
														
 
															+		xics->max_icsid = icsid;
														
 
															+
														
 
															+ out:
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+	return xics->ics[icsid];
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
														
 
															+{
														
 
															+	struct kvmppc_icp *icp;
														
 
															+
														
 
															+	if (!vcpu->kvm->arch.xics)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	if (kvmppc_xics_find_server(vcpu->kvm, server_num))
														
 
															+		return -EEXIST;
														
 
															+
														
 
															+	icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL);
														
 
															+	if (!icp)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	icp->vcpu = vcpu;
														
 
															+	icp->server_num = server_num;
														
 
															+	icp->state.mfrr = MASKED;
														
 
															+	icp->state.pending_pri = MASKED;
														
 
															+	vcpu->arch.icp = icp;
														
 
															+
														
 
															+	XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	union kvmppc_icp_state state;
														
 
															+
														
 
															+	if (!icp)
														
 
															+		return 0;
														
 
															+	state = icp->state;
														
 
															+	return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) |
														
 
															+		((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) |
														
 
															+		((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) |
														
 
															+		((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT);
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
														
 
															+{
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	u8 cppr, mfrr, pending_pri;
														
 
															+	u32 xisr;
														
 
															+	u16 src;
														
 
															+	bool resend;
														
 
															+
														
 
															+	if (!icp || !xics)
														
 
															+		return -ENOENT;
														
 
															+
														
 
															+	cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
														
 
															+	xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
														
 
															+		KVM_REG_PPC_ICP_XISR_MASK;
														
 
															+	mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
														
 
															+	pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT;
														
 
															+
														
 
															+	/* Require the new state to be internally consistent */
														
 
															+	if (xisr == 0) {
														
 
															+		if (pending_pri != 0xff)
														
 
															+			return -EINVAL;
														
 
															+	} else if (xisr == XICS_IPI) {
														
 
															+		if (pending_pri != mfrr || pending_pri >= cppr)
														
 
															+			return -EINVAL;
														
 
															+	} else {
														
 
															+		if (pending_pri >= mfrr || pending_pri >= cppr)
														
 
															+			return -EINVAL;
														
 
															+		ics = kvmppc_xics_find_ics(xics, xisr, &src);
														
 
															+		if (!ics)
														
 
															+			return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	new_state.raw = 0;
														
 
															+	new_state.cppr = cppr;
														
 
															+	new_state.xisr = xisr;
														
 
															+	new_state.mfrr = mfrr;
														
 
															+	new_state.pending_pri = pending_pri;
														
 
															+
														
 
															+	/*
														
 
															+	 * Deassert the CPU interrupt request.
														
 
															+	 * icp_try_update will reassert it if necessary.
														
 
															+	 */
														
 
															+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
														
 
															+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
														
 
															+
														
 
															+	/*
														
 
															+	 * Note that if we displace an interrupt from old_state.xisr,
														
 
															+	 * we don't mark it as rejected.  We expect userspace to set
														
 
															+	 * the state of the interrupt sources to be consistent with
														
 
															+	 * the ICP states (either before or afterwards, which doesn't
														
 
															+	 * matter).  We do handle resends due to CPPR becoming less
														
 
															+	 * favoured because that is necessary to end up with a
														
 
															+	 * consistent state in the situation where userspace restores
														
 
															+	 * the ICS states before the ICP states.
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		if (new_state.mfrr <= old_state.mfrr) {
														
 
															+			resend = false;
														
 
															+			new_state.need_resend = old_state.need_resend;
														
 
															+		} else {
														
 
															+			resend = old_state.need_resend;
														
 
															+			new_state.need_resend = 0;
														
 
															+		}
														
 
															+	} while (!icp_try_update(icp, old_state, new_state, false));
														
 
															+
														
 
															+	if (resend)
														
 
															+		icp_check_resend(xics, icp);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *irqp;
														
 
															+	u64 __user *ubufp = (u64 __user *) addr;
														
 
															+	u16 idx;
														
 
															+	u64 val, prio;
														
 
															+
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &idx);
														
 
															+	if (!ics)
														
 
															+		return -ENOENT;
														
 
															+
														
 
															+	irqp = &ics->irq_state[idx];
														
 
															+	mutex_lock(&ics->lock);
														
 
															+	ret = -ENOENT;
														
 
															+	if (irqp->exists) {
														
 
															+		val = irqp->server;
														
 
															+		prio = irqp->priority;
														
 
															+		if (prio == MASKED) {
														
 
															+			val |= KVM_XICS_MASKED;
														
 
															+			prio = irqp->saved_priority;
														
 
															+		}
														
 
															+		val |= prio << KVM_XICS_PRIORITY_SHIFT;
														
 
															+		if (irqp->asserted)
														
 
															+			val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING;
														
 
															+		else if (irqp->masked_pending || irqp->resend)
														
 
															+			val |= KVM_XICS_PENDING;
														
 
															+		ret = 0;
														
 
															+	}
														
 
															+	mutex_unlock(&ics->lock);
														
 
															+
														
 
															+	if (!ret && put_user(val, ubufp))
														
 
															+		ret = -EFAULT;
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
														
 
															+{
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *irqp;
														
 
															+	u64 __user *ubufp = (u64 __user *) addr;
														
 
															+	u16 idx;
														
 
															+	u64 val;
														
 
															+	u8 prio;
														
 
															+	u32 server;
														
 
															+
														
 
															+	if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
														
 
															+		return -ENOENT;
														
 
															+
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &idx);
														
 
															+	if (!ics) {
														
 
															+		ics = kvmppc_xics_create_ics(xics->kvm, xics, irq);
														
 
															+		if (!ics)
														
 
															+			return -ENOMEM;
														
 
															+	}
														
 
															+	irqp = &ics->irq_state[idx];
														
 
															+	if (get_user(val, ubufp))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	server = val & KVM_XICS_DESTINATION_MASK;
														
 
															+	prio = val >> KVM_XICS_PRIORITY_SHIFT;
														
 
															+	if (prio != MASKED &&
														
 
															+	    kvmppc_xics_find_server(xics->kvm, server) == NULL)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	mutex_lock(&ics->lock);
														
 
															+	irqp->server = server;
														
 
															+	irqp->saved_priority = prio;
														
 
															+	if (val & KVM_XICS_MASKED)
														
 
															+		prio = MASKED;
														
 
															+	irqp->priority = prio;
														
 
															+	irqp->resend = 0;
														
 
															+	irqp->masked_pending = 0;
														
 
															+	irqp->asserted = 0;
														
 
															+	if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
														
 
															+		irqp->asserted = 1;
														
 
															+	irqp->exists = 1;
														
 
															+	mutex_unlock(&ics->lock);
														
 
															+
														
 
															+	if (val & KVM_XICS_PENDING)
														
 
															+		icp_deliver_irq(xics, NULL, irqp->number);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
														
 
															+		bool line_status)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = kvm->arch.xics;
														
 
															+
														
 
															+	return ics_deliver_irq(xics, irq, level, line_status);
														
 
															+}
														
 
															+
														
 
															+static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = dev->private;
														
 
															+
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_XICS_GRP_SOURCES:
														
 
															+		return xics_set_source(xics, attr->attr, attr->addr);
														
 
															+	}
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = dev->private;
														
 
															+
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_XICS_GRP_SOURCES:
														
 
															+		return xics_get_source(xics, attr->attr, attr->addr);
														
 
															+	}
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_XICS_GRP_SOURCES:
														
 
															+		if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
														
 
															+		    attr->attr < KVMPPC_XICS_NR_IRQS)
														
 
															+			return 0;
														
 
															+		break;
														
 
															+	}
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static void kvmppc_xics_free(struct kvm_device *dev)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = dev->private;
														
 
															+	int i;
														
 
															+	struct kvm *kvm = xics->kvm;
														
 
															+
														
 
															+	debugfs_remove(xics->dentry);
														
 
															+
														
 
															+	if (kvm)
														
 
															+		kvm->arch.xics = NULL;
														
 
															+
														
 
															+	for (i = 0; i <= xics->max_icsid; i++)
														
 
															+		kfree(xics->ics[i]);
														
 
															+	kfree(xics);
														
 
															+	kfree(dev);
														
 
															+}
														
 
															+
														
 
															+static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics;
														
 
															+	struct kvm *kvm = dev->kvm;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	xics = kzalloc(sizeof(*xics), GFP_KERNEL);
														
 
															+	if (!xics)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	dev->private = xics;
														
 
															+	xics->dev = dev;
														
 
															+	xics->kvm = kvm;
														
 
															+
														
 
															+	/* Already there ? */
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+	if (kvm->arch.xics)
														
 
															+		ret = -EEXIST;
														
 
															+	else
														
 
															+		kvm->arch.xics = xics;
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	xics_debugfs_init(xics);
														
 
															+
														
 
															+#ifdef CONFIG_KVM_BOOK3S_64_HV
														
 
															+	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
														
 
															+		/* Enable real mode support */
														
 
															+		xics->real_mode = ENABLE_REALMODE;
														
 
															+		xics->real_mode_dbg = DEBUG_REALMODE;
														
 
															+	}
														
 
															+#endif /* CONFIG_KVM_BOOK3S_64_HV */
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+struct kvm_device_ops kvm_xics_ops = {
														
 
															+	.name = "kvm-xics",
														
 
															+	.create = kvmppc_xics_create,
														
 
															+	.destroy = kvmppc_xics_free,
														
 
															+	.set_attr = xics_set_attr,
														
 
															+	.get_attr = xics_get_attr,
														
 
															+	.has_attr = xics_has_attr,
														
 
															+};
														
 
															+
														
 
															+int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
														
 
															+			     u32 xcpu)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = dev->private;
														
 
															+	int r = -EBUSY;
														
 
															+
														
 
															+	if (dev->ops != &kvm_xics_ops)
														
 
															+		return -EPERM;
														
 
															+	if (xics->kvm != vcpu->kvm)
														
 
															+		return -EPERM;
														
 
															+	if (vcpu->arch.irq_type)
														
 
															+		return -EBUSY;
														
 
															+
														
 
															+	r = kvmppc_xics_create_icp(vcpu, xcpu);
														
 
															+	if (!r)
														
 
															+		vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
														
 
															+
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	if (!vcpu->arch.icp)
														
 
															+		return;
														
 
															+	kfree(vcpu->arch.icp);
														
 
															+	vcpu->arch.icp = NULL;
														
 
															+	vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
														
 
															+}
														
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -0,0 +1,130 @@
 
															+/*
														
 
															+ * Copyright 2012 Michael Ellerman, IBM Corporation.
														
 
															+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License, version 2, as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#ifndef _KVM_PPC_BOOK3S_XICS_H
														
 
															+#define _KVM_PPC_BOOK3S_XICS_H
														
 
															+
														
 
															+/*
														
 
															+ * We use a two-level tree to store interrupt source information.
														
 
															+ * There are up to 1024 ICS nodes, each of which can represent
														
 
															+ * 1024 sources.
														
 
															+ */
														
 
															+#define KVMPPC_XICS_MAX_ICS_ID	1023
														
 
															+#define KVMPPC_XICS_ICS_SHIFT	10
														
 
															+#define KVMPPC_XICS_IRQ_PER_ICS	(1 << KVMPPC_XICS_ICS_SHIFT)
														
 
															+#define KVMPPC_XICS_SRC_MASK	(KVMPPC_XICS_IRQ_PER_ICS - 1)
														
 
															+
														
 
															+/*
														
 
															+ * Interrupt source numbers below this are reserved, for example
														
 
															+ * 0 is "no interrupt", and 2 is used for IPIs.
														
 
															+ */
														
 
															+#define KVMPPC_XICS_FIRST_IRQ	16
														
 
															+#define KVMPPC_XICS_NR_IRQS	((KVMPPC_XICS_MAX_ICS_ID + 1) * \
														
 
															+				 KVMPPC_XICS_IRQ_PER_ICS)
														
 
															+
														
 
															+/* Priority value to use for disabling an interrupt */
														
 
															+#define MASKED	0xff
														
 
															+
														
 
															+/* State for one irq source */
														
 
															+struct ics_irq_state {
														
 
															+	u32 number;
														
 
															+	u32 server;
														
 
															+	u8  priority;
														
 
															+	u8  saved_priority;
														
 
															+	u8  resend;
														
 
															+	u8  masked_pending;
														
 
															+	u8  asserted; /* Only for LSI */
														
 
															+	u8  exists;
														
 
															+};
														
 
															+
														
 
															+/* Atomic ICP state, updated with a single compare & swap */
														
 
															+union kvmppc_icp_state {
														
 
															+	unsigned long raw;
														
 
															+	struct {
														
 
															+		u8 out_ee:1;
														
 
															+		u8 need_resend:1;
														
 
															+		u8 cppr;
														
 
															+		u8 mfrr;
														
 
															+		u8 pending_pri;
														
 
															+		u32 xisr;
														
 
															+	};
														
 
															+};
														
 
															+
														
 
															+/* One bit per ICS */
														
 
															+#define ICP_RESEND_MAP_SIZE	(KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1)
														
 
															+
														
 
															+struct kvmppc_icp {
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+	unsigned long server_num;
														
 
															+	union kvmppc_icp_state state;
														
 
															+	unsigned long resend_map[ICP_RESEND_MAP_SIZE];
														
 
															+
														
 
															+	/* Real mode might find something too hard, here's the action
														
 
															+	 * it might request from virtual mode
														
 
															+	 */
														
 
															+#define XICS_RM_KICK_VCPU	0x1
														
 
															+#define XICS_RM_CHECK_RESEND	0x2
														
 
															+#define XICS_RM_REJECT		0x4
														
 
															+	u32 rm_action;
														
 
															+	struct kvm_vcpu *rm_kick_target;
														
 
															+	u32  rm_reject;
														
 
															+
														
 
															+	/* Debug stuff for real mode */
														
 
															+	union kvmppc_icp_state rm_dbgstate;
														
 
															+	struct kvm_vcpu *rm_dbgtgt;
														
 
															+};
														
 
															+
														
 
															+struct kvmppc_ics {
														
 
															+	struct mutex lock;
														
 
															+	u16 icsid;
														
 
															+	struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
														
 
															+};
														
 
															+
														
 
															+struct kvmppc_xics {
														
 
															+	struct kvm *kvm;
														
 
															+	struct kvm_device *dev;
														
 
															+	struct dentry *dentry;
														
 
															+	u32 max_icsid;
														
 
															+	bool real_mode;
														
 
															+	bool real_mode_dbg;
														
 
															+	struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
														
 
															+};
														
 
															+
														
 
															+static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
														
 
															+							 u32 nr)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu = NULL;
														
 
															+	int i;
														
 
															+
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
														
 
															+			return vcpu->arch.icp;
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
														
 
															+						      u32 irq, u16 *source)
														
 
															+{
														
 
															+	u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
														
 
															+	u16 src = irq & KVMPPC_XICS_SRC_MASK;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+
														
 
															+	if (source)
														
 
															+		*source = src;
														
 
															+	if (icsid > KVMPPC_XICS_MAX_ICS_ID)
														
 
															+		return NULL;
														
 
															+	ics = xics->ics[icsid];
														
 
															+	if (!ics)
														
 
															+		return NULL;
														
 
															+	return ics;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+#endif /* _KVM_PPC_BOOK3S_XICS_H */
														
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -222,8 +222,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
 
															 	kvmppc_booke_queue_irqprio(vcpu, prio);
														
 
															 }
														
 
															-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
														
 
															-                                  struct kvm_interrupt *irq)
														
 
															+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
														
 
															 	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
														
@@ -347,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 
															 		keep_irq = true;
														
 
															 	}
														
 
															-	if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled)
														
 
															+	if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags)
														
 
															 		update_epr = true;
														
 
															 	switch (priority) {
														
@@ -428,8 +427,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 
															 			set_guest_esr(vcpu, vcpu->arch.queued_esr);
														
 
															 		if (update_dear == true)
														
 
															 			set_guest_dear(vcpu, vcpu->arch.queued_dear);
														
 
															-		if (update_epr == true)
														
 
															-			kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
														
 
															+		if (update_epr == true) {
														
 
															+			if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
														
 
															+				kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
														
 
															+			else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) {
														
 
															+				BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC);
														
 
															+				kvmppc_mpic_set_epr(vcpu);
														
 
															+			}
														
 
															+		}
														
 
															 		new_msr &= msr_mask;
														
 
															 #if defined(CONFIG_64BIT)
														
@@ -746,6 +751,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
															 		kvmppc_core_queue_program(vcpu, ESR_PIL);
														
 
															 		return RESUME_HOST;
														
 
															+	case EMULATE_EXIT_USER:
														
 
															+		return RESUME_HOST;
														
 
															+
														
 
															 	default:
														
 
															 		BUG();
														
 
															 	}
														
@@ -1148,6 +1156,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
															 	return r;
														
 
															 }
														
 
															+static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr)
														
 
															+{
														
 
															+	u32 old_tsr = vcpu->arch.tsr;
														
 
															+
														
 
															+	vcpu->arch.tsr = new_tsr;
														
 
															+
														
 
															+	if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
														
 
															+		arm_next_watchdog(vcpu);
														
 
															+
														
 
															+	update_timer_ints(vcpu);
														
 
															+}
														
 
															+
														
 
															 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
														
 
															 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
														
 
															 {
														
@@ -1287,16 +1307,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
 
															 		kvmppc_emulate_dec(vcpu);
														
 
															 	}
														
 
															-	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
														
 
															-		u32 old_tsr = vcpu->arch.tsr;
														
 
															-
														
 
															-		vcpu->arch.tsr = sregs->u.e.tsr;
														
 
															-
														
 
															-		if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
														
 
															-			arm_next_watchdog(vcpu);
														
 
															-
														
 
															-		update_timer_ints(vcpu);
														
 
															-	}
														
 
															+	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR)
														
 
															+		kvmppc_set_tsr(vcpu, sregs->u.e.tsr);
														
 
															 	return 0;
														
 
															 }
														
@@ -1409,84 +1421,134 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
															 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
														
 
															 {
														
 
															-	int r = -EINVAL;
														
 
															+	int r = 0;
														
 
															+	union kvmppc_one_reg val;
														
 
															+	int size;
														
 
															+	long int i;
														
 
															+
														
 
															+	size = one_reg_size(reg->id);
														
 
															+	if (size > sizeof(val))
														
 
															+		return -EINVAL;
														
 
															 	switch (reg->id) {
														
 
															 	case KVM_REG_PPC_IAC1:
														
 
															 	case KVM_REG_PPC_IAC2:
														
 
															 	case KVM_REG_PPC_IAC3:
														
 
															-	case KVM_REG_PPC_IAC4: {
														
 
															-		int iac = reg->id - KVM_REG_PPC_IAC1;
														
 
															-		r = copy_to_user((u64 __user *)(long)reg->addr,
														
 
															-				 &vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
														
 
															+	case KVM_REG_PPC_IAC4:
														
 
															+		i = reg->id - KVM_REG_PPC_IAC1;
														
 
															+		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
														
 
															 		break;
														
 
															-	}
														
 
															 	case KVM_REG_PPC_DAC1:
														
 
															-	case KVM_REG_PPC_DAC2: {
														
 
															-		int dac = reg->id - KVM_REG_PPC_DAC1;
														
 
															-		r = copy_to_user((u64 __user *)(long)reg->addr,
														
 
															-				 &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
														
 
															+	case KVM_REG_PPC_DAC2:
														
 
															+		i = reg->id - KVM_REG_PPC_DAC1;
														
 
															+		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
														
 
															 		break;
														
 
															-	}
														
 
															 	case KVM_REG_PPC_EPR: {
														
 
															 		u32 epr = get_guest_epr(vcpu);
														
 
															-		r = put_user(epr, (u32 __user *)(long)reg->addr);
														
 
															+		val = get_reg_val(reg->id, epr);
														
 
															 		break;
														
 
															 	}
														
 
															 #if defined(CONFIG_64BIT)
														
 
															 	case KVM_REG_PPC_EPCR:
														
 
															-		r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
														
 
															+		val = get_reg_val(reg->id, vcpu->arch.epcr);
														
 
															 		break;
														
 
															 #endif
														
 
															+	case KVM_REG_PPC_TCR:
														
 
															+		val = get_reg_val(reg->id, vcpu->arch.tcr);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_TSR:
														
 
															+		val = get_reg_val(reg->id, vcpu->arch.tsr);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_DEBUG_INST:
														
 
															+		val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV);
														
 
															+		break;
														
 
															 	default:
														
 
															+		r = kvmppc_get_one_reg(vcpu, reg->id, &val);
														
 
															 		break;
														
 
															 	}
														
 
															+
														
 
															+	if (r)
														
 
															+		return r;
														
 
															+
														
 
															+	if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
														
 
															+		r = -EFAULT;
														
 
															+
														
 
															 	return r;
														
 
															 }
														
 
															 int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
														
 
															 {
														
 
															-	int r = -EINVAL;
														
 
															+	int r = 0;
														
 
															+	union kvmppc_one_reg val;
														
 
															+	int size;
														
 
															+	long int i;
														
 
															+
														
 
															+	size = one_reg_size(reg->id);
														
 
															+	if (size > sizeof(val))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
														
 
															+		return -EFAULT;
														
 
															 	switch (reg->id) {
														
 
															 	case KVM_REG_PPC_IAC1:
														
 
															 	case KVM_REG_PPC_IAC2:
														
 
															 	case KVM_REG_PPC_IAC3:
														
 
															-	case KVM_REG_PPC_IAC4: {
														
 
															-		int iac = reg->id - KVM_REG_PPC_IAC1;
														
 
															-		r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
														
 
															-			     (u64 __user *)(long)reg->addr, sizeof(u64));
														
 
															+	case KVM_REG_PPC_IAC4:
														
 
															+		i = reg->id - KVM_REG_PPC_IAC1;
														
 
															+		vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
														
 
															 		break;
														
 
															-	}
														
 
															 	case KVM_REG_PPC_DAC1:
														
 
															-	case KVM_REG_PPC_DAC2: {
														
 
															-		int dac = reg->id - KVM_REG_PPC_DAC1;
														
 
															-		r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
														
 
															-			     (u64 __user *)(long)reg->addr, sizeof(u64));
														
 
															+	case KVM_REG_PPC_DAC2:
														
 
															+		i = reg->id - KVM_REG_PPC_DAC1;
														
 
															+		vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
														
 
															 		break;
														
 
															-	}
														
 
															 	case KVM_REG_PPC_EPR: {
														
 
															-		u32 new_epr;
														
 
															-		r = get_user(new_epr, (u32 __user *)(long)reg->addr);
														
 
															-		if (!r)
														
 
															-			kvmppc_set_epr(vcpu, new_epr);
														
 
															+		u32 new_epr = set_reg_val(reg->id, val);
														
 
															+		kvmppc_set_epr(vcpu, new_epr);
														
 
															 		break;
														
 
															 	}
														
 
															 #if defined(CONFIG_64BIT)
														
 
															 	case KVM_REG_PPC_EPCR: {
														
 
															-		u32 new_epcr;
														
 
															-		r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
														
 
															-		if (r == 0)
														
 
															-			kvmppc_set_epcr(vcpu, new_epcr);
														
 
															+		u32 new_epcr = set_reg_val(reg->id, val);
														
 
															+		kvmppc_set_epcr(vcpu, new_epcr);
														
 
															 		break;
														
 
															 	}
														
 
															 #endif
														
 
															+	case KVM_REG_PPC_OR_TSR: {
														
 
															+		u32 tsr_bits = set_reg_val(reg->id, val);
														
 
															+		kvmppc_set_tsr_bits(vcpu, tsr_bits);
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_REG_PPC_CLEAR_TSR: {
														
 
															+		u32 tsr_bits = set_reg_val(reg->id, val);
														
 
															+		kvmppc_clr_tsr_bits(vcpu, tsr_bits);
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_REG_PPC_TSR: {
														
 
															+		u32 tsr = set_reg_val(reg->id, val);
														
 
															+		kvmppc_set_tsr(vcpu, tsr);
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_REG_PPC_TCR: {
														
 
															+		u32 tcr = set_reg_val(reg->id, val);
														
 
															+		kvmppc_set_tcr(vcpu, tcr);
														
 
															+		break;
														
 
															+	}
														
 
															 	default:
														
 
															+		r = kvmppc_set_one_reg(vcpu, reg->id, &val);
														
 
															 		break;
														
 
															 	}
														
 
															+
														
 
															 	return r;
														
 
															 }
														
 
															+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
														
 
															+					 struct kvm_guest_debug *dbg)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
														
 
															 {
														
 
															 	return -ENOTSUPP;
														
@@ -1531,7 +1593,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 
															 void kvmppc_core_commit_memory_region(struct kvm *kvm,
														
 
															 				struct kvm_userspace_memory_region *mem,
														
 
															-				struct kvm_memory_slot old)
														
 
															+				const struct kvm_memory_slot *old)
														
 
															 {
														
 
															 }
														
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -54,8 +54,7 @@
 
															                        (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
														
 
															                        (1<<BOOKE_INTERRUPT_ALIGNMENT))
														
 
															-.macro KVM_HANDLER ivor_nr scratch srr0
														
 
															-_GLOBAL(kvmppc_handler_\ivor_nr)
														
 
															+.macro __KVM_HANDLER ivor_nr scratch srr0
														
 
															 	/* Get pointer to vcpu and record exit number. */
														
 
															 	mtspr	\scratch , r4
														
 
															 	mfspr   r4, SPRN_SPRG_THREAD
														
@@ -76,6 +75,43 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
 
															 	bctr
														
 
															 .endm
														
 
															+.macro KVM_HANDLER ivor_nr scratch srr0
														
 
															+_GLOBAL(kvmppc_handler_\ivor_nr)
														
 
															+	__KVM_HANDLER \ivor_nr \scratch \srr0
														
 
															+.endm
														
 
															+
														
 
															+.macro KVM_DBG_HANDLER ivor_nr scratch srr0
														
 
															+_GLOBAL(kvmppc_handler_\ivor_nr)
														
 
															+	mtspr   \scratch, r4
														
 
															+	mfspr	r4, SPRN_SPRG_THREAD
														
 
															+	lwz	r4, THREAD_KVM_VCPU(r4)
														
 
															+	stw	r3, VCPU_CRIT_SAVE(r4)
														
 
															+	mfcr	r3
														
 
															+	mfspr	r4, SPRN_CSRR1
														
 
															+	andi.	r4, r4, MSR_PR
														
 
															+	bne	1f
														
 
															+	/* debug interrupt happened in enter/exit path */
														
 
															+	mfspr   r4, SPRN_CSRR1
														
 
															+	rlwinm  r4, r4, 0, ~MSR_DE
														
 
															+	mtspr   SPRN_CSRR1, r4
														
 
															+	lis	r4, 0xffff
														
 
															+	ori	r4, r4, 0xffff
														
 
															+	mtspr	SPRN_DBSR, r4
														
 
															+	mfspr	r4, SPRN_SPRG_THREAD
														
 
															+	lwz	r4, THREAD_KVM_VCPU(r4)
														
 
															+	mtcr	r3
														
 
															+	lwz     r3, VCPU_CRIT_SAVE(r4)
														
 
															+	mfspr   r4, \scratch
														
 
															+	rfci
														
 
															+1:	/* debug interrupt happened in guest */
														
 
															+	mtcr	r3
														
 
															+	mfspr	r4, SPRN_SPRG_THREAD
														
 
															+	lwz	r4, THREAD_KVM_VCPU(r4)
														
 
															+	lwz     r3, VCPU_CRIT_SAVE(r4)
														
 
															+	mfspr   r4, \scratch
														
 
															+	__KVM_HANDLER \ivor_nr \scratch \srr0
														
 
															+.endm
														
 
															+
														
 
															 .macro KVM_HANDLER_ADDR ivor_nr
														
 
															 	.long	kvmppc_handler_\ivor_nr
														
 
															 .endm
														
@@ -100,7 +136,7 @@ KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0
 
															 KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
														
 
															 KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
														
 
															 KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
														
 
															-KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
														
 
															+KVM_DBG_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
														
 
															 KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
														
 
															 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
														
 
															 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0
														
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -425,6 +425,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 
															 	return kvmppc_set_sregs_ivor(vcpu, sregs);
														
 
															 }
														
 
															+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
 
															+			union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
 
															+		       union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
														
 
															 {
														
 
															 	struct kvmppc_vcpu_e500 *vcpu_e500;
														
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -23,6 +23,10 @@
 
															 #include <asm/mmu-book3e.h>
														
 
															 #include <asm/tlb.h>
														
 
															+enum vcpu_ftr {
														
 
															+	VCPU_FTR_MMU_V2
														
 
															+};
														
 
															+
														
 
															 #define E500_PID_NUM   3
														
 
															 #define E500_TLB_NUM   2
														
@@ -131,6 +135,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
 
															 void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
														
 
															 int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
														
 
															+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
														
 
															+				union kvmppc_one_reg *val);
														
 
															+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
														
 
															+			       union kvmppc_one_reg *val);
														
 
															 #ifdef CONFIG_KVM_E500V2
														
 
															 unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
														
@@ -295,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
 
															 #define get_tlb_sts(gtlbe)              (MAS1_TS)
														
 
															 #endif /* !BOOKE_HV */
														
 
															+static inline bool has_feature(const struct kvm_vcpu *vcpu,
														
 
															+			       enum vcpu_ftr ftr)
														
 
															+{
														
 
															+	bool has_ftr;
														
 
															+	switch (ftr) {
														
 
															+	case VCPU_FTR_MMU_V2:
														
 
															+		has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2);
														
 
															+		break;
														
 
															+	default:
														
 
															+		return false;
														
 
															+	}
														
 
															+	return has_ftr;
														
 
															+}
														
 
															+
														
 
															 #endif /* KVM_E500_H */
														
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 
															 	case SPRN_TLB1CFG:
														
 
															 		*spr_val = vcpu->arch.tlbcfg[1];
														
 
															 		break;
														
 
															+	case SPRN_TLB0PS:
														
 
															+		if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
														
 
															+			return EMULATE_FAIL;
														
 
															+		*spr_val = vcpu->arch.tlbps[0];
														
 
															+		break;
														
 
															+	case SPRN_TLB1PS:
														
 
															+		if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
														
 
															+			return EMULATE_FAIL;
														
 
															+		*spr_val = vcpu->arch.tlbps[1];
														
 
															+		break;
														
 
															 	case SPRN_L1CSR0:
														
 
															 		*spr_val = vcpu_e500->l1csr0;
														
 
															 		break;
														
@@ -307,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 
															 	case SPRN_MMUCFG:
														
 
															 		*spr_val = vcpu->arch.mmucfg;
														
 
															 		break;
														
 
															+	case SPRN_EPTCFG:
														
 
															+		if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
														
 
															+			return EMULATE_FAIL;
														
 
															+		/*
														
 
															+		 * Legacy Linux guests access EPTCFG register even if the E.PT
														
 
															+		 * category is disabled in the VM. Give them a chance to live.
														
 
															+		 */
														
 
															+		*spr_val = vcpu->arch.eptcfg;
														
 
															+		break;
														
 
															 	/* extra exceptions */
														
 
															 	case SPRN_IVOR32:
														
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -596,6 +596,140 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 
															 	return 0;
														
 
															 }
														
 
															+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
														
 
															+				union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	int r = 0;
														
 
															+	long int i;
														
 
															+
														
 
															+	switch (id) {
														
 
															+	case KVM_REG_PPC_MAS0:
														
 
															+		*val = get_reg_val(id, vcpu->arch.shared->mas0);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS1:
														
 
															+		*val = get_reg_val(id, vcpu->arch.shared->mas1);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS2:
														
 
															+		*val = get_reg_val(id, vcpu->arch.shared->mas2);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS7_3:
														
 
															+		*val = get_reg_val(id, vcpu->arch.shared->mas7_3);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS4:
														
 
															+		*val = get_reg_val(id, vcpu->arch.shared->mas4);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS6:
														
 
															+		*val = get_reg_val(id, vcpu->arch.shared->mas6);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MMUCFG:
														
 
															+		*val = get_reg_val(id, vcpu->arch.mmucfg);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_EPTCFG:
														
 
															+		*val = get_reg_val(id, vcpu->arch.eptcfg);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_TLB0CFG:
														
 
															+	case KVM_REG_PPC_TLB1CFG:
														
 
															+	case KVM_REG_PPC_TLB2CFG:
														
 
															+	case KVM_REG_PPC_TLB3CFG:
														
 
															+		i = id - KVM_REG_PPC_TLB0CFG;
														
 
															+		*val = get_reg_val(id, vcpu->arch.tlbcfg[i]);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_TLB0PS:
														
 
															+	case KVM_REG_PPC_TLB1PS:
														
 
															+	case KVM_REG_PPC_TLB2PS:
														
 
															+	case KVM_REG_PPC_TLB3PS:
														
 
															+		i = id - KVM_REG_PPC_TLB0PS;
														
 
															+		*val = get_reg_val(id, vcpu->arch.tlbps[i]);
														
 
															+		break;
														
 
															+	default:
														
 
															+		r = -EINVAL;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
														
 
															+			       union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	int r = 0;
														
 
															+	long int i;
														
 
															+
														
 
															+	switch (id) {
														
 
															+	case KVM_REG_PPC_MAS0:
														
 
															+		vcpu->arch.shared->mas0 = set_reg_val(id, *val);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS1:
														
 
															+		vcpu->arch.shared->mas1 = set_reg_val(id, *val);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS2:
														
 
															+		vcpu->arch.shared->mas2 = set_reg_val(id, *val);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS7_3:
														
 
															+		vcpu->arch.shared->mas7_3 = set_reg_val(id, *val);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS4:
														
 
															+		vcpu->arch.shared->mas4 = set_reg_val(id, *val);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS6:
														
 
															+		vcpu->arch.shared->mas6 = set_reg_val(id, *val);
														
 
															+		break;
														
 
															+	/* Only allow MMU registers to be set to the config supported by KVM */
														
 
															+	case KVM_REG_PPC_MMUCFG: {
														
 
															+		u32 reg = set_reg_val(id, *val);
														
 
															+		if (reg != vcpu->arch.mmucfg)
														
 
															+			r = -EINVAL;
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_REG_PPC_EPTCFG: {
														
 
															+		u32 reg = set_reg_val(id, *val);
														
 
															+		if (reg != vcpu->arch.eptcfg)
														
 
															+			r = -EINVAL;
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_REG_PPC_TLB0CFG:
														
 
															+	case KVM_REG_PPC_TLB1CFG:
														
 
															+	case KVM_REG_PPC_TLB2CFG:
														
 
															+	case KVM_REG_PPC_TLB3CFG: {
														
 
															+		/* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */
														
 
															+		u32 reg = set_reg_val(id, *val);
														
 
															+		i = id - KVM_REG_PPC_TLB0CFG;
														
 
															+		if (reg != vcpu->arch.tlbcfg[i])
														
 
															+			r = -EINVAL;
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_REG_PPC_TLB0PS:
														
 
															+	case KVM_REG_PPC_TLB1PS:
														
 
															+	case KVM_REG_PPC_TLB2PS:
														
 
															+	case KVM_REG_PPC_TLB3PS: {
														
 
															+		u32 reg = set_reg_val(id, *val);
														
 
															+		i = id - KVM_REG_PPC_TLB0PS;
														
 
															+		if (reg != vcpu->arch.tlbps[i])
														
 
															+			r = -EINVAL;
														
 
															+		break;
														
 
															+	}
														
 
															+	default:
														
 
															+		r = -EINVAL;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
														
 
															+		struct kvm_book3e_206_tlb_params *params)
														
 
															+{
														
 
															+	vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															+	if (params->tlb_sizes[0] <= 2048)
														
 
															+		vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0];
														
 
															+	vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
														
 
															+
														
 
															+	vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															+	vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1];
														
 
															+	vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
														
 
															 			      struct kvm_config_tlb *cfg)
														
 
															 {
														
@@ -692,16 +826,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 
															 	vcpu_e500->gtlb_offset[0] = 0;
														
 
															 	vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
														
 
															-	vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
														
 
															-
														
 
															-	vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															-	if (params.tlb_sizes[0] <= 2048)
														
 
															-		vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0];
														
 
															-	vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
														
 
															-
														
 
															-	vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															-	vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1];
														
 
															-	vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
														
 
															+	/* Update vcpu's MMU geometry based on SW_TLB input */
														
 
															+	vcpu_mmu_geometry_update(vcpu, &params);
														
 
															 	vcpu_e500->shared_tlb_pages = pages;
														
 
															 	vcpu_e500->num_shared_tlb_pages = num_pages;
														
@@ -737,6 +863,39 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
 
															 	return 0;
														
 
															 }
														
 
															+/* Vcpu's MMU default configuration */
														
 
															+static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
														
 
															+		       struct kvmppc_e500_tlb_params *params)
														
 
															+{
														
 
															+	/* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
														
 
															+	vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
														
 
															+
														
 
															+	/* Initialize TLBnCFG fields with host values and SW_TLB geometry*/
														
 
															+	vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
														
 
															+			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															+	vcpu->arch.tlbcfg[0] |= params[0].entries;
														
 
															+	vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT;
														
 
															+
														
 
															+	vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
														
 
															+			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															+	vcpu->arch.tlbcfg[1] |= params[1].entries;
														
 
															+	vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT;
														
 
															+
														
 
															+	if (has_feature(vcpu, VCPU_FTR_MMU_V2)) {
														
 
															+		vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS);
														
 
															+		vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS);
														
 
															+
														
 
															+		vcpu->arch.mmucfg &= ~MMUCFG_LRAT;
														
 
															+
														
 
															+		/* Guest mmu emulation currently doesn't handle E.PT */
														
 
															+		vcpu->arch.eptcfg = 0;
														
 
															+		vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT;
														
 
															+		vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
														
 
															 {
														
 
															 	struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
														
@@ -781,18 +940,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 
															 	if (!vcpu_e500->g2h_tlb1_map)
														
 
															 		goto err;
														
 
															-	/* Init TLB configuration register */
														
 
															-	vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
														
 
															-			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															-	vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries;
														
 
															-	vcpu->arch.tlbcfg[0] |=
														
 
															-		vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
														
 
															-
														
 
															-	vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
														
 
															-			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															-	vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries;
														
 
															-	vcpu->arch.tlbcfg[1] |=
														
 
															-		vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
														
 
															+	vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
														
 
															 	kvmppc_recalc_tlb1map_range(vcpu_e500);
														
 
															 	return 0;
														
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -177,6 +177,8 @@ int kvmppc_core_check_processor_compat(void)
 
															 		r = 0;
														
 
															 	else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
														
 
															 		r = 0;
														
 
															+	else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
														
 
															+		r = 0;
														
 
															 	else
														
 
															 		r = -ENOTSUPP;
														
@@ -260,6 +262,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 
															 	return kvmppc_set_sregs_ivor(vcpu, sregs);
														
 
															 }
														
 
															+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
 
															+			union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
 
															+		       union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
														
 
															 {
														
 
															 	struct kvmppc_vcpu_e500 *vcpu_e500;
														
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -38,6 +38,7 @@
 
															 #define OP_31_XOP_TRAP      4
														
 
															 #define OP_31_XOP_LWZX      23
														
 
															+#define OP_31_XOP_DCBST     54
														
 
															 #define OP_31_XOP_TRAP_64   68
														
 
															 #define OP_31_XOP_DCBF      86
														
 
															 #define OP_31_XOP_LBZX      87
														
@@ -370,6 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
															 			emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
														
 
															 			break;
														
 
															+		case OP_31_XOP_DCBST:
														
 
															 		case OP_31_XOP_DCBF:
														
 
															 		case OP_31_XOP_DCBI:
														
 
															 			/* Do nothing. The guest is performing dcbi because
														
--- a/arch/powerpc/kvm/irq.h
+++ b/arch/powerpc/kvm/irq.h
@@ -0,0 +1,20 @@
 
															+#ifndef __IRQ_H
														
 
															+#define __IRQ_H
														
 
															+
														
 
															+#include <linux/kvm_host.h>
														
 
															+
														
 
															+static inline int irqchip_in_kernel(struct kvm *kvm)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+
														
 
															+#ifdef CONFIG_KVM_MPIC
														
 
															+	ret = ret || (kvm->arch.mpic != NULL);
														
 
															+#endif
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+	ret = ret || (kvm->arch.xics != NULL);
														
 
															+#endif
														
 
															+	smp_rmb();
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+#endif
														
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -0,0 +1,1853 @@
 
															+/*
														
 
															+ * OpenPIC emulation
														
 
															+ *
														
 
															+ * Copyright (c) 2004 Jocelyn Mayer
														
 
															+ *               2011 Alexander Graf
														
 
															+ *
														
 
															+ * Permission is hereby granted, free of charge, to any person obtaining a copy
														
 
															+ * of this software and associated documentation files (the "Software"), to deal
														
 
															+ * in the Software without restriction, including without limitation the rights
														
 
															+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
														
 
															+ * copies of the Software, and to permit persons to whom the Software is
														
 
															+ * furnished to do so, subject to the following conditions:
														
 
															+ *
														
 
															+ * The above copyright notice and this permission notice shall be included in
														
 
															+ * all copies or substantial portions of the Software.
														
 
															+ *
														
 
															+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
														
 
															+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
														
 
															+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
														
 
															+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
														
 
															+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
														
 
															+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
														
 
															+ * THE SOFTWARE.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/slab.h>
														
 
															+#include <linux/mutex.h>
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/errno.h>
														
 
															+#include <linux/fs.h>
														
 
															+#include <linux/anon_inodes.h>
														
 
															+#include <asm/uaccess.h>
														
 
															+#include <asm/mpic.h>
														
 
															+#include <asm/kvm_para.h>
														
 
															+#include <asm/kvm_host.h>
														
 
															+#include <asm/kvm_ppc.h>
														
 
															+#include "iodev.h"
														
 
															+
														
 
															+#define MAX_CPU     32
														
 
															+#define MAX_SRC     256
														
 
															+#define MAX_TMR     4
														
 
															+#define MAX_IPI     4
														
 
															+#define MAX_MSI     8
														
 
															+#define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
														
 
															+#define VID         0x03	/* MPIC version ID */
														
 
															+
														
 
															+/* OpenPIC capability flags */
														
 
															+#define OPENPIC_FLAG_IDR_CRIT     (1 << 0)
														
 
															+#define OPENPIC_FLAG_ILR          (2 << 0)
														
 
															+
														
 
															+/* OpenPIC address map */
														
 
															+#define OPENPIC_REG_SIZE             0x40000
														
 
															+#define OPENPIC_GLB_REG_START        0x0
														
 
															+#define OPENPIC_GLB_REG_SIZE         0x10F0
														
 
															+#define OPENPIC_TMR_REG_START        0x10F0
														
 
															+#define OPENPIC_TMR_REG_SIZE         0x220
														
 
															+#define OPENPIC_MSI_REG_START        0x1600
														
 
															+#define OPENPIC_MSI_REG_SIZE         0x200
														
 
															+#define OPENPIC_SUMMARY_REG_START    0x3800
														
 
															+#define OPENPIC_SUMMARY_REG_SIZE     0x800
														
 
															+#define OPENPIC_SRC_REG_START        0x10000
														
 
															+#define OPENPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
														
 
															+#define OPENPIC_CPU_REG_START        0x20000
														
 
															+#define OPENPIC_CPU_REG_SIZE         (0x100 + ((MAX_CPU - 1) * 0x1000))
														
 
															+
														
 
															+struct fsl_mpic_info {
														
 
															+	int max_ext;
														
 
															+};
														
 
															+
														
 
															+static struct fsl_mpic_info fsl_mpic_20 = {
														
 
															+	.max_ext = 12,
														
 
															+};
														
 
															+
														
 
															+static struct fsl_mpic_info fsl_mpic_42 = {
														
 
															+	.max_ext = 12,
														
 
															+};
														
 
															+
														
 
															+#define FRR_NIRQ_SHIFT    16
														
 
															+#define FRR_NCPU_SHIFT     8
														
 
															+#define FRR_VID_SHIFT      0
														
 
															+
														
 
															+#define VID_REVISION_1_2   2
														
 
															+#define VID_REVISION_1_3   3
														
 
															+
														
 
															+#define VIR_GENERIC      0x00000000	/* Generic Vendor ID */
														
 
															+
														
 
															+#define GCR_RESET        0x80000000
														
 
															+#define GCR_MODE_PASS    0x00000000
														
 
															+#define GCR_MODE_MIXED   0x20000000
														
 
															+#define GCR_MODE_PROXY   0x60000000
														
 
															+
														
 
															+#define TBCR_CI           0x80000000	/* count inhibit */
														
 
															+#define TCCR_TOG          0x80000000	/* toggles when decrement to zero */
														
 
															+
														
 
															+#define IDR_EP_SHIFT      31
														
 
															+#define IDR_EP_MASK       (1 << IDR_EP_SHIFT)
														
 
															+#define IDR_CI0_SHIFT     30
														
 
															+#define IDR_CI1_SHIFT     29
														
 
															+#define IDR_P1_SHIFT      1
														
 
															+#define IDR_P0_SHIFT      0
														
 
															+
														
 
															+#define ILR_INTTGT_MASK   0x000000ff
														
 
															+#define ILR_INTTGT_INT    0x00
														
 
															+#define ILR_INTTGT_CINT   0x01	/* critical */
														
 
															+#define ILR_INTTGT_MCP    0x02	/* machine check */
														
 
															+#define NUM_OUTPUTS       3
														
 
															+
														
 
															+#define MSIIR_OFFSET       0x140
														
 
															+#define MSIIR_SRS_SHIFT    29
														
 
															+#define MSIIR_SRS_MASK     (0x7 << MSIIR_SRS_SHIFT)
														
 
															+#define MSIIR_IBS_SHIFT    24
														
 
															+#define MSIIR_IBS_MASK     (0x1f << MSIIR_IBS_SHIFT)
														
 
															+
														
 
															+static int get_current_cpu(void)
														
 
															+{
														
 
															+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
														
 
															+	struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
														
 
															+	return vcpu ? vcpu->arch.irq_cpu_id : -1;
														
 
															+#else
														
 
															+	/* XXX */
														
 
															+	return -1;
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
														
 
															+				      u32 val, int idx);
														
 
															+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
														
 
															+				     u32 *ptr, int idx);
														
 
															+
														
 
															+enum irq_type {
														
 
															+	IRQ_TYPE_NORMAL = 0,
														
 
															+	IRQ_TYPE_FSLINT,	/* FSL internal interrupt -- level only */
														
 
															+	IRQ_TYPE_FSLSPECIAL,	/* FSL timer/IPI interrupt, edge, no polarity */
														
 
															+};
														
 
															+
														
 
															+struct irq_queue {
														
 
															+	/* Round up to the nearest 64 IRQs so that the queue length
														
 
															+	 * won't change when moving between 32 and 64 bit hosts.
														
 
															+	 */
														
 
															+	unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)];
														
 
															+	int next;
														
 
															+	int priority;
														
 
															+};
														
 
															+
														
 
															+struct irq_source {
														
 
															+	uint32_t ivpr;		/* IRQ vector/priority register */
														
 
															+	uint32_t idr;		/* IRQ destination register */
														
 
															+	uint32_t destmask;	/* bitmap of CPU destinations */
														
 
															+	int last_cpu;
														
 
															+	int output;		/* IRQ level, e.g. ILR_INTTGT_INT */
														
 
															+	int pending;		/* TRUE if IRQ is pending */
														
 
															+	enum irq_type type;
														
 
															+	bool level:1;		/* level-triggered */
														
 
															+	bool nomask:1;	/* critical interrupts ignore mask on some FSL MPICs */
														
 
															+};
														
 
															+
														
 
															+#define IVPR_MASK_SHIFT       31
														
 
															+#define IVPR_MASK_MASK        (1 << IVPR_MASK_SHIFT)
														
 
															+#define IVPR_ACTIVITY_SHIFT   30
														
 
															+#define IVPR_ACTIVITY_MASK    (1 << IVPR_ACTIVITY_SHIFT)
														
 
															+#define IVPR_MODE_SHIFT       29
														
 
															+#define IVPR_MODE_MASK        (1 << IVPR_MODE_SHIFT)
														
 
															+#define IVPR_POLARITY_SHIFT   23
														
 
															+#define IVPR_POLARITY_MASK    (1 << IVPR_POLARITY_SHIFT)
														
 
															+#define IVPR_SENSE_SHIFT      22
														
 
															+#define IVPR_SENSE_MASK       (1 << IVPR_SENSE_SHIFT)
														
 
															+
														
 
															+#define IVPR_PRIORITY_MASK     (0xF << 16)
														
 
															+#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
														
 
															+#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
														
 
															+
														
 
															+/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
														
 
															+#define IDR_EP      0x80000000	/* external pin */
														
 
															+#define IDR_CI      0x40000000	/* critical interrupt */
														
 
															+
														
 
															+struct irq_dest {
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+
														
 
															+	int32_t ctpr;		/* CPU current task priority */
														
 
															+	struct irq_queue raised;
														
 
															+	struct irq_queue servicing;
														
 
															+
														
 
															+	/* Count of IRQ sources asserting on non-INT outputs */
														
 
															+	uint32_t outputs_active[NUM_OUTPUTS];
														
 
															+};
														
 
															+
														
 
															+#define MAX_MMIO_REGIONS 10
														
 
															+
														
 
															+struct openpic {
														
 
															+	struct kvm *kvm;
														
 
															+	struct kvm_device *dev;
														
 
															+	struct kvm_io_device mmio;
														
 
															+	const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS];
														
 
															+	int num_mmio_regions;
														
 
															+
														
 
															+	gpa_t reg_base;
														
 
															+	spinlock_t lock;
														
 
															+
														
 
															+	/* Behavior control */
														
 
															+	struct fsl_mpic_info *fsl;
														
 
															+	uint32_t model;
														
 
															+	uint32_t flags;
														
 
															+	uint32_t nb_irqs;
														
 
															+	uint32_t vid;
														
 
															+	uint32_t vir;		/* Vendor identification register */
														
 
															+	uint32_t vector_mask;
														
 
															+	uint32_t tfrr_reset;
														
 
															+	uint32_t ivpr_reset;
														
 
															+	uint32_t idr_reset;
														
 
															+	uint32_t brr1;
														
 
															+	uint32_t mpic_mode_mask;
														
 
															+
														
 
															+	/* Global registers */
														
 
															+	uint32_t frr;		/* Feature reporting register */
														
 
															+	uint32_t gcr;		/* Global configuration register  */
														
 
															+	uint32_t pir;		/* Processor initialization register */
														
 
															+	uint32_t spve;		/* Spurious vector register */
														
 
															+	uint32_t tfrr;		/* Timer frequency reporting register */
														
 
															+	/* Source registers */
														
 
															+	struct irq_source src[MAX_IRQ];
														
 
															+	/* Local registers per output pin */
														
 
															+	struct irq_dest dst[MAX_CPU];
														
 
															+	uint32_t nb_cpus;
														
 
															+	/* Timer registers */
														
 
															+	struct {
														
 
															+		uint32_t tccr;	/* Global timer current count register */
														
 
															+		uint32_t tbcr;	/* Global timer base count register */
														
 
															+	} timers[MAX_TMR];
														
 
															+	/* Shared MSI registers */
														
 
															+	struct {
														
 
															+		uint32_t msir;	/* Shared Message Signaled Interrupt Register */
														
 
															+	} msi[MAX_MSI];
														
 
															+	uint32_t max_irq;
														
 
															+	uint32_t irq_ipi0;
														
 
															+	uint32_t irq_tim0;
														
 
															+	uint32_t irq_msi;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
														
 
															+			   int output)
														
 
															+{
														
 
															+	struct kvm_interrupt irq = {
														
 
															+		.irq = KVM_INTERRUPT_SET_LEVEL,
														
 
															+	};
														
 
															+
														
 
															+	if (!dst->vcpu) {
														
 
															+		pr_debug("%s: destination cpu %d does not exist\n",
														
 
															+			 __func__, (int)(dst - &opp->dst[0]));
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
														
 
															+		output);
														
 
															+
														
 
															+	if (output != ILR_INTTGT_INT)	/* TODO */
														
 
															+		return;
														
 
															+
														
 
															+	kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
														
 
															+}
														
 
															+
														
 
															+static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
														
 
															+			   int output)
														
 
															+{
														
 
															+	if (!dst->vcpu) {
														
 
															+		pr_debug("%s: destination cpu %d does not exist\n",
														
 
															+			 __func__, (int)(dst - &opp->dst[0]));
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
														
 
															+		output);
														
 
															+
														
 
															+	if (output != ILR_INTTGT_INT)	/* TODO */
														
 
															+		return;
														
 
															+
														
 
															+	kvmppc_core_dequeue_external(dst->vcpu);
														
 
															+}
														
 
															+
														
 
															+static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
														
 
															+{
														
 
															+	set_bit(n_IRQ, q->queue);
														
 
															+}
														
 
															+
														
 
															+static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
														
 
															+{
														
 
															+	clear_bit(n_IRQ, q->queue);
														
 
															+}
														
 
															+
														
 
															+static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
														
 
															+{
														
 
															+	return test_bit(n_IRQ, q->queue);
														
 
															+}
														
 
															+
														
 
															+static void IRQ_check(struct openpic *opp, struct irq_queue *q)
														
 
															+{
														
 
															+	int irq = -1;
														
 
															+	int next = -1;
														
 
															+	int priority = -1;
														
 
															+
														
 
															+	for (;;) {
														
 
															+		irq = find_next_bit(q->queue, opp->max_irq, irq + 1);
														
 
															+		if (irq == opp->max_irq)
														
 
															+			break;
														
 
															+
														
 
															+		pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n",
														
 
															+			irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority);
														
 
															+
														
 
															+		if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) {
														
 
															+			next = irq;
														
 
															+			priority = IVPR_PRIORITY(opp->src[irq].ivpr);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	q->next = next;
														
 
															+	q->priority = priority;
														
 
															+}
														
 
															+
														
 
															+static int IRQ_get_next(struct openpic *opp, struct irq_queue *q)
														
 
															+{
														
 
															+	/* XXX: optimize */
														
 
															+	IRQ_check(opp, q);
														
 
															+
														
 
															+	return q->next;
														
 
															+}
														
 
															+
														
 
															+static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
														
 
															+			   bool active, bool was_active)
														
 
															+{
														
 
															+	struct irq_dest *dst;
														
 
															+	struct irq_source *src;
														
 
															+	int priority;
														
 
															+
														
 
															+	dst = &opp->dst[n_CPU];
														
 
															+	src = &opp->src[n_IRQ];
														
 
															+
														
 
															+	pr_debug("%s: IRQ %d active %d was %d\n",
														
 
															+		__func__, n_IRQ, active, was_active);
														
 
															+
														
 
															+	if (src->output != ILR_INTTGT_INT) {
														
 
															+		pr_debug("%s: output %d irq %d active %d was %d count %d\n",
														
 
															+			__func__, src->output, n_IRQ, active, was_active,
														
 
															+			dst->outputs_active[src->output]);
														
 
															+
														
 
															+		/* On Freescale MPIC, critical interrupts ignore priority,
														
 
															+		 * IACK, EOI, etc.  Before MPIC v4.1 they also ignore
														
 
															+		 * masking.
														
 
															+		 */
														
 
															+		if (active) {
														
 
															+			if (!was_active &&
														
 
															+			    dst->outputs_active[src->output]++ == 0) {
														
 
															+				pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
														
 
															+					__func__, src->output, n_CPU, n_IRQ);
														
 
															+				mpic_irq_raise(opp, dst, src->output);
														
 
															+			}
														
 
															+		} else {
														
 
															+			if (was_active &&
														
 
															+			    --dst->outputs_active[src->output] == 0) {
														
 
															+				pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
														
 
															+					__func__, src->output, n_CPU, n_IRQ);
														
 
															+				mpic_irq_lower(opp, dst, src->output);
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	priority = IVPR_PRIORITY(src->ivpr);
														
 
															+
														
 
															+	/* Even if the interrupt doesn't have enough priority,
														
 
															+	 * it is still raised, in case ctpr is lowered later.
														
 
															+	 */
														
 
															+	if (active)
														
 
															+		IRQ_setbit(&dst->raised, n_IRQ);
														
 
															+	else
														
 
															+		IRQ_resetbit(&dst->raised, n_IRQ);
														
 
															+
														
 
															+	IRQ_check(opp, &dst->raised);
														
 
															+
														
 
															+	if (active && priority <= dst->ctpr) {
														
 
															+		pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n",
														
 
															+			__func__, n_IRQ, priority, dst->ctpr, n_CPU);
														
 
															+		active = 0;
														
 
															+	}
														
 
															+
														
 
															+	if (active) {
														
 
															+		if (IRQ_get_next(opp, &dst->servicing) >= 0 &&
														
 
															+		    priority <= dst->servicing.priority) {
														
 
															+			pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n",
														
 
															+				__func__, n_IRQ, dst->servicing.next, n_CPU);
														
 
															+		} else {
														
 
															+			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
														
 
															+				__func__, n_CPU, n_IRQ, dst->raised.next);
														
 
															+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
														
 
															+		}
														
 
															+	} else {
														
 
															+		IRQ_get_next(opp, &dst->servicing);
														
 
															+		if (dst->raised.priority > dst->ctpr &&
														
 
															+		    dst->raised.priority > dst->servicing.priority) {
														
 
															+			pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n",
														
 
															+				__func__, n_IRQ, dst->raised.next,
														
 
															+				dst->raised.priority, dst->ctpr,
														
 
															+				dst->servicing.priority, n_CPU);
														
 
															+			/* IRQ line stays asserted */
														
 
															+		} else {
														
 
															+			pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
														
 
															+				__func__, n_IRQ, dst->ctpr,
														
 
															+				dst->servicing.priority, n_CPU);
														
 
															+			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/* update pic state because registers for n_IRQ have changed value */
														
 
															+static void openpic_update_irq(struct openpic *opp, int n_IRQ)
														
 
															+{
														
 
															+	struct irq_source *src;
														
 
															+	bool active, was_active;
														
 
															+	int i;
														
 
															+
														
 
															+	src = &opp->src[n_IRQ];
														
 
															+	active = src->pending;
														
 
															+
														
 
															+	if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) {
														
 
															+		/* Interrupt source is disabled */
														
 
															+		pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ);
														
 
															+		active = false;
														
 
															+	}
														
 
															+
														
 
															+	was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK);
														
 
															+
														
 
															+	/*
														
 
															+	 * We don't have a similar check for already-active because
														
 
															+	 * ctpr may have changed and we need to withdraw the interrupt.
														
 
															+	 */
														
 
															+	if (!active && !was_active) {
														
 
															+		pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	if (active)
														
 
															+		src->ivpr |= IVPR_ACTIVITY_MASK;
														
 
															+	else
														
 
															+		src->ivpr &= ~IVPR_ACTIVITY_MASK;
														
 
															+
														
 
															+	if (src->destmask == 0) {
														
 
															+		/* No target */
														
 
															+		pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	if (src->destmask == (1 << src->last_cpu)) {
														
 
															+		/* Only one CPU is allowed to receive this IRQ */
														
 
															+		IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active);
														
 
															+	} else if (!(src->ivpr & IVPR_MODE_MASK)) {
														
 
															+		/* Directed delivery mode */
														
 
															+		for (i = 0; i < opp->nb_cpus; i++) {
														
 
															+			if (src->destmask & (1 << i)) {
														
 
															+				IRQ_local_pipe(opp, i, n_IRQ, active,
														
 
															+					       was_active);
														
 
															+			}
														
 
															+		}
														
 
															+	} else {
														
 
															+		/* Distributed delivery mode */
														
 
															+		for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
														
 
															+			if (i == opp->nb_cpus)
														
 
															+				i = 0;
														
 
															+
														
 
															+			if (src->destmask & (1 << i)) {
														
 
															+				IRQ_local_pipe(opp, i, n_IRQ, active,
														
 
															+					       was_active);
														
 
															+				src->last_cpu = i;
														
 
															+				break;
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void openpic_set_irq(void *opaque, int n_IRQ, int level)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	struct irq_source *src;
														
 
															+
														
 
															+	if (n_IRQ >= MAX_IRQ) {
														
 
															+		WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	src = &opp->src[n_IRQ];
														
 
															+	pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n",
														
 
															+		n_IRQ, level, src->ivpr);
														
 
															+	if (src->level) {
														
 
															+		/* level-sensitive irq */
														
 
															+		src->pending = level;
														
 
															+		openpic_update_irq(opp, n_IRQ);
														
 
															+	} else {
														
 
															+		/* edge-sensitive irq */
														
 
															+		if (level) {
														
 
															+			src->pending = 1;
														
 
															+			openpic_update_irq(opp, n_IRQ);
														
 
															+		}
														
 
															+
														
 
															+		if (src->output != ILR_INTTGT_INT) {
														
 
															+			/* Edge-triggered interrupts shouldn't be used
														
 
															+			 * with non-INT delivery, but just in case,
														
 
															+			 * try to make it do something sane rather than
														
 
															+			 * cause an interrupt storm.  This is close to
														
 
															+			 * what you'd probably see happen in real hardware.
														
 
															+			 */
														
 
															+			src->pending = 0;
														
 
															+			openpic_update_irq(opp, n_IRQ);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void openpic_reset(struct openpic *opp)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	opp->gcr = GCR_RESET;
														
 
															+	/* Initialise controller registers */
														
 
															+	opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
														
 
															+	    (opp->vid << FRR_VID_SHIFT);
														
 
															+
														
 
															+	opp->pir = 0;
														
 
															+	opp->spve = -1 & opp->vector_mask;
														
 
															+	opp->tfrr = opp->tfrr_reset;
														
 
															+	/* Initialise IRQ sources */
														
 
															+	for (i = 0; i < opp->max_irq; i++) {
														
 
															+		opp->src[i].ivpr = opp->ivpr_reset;
														
 
															+		opp->src[i].idr = opp->idr_reset;
														
 
															+
														
 
															+		switch (opp->src[i].type) {
														
 
															+		case IRQ_TYPE_NORMAL:
														
 
															+			opp->src[i].level =
														
 
															+			    !!(opp->ivpr_reset & IVPR_SENSE_MASK);
														
 
															+			break;
														
 
															+
														
 
															+		case IRQ_TYPE_FSLINT:
														
 
															+			opp->src[i].ivpr |= IVPR_POLARITY_MASK;
														
 
															+			break;
														
 
															+
														
 
															+		case IRQ_TYPE_FSLSPECIAL:
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+	/* Initialise IRQ destinations */
														
 
															+	for (i = 0; i < MAX_CPU; i++) {
														
 
															+		opp->dst[i].ctpr = 15;
														
 
															+		memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue));
														
 
															+		opp->dst[i].raised.next = -1;
														
 
															+		memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue));
														
 
															+		opp->dst[i].servicing.next = -1;
														
 
															+	}
														
 
															+	/* Initialise timers */
														
 
															+	for (i = 0; i < MAX_TMR; i++) {
														
 
															+		opp->timers[i].tccr = 0;
														
 
															+		opp->timers[i].tbcr = TBCR_CI;
														
 
															+	}
														
 
															+	/* Go out of RESET state */
														
 
															+	opp->gcr = 0;
														
 
															+}
														
 
															+
														
 
															+static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
														
 
															+{
														
 
															+	return opp->src[n_IRQ].idr;
														
 
															+}
														
 
															+
														
 
															+static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
														
 
															+{
														
 
															+	if (opp->flags & OPENPIC_FLAG_ILR)
														
 
															+		return opp->src[n_IRQ].output;
														
 
															+
														
 
															+	return 0xffffffff;
														
 
															+}
														
 
															+
														
 
															+static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ)
														
 
															+{
														
 
															+	return opp->src[n_IRQ].ivpr;
														
 
															+}
														
 
															+
														
 
															+static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
														
 
															+				    uint32_t val)
														
 
															+{
														
 
															+	struct irq_source *src = &opp->src[n_IRQ];
														
 
															+	uint32_t normal_mask = (1UL << opp->nb_cpus) - 1;
														
 
															+	uint32_t crit_mask = 0;
														
 
															+	uint32_t mask = normal_mask;
														
 
															+	int crit_shift = IDR_EP_SHIFT - opp->nb_cpus;
														
 
															+	int i;
														
 
															+
														
 
															+	if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
														
 
															+		crit_mask = mask << crit_shift;
														
 
															+		mask |= crit_mask | IDR_EP;
														
 
															+	}
														
 
															+
														
 
															+	src->idr = val & mask;
														
 
															+	pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr);
														
 
															+
														
 
															+	if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
														
 
															+		if (src->idr & crit_mask) {
														
 
															+			if (src->idr & normal_mask) {
														
 
															+				pr_debug("%s: IRQ configured for multiple output types, using critical\n",
														
 
															+					__func__);
														
 
															+			}
														
 
															+
														
 
															+			src->output = ILR_INTTGT_CINT;
														
 
															+			src->nomask = true;
														
 
															+			src->destmask = 0;
														
 
															+
														
 
															+			for (i = 0; i < opp->nb_cpus; i++) {
														
 
															+				int n_ci = IDR_CI0_SHIFT - i;
														
 
															+
														
 
															+				if (src->idr & (1UL << n_ci))
														
 
															+					src->destmask |= 1UL << i;
														
 
															+			}
														
 
															+		} else {
														
 
															+			src->output = ILR_INTTGT_INT;
														
 
															+			src->nomask = false;
														
 
															+			src->destmask = src->idr & normal_mask;
														
 
															+		}
														
 
															+	} else {
														
 
															+		src->destmask = src->idr;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
														
 
															+				    uint32_t val)
														
 
															+{
														
 
															+	if (opp->flags & OPENPIC_FLAG_ILR) {
														
 
															+		struct irq_source *src = &opp->src[n_IRQ];
														
 
															+
														
 
															+		src->output = val & ILR_INTTGT_MASK;
														
 
															+		pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
														
 
															+			src->output);
														
 
															+
														
 
															+		/* TODO: on MPIC v4.0 only, set nomask for non-INT */
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
														
 
															+				     uint32_t val)
														
 
															+{
														
 
															+	uint32_t mask;
														
 
															+
														
 
															+	/* NOTE when implementing newer FSL MPIC models: starting with v4.0,
														
 
															+	 * the polarity bit is read-only on internal interrupts.
														
 
															+	 */
														
 
															+	mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
														
 
															+	    IVPR_POLARITY_MASK | opp->vector_mask;
														
 
															+
														
 
															+	/* ACTIVITY bit is read-only */
														
 
															+	opp->src[n_IRQ].ivpr =
														
 
															+	    (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
														
 
															+
														
 
															+	/* For FSL internal interrupts, The sense bit is reserved and zero,
														
 
															+	 * and the interrupt is always level-triggered.  Timers and IPIs
														
 
															+	 * have no sense or polarity bits, and are edge-triggered.
														
 
															+	 */
														
 
															+	switch (opp->src[n_IRQ].type) {
														
 
															+	case IRQ_TYPE_NORMAL:
														
 
															+		opp->src[n_IRQ].level =
														
 
															+		    !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK);
														
 
															+		break;
														
 
															+
														
 
															+	case IRQ_TYPE_FSLINT:
														
 
															+		opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK;
														
 
															+		break;
														
 
															+
														
 
															+	case IRQ_TYPE_FSLSPECIAL:
														
 
															+		opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	openpic_update_irq(opp, n_IRQ);
														
 
															+	pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
														
 
															+		opp->src[n_IRQ].ivpr);
														
 
															+}
														
 
															+
														
 
															+static void openpic_gcr_write(struct openpic *opp, uint64_t val)
														
 
															+{
														
 
															+	if (val & GCR_RESET) {
														
 
															+		openpic_reset(opp);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	opp->gcr &= ~opp->mpic_mode_mask;
														
 
															+	opp->gcr |= val & opp->mpic_mode_mask;
														
 
															+}
														
 
															+
														
 
															+static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	int err = 0;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
														
 
															+	if (addr & 0xF)
														
 
															+		return 0;
														
 
															+
														
 
															+	switch (addr) {
														
 
															+	case 0x00:	/* Block Revision Register1 (BRR1) is Readonly */
														
 
															+		break;
														
 
															+	case 0x40:
														
 
															+	case 0x50:
														
 
															+	case 0x60:
														
 
															+	case 0x70:
														
 
															+	case 0x80:
														
 
															+	case 0x90:
														
 
															+	case 0xA0:
														
 
															+	case 0xB0:
														
 
															+		err = openpic_cpu_write_internal(opp, addr, val,
														
 
															+						 get_current_cpu());
														
 
															+		break;
														
 
															+	case 0x1000:		/* FRR */
														
 
															+		break;
														
 
															+	case 0x1020:		/* GCR */
														
 
															+		openpic_gcr_write(opp, val);
														
 
															+		break;
														
 
															+	case 0x1080:		/* VIR */
														
 
															+		break;
														
 
															+	case 0x1090:		/* PIR */
														
 
															+		/*
														
 
															+		 * This register is used to reset a CPU core --
														
 
															+		 * let userspace handle it.
														
 
															+		 */
														
 
															+		err = -ENXIO;
														
 
															+		break;
														
 
															+	case 0x10A0:		/* IPI_IVPR */
														
 
															+	case 0x10B0:
														
 
															+	case 0x10C0:
														
 
															+	case 0x10D0: {
														
 
															+		int idx;
														
 
															+		idx = (addr - 0x10A0) >> 4;
														
 
															+		write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val);
														
 
															+		break;
														
 
															+	}
														
 
															+	case 0x10E0:		/* SPVE */
														
 
															+		opp->spve = val & opp->vector_mask;
														
 
															+		break;
														
 
															+	default:
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	u32 retval;
														
 
															+	int err = 0;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx\n", __func__, addr);
														
 
															+	retval = 0xFFFFFFFF;
														
 
															+	if (addr & 0xF)
														
 
															+		goto out;
														
 
															+
														
 
															+	switch (addr) {
														
 
															+	case 0x1000:		/* FRR */
														
 
															+		retval = opp->frr;
														
 
															+		retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
														
 
															+		break;
														
 
															+	case 0x1020:		/* GCR */
														
 
															+		retval = opp->gcr;
														
 
															+		break;
														
 
															+	case 0x1080:		/* VIR */
														
 
															+		retval = opp->vir;
														
 
															+		break;
														
 
															+	case 0x1090:		/* PIR */
														
 
															+		retval = 0x00000000;
														
 
															+		break;
														
 
															+	case 0x00:		/* Block Revision Register1 (BRR1) */
														
 
															+		retval = opp->brr1;
														
 
															+		break;
														
 
															+	case 0x40:
														
 
															+	case 0x50:
														
 
															+	case 0x60:
														
 
															+	case 0x70:
														
 
															+	case 0x80:
														
 
															+	case 0x90:
														
 
															+	case 0xA0:
														
 
															+	case 0xB0:
														
 
															+		err = openpic_cpu_read_internal(opp, addr,
														
 
															+			&retval, get_current_cpu());
														
 
															+		break;
														
 
															+	case 0x10A0:		/* IPI_IVPR */
														
 
															+	case 0x10B0:
														
 
															+	case 0x10C0:
														
 
															+	case 0x10D0:
														
 
															+		{
														
 
															+			int idx;
														
 
															+			idx = (addr - 0x10A0) >> 4;
														
 
															+			retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx);
														
 
															+		}
														
 
															+		break;
														
 
															+	case 0x10E0:		/* SPVE */
														
 
															+		retval = opp->spve;
														
 
															+		break;
														
 
															+	default:
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+out:
														
 
															+	pr_debug("%s: => 0x%08x\n", __func__, retval);
														
 
															+	*ptr = retval;
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	int idx;
														
 
															+
														
 
															+	addr += 0x10f0;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
														
 
															+	if (addr & 0xF)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (addr == 0x10f0) {
														
 
															+		/* TFRR */
														
 
															+		opp->tfrr = val;
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	idx = (addr >> 6) & 0x3;
														
 
															+	addr = addr & 0x30;
														
 
															+
														
 
															+	switch (addr & 0x30) {
														
 
															+	case 0x00:		/* TCCR */
														
 
															+		break;
														
 
															+	case 0x10:		/* TBCR */
														
 
															+		if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
														
 
															+		    (val & TBCR_CI) == 0 &&
														
 
															+		    (opp->timers[idx].tbcr & TBCR_CI) != 0)
														
 
															+			opp->timers[idx].tccr &= ~TCCR_TOG;
														
 
															+
														
 
															+		opp->timers[idx].tbcr = val;
														
 
															+		break;
														
 
															+	case 0x20:		/* TVPR */
														
 
															+		write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val);
														
 
															+		break;
														
 
															+	case 0x30:		/* TDR */
														
 
															+		write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	uint32_t retval = -1;
														
 
															+	int idx;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx\n", __func__, addr);
														
 
															+	if (addr & 0xF)
														
 
															+		goto out;
														
 
															+
														
 
															+	idx = (addr >> 6) & 0x3;
														
 
															+	if (addr == 0x0) {
														
 
															+		/* TFRR */
														
 
															+		retval = opp->tfrr;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	switch (addr & 0x30) {
														
 
															+	case 0x00:		/* TCCR */
														
 
															+		retval = opp->timers[idx].tccr;
														
 
															+		break;
														
 
															+	case 0x10:		/* TBCR */
														
 
															+		retval = opp->timers[idx].tbcr;
														
 
															+		break;
														
 
															+	case 0x20:		/* TIPV */
														
 
															+		retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx);
														
 
															+		break;
														
 
															+	case 0x30:		/* TIDE (TIDR) */
														
 
															+		retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+out:
														
 
															+	pr_debug("%s: => 0x%08x\n", __func__, retval);
														
 
															+	*ptr = retval;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	int idx;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
														
 
															+
														
 
															+	addr = addr & 0xffff;
														
 
															+	idx = addr >> 5;
														
 
															+
														
 
															+	switch (addr & 0x1f) {
														
 
															+	case 0x00:
														
 
															+		write_IRQreg_ivpr(opp, idx, val);
														
 
															+		break;
														
 
															+	case 0x10:
														
 
															+		write_IRQreg_idr(opp, idx, val);
														
 
															+		break;
														
 
															+	case 0x18:
														
 
															+		write_IRQreg_ilr(opp, idx, val);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	uint32_t retval;
														
 
															+	int idx;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx\n", __func__, addr);
														
 
															+	retval = 0xFFFFFFFF;
														
 
															+
														
 
															+	addr = addr & 0xffff;
														
 
															+	idx = addr >> 5;
														
 
															+
														
 
															+	switch (addr & 0x1f) {
														
 
															+	case 0x00:
														
 
															+		retval = read_IRQreg_ivpr(opp, idx);
														
 
															+		break;
														
 
															+	case 0x10:
														
 
															+		retval = read_IRQreg_idr(opp, idx);
														
 
															+		break;
														
 
															+	case 0x18:
														
 
															+		retval = read_IRQreg_ilr(opp, idx);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	pr_debug("%s: => 0x%08x\n", __func__, retval);
														
 
															+	*ptr = retval;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	int idx = opp->irq_msi;
														
 
															+	int srs, ibs;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
														
 
															+	if (addr & 0xF)
														
 
															+		return 0;
														
 
															+
														
 
															+	switch (addr) {
														
 
															+	case MSIIR_OFFSET:
														
 
															+		srs = val >> MSIIR_SRS_SHIFT;
														
 
															+		idx += srs;
														
 
															+		ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
														
 
															+		opp->msi[srs].msir |= 1 << ibs;
														
 
															+		openpic_set_irq(opp, idx, 1);
														
 
															+		break;
														
 
															+	default:
														
 
															+		/* most registers are read-only, thus ignored */
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	uint32_t r = 0;
														
 
															+	int i, srs;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx\n", __func__, addr);
														
 
															+	if (addr & 0xF)
														
 
															+		return -ENXIO;
														
 
															+
														
 
															+	srs = addr >> 4;
														
 
															+
														
 
															+	switch (addr) {
														
 
															+	case 0x00:
														
 
															+	case 0x10:
														
 
															+	case 0x20:
														
 
															+	case 0x30:
														
 
															+	case 0x40:
														
 
															+	case 0x50:
														
 
															+	case 0x60:
														
 
															+	case 0x70:		/* MSIRs */
														
 
															+		r = opp->msi[srs].msir;
														
 
															+		/* Clear on read */
														
 
															+		opp->msi[srs].msir = 0;
														
 
															+		openpic_set_irq(opp, opp->irq_msi + srs, 0);
														
 
															+		break;
														
 
															+	case 0x120:		/* MSISR */
														
 
															+		for (i = 0; i < MAX_MSI; i++)
														
 
															+			r |= (opp->msi[i].msir ? 1 : 0) << i;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	pr_debug("%s: => 0x%08x\n", __func__, r);
														
 
															+	*ptr = r;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	uint32_t r = 0;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx\n", __func__, addr);
														
 
															+
														
 
															+	/* TODO: EISR/EIMR */
														
 
															+
														
 
															+	*ptr = r;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
														
 
															+
														
 
															+	/* TODO: EISR/EIMR */
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
														
 
															+				      u32 val, int idx)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	struct irq_source *src;
														
 
															+	struct irq_dest *dst;
														
 
															+	int s_IRQ, n_IRQ;
														
 
															+
														
 
															+	pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
														
 
															+		addr, val);
														
 
															+
														
 
															+	if (idx < 0)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (addr & 0xF)
														
 
															+		return 0;
														
 
															+
														
 
															+	dst = &opp->dst[idx];
														
 
															+	addr &= 0xFF0;
														
 
															+	switch (addr) {
														
 
															+	case 0x40:		/* IPIDR */
														
 
															+	case 0x50:
														
 
															+	case 0x60:
														
 
															+	case 0x70:
														
 
															+		idx = (addr - 0x40) >> 4;
														
 
															+		/* we use IDE as mask which CPUs to deliver the IPI to still. */
														
 
															+		opp->src[opp->irq_ipi0 + idx].destmask |= val;
														
 
															+		openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
														
 
															+		openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
														
 
															+		break;
														
 
															+	case 0x80:		/* CTPR */
														
 
															+		dst->ctpr = val & 0x0000000F;
														
 
															+
														
 
															+		pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n",
														
 
															+			__func__, idx, dst->ctpr, dst->raised.priority,
														
 
															+			dst->servicing.priority);
														
 
															+
														
 
															+		if (dst->raised.priority <= dst->ctpr) {
														
 
															+			pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
														
 
															+				__func__, idx);
														
 
															+			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
														
 
															+		} else if (dst->raised.priority > dst->servicing.priority) {
														
 
															+			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
														
 
															+				__func__, idx, dst->raised.next);
														
 
															+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
														
 
															+		}
														
 
															+
														
 
															+		break;
														
 
															+	case 0x90:		/* WHOAMI */
														
 
															+		/* Read-only register */
														
 
															+		break;
														
 
															+	case 0xA0:		/* IACK */
														
 
															+		/* Read-only register */
														
 
															+		break;
														
 
															+	case 0xB0: {		/* EOI */
														
 
															+		int notify_eoi;
														
 
															+
														
 
															+		pr_debug("EOI\n");
														
 
															+		s_IRQ = IRQ_get_next(opp, &dst->servicing);
														
 
															+
														
 
															+		if (s_IRQ < 0) {
														
 
															+			pr_debug("%s: EOI with no interrupt in service\n",
														
 
															+				__func__);
														
 
															+			break;
														
 
															+		}
														
 
															+
														
 
															+		IRQ_resetbit(&dst->servicing, s_IRQ);
														
 
															+		/* Notify listeners that the IRQ is over */
														
 
															+		notify_eoi = s_IRQ;
														
 
															+		/* Set up next servicing IRQ */
														
 
															+		s_IRQ = IRQ_get_next(opp, &dst->servicing);
														
 
															+		/* Check queued interrupts. */
														
 
															+		n_IRQ = IRQ_get_next(opp, &dst->raised);
														
 
															+		src = &opp->src[n_IRQ];
														
 
															+		if (n_IRQ != -1 &&
														
 
															+		    (s_IRQ == -1 ||
														
 
															+		     IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
														
 
															+			pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
														
 
															+				idx, n_IRQ);
														
 
															+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
														
 
															+		}
														
 
															+
														
 
															+		spin_unlock(&opp->lock);
														
 
															+		kvm_notify_acked_irq(opp->kvm, 0, notify_eoi);
														
 
															+		spin_lock(&opp->lock);
														
 
															+
														
 
															+		break;
														
 
															+	}
														
 
															+	default:
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+
														
 
															+	return openpic_cpu_write_internal(opp, addr, val,
														
 
															+					 (addr & 0x1f000) >> 12);
														
 
															+}
														
 
															+
														
 
															+static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
														
 
															+			     int cpu)
														
 
															+{
														
 
															+	struct irq_source *src;
														
 
															+	int retval, irq;
														
 
															+
														
 
															+	pr_debug("Lower OpenPIC INT output\n");
														
 
															+	mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
														
 
															+
														
 
															+	irq = IRQ_get_next(opp, &dst->raised);
														
 
															+	pr_debug("IACK: irq=%d\n", irq);
														
 
															+
														
 
															+	if (irq == -1)
														
 
															+		/* No more interrupt pending */
														
 
															+		return opp->spve;
														
 
															+
														
 
															+	src = &opp->src[irq];
														
 
															+	if (!(src->ivpr & IVPR_ACTIVITY_MASK) ||
														
 
															+	    !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) {
														
 
															+		pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n",
														
 
															+			__func__, irq, dst->ctpr, src->ivpr);
														
 
															+		openpic_update_irq(opp, irq);
														
 
															+		retval = opp->spve;
														
 
															+	} else {
														
 
															+		/* IRQ enter servicing state */
														
 
															+		IRQ_setbit(&dst->servicing, irq);
														
 
															+		retval = IVPR_VECTOR(opp, src->ivpr);
														
 
															+	}
														
 
															+
														
 
															+	if (!src->level) {
														
 
															+		/* edge-sensitive IRQ */
														
 
															+		src->ivpr &= ~IVPR_ACTIVITY_MASK;
														
 
															+		src->pending = 0;
														
 
															+		IRQ_resetbit(&dst->raised, irq);
														
 
															+	}
														
 
															+
														
 
															+	if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) {
														
 
															+		src->destmask &= ~(1 << cpu);
														
 
															+		if (src->destmask && !src->level) {
														
 
															+			/* trigger on CPUs that didn't know about it yet */
														
 
															+			openpic_set_irq(opp, irq, 1);
														
 
															+			openpic_set_irq(opp, irq, 0);
														
 
															+			/* if all CPUs knew about it, set active bit again */
														
 
															+			src->ivpr |= IVPR_ACTIVITY_MASK;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	return retval;
														
 
															+}
														
 
															+
														
 
															+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct openpic *opp = vcpu->arch.mpic;
														
 
															+	int cpu = vcpu->arch.irq_cpu_id;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	spin_lock_irqsave(&opp->lock, flags);
														
 
															+
														
 
															+	if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
														
 
															+		kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
														
 
															+
														
 
															+	spin_unlock_irqrestore(&opp->lock, flags);
														
 
															+}
														
 
															+
														
 
															+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
														
 
															+				     u32 *ptr, int idx)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	struct irq_dest *dst;
														
 
															+	uint32_t retval;
														
 
															+
														
 
															+	pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
														
 
															+	retval = 0xFFFFFFFF;
														
 
															+
														
 
															+	if (idx < 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (addr & 0xF)
														
 
															+		goto out;
														
 
															+
														
 
															+	dst = &opp->dst[idx];
														
 
															+	addr &= 0xFF0;
														
 
															+	switch (addr) {
														
 
															+	case 0x80:		/* CTPR */
														
 
															+		retval = dst->ctpr;
														
 
															+		break;
														
 
															+	case 0x90:		/* WHOAMI */
														
 
															+		retval = idx;
														
 
															+		break;
														
 
															+	case 0xA0:		/* IACK */
														
 
															+		retval = openpic_iack(opp, dst, idx);
														
 
															+		break;
														
 
															+	case 0xB0:		/* EOI */
														
 
															+		retval = 0;
														
 
															+		break;
														
 
															+	default:
														
 
															+		break;
														
 
															+	}
														
 
															+	pr_debug("%s: => 0x%08x\n", __func__, retval);
														
 
															+
														
 
															+out:
														
 
															+	*ptr = retval;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+
														
 
															+	return openpic_cpu_read_internal(opp, addr, ptr,
														
 
															+					 (addr & 0x1f000) >> 12);
														
 
															+}
														
 
															+
														
 
															+struct mem_reg {
														
 
															+	int (*read)(void *opaque, gpa_t addr, u32 *ptr);
														
 
															+	int (*write)(void *opaque, gpa_t addr, u32 val);
														
 
															+	gpa_t start_addr;
														
 
															+	int size;
														
 
															+};
														
 
															+
														
 
															+static const struct mem_reg openpic_gbl_mmio = {
														
 
															+	.write = openpic_gbl_write,
														
 
															+	.read = openpic_gbl_read,
														
 
															+	.start_addr = OPENPIC_GLB_REG_START,
														
 
															+	.size = OPENPIC_GLB_REG_SIZE,
														
 
															+};
														
 
															+
														
 
															+static const struct mem_reg openpic_tmr_mmio = {
														
 
															+	.write = openpic_tmr_write,
														
 
															+	.read = openpic_tmr_read,
														
 
															+	.start_addr = OPENPIC_TMR_REG_START,
														
 
															+	.size = OPENPIC_TMR_REG_SIZE,
														
 
															+};
														
 
															+
														
 
															+static const struct mem_reg openpic_cpu_mmio = {
														
 
															+	.write = openpic_cpu_write,
														
 
															+	.read = openpic_cpu_read,
														
 
															+	.start_addr = OPENPIC_CPU_REG_START,
														
 
															+	.size = OPENPIC_CPU_REG_SIZE,
														
 
															+};
														
 
															+
														
 
															+static const struct mem_reg openpic_src_mmio = {
														
 
															+	.write = openpic_src_write,
														
 
															+	.read = openpic_src_read,
														
 
															+	.start_addr = OPENPIC_SRC_REG_START,
														
 
															+	.size = OPENPIC_SRC_REG_SIZE,
														
 
															+};
														
 
															+
														
 
															+static const struct mem_reg openpic_msi_mmio = {
														
 
															+	.read = openpic_msi_read,
														
 
															+	.write = openpic_msi_write,
														
 
															+	.start_addr = OPENPIC_MSI_REG_START,
														
 
															+	.size = OPENPIC_MSI_REG_SIZE,
														
 
															+};
														
 
															+
														
 
															+static const struct mem_reg openpic_summary_mmio = {
														
 
															+	.read = openpic_summary_read,
														
 
															+	.write = openpic_summary_write,
														
 
															+	.start_addr = OPENPIC_SUMMARY_REG_START,
														
 
															+	.size = OPENPIC_SUMMARY_REG_SIZE,
														
 
															+};
														
 
															+
														
 
															+static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr)
														
 
															+{
														
 
															+	if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) {
														
 
															+		WARN(1, "kvm mpic: too many mmio regions\n");
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	opp->mmio_regions[opp->num_mmio_regions++] = mr;
														
 
															+}
														
 
															+
														
 
															+static void fsl_common_init(struct openpic *opp)
														
 
															+{
														
 
															+	int i;
														
 
															+	int virq = MAX_SRC;
														
 
															+
														
 
															+	add_mmio_region(opp, &openpic_msi_mmio);
														
 
															+	add_mmio_region(opp, &openpic_summary_mmio);
														
 
															+
														
 
															+	opp->vid = VID_REVISION_1_2;
														
 
															+	opp->vir = VIR_GENERIC;
														
 
															+	opp->vector_mask = 0xFFFF;
														
 
															+	opp->tfrr_reset = 0;
														
 
															+	opp->ivpr_reset = IVPR_MASK_MASK;
														
 
															+	opp->idr_reset = 1 << 0;
														
 
															+	opp->max_irq = MAX_IRQ;
														
 
															+
														
 
															+	opp->irq_ipi0 = virq;
														
 
															+	virq += MAX_IPI;
														
 
															+	opp->irq_tim0 = virq;
														
 
															+	virq += MAX_TMR;
														
 
															+
														
 
															+	BUG_ON(virq > MAX_IRQ);
														
 
															+
														
 
															+	opp->irq_msi = 224;
														
 
															+
														
 
															+	for (i = 0; i < opp->fsl->max_ext; i++)
														
 
															+		opp->src[i].level = false;
														
 
															+
														
 
															+	/* Internal interrupts, including message and MSI */
														
 
															+	for (i = 16; i < MAX_SRC; i++) {
														
 
															+		opp->src[i].type = IRQ_TYPE_FSLINT;
														
 
															+		opp->src[i].level = true;
														
 
															+	}
														
 
															+
														
 
															+	/* timers and IPIs */
														
 
															+	for (i = MAX_SRC; i < virq; i++) {
														
 
															+		opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
														
 
															+		opp->src[i].level = false;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	for (i = 0; i < opp->num_mmio_regions; i++) {
														
 
															+		const struct mem_reg *mr = opp->mmio_regions[i];
														
 
															+
														
 
															+		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
														
 
															+			continue;
														
 
															+
														
 
															+		return mr->read(opp, addr - mr->start_addr, ptr);
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	for (i = 0; i < opp->num_mmio_regions; i++) {
														
 
															+		const struct mem_reg *mr = opp->mmio_regions[i];
														
 
															+
														
 
															+		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
														
 
															+			continue;
														
 
															+
														
 
															+		return mr->write(opp, addr - mr->start_addr, val);
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
														
 
															+			 int len, void *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = container_of(this, struct openpic, mmio);
														
 
															+	int ret;
														
 
															+	union {
														
 
															+		u32 val;
														
 
															+		u8 bytes[4];
														
 
															+	} u;
														
 
															+
														
 
															+	if (addr & (len - 1)) {
														
 
															+		pr_debug("%s: bad alignment %llx/%d\n",
														
 
															+			 __func__, addr, len);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	spin_lock_irq(&opp->lock);
														
 
															+	ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
														
 
															+	spin_unlock_irq(&opp->lock);
														
 
															+
														
 
															+	/*
														
 
															+	 * Technically only 32-bit accesses are allowed, but be nice to
														
 
															+	 * people dumping registers a byte at a time -- it works in real
														
 
															+	 * hardware (reads only, not writes).
														
 
															+	 */
														
 
															+	if (len == 4) {
														
 
															+		*(u32 *)ptr = u.val;
														
 
															+		pr_debug("%s: addr %llx ret %d len 4 val %x\n",
														
 
															+			 __func__, addr, ret, u.val);
														
 
															+	} else if (len == 1) {
														
 
															+		*(u8 *)ptr = u.bytes[addr & 3];
														
 
															+		pr_debug("%s: addr %llx ret %d len 1 val %x\n",
														
 
															+			 __func__, addr, ret, u.bytes[addr & 3]);
														
 
															+	} else {
														
 
															+		pr_debug("%s: bad length %d\n", __func__, len);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
														
 
															+			  int len, const void *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = container_of(this, struct openpic, mmio);
														
 
															+	int ret;
														
 
															+
														
 
															+	if (len != 4) {
														
 
															+		pr_debug("%s: bad length %d\n", __func__, len);
														
 
															+		return -EOPNOTSUPP;
														
 
															+	}
														
 
															+	if (addr & 3) {
														
 
															+		pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
														
 
															+		return -EOPNOTSUPP;
														
 
															+	}
														
 
															+
														
 
															+	spin_lock_irq(&opp->lock);
														
 
															+	ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
														
 
															+				      *(const u32 *)ptr);
														
 
															+	spin_unlock_irq(&opp->lock);
														
 
															+
														
 
															+	pr_debug("%s: addr %llx ret %d val %x\n",
														
 
															+		 __func__, addr, ret, *(const u32 *)ptr);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static const struct kvm_io_device_ops mpic_mmio_ops = {
														
 
															+	.read = kvm_mpic_read,
														
 
															+	.write = kvm_mpic_write,
														
 
															+};
														
 
															+
														
 
															+static void map_mmio(struct openpic *opp)
														
 
															+{
														
 
															+	kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
														
 
															+
														
 
															+	kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
														
 
															+				opp->reg_base, OPENPIC_REG_SIZE,
														
 
															+				&opp->mmio);
														
 
															+}
														
 
															+
														
 
															+static void unmap_mmio(struct openpic *opp)
														
 
															+{
														
 
															+	kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
														
 
															+}
														
 
															+
														
 
															+static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	u64 base;
														
 
															+
														
 
															+	if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	if (base & 0x3ffff) {
														
 
															+		pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
														
 
															+			 __func__, base);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	if (base == opp->reg_base)
														
 
															+		return 0;
														
 
															+
														
 
															+	mutex_lock(&opp->kvm->slots_lock);
														
 
															+
														
 
															+	unmap_mmio(opp);
														
 
															+	opp->reg_base = base;
														
 
															+
														
 
															+	pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
														
 
															+		 __func__, base);
														
 
															+
														
 
															+	if (base == 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	map_mmio(opp);
														
 
															+
														
 
															+out:
														
 
															+	mutex_unlock(&opp->kvm->slots_lock);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+#define ATTR_SET		0
														
 
															+#define ATTR_GET		1
														
 
															+
														
 
															+static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	if (addr & 3)
														
 
															+		return -ENXIO;
														
 
															+
														
 
															+	spin_lock_irq(&opp->lock);
														
 
															+
														
 
															+	if (type == ATTR_SET)
														
 
															+		ret = kvm_mpic_write_internal(opp, addr, *val);
														
 
															+	else
														
 
															+		ret = kvm_mpic_read_internal(opp, addr, val);
														
 
															+
														
 
															+	spin_unlock_irq(&opp->lock);
														
 
															+
														
 
															+	pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	struct openpic *opp = dev->private;
														
 
															+	u32 attr32;
														
 
															+
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_MPIC_GRP_MISC:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_DEV_MPIC_BASE_ADDR:
														
 
															+			return set_base_addr(opp, attr);
														
 
															+		}
														
 
															+
														
 
															+		break;
														
 
															+
														
 
															+	case KVM_DEV_MPIC_GRP_REGISTER:
														
 
															+		if (get_user(attr32, (u32 __user *)(long)attr->addr))
														
 
															+			return -EFAULT;
														
 
															+
														
 
															+		return access_reg(opp, attr->attr, &attr32, ATTR_SET);
														
 
															+
														
 
															+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
														
 
															+		if (attr->attr > MAX_SRC)
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+		if (get_user(attr32, (u32 __user *)(long)attr->addr))
														
 
															+			return -EFAULT;
														
 
															+
														
 
															+		if (attr32 != 0 && attr32 != 1)
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+		spin_lock_irq(&opp->lock);
														
 
															+		openpic_set_irq(opp, attr->attr, attr32);
														
 
															+		spin_unlock_irq(&opp->lock);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	struct openpic *opp = dev->private;
														
 
															+	u64 attr64;
														
 
															+	u32 attr32;
														
 
															+	int ret;
														
 
															+
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_MPIC_GRP_MISC:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_DEV_MPIC_BASE_ADDR:
														
 
															+			mutex_lock(&opp->kvm->slots_lock);
														
 
															+			attr64 = opp->reg_base;
														
 
															+			mutex_unlock(&opp->kvm->slots_lock);
														
 
															+
														
 
															+			if (copy_to_user((u64 __user *)(long)attr->addr,
														
 
															+					 &attr64, sizeof(u64)))
														
 
															+				return -EFAULT;
														
 
															+
														
 
															+			return 0;
														
 
															+		}
														
 
															+
														
 
															+		break;
														
 
															+
														
 
															+	case KVM_DEV_MPIC_GRP_REGISTER:
														
 
															+		ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+
														
 
															+		if (put_user(attr32, (u32 __user *)(long)attr->addr))
														
 
															+			return -EFAULT;
														
 
															+
														
 
															+		return 0;
														
 
															+
														
 
															+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
														
 
															+		if (attr->attr > MAX_SRC)
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+		spin_lock_irq(&opp->lock);
														
 
															+		attr32 = opp->src[attr->attr].pending;
														
 
															+		spin_unlock_irq(&opp->lock);
														
 
															+
														
 
															+		if (put_user(attr32, (u32 __user *)(long)attr->addr))
														
 
															+			return -EFAULT;
														
 
															+
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_MPIC_GRP_MISC:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_DEV_MPIC_BASE_ADDR:
														
 
															+			return 0;
														
 
															+		}
														
 
															+
														
 
															+		break;
														
 
															+
														
 
															+	case KVM_DEV_MPIC_GRP_REGISTER:
														
 
															+		return 0;
														
 
															+
														
 
															+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
														
 
															+		if (attr->attr > MAX_SRC)
														
 
															+			break;
														
 
															+
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static void mpic_destroy(struct kvm_device *dev)
														
 
															+{
														
 
															+	struct openpic *opp = dev->private;
														
 
															+
														
 
															+	dev->kvm->arch.mpic = NULL;
														
 
															+	kfree(opp);
														
 
															+}
														
 
															+
														
 
															+static int mpic_set_default_irq_routing(struct openpic *opp)
														
 
															+{
														
 
															+	struct kvm_irq_routing_entry *routing;
														
 
															+
														
 
															+	/* Create a nop default map, so that dereferencing it still works */
														
 
															+	routing = kzalloc((sizeof(*routing)), GFP_KERNEL);
														
 
															+	if (!routing)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	kvm_set_irq_routing(opp->kvm, routing, 0, 0);
														
 
															+
														
 
															+	kfree(routing);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int mpic_create(struct kvm_device *dev, u32 type)
														
 
															+{
														
 
															+	struct openpic *opp;
														
 
															+	int ret;
														
 
															+
														
 
															+	/* We only support one MPIC at a time for now */
														
 
															+	if (dev->kvm->arch.mpic)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
														
 
															+	if (!opp)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	dev->private = opp;
														
 
															+	opp->kvm = dev->kvm;
														
 
															+	opp->dev = dev;
														
 
															+	opp->model = type;
														
 
															+	spin_lock_init(&opp->lock);
														
 
															+
														
 
															+	add_mmio_region(opp, &openpic_gbl_mmio);
														
 
															+	add_mmio_region(opp, &openpic_tmr_mmio);
														
 
															+	add_mmio_region(opp, &openpic_src_mmio);
														
 
															+	add_mmio_region(opp, &openpic_cpu_mmio);
														
 
															+
														
 
															+	switch (opp->model) {
														
 
															+	case KVM_DEV_TYPE_FSL_MPIC_20:
														
 
															+		opp->fsl = &fsl_mpic_20;
														
 
															+		opp->brr1 = 0x00400200;
														
 
															+		opp->flags |= OPENPIC_FLAG_IDR_CRIT;
														
 
															+		opp->nb_irqs = 80;
														
 
															+		opp->mpic_mode_mask = GCR_MODE_MIXED;
														
 
															+
														
 
															+		fsl_common_init(opp);
														
 
															+
														
 
															+		break;
														
 
															+
														
 
															+	case KVM_DEV_TYPE_FSL_MPIC_42:
														
 
															+		opp->fsl = &fsl_mpic_42;
														
 
															+		opp->brr1 = 0x00400402;
														
 
															+		opp->flags |= OPENPIC_FLAG_ILR;
														
 
															+		opp->nb_irqs = 196;
														
 
															+		opp->mpic_mode_mask = GCR_MODE_PROXY;
														
 
															+
														
 
															+		fsl_common_init(opp);
														
 
															+
														
 
															+		break;
														
 
															+
														
 
															+	default:
														
 
															+		ret = -ENODEV;
														
 
															+		goto err;
														
 
															+	}
														
 
															+
														
 
															+	ret = mpic_set_default_irq_routing(opp);
														
 
															+	if (ret)
														
 
															+		goto err;
														
 
															+
														
 
															+	openpic_reset(opp);
														
 
															+
														
 
															+	smp_wmb();
														
 
															+	dev->kvm->arch.mpic = opp;
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+err:
														
 
															+	kfree(opp);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+struct kvm_device_ops kvm_mpic_ops = {
														
 
															+	.name = "kvm-mpic",
														
 
															+	.create = mpic_create,
														
 
															+	.destroy = mpic_destroy,
														
 
															+	.set_attr = mpic_set_attr,
														
 
															+	.get_attr = mpic_get_attr,
														
 
															+	.has_attr = mpic_has_attr,
														
 
															+};
														
 
															+
														
 
															+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
														
 
															+			     u32 cpu)
														
 
															+{
														
 
															+	struct openpic *opp = dev->private;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	if (dev->ops != &kvm_mpic_ops)
														
 
															+		return -EPERM;
														
 
															+	if (opp->kvm != vcpu->kvm)
														
 
															+		return -EPERM;
														
 
															+	if (cpu < 0 || cpu >= MAX_CPU)
														
 
															+		return -EPERM;
														
 
															+
														
 
															+	spin_lock_irq(&opp->lock);
														
 
															+
														
 
															+	if (opp->dst[cpu].vcpu) {
														
 
															+		ret = -EEXIST;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	if (vcpu->arch.irq_type) {
														
 
															+		ret = -EBUSY;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	opp->dst[cpu].vcpu = vcpu;
														
 
															+	opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
														
 
															+
														
 
															+	vcpu->arch.mpic = opp;
														
 
															+	vcpu->arch.irq_cpu_id = cpu;
														
 
															+	vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
														
 
															+
														
 
															+	/* This might need to be changed if GCR gets extended */
														
 
															+	if (opp->mpic_mode_mask == GCR_MODE_PROXY)
														
 
															+		vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
														
 
															+
														
 
															+out:
														
 
															+	spin_unlock_irq(&opp->lock);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * This should only happen immediately before the mpic is destroyed,
														
 
															+ * so we shouldn't need to worry about anything still trying to
														
 
															+ * access the vcpu pointer.
														
 
															+ */
														
 
															+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
														
 
															+
														
 
															+	opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Return value:
														
 
															+ *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
														
 
															+ *  = 0   Interrupt was coalesced (previous irq is still pending)
														
 
															+ *  > 0   Number of CPUs interrupt was delivered to
														
 
															+ */
														
 
															+static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e,
														
 
															+			struct kvm *kvm, int irq_source_id, int level,
														
 
															+			bool line_status)
														
 
															+{
														
 
															+	u32 irq = e->irqchip.pin;
														
 
															+	struct openpic *opp = kvm->arch.mpic;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	spin_lock_irqsave(&opp->lock, flags);
														
 
															+	openpic_set_irq(opp, irq, level);
														
 
															+	spin_unlock_irqrestore(&opp->lock, flags);
														
 
															+
														
 
															+	/* All code paths we care about don't check for the return value */
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
														
 
															+		struct kvm *kvm, int irq_source_id, int level, bool line_status)
														
 
															+{
														
 
															+	struct openpic *opp = kvm->arch.mpic;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	spin_lock_irqsave(&opp->lock, flags);
														
 
															+
														
 
															+	/*
														
 
															+	 * XXX We ignore the target address for now, as we only support
														
 
															+	 *     a single MSI bank.
														
 
															+	 */
														
 
															+	openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data);
														
 
															+	spin_unlock_irqrestore(&opp->lock, flags);
														
 
															+
														
 
															+	/* All code paths we care about don't check for the return value */
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
														
 
															+			  struct kvm_kernel_irq_routing_entry *e,
														
 
															+			  const struct kvm_irq_routing_entry *ue)
														
 
															+{
														
 
															+	int r = -EINVAL;
														
 
															+
														
 
															+	switch (ue->type) {
														
 
															+	case KVM_IRQ_ROUTING_IRQCHIP:
														
 
															+		e->set = mpic_set_irq;
														
 
															+		e->irqchip.irqchip = ue->u.irqchip.irqchip;
														
 
															+		e->irqchip.pin = ue->u.irqchip.pin;
														
 
															+		if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
														
 
															+			goto out;
														
 
															+		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
														
 
															+		break;
														
 
															+	case KVM_IRQ_ROUTING_MSI:
														
 
															+		e->set = kvm_set_msi;
														
 
															+		e->msi.address_lo = ue->u.msi.address_lo;
														
 
															+		e->msi.address_hi = ue->u.msi.address_hi;
														
 
															+		e->msi.data = ue->u.msi.data;
														
 
															+		break;
														
 
															+	default:
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	r = 0;
														
 
															+out:
														
 
															+	return r;
														
 
															+}
														
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -25,6 +25,7 @@
 
															 #include <linux/hrtimer.h>
														
 
															 #include <linux/fs.h>
														
 
															 #include <linux/slab.h>
														
 
															+#include <linux/file.h>
														
 
															 #include <asm/cputable.h>
														
 
															 #include <asm/uaccess.h>
														
 
															 #include <asm/kvm_ppc.h>
														
@@ -32,6 +33,7 @@
 
															 #include <asm/cputhreads.h>
														
 
															 #include <asm/irqflags.h>
														
 
															 #include "timing.h"
														
 
															+#include "irq.h"
														
 
															 #include "../mm/mmu_decl.h"
														
 
															 #define CREATE_TRACE_POINTS
														
@@ -317,6 +319,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_ENABLE_CAP:
														
 
															 	case KVM_CAP_ONE_REG:
														
 
															 	case KVM_CAP_IOEVENTFD:
														
 
															+	case KVM_CAP_DEVICE_CTRL:
														
 
															 		r = 1;
														
 
															 		break;
														
 
															 #ifndef CONFIG_KVM_BOOK3S_64_HV
														
@@ -325,6 +328,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_PPC_GET_PVINFO:
														
 
															 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
														
 
															 	case KVM_CAP_SW_TLB:
														
 
															+#endif
														
 
															+#ifdef CONFIG_KVM_MPIC
														
 
															+	case KVM_CAP_IRQ_MPIC:
														
 
															 #endif
														
 
															 		r = 1;
														
 
															 		break;
														
@@ -335,6 +341,10 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 #ifdef CONFIG_PPC_BOOK3S_64
														
 
															 	case KVM_CAP_SPAPR_TCE:
														
 
															 	case KVM_CAP_PPC_ALLOC_HTAB:
														
 
															+	case KVM_CAP_PPC_RTAS:
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+	case KVM_CAP_IRQ_XICS:
														
 
															+#endif
														
 
															 		r = 1;
														
 
															 		break;
														
 
															 #endif /* CONFIG_PPC_BOOK3S_64 */
														
@@ -411,18 +421,17 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 
															 }
														
 
															 int kvm_arch_prepare_memory_region(struct kvm *kvm,
														
 
															-                                   struct kvm_memory_slot *memslot,
														
 
															-                                   struct kvm_memory_slot old,
														
 
															-                                   struct kvm_userspace_memory_region *mem,
														
 
															-                                   bool user_alloc)
														
 
															+				   struct kvm_memory_slot *memslot,
														
 
															+				   struct kvm_userspace_memory_region *mem,
														
 
															+				   enum kvm_mr_change change)
														
 
															 {
														
 
															 	return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
														
 
															 }
														
 
															 void kvm_arch_commit_memory_region(struct kvm *kvm,
														
 
															-               struct kvm_userspace_memory_region *mem,
														
 
															-               struct kvm_memory_slot old,
														
 
															-               bool user_alloc)
														
 
															+				   struct kvm_userspace_memory_region *mem,
														
 
															+				   const struct kvm_memory_slot *old,
														
 
															+				   enum kvm_mr_change change)
														
 
															 {
														
 
															 	kvmppc_core_commit_memory_region(kvm, mem, old);
														
 
															 }
														
@@ -460,6 +469,16 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 
															 	tasklet_kill(&vcpu->arch.tasklet);
														
 
															 	kvmppc_remove_vcpu_debugfs(vcpu);
														
 
															+
														
 
															+	switch (vcpu->arch.irq_type) {
														
 
															+	case KVMPPC_IRQ_MPIC:
														
 
															+		kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
														
 
															+		break;
														
 
															+	case KVMPPC_IRQ_XICS:
														
 
															+		kvmppc_xics_free_icp(vcpu);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															 	kvmppc_core_vcpu_free(vcpu);
														
 
															 }
														
@@ -532,12 +551,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
															 #endif
														
 
															 }
														
 
															-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
														
 
															-                                        struct kvm_guest_debug *dbg)
														
 
															-{
														
 
															-	return -EINVAL;
														
 
															-}
														
 
															-
														
 
															 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
														
 
															                                      struct kvm_run *run)
														
 
															 {
														
@@ -612,6 +625,8 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 
															 int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
														
 
															                        unsigned int rt, unsigned int bytes, int is_bigendian)
														
 
															 {
														
 
															+	int idx, ret;
														
 
															+
														
 
															 	if (bytes > sizeof(run->mmio.data)) {
														
 
															 		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
														
 
															 		       run->mmio.len);
														
@@ -627,8 +642,14 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
															 	vcpu->mmio_is_write = 0;
														
 
															 	vcpu->arch.mmio_sign_extend = 0;
														
 
															-	if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
														
 
															-			     bytes, &run->mmio.data)) {
														
 
															+	idx = srcu_read_lock(&vcpu->kvm->srcu);
														
 
															+
														
 
															+	ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
														
 
															+			      bytes, &run->mmio.data);
														
 
															+
														
 
															+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
														
 
															+
														
 
															+	if (!ret) {
														
 
															 		kvmppc_complete_mmio_load(vcpu, run);
														
 
															 		vcpu->mmio_needed = 0;
														
 
															 		return EMULATE_DONE;
														
@@ -653,6 +674,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
															                         u64 val, unsigned int bytes, int is_bigendian)
														
 
															 {
														
 
															 	void *data = run->mmio.data;
														
 
															+	int idx, ret;
														
 
															 	if (bytes > sizeof(run->mmio.data)) {
														
 
															 		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
														
@@ -682,9 +704,14 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
															 		}
														
 
															 	}
														
 
															-	if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
														
 
															-			      bytes, &run->mmio.data)) {
														
 
															-		kvmppc_complete_mmio_load(vcpu, run);
														
 
															+	idx = srcu_read_lock(&vcpu->kvm->srcu);
														
 
															+
														
 
															+	ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
														
 
															+			       bytes, &run->mmio.data);
														
 
															+
														
 
															+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
														
 
															+
														
 
															+	if (!ret) {
														
 
															 		vcpu->mmio_needed = 0;
														
 
															 		return EMULATE_DONE;
														
 
															 	}
														
@@ -740,7 +767,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
														
 
															 {
														
 
															 	if (irq->irq == KVM_INTERRUPT_UNSET) {
														
 
															-		kvmppc_core_dequeue_external(vcpu, irq);
														
 
															+		kvmppc_core_dequeue_external(vcpu);
														
 
															 		return 0;
														
 
															 	}
														
@@ -770,7 +797,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
															 		break;
														
 
															 	case KVM_CAP_PPC_EPR:
														
 
															 		r = 0;
														
 
															-		vcpu->arch.epr_enabled = cap->args[0];
														
 
															+		if (cap->args[0])
														
 
															+			vcpu->arch.epr_flags |= KVMPPC_EPR_USER;
														
 
															+		else
														
 
															+			vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER;
														
 
															 		break;
														
 
															 #ifdef CONFIG_BOOKE
														
 
															 	case KVM_CAP_PPC_BOOKE_WATCHDOG:
														
@@ -791,6 +821,44 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
															 		break;
														
 
															 	}
														
 
															 #endif
														
 
															+#ifdef CONFIG_KVM_MPIC
														
 
															+	case KVM_CAP_IRQ_MPIC: {
														
 
															+		struct file *filp;
														
 
															+		struct kvm_device *dev;
														
 
															+
														
 
															+		r = -EBADF;
														
 
															+		filp = fget(cap->args[0]);
														
 
															+		if (!filp)
														
 
															+			break;
														
 
															+
														
 
															+		r = -EPERM;
														
 
															+		dev = kvm_device_from_filp(filp);
														
 
															+		if (dev)
														
 
															+			r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
														
 
															+
														
 
															+		fput(filp);
														
 
															+		break;
														
 
															+	}
														
 
															+#endif
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+	case KVM_CAP_IRQ_XICS: {
														
 
															+		struct file *filp;
														
 
															+		struct kvm_device *dev;
														
 
															+
														
 
															+		r = -EBADF;
														
 
															+		filp = fget(cap->args[0]);
														
 
															+		if (!filp)
														
 
															+			break;
														
 
															+
														
 
															+		r = -EPERM;
														
 
															+		dev = kvm_device_from_filp(filp);
														
 
															+		if (dev)
														
 
															+			r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
														
 
															+
														
 
															+		fput(filp);
														
 
															+		break;
														
 
															+	}
														
 
															+#endif /* CONFIG_KVM_XICS */
														
 
															 	default:
														
 
															 		r = -EINVAL;
														
 
															 		break;
														
@@ -913,9 +981,22 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
 
															 	return 0;
														
 
															 }
														
 
															+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
														
 
															+			  bool line_status)
														
 
															+{
														
 
															+	if (!irqchip_in_kernel(kvm))
														
 
															+		return -ENXIO;
														
 
															+
														
 
															+	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
														
 
															+					irq_event->irq, irq_event->level,
														
 
															+					line_status);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 long kvm_arch_vm_ioctl(struct file *filp,
														
 
															                        unsigned int ioctl, unsigned long arg)
														
 
															 {
														
 
															+	struct kvm *kvm __maybe_unused = filp->private_data;
														
 
															 	void __user *argp = (void __user *)arg;
														
 
															 	long r;
														
@@ -934,7 +1015,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 #ifdef CONFIG_PPC_BOOK3S_64
														
 
															 	case KVM_CREATE_SPAPR_TCE: {
														
 
															 		struct kvm_create_spapr_tce create_tce;
														
 
															-		struct kvm *kvm = filp->private_data;
														
 
															 		r = -EFAULT;
														
 
															 		if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
														
@@ -946,8 +1026,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 #ifdef CONFIG_KVM_BOOK3S_64_HV
														
 
															 	case KVM_ALLOCATE_RMA: {
														
 
															-		struct kvm *kvm = filp->private_data;
														
 
															 		struct kvm_allocate_rma rma;
														
 
															+		struct kvm *kvm = filp->private_data;
														
 
															 		r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
														
 
															 		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
														
@@ -956,7 +1036,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 	}
														
 
															 	case KVM_PPC_ALLOCATE_HTAB: {
														
 
															-		struct kvm *kvm = filp->private_data;
														
 
															 		u32 htab_order;
														
 
															 		r = -EFAULT;
														
@@ -973,7 +1052,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 	}
														
 
															 	case KVM_PPC_GET_HTAB_FD: {
														
 
															-		struct kvm *kvm = filp->private_data;
														
 
															 		struct kvm_get_htab_fd ghf;
														
 
															 		r = -EFAULT;
														
@@ -986,7 +1064,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 #ifdef CONFIG_PPC_BOOK3S_64
														
 
															 	case KVM_PPC_GET_SMMU_INFO: {
														
 
															-		struct kvm *kvm = filp->private_data;
														
 
															 		struct kvm_ppc_smmu_info info;
														
 
															 		memset(&info, 0, sizeof(info));
														
@@ -995,6 +1072,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 			r = -EFAULT;
														
 
															 		break;
														
 
															 	}
														
 
															+	case KVM_PPC_RTAS_DEFINE_TOKEN: {
														
 
															+		struct kvm *kvm = filp->private_data;
														
 
															+
														
 
															+		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
														
 
															+		break;
														
 
															+	}
														
 
															 #endif /* CONFIG_PPC_BOOK3S_64 */
														
 
															 	default:
														
 
															 		r = -ENOTTY;
														
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -51,6 +51,12 @@ static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
 
															 static inline unsigned int icp_native_get_xirr(void)
														
 
															 {
														
 
															 	int cpu = smp_processor_id();
														
 
															+	unsigned int xirr;
														
 
															+
														
 
															+	/* Handled an interrupt latched by KVM */
														
 
															+	xirr = kvmppc_get_xics_latch();
														
 
															+	if (xirr)
														
 
															+		return xirr;
														
 
															 	return in_be32(&icp_native_regs[cpu]->xirr.word);
														
 
															 }
														
@@ -138,6 +144,7 @@ static unsigned int icp_native_get_irq(void)
 
															 static void icp_native_cause_ipi(int cpu, unsigned long data)
														
 
															 {
														
 
															+	kvmppc_set_host_ipi(cpu, 1);
														
 
															 	icp_native_set_qirr(cpu, IPI_PRIORITY);
														
 
															 }
														
@@ -151,6 +158,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
 
															 {
														
 
															 	int cpu = smp_processor_id();
														
 
															+	kvmppc_set_host_ipi(cpu, 0);
														
 
															 	icp_native_set_qirr(cpu, 0xff);
														
 
															 	return smp_ipi_demux();
														
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -44,5 +44,6 @@ header-y += termios.h
 
															 header-y += types.h
														
 
															 header-y += ucontext.h
														
 
															 header-y += unistd.h
														
 
															+header-y += virtio-ccw.h
														
 
															 header-y += vtoc.h
														
 
															 header-y += zcrypt.h
														
--- a/arch/s390/include/uapi/asm/virtio-ccw.h
+++ b/arch/s390/include/uapi/asm/virtio-ccw.h
@@ -0,0 +1,21 @@
 
															+/*
														
 
															+ * Definitions for virtio-ccw devices.
														
 
															+ *
														
 
															+ * Copyright IBM Corp. 2013
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License (version 2 only)
														
 
															+ * as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ *  Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
														
 
															+ */
														
 
															+#ifndef __KVM_VIRTIO_CCW_H
														
 
															+#define __KVM_VIRTIO_CCW_H
														
 
															+
														
 
															+/* Alignment of vring buffers. */
														
 
															+#define KVM_VIRTIO_CCW_RING_ALIGN 4096
														
 
															+
														
 
															+/* Subcode for diagnose 500 (virtio hypercall). */
														
 
															+#define KVM_S390_VIRTIO_CCW_NOTIFY 3
														
 
															+
														
 
															+#endif
														
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM
 
															 	select PREEMPT_NOTIFIERS
														
 
															 	select ANON_INODES
														
 
															 	select HAVE_KVM_CPU_RELAX_INTERCEPT
														
 
															+	select HAVE_KVM_EVENTFD
														
 
															 	---help---
														
 
															 	  Support hosting paravirtualized guest machines using the SIE
														
 
															 	  virtualization capability on the mainframe. This should work
														
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -6,7 +6,7 @@
 
															 # it under the terms of the GNU General Public License (version 2 only)
														
 
															 # as published by the Free Software Foundation.
														
 
															-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
														
 
															+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o)
														
 
															 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
														
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -13,6 +13,7 @@
 
															 #include <linux/kvm.h>
														
 
															 #include <linux/kvm_host.h>
														
 
															+#include <asm/virtio-ccw.h>
														
 
															 #include "kvm-s390.h"
														
 
															 #include "trace.h"
														
 
															 #include "trace-s390.h"
														
@@ -104,6 +105,29 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 
															 	return -EREMOTE;
														
 
															 }
														
 
															+static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	int ret, idx;
														
 
															+
														
 
															+	/* No virtio-ccw notification? Get out quickly. */
														
 
															+	if (!vcpu->kvm->arch.css_support ||
														
 
															+	    (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
														
 
															+		return -EOPNOTSUPP;
														
 
															+
														
 
															+	idx = srcu_read_lock(&vcpu->kvm->srcu);
														
 
															+	/*
														
 
															+	 * The layout is as follows:
														
 
															+	 * - gpr 2 contains the subchannel id (passed as addr)
														
 
															+	 * - gpr 3 contains the virtqueue index (passed as datamatch)
														
 
															+	 */
														
 
															+	ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
														
 
															+				vcpu->run->s.regs.gprs[2],
														
 
															+				8, &vcpu->run->s.regs.gprs[3]);
														
 
															+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
														
 
															+	/* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */
														
 
															+	return ret < 0 ? ret : 0;
														
 
															+}
														
 
															+
														
 
															 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
														
@@ -118,6 +142,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 
															 		return __diag_time_slice_end_directed(vcpu);
														
 
															 	case 0x308:
														
 
															 		return __diag_ipl_functions(vcpu);
														
 
															+	case 0x500:
														
 
															+		return __diag_virtio_hypercall(vcpu);
														
 
															 	default:
														
 
															 		return -EOPNOTSUPP;
														
 
															 	}
														
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -18,369 +18,86 @@
 
															 #include <asm/uaccess.h>
														
 
															 #include "kvm-s390.h"
														
 
															-static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
														
 
															-					       unsigned long guestaddr)
														
 
															+static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
														
 
															+					  void __user *gptr,
														
 
															+					  int prefixing)
														
 
															 {
														
 
															 	unsigned long prefix  = vcpu->arch.sie_block->prefix;
														
 
															-
														
 
															-	if (guestaddr < 2 * PAGE_SIZE)
														
 
															-		guestaddr += prefix;
														
 
															-	else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE))
														
 
															-		guestaddr -= prefix;
														
 
															-
														
 
															-	return (void __user *) gmap_fault(guestaddr, vcpu->arch.gmap);
														
 
															-}
														
 
															-
														
 
															-static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr,
														
 
															-				u64 *result)
														
 
															-{
														
 
															-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
														
 
															-
														
 
															-	BUG_ON(guestaddr & 7);
														
 
															-
														
 
															-	if (IS_ERR((void __force *) uptr))
														
 
															-		return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-	return get_user(*result, (unsigned long __user *) uptr);
														
 
															-}
														
 
															-
														
 
															-static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr,
														
 
															-				u32 *result)
														
 
															-{
														
 
															-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
														
 
															-
														
 
															-	BUG_ON(guestaddr & 3);
														
 
															-
														
 
															-	if (IS_ERR((void __force *) uptr))
														
 
															-		return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-	return get_user(*result, (u32 __user *) uptr);
														
 
															-}
														
 
															-
														
 
															-static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr,
														
 
															-				u16 *result)
														
 
															-{
														
 
															-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
														
 
															-
														
 
															-	BUG_ON(guestaddr & 1);
														
 
															-
														
 
															-	if (IS_ERR(uptr))
														
 
															-		return PTR_ERR(uptr);
														
 
															-
														
 
															-	return get_user(*result, (u16 __user *) uptr);
														
 
															-}
														
 
															-
														
 
															-static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr,
														
 
															-			       u8 *result)
														
 
															-{
														
 
															-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
														
 
															-
														
 
															-	if (IS_ERR((void __force *) uptr))
														
 
															-		return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-	return get_user(*result, (u8 __user *) uptr);
														
 
															-}
														
 
															-
														
 
															-static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr,
														
 
															-				u64 value)
														
 
															-{
														
 
															-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
														
 
															-
														
 
															-	BUG_ON(guestaddr & 7);
														
 
															-
														
 
															-	if (IS_ERR((void __force *) uptr))
														
 
															-		return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-	return put_user(value, (u64 __user *) uptr);
														
 
															-}
														
 
															-
														
 
															-static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr,
														
 
															-				u32 value)
														
 
															-{
														
 
															-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
														
 
															-
														
 
															-	BUG_ON(guestaddr & 3);
														
 
															-
														
 
															-	if (IS_ERR((void __force *) uptr))
														
 
															-		return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-	return put_user(value, (u32 __user *) uptr);
														
 
															-}
														
 
															-
														
 
															-static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr,
														
 
															-				u16 value)
														
 
															-{
														
 
															-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
														
 
															-
														
 
															-	BUG_ON(guestaddr & 1);
														
 
															-
														
 
															-	if (IS_ERR((void __force *) uptr))
														
 
															-		return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-	return put_user(value, (u16 __user *) uptr);
														
 
															-}
														
 
															-
														
 
															-static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr,
														
 
															-			       u8 value)
														
 
															-{
														
 
															-	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
														
 
															-
														
 
															-	if (IS_ERR((void __force *) uptr))
														
 
															-		return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-	return put_user(value, (u8 __user *) uptr);
														
 
															-}
														
 
															-
														
 
															-
														
 
															-static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu,
														
 
															-				       unsigned long guestdest,
														
 
															-				       void *from, unsigned long n)
														
 
															-{
														
 
															-	int rc;
														
 
															-	unsigned long i;
														
 
															-	u8 *data = from;
														
 
															-
														
 
															-	for (i = 0; i < n; i++) {
														
 
															-		rc = put_guest_u8(vcpu, guestdest++, *(data++));
														
 
															-		if (rc < 0)
														
 
															-			return rc;
														
 
															+	unsigned long gaddr = (unsigned long) gptr;
														
 
															+	unsigned long uaddr;
														
 
															+
														
 
															+	if (prefixing) {
														
 
															+		if (gaddr < 2 * PAGE_SIZE)
														
 
															+			gaddr += prefix;
														
 
															+		else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE))
														
 
															+			gaddr -= prefix;
														
 
															 	}
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static inline int __copy_to_guest_fast(struct kvm_vcpu *vcpu,
														
 
															-				       unsigned long guestdest,
														
 
															-				       void *from, unsigned long n)
														
 
															-{
														
 
															-	int r;
														
 
															+	uaddr = gmap_fault(gaddr, vcpu->arch.gmap);
														
 
															+	if (IS_ERR_VALUE(uaddr))
														
 
															+		uaddr = -EFAULT;
														
 
															+	return (void __user *)uaddr;
														
 
															+}
														
 
															+
														
 
															+#define get_guest(vcpu, x, gptr)				\
														
 
															+({								\
														
 
															+	__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
														
 
															+	int __mask = sizeof(__typeof__(*(gptr))) - 1;		\
														
 
															+	int __ret = PTR_RET((void __force *)__uptr);		\
														
 
															+								\
														
 
															+	if (!__ret) {						\
														
 
															+		BUG_ON((unsigned long)__uptr & __mask);		\
														
 
															+		__ret = get_user(x, __uptr);			\
														
 
															+	}							\
														
 
															+	__ret;							\
														
 
															+})
														
 
															+
														
 
															+#define put_guest(vcpu, x, gptr)				\
														
 
															+({								\
														
 
															+	__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
														
 
															+	int __mask = sizeof(__typeof__(*(gptr))) - 1;		\
														
 
															+	int __ret = PTR_RET((void __force *)__uptr);		\
														
 
															+								\
														
 
															+	if (!__ret) {						\
														
 
															+		BUG_ON((unsigned long)__uptr & __mask);		\
														
 
															+		__ret = put_user(x, __uptr);			\
														
 
															+	}							\
														
 
															+	__ret;							\
														
 
															+})
														
 
															+
														
 
															+static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to,
														
 
															+			       unsigned long from, unsigned long len,
														
 
															+			       int to_guest, int prefixing)
														
 
															+{
														
 
															+	unsigned long _len, rc;
														
 
															 	void __user *uptr;
														
 
															-	unsigned long size;
														
 
															-
														
 
															-	if (guestdest + n < guestdest)
														
 
															-		return -EFAULT;
														
 
															-
														
 
															-	/* simple case: all within one segment table entry? */
														
 
															-	if ((guestdest & PMD_MASK) == ((guestdest+n) & PMD_MASK)) {
														
 
															-		uptr = (void __user *) gmap_fault(guestdest, vcpu->arch.gmap);
														
 
															-
														
 
															-		if (IS_ERR((void __force *) uptr))
														
 
															-			return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-		r = copy_to_user(uptr, from, n);
														
 
															-
														
 
															-		if (r)
														
 
															-			r = -EFAULT;
														
 
															-
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	/* copy first segment */
														
 
															-	uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap);
														
 
															-
														
 
															-	if (IS_ERR((void __force *) uptr))
														
 
															-		return PTR_ERR((void __force *) uptr);
														
 
															-	size = PMD_SIZE - (guestdest & ~PMD_MASK);
														
 
															-
														
 
															-	r = copy_to_user(uptr, from, size);
														
 
															-
														
 
															-	if (r) {
														
 
															-		r = -EFAULT;
														
 
															-		goto out;
														
 
															-	}
														
 
															-	from += size;
														
 
															-	n -= size;
														
 
															-	guestdest += size;
														
 
															-
														
 
															-	/* copy full segments */
														
 
															-	while (n >= PMD_SIZE) {
														
 
															-		uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap);
														
 
															-
														
 
															-		if (IS_ERR((void __force *) uptr))
														
 
															-			return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-		r = copy_to_user(uptr, from, PMD_SIZE);
														
 
															-
														
 
															-		if (r) {
														
 
															-			r = -EFAULT;
														
 
															-			goto out;
														
 
															-		}
														
 
															-		from += PMD_SIZE;
														
 
															-		n -= PMD_SIZE;
														
 
															-		guestdest += PMD_SIZE;
														
 
															-	}
														
 
															-
														
 
															-	/* copy the tail segment */
														
 
															-	if (n) {
														
 
															-		uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap);
														
 
															-
														
 
															-		if (IS_ERR((void __force *) uptr))
														
 
															-			return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-		r = copy_to_user(uptr, from, n);
														
 
															-
														
 
															-		if (r)
														
 
															-			r = -EFAULT;
														
 
															-	}
														
 
															-out:
														
 
															-	return r;
														
 
															-}
														
 
															-
														
 
															-static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu,
														
 
															-					 unsigned long guestdest,
														
 
															-					 void *from, unsigned long n)
														
 
															-{
														
 
															-	return __copy_to_guest_fast(vcpu, guestdest, from, n);
														
 
															-}
														
 
															-
														
 
															-static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest,
														
 
															-				void *from, unsigned long n)
														
 
															-{
														
 
															-	unsigned long prefix  = vcpu->arch.sie_block->prefix;
														
 
															-
														
 
															-	if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
														
 
															-		goto slowpath;
														
 
															-
														
 
															-	if ((guestdest < prefix) && (guestdest + n > prefix))
														
 
															-		goto slowpath;
														
 
															-
														
 
															-	if ((guestdest < prefix + 2 * PAGE_SIZE)
														
 
															-	    && (guestdest + n > prefix + 2 * PAGE_SIZE))
														
 
															-		goto slowpath;
														
 
															-
														
 
															-	if (guestdest < 2 * PAGE_SIZE)
														
 
															-		guestdest += prefix;
														
 
															-	else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE))
														
 
															-		guestdest -= prefix;
														
 
															-
														
 
															-	return __copy_to_guest_fast(vcpu, guestdest, from, n);
														
 
															-slowpath:
														
 
															-	return __copy_to_guest_slow(vcpu, guestdest, from, n);
														
 
															-}
														
 
															-
														
 
															-static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to,
														
 
															-					 unsigned long guestsrc,
														
 
															-					 unsigned long n)
														
 
															-{
														
 
															-	int rc;
														
 
															-	unsigned long i;
														
 
															-	u8 *data = to;
														
 
															-
														
 
															-	for (i = 0; i < n; i++) {
														
 
															-		rc = get_guest_u8(vcpu, guestsrc++, data++);
														
 
															-		if (rc < 0)
														
 
															-			return rc;
														
 
															+	while (len) {
														
 
															+		uptr = to_guest ? (void __user *)to : (void __user *)from;
														
 
															+		uptr = __gptr_to_uptr(vcpu, uptr, prefixing);
														
 
															+		if (IS_ERR((void __force *)uptr))
														
 
															+			return -EFAULT;
														
 
															+		_len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1));
														
 
															+		_len = min(_len, len);
														
 
															+		if (to_guest)
														
 
															+			rc = copy_to_user((void __user *) uptr, (void *)from, _len);
														
 
															+		else
														
 
															+			rc = copy_from_user((void *)to, (void __user *)uptr, _len);
														
 
															+		if (rc)
														
 
															+			return -EFAULT;
														
 
															+		len -= _len;
														
 
															+		from += _len;
														
 
															+		to += _len;
														
 
															 	}
														
 
															 	return 0;
														
 
															 }
														
 
															-static inline int __copy_from_guest_fast(struct kvm_vcpu *vcpu, void *to,
														
 
															-					 unsigned long guestsrc,
														
 
															-					 unsigned long n)
														
 
															-{
														
 
															-	int r;
														
 
															-	void __user *uptr;
														
 
															-	unsigned long size;
														
 
															-
														
 
															-	if (guestsrc + n < guestsrc)
														
 
															-		return -EFAULT;
														
 
															-
														
 
															-	/* simple case: all within one segment table entry? */
														
 
															-	if ((guestsrc & PMD_MASK) == ((guestsrc+n) & PMD_MASK)) {
														
 
															-		uptr = (void __user *) gmap_fault(guestsrc, vcpu->arch.gmap);
														
 
															-
														
 
															-		if (IS_ERR((void __force *) uptr))
														
 
															-			return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-		r = copy_from_user(to, uptr, n);
														
 
															-
														
 
															-		if (r)
														
 
															-			r = -EFAULT;
														
 
															-
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	/* copy first segment */
														
 
															-	uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap);
														
 
															-
														
 
															-	if (IS_ERR((void __force *) uptr))
														
 
															-		return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-	size = PMD_SIZE - (guestsrc & ~PMD_MASK);
														
 
															-
														
 
															-	r = copy_from_user(to, uptr, size);
														
 
															-
														
 
															-	if (r) {
														
 
															-		r = -EFAULT;
														
 
															-		goto out;
														
 
															-	}
														
 
															-	to += size;
														
 
															-	n -= size;
														
 
															-	guestsrc += size;
														
 
															-
														
 
															-	/* copy full segments */
														
 
															-	while (n >= PMD_SIZE) {
														
 
															-		uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap);
														
 
															-
														
 
															-		if (IS_ERR((void __force *) uptr))
														
 
															-			return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-		r = copy_from_user(to, uptr, PMD_SIZE);
														
 
															-
														
 
															-		if (r) {
														
 
															-			r = -EFAULT;
														
 
															-			goto out;
														
 
															-		}
														
 
															-		to += PMD_SIZE;
														
 
															-		n -= PMD_SIZE;
														
 
															-		guestsrc += PMD_SIZE;
														
 
															-	}
														
 
															-
														
 
															-	/* copy the tail segment */
														
 
															-	if (n) {
														
 
															-		uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap);
														
 
															-
														
 
															-		if (IS_ERR((void __force *) uptr))
														
 
															-			return PTR_ERR((void __force *) uptr);
														
 
															-
														
 
															-		r = copy_from_user(to, uptr, n);
														
 
															-
														
 
															-		if (r)
														
 
															-			r = -EFAULT;
														
 
															-	}
														
 
															-out:
														
 
															-	return r;
														
 
															-}
														
 
															-
														
 
															-static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
														
 
															-					   unsigned long guestsrc,
														
 
															-					   unsigned long n)
														
 
															-{
														
 
															-	return __copy_from_guest_fast(vcpu, to, guestsrc, n);
														
 
															-}
														
 
															-
														
 
															-static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
														
 
															-				  unsigned long guestsrc, unsigned long n)
														
 
															-{
														
 
															-	unsigned long prefix  = vcpu->arch.sie_block->prefix;
														
 
															-
														
 
															-	if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
														
 
															-		goto slowpath;
														
 
															+#define copy_to_guest(vcpu, to, from, size) \
														
 
															+	__copy_guest(vcpu, to, (unsigned long)from, size, 1, 1)
														
 
															+#define copy_from_guest(vcpu, to, from, size) \
														
 
															+	__copy_guest(vcpu, (unsigned long)to, from, size, 0, 1)
														
 
															+#define copy_to_guest_absolute(vcpu, to, from, size) \
														
 
															+	__copy_guest(vcpu, to, (unsigned long)from, size, 1, 0)
														
 
															+#define copy_from_guest_absolute(vcpu, to, from, size) \
														
 
															+	__copy_guest(vcpu, (unsigned long)to, from, size, 0, 0)
														
 
															-	if ((guestsrc < prefix) && (guestsrc + n > prefix))
														
 
															-		goto slowpath;
														
 
															-
														
 
															-	if ((guestsrc < prefix + 2 * PAGE_SIZE)
														
 
															-	    && (guestsrc + n > prefix + 2 * PAGE_SIZE))
														
 
															-		goto slowpath;
														
 
															-
														
 
															-	if (guestsrc < 2 * PAGE_SIZE)
														
 
															-		guestsrc += prefix;
														
 
															-	else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE))
														
 
															-		guestsrc -= prefix;
														
 
															-
														
 
															-	return __copy_from_guest_fast(vcpu, to, guestsrc, n);
														
 
															-slowpath:
														
 
															-	return __copy_from_guest_slow(vcpu, to, guestsrc, n);
														
 
															-}
														
 
															-#endif
														
 
															+#endif /* __KVM_S390_GACCESS_H */
														
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -43,12 +43,10 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 
															 	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
														
 
															 	do {
														
 
															-		rc = get_guest_u64(vcpu, useraddr,
														
 
															-				   &vcpu->arch.sie_block->gcr[reg]);
														
 
															-		if (rc == -EFAULT) {
														
 
															-			kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-			break;
														
 
															-		}
														
 
															+		rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
														
 
															+			       (u64 __user *) useraddr);
														
 
															+		if (rc)
														
 
															+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															 		useraddr += 8;
														
 
															 		if (reg == reg3)
														
 
															 			break;
														
@@ -78,11 +76,9 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
 
															 	reg = reg1;
														
 
															 	do {
														
 
															-		rc = get_guest_u32(vcpu, useraddr, &val);
														
 
															-		if (rc == -EFAULT) {
														
 
															-			kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-			break;
														
 
															-		}
														
 
															+		rc = get_guest(vcpu, val, (u32 __user *) useraddr);
														
 
															+		if (rc)
														
 
															+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															 		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
														
 
															 		vcpu->arch.sie_block->gcr[reg] |= val;
														
 
															 		useraddr += 4;
														
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -180,7 +180,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 
															 				   struct kvm_s390_interrupt_info *inti)
														
 
															 {
														
 
															 	const unsigned short table[] = { 2, 4, 4, 6 };
														
 
															-	int rc, exception = 0;
														
 
															+	int rc = 0;
														
 
															 	switch (inti->type) {
														
 
															 	case KVM_S390_INT_EMERGENCY:
														
@@ -188,74 +188,41 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 
															 		vcpu->stat.deliver_emergency_signal++;
														
 
															 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
														
 
															 						 inti->emerg.code, 0);
														
 
															-		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
														
 
															-			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															-			__LC_EXT_NEW_PSW, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															+		rc  = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE);
														
 
															+		rc |= put_guest(vcpu, inti->emerg.code,
														
 
															+				(u16 __user *)__LC_EXT_CPU_ADDR);
														
 
															+		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
														
 
															+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															+				      __LC_EXT_NEW_PSW, sizeof(psw_t));
														
 
															 		break;
														
 
															-
														
 
															 	case KVM_S390_INT_EXTERNAL_CALL:
														
 
															 		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
														
 
															 		vcpu->stat.deliver_external_call++;
														
 
															 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
														
 
															 						 inti->extcall.code, 0);
														
 
															-		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
														
 
															-			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															-			__LC_EXT_NEW_PSW, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															+		rc  = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE);
														
 
															+		rc |= put_guest(vcpu, inti->extcall.code,
														
 
															+				(u16 __user *)__LC_EXT_CPU_ADDR);
														
 
															+		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
														
 
															+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															+				      __LC_EXT_NEW_PSW, sizeof(psw_t));
														
 
															 		break;
														
 
															-
														
 
															 	case KVM_S390_INT_SERVICE:
														
 
															 		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
														
 
															 			   inti->ext.ext_params);
														
 
															 		vcpu->stat.deliver_service_signal++;
														
 
															 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
														
 
															 						 inti->ext.ext_params, 0);
														
 
															-		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
														
 
															-			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															-			__LC_EXT_NEW_PSW, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															+		rc  = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE);
														
 
															+		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
														
 
															+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															+				      __LC_EXT_NEW_PSW, sizeof(psw_t));
														
 
															+		rc |= put_guest(vcpu, inti->ext.ext_params,
														
 
															+				(u32 __user *)__LC_EXT_PARAMS);
														
 
															 		break;
														
 
															-
														
 
															 	case KVM_S390_INT_VIRTIO:
														
 
															 		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
														
 
															 			   inti->ext.ext_params, inti->ext.ext_params2);
														
@@ -263,34 +230,17 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 
															 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
														
 
															 						 inti->ext.ext_params,
														
 
															 						 inti->ext.ext_params2);
														
 
															-		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
														
 
															-			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															-			__LC_EXT_NEW_PSW, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2,
														
 
															-				   inti->ext.ext_params2);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															+		rc  = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE);
														
 
															+		rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR);
														
 
															+		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
														
 
															+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															+				      __LC_EXT_NEW_PSW, sizeof(psw_t));
														
 
															+		rc |= put_guest(vcpu, inti->ext.ext_params,
														
 
															+				(u32 __user *)__LC_EXT_PARAMS);
														
 
															+		rc |= put_guest(vcpu, inti->ext.ext_params2,
														
 
															+				(u64 __user *)__LC_EXT_PARAMS2);
														
 
															 		break;
														
 
															-
														
 
															 	case KVM_S390_SIGP_STOP:
														
 
															 		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
														
 
															 		vcpu->stat.deliver_stop_signal++;
														
@@ -313,18 +263,14 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 
															 		vcpu->stat.deliver_restart_signal++;
														
 
															 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
														
 
															 						 0, 0);
														
 
															-		rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
														
 
															-		  restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															-			offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															+		rc  = copy_to_guest(vcpu,
														
 
															+				    offsetof(struct _lowcore, restart_old_psw),
														
 
															+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															+				      offsetof(struct _lowcore, restart_psw),
														
 
															+				      sizeof(psw_t));
														
 
															 		atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
														
 
															 		break;
														
 
															-
														
 
															 	case KVM_S390_PROGRAM_INT:
														
 
															 		VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
														
 
															 			   inti->pgm.code,
														
@@ -332,24 +278,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 
															 		vcpu->stat.deliver_program_int++;
														
 
															 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
														
 
															 						 inti->pgm.code, 0);
														
 
															-		rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = put_guest_u16(vcpu, __LC_PGM_ILC,
														
 
															-			table[vcpu->arch.sie_block->ipa >> 14]);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
														
 
															-			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															-			__LC_PGM_NEW_PSW, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															+		rc  = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE);
														
 
															+		rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
														
 
															+				(u16 __user *)__LC_PGM_ILC);
														
 
															+		rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
														
 
															+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															+				      __LC_PGM_NEW_PSW, sizeof(psw_t));
														
 
															 		break;
														
 
															 	case KVM_S390_MCHK:
														
@@ -358,24 +293,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 
															 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
														
 
															 						 inti->mchk.cr14,
														
 
															 						 inti->mchk.mcic);
														
 
															-		rc = kvm_s390_vcpu_store_status(vcpu,
														
 
															-						KVM_S390_STORE_STATUS_PREFIXED);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW,
														
 
															-				   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															-				     __LC_MCK_NEW_PSW, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															+		rc  = kvm_s390_vcpu_store_status(vcpu,
														
 
															+						 KVM_S390_STORE_STATUS_PREFIXED);
														
 
															+		rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE);
														
 
															+		rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW,
														
 
															+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															+				      __LC_MCK_NEW_PSW, sizeof(psw_t));
														
 
															 		break;
														
 
															 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
														
@@ -388,67 +312,44 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 
															 		vcpu->stat.deliver_io_int++;
														
 
															 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
														
 
															 						 param0, param1);
														
 
															-		rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID,
														
 
															-				   inti->io.subchannel_id);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR,
														
 
															-				   inti->io.subchannel_nr);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = put_guest_u32(vcpu, __LC_IO_INT_PARM,
														
 
															-				   inti->io.io_int_parm);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = put_guest_u32(vcpu, __LC_IO_INT_WORD,
														
 
															-				   inti->io.io_int_word);
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW,
														
 
															-				   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															-
														
 
															-		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															-				     __LC_IO_NEW_PSW, sizeof(psw_t));
														
 
															-		if (rc == -EFAULT)
														
 
															-			exception = 1;
														
 
															+		rc  = put_guest(vcpu, inti->io.subchannel_id,
														
 
															+				(u16 __user *) __LC_SUBCHANNEL_ID);
														
 
															+		rc |= put_guest(vcpu, inti->io.subchannel_nr,
														
 
															+				(u16 __user *) __LC_SUBCHANNEL_NR);
														
 
															+		rc |= put_guest(vcpu, inti->io.io_int_parm,
														
 
															+				(u32 __user *) __LC_IO_INT_PARM);
														
 
															+		rc |= put_guest(vcpu, inti->io.io_int_word,
														
 
															+				(u32 __user *) __LC_IO_INT_WORD);
														
 
															+		rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW,
														
 
															+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															+				      __LC_IO_NEW_PSW, sizeof(psw_t));
														
 
															 		break;
														
 
															 	}
														
 
															 	default:
														
 
															 		BUG();
														
 
															 	}
														
 
															-	if (exception) {
														
 
															+	if (rc) {
														
 
															 		printk("kvm: The guest lowcore is not mapped during interrupt "
														
 
															-			"delivery, killing userspace\n");
														
 
															+		       "delivery, killing userspace\n");
														
 
															 		do_exit(SIGKILL);
														
 
															 	}
														
 
															 }
														
 
															 static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	int rc, exception = 0;
														
 
															+	int rc;
														
 
															 	if (psw_extint_disabled(vcpu))
														
 
															 		return 0;
														
 
															 	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
														
 
															 		return 0;
														
 
															-	rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004);
														
 
															-	if (rc == -EFAULT)
														
 
															-		exception = 1;
														
 
															-	rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
														
 
															-		 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															-	if (rc == -EFAULT)
														
 
															-		exception = 1;
														
 
															-	rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															-		__LC_EXT_NEW_PSW, sizeof(psw_t));
														
 
															-	if (rc == -EFAULT)
														
 
															-		exception = 1;
														
 
															-	if (exception) {
														
 
															+	rc  = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
														
 
															+	rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
														
 
															+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
														
 
															+	rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
														
 
															+			      __LC_EXT_NEW_PSW, sizeof(psw_t));
														
 
															+	if (rc) {
														
 
															 		printk("kvm: The guest lowcore is not mapped during interrupt "
														
 
															 			"delivery, killing userspace\n");
														
 
															 		do_exit(SIGKILL);
														
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -142,12 +142,16 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_ONE_REG:
														
 
															 	case KVM_CAP_ENABLE_CAP:
														
 
															 	case KVM_CAP_S390_CSS_SUPPORT:
														
 
															+	case KVM_CAP_IOEVENTFD:
														
 
															 		r = 1;
														
 
															 		break;
														
 
															 	case KVM_CAP_NR_VCPUS:
														
 
															 	case KVM_CAP_MAX_VCPUS:
														
 
															 		r = KVM_MAX_VCPUS;
														
 
															 		break;
														
 
															+	case KVM_CAP_NR_MEMSLOTS:
														
 
															+		r = KVM_USER_MEM_SLOTS;
														
 
															+		break;
														
 
															 	case KVM_CAP_S390_COW:
														
 
															 		r = MACHINE_HAS_ESOP;
														
 
															 		break;
														
@@ -632,8 +636,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 
															 		} else {
														
 
															 			VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
														
 
															 			trace_kvm_s390_sie_fault(vcpu);
														
 
															-			kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-			rc = 0;
														
 
															+			rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															 		}
														
 
															 	}
														
 
															 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
														
@@ -974,22 +977,13 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 
															 /* Section: memory related */
														
 
															 int kvm_arch_prepare_memory_region(struct kvm *kvm,
														
 
															 				   struct kvm_memory_slot *memslot,
														
 
															-				   struct kvm_memory_slot old,
														
 
															 				   struct kvm_userspace_memory_region *mem,
														
 
															-				   bool user_alloc)
														
 
															+				   enum kvm_mr_change change)
														
 
															 {
														
 
															-	/* A few sanity checks. We can have exactly one memory slot which has
														
 
															-	   to start at guest virtual zero and which has to be located at a
														
 
															-	   page boundary in userland and which has to end at a page boundary.
														
 
															-	   The memory in userland is ok to be fragmented into various different
														
 
															-	   vmas. It is okay to mmap() and munmap() stuff in this slot after
														
 
															-	   doing this call at any time */
														
 
															-
														
 
															-	if (mem->slot)
														
 
															-		return -EINVAL;
														
 
															-
														
 
															-	if (mem->guest_phys_addr)
														
 
															-		return -EINVAL;
														
 
															+	/* A few sanity checks. We can have memory slots which have to be
														
 
															+	   located/ended at a segment boundary (1MB). The memory in userland is
														
 
															+	   ok to be fragmented into various different vmas. It is okay to mmap()
														
 
															+	   and munmap() stuff in this slot after doing this call at any time */
														
 
															 	if (mem->userspace_addr & 0xffffful)
														
 
															 		return -EINVAL;
														
@@ -997,19 +991,26 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
															 	if (mem->memory_size & 0xffffful)
														
 
															 		return -EINVAL;
														
 
															-	if (!user_alloc)
														
 
															-		return -EINVAL;
														
 
															-
														
 
															 	return 0;
														
 
															 }
														
 
															 void kvm_arch_commit_memory_region(struct kvm *kvm,
														
 
															 				struct kvm_userspace_memory_region *mem,
														
 
															-				struct kvm_memory_slot old,
														
 
															-				bool user_alloc)
														
 
															+				const struct kvm_memory_slot *old,
														
 
															+				enum kvm_mr_change change)
														
 
															 {
														
 
															 	int rc;
														
 
															+	/* If the basics of the memslot do not change, we do not want
														
 
															+	 * to update the gmap. Every update causes several unnecessary
														
 
															+	 * segment translation exceptions. This is usually handled just
														
 
															+	 * fine by the normal fault handler + gmap, but it will also
														
 
															+	 * cause faults on the prefix page of running guest CPUs.
														
 
															+	 */
														
 
															+	if (old->userspace_addr == mem->userspace_addr &&
														
 
															+	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
														
 
															+	    old->npages * PAGE_SIZE == mem->memory_size)
														
 
															+		return;
														
 
															 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
														
 
															 		mem->guest_phys_addr, mem->memory_size);
														
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -110,12 +110,12 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
 
															 void kvm_s390_tasklet(unsigned long parm);
														
 
															 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
														
 
															 void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
														
 
															-int kvm_s390_inject_vm(struct kvm *kvm,
														
 
															-		struct kvm_s390_interrupt *s390int);
														
 
															-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
														
 
															-		struct kvm_s390_interrupt *s390int);
														
 
															-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
														
 
															-int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
														
 
															+int __must_check kvm_s390_inject_vm(struct kvm *kvm,
														
 
															+				    struct kvm_s390_interrupt *s390int);
														
 
															+int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
														
 
															+				      struct kvm_s390_interrupt *s390int);
														
 
															+int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
														
 
															+int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
														
 
															 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
														
 
															 						    u64 cr6, u64 schid);
														
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -14,6 +14,8 @@
 
															 #include <linux/kvm.h>
														
 
															 #include <linux/gfp.h>
														
 
															 #include <linux/errno.h>
														
 
															+#include <linux/compat.h>
														
 
															+#include <asm/asm-offsets.h>
														
 
															 #include <asm/current.h>
														
 
															 #include <asm/debug.h>
														
 
															 #include <asm/ebcdic.h>
														
@@ -35,31 +37,24 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
 
															 	operand2 = kvm_s390_get_base_disp_s(vcpu);
														
 
															 	/* must be word boundary */
														
 
															-	if (operand2 & 3) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															-		goto out;
														
 
															-	}
														
 
															+	if (operand2 & 3)
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															 	/* get the value */
														
 
															-	if (get_guest_u32(vcpu, operand2, &address)) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-		goto out;
														
 
															-	}
														
 
															+	if (get_guest(vcpu, address, (u32 __user *) operand2))
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															 	address = address & 0x7fffe000u;
														
 
															 	/* make sure that the new value is valid memory */
														
 
															 	if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
														
 
															-	   (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-		goto out;
														
 
															-	}
														
 
															+	   (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)))
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															 	kvm_s390_set_prefix(vcpu, address);
														
 
															 	VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
														
 
															 	trace_kvm_s390_handle_prefix(vcpu, 1, address);
														
 
															-out:
														
 
															 	return 0;
														
 
															 }
														
@@ -73,49 +68,37 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 
															 	operand2 = kvm_s390_get_base_disp_s(vcpu);
														
 
															 	/* must be word boundary */
														
 
															-	if (operand2 & 3) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															-		goto out;
														
 
															-	}
														
 
															+	if (operand2 & 3)
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															 	address = vcpu->arch.sie_block->prefix;
														
 
															 	address = address & 0x7fffe000u;
														
 
															 	/* get the value */
														
 
															-	if (put_guest_u32(vcpu, operand2, address)) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-		goto out;
														
 
															-	}
														
 
															+	if (put_guest(vcpu, address, (u32 __user *)operand2))
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															 	VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
														
 
															 	trace_kvm_s390_handle_prefix(vcpu, 0, address);
														
 
															-out:
														
 
															 	return 0;
														
 
															 }
														
 
															 static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	u64 useraddr;
														
 
															-	int rc;
														
 
															 	vcpu->stat.instruction_stap++;
														
 
															 	useraddr = kvm_s390_get_base_disp_s(vcpu);
														
 
															-	if (useraddr & 1) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															-		goto out;
														
 
															-	}
														
 
															+	if (useraddr & 1)
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															-	rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id);
														
 
															-	if (rc == -EFAULT) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-		goto out;
														
 
															-	}
														
 
															+	if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr))
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															 	VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
														
 
															 	trace_kvm_s390_handle_stap(vcpu, useraddr);
														
 
															-out:
														
 
															 	return 0;
														
 
															 }
														
@@ -129,36 +112,38 @@ static int handle_skey(struct kvm_vcpu *vcpu)
 
															 static int handle_tpi(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	u64 addr;
														
 
															 	struct kvm_s390_interrupt_info *inti;
														
 
															+	u64 addr;
														
 
															 	int cc;
														
 
															 	addr = kvm_s390_get_base_disp_s(vcpu);
														
 
															-
														
 
															+	if (addr & 3)
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															+	cc = 0;
														
 
															 	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0);
														
 
															-	if (inti) {
														
 
															-		if (addr) {
														
 
															-			/*
														
 
															-			 * Store the two-word I/O interruption code into the
														
 
															-			 * provided area.
														
 
															-			 */
														
 
															-			put_guest_u16(vcpu, addr, inti->io.subchannel_id);
														
 
															-			put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr);
														
 
															-			put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm);
														
 
															-		} else {
														
 
															-			/*
														
 
															-			 * Store the three-word I/O interruption code into
														
 
															-			 * the appropriate lowcore area.
														
 
															-			 */
														
 
															-			put_guest_u16(vcpu, 184, inti->io.subchannel_id);
														
 
															-			put_guest_u16(vcpu, 186, inti->io.subchannel_nr);
														
 
															-			put_guest_u32(vcpu, 188, inti->io.io_int_parm);
														
 
															-			put_guest_u32(vcpu, 192, inti->io.io_int_word);
														
 
															-		}
														
 
															-		cc = 1;
														
 
															-	} else
														
 
															-		cc = 0;
														
 
															+	if (!inti)
														
 
															+		goto no_interrupt;
														
 
															+	cc = 1;
														
 
															+	if (addr) {
														
 
															+		/*
														
 
															+		 * Store the two-word I/O interruption code into the
														
 
															+		 * provided area.
														
 
															+		 */
														
 
															+		put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr);
														
 
															+		put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2));
														
 
															+		put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4));
														
 
															+	} else {
														
 
															+		/*
														
 
															+		 * Store the three-word I/O interruption code into
														
 
															+		 * the appropriate lowcore area.
														
 
															+		 */
														
 
															+		put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID);
														
 
															+		put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR);
														
 
															+		put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM);
														
 
															+		put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD);
														
 
															+	}
														
 
															 	kfree(inti);
														
 
															+no_interrupt:
														
 
															 	/* Set condition code and we're done. */
														
 
															 	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
														
 
															 	vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
														
@@ -230,13 +215,10 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
 
															 	rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
														
 
															 			   &facility_list, sizeof(facility_list));
														
 
															-	if (rc == -EFAULT)
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-	else {
														
 
															-		VCPU_EVENT(vcpu, 5, "store facility list value %x",
														
 
															-			   facility_list);
														
 
															-		trace_kvm_s390_handle_stfl(vcpu, facility_list);
														
 
															-	}
														
 
															+	if (rc)
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															+	VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list);
														
 
															+	trace_kvm_s390_handle_stfl(vcpu, facility_list);
														
 
															 	return 0;
														
 
															 }
														
@@ -249,112 +231,80 @@ static void handle_new_psw(struct kvm_vcpu *vcpu)
 
															 #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
														
 
															 #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
														
 
															-#define PSW_ADDR_24 0x00000000000fffffUL
														
 
															+#define PSW_ADDR_24 0x0000000000ffffffUL
														
 
															 #define PSW_ADDR_31 0x000000007fffffffUL
														
 
															+static int is_valid_psw(psw_t *psw) {
														
 
															+	if (psw->mask & PSW_MASK_UNASSIGNED)
														
 
															+		return 0;
														
 
															+	if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) {
														
 
															+		if (psw->addr & ~PSW_ADDR_31)
														
 
															+			return 0;
														
 
															+	}
														
 
															+	if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24))
														
 
															+		return 0;
														
 
															+	if ((psw->mask & PSW_MASK_ADDR_MODE) ==  PSW_MASK_EA)
														
 
															+		return 0;
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															 int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	u64 addr;
														
 
															+	psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
														
 
															 	psw_compat_t new_psw;
														
 
															+	u64 addr;
														
 
															-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
														
 
															+	if (gpsw->mask & PSW_MASK_PSTATE)
														
 
															 		return kvm_s390_inject_program_int(vcpu,
														
 
															 						   PGM_PRIVILEGED_OPERATION);
														
 
															-
														
 
															 	addr = kvm_s390_get_base_disp_s(vcpu);
														
 
															-
														
 
															-	if (addr & 7) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	if (!(new_psw.mask & PSW32_MASK_BASE)) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	vcpu->arch.sie_block->gpsw.mask =
														
 
															-		(new_psw.mask & ~PSW32_MASK_BASE) << 32;
														
 
															-	vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
														
 
															-
														
 
															-	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
														
 
															-	    (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
														
 
															-	     (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
														
 
															-	    ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
														
 
															-	     PSW_MASK_EA)) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															+	if (addr & 7)
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															+	if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															+	if (!(new_psw.mask & PSW32_MASK_BASE))
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															+	gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
														
 
															+	gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE;
														
 
															+	gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
														
 
															+	if (!is_valid_psw(gpsw))
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															 	handle_new_psw(vcpu);
														
 
															-out:
														
 
															 	return 0;
														
 
															 }
														
 
															 static int handle_lpswe(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	u64 addr;
														
 
															 	psw_t new_psw;
														
 
															+	u64 addr;
														
 
															 	addr = kvm_s390_get_base_disp_s(vcpu);
														
 
															-
														
 
															-	if (addr & 7) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	vcpu->arch.sie_block->gpsw.mask = new_psw.mask;
														
 
															-	vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
														
 
															-
														
 
															-	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
														
 
															-	    (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
														
 
															-	      PSW_MASK_BA) &&
														
 
															-	     (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) ||
														
 
															-	    (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
														
 
															-	     (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
														
 
															-	    ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
														
 
															-	     PSW_MASK_EA)) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															+	if (addr & 7)
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															+	if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															+	vcpu->arch.sie_block->gpsw = new_psw;
														
 
															+	if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															 	handle_new_psw(vcpu);
														
 
															-out:
														
 
															 	return 0;
														
 
															 }
														
 
															 static int handle_stidp(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	u64 operand2;
														
 
															-	int rc;
														
 
															 	vcpu->stat.instruction_stidp++;
														
 
															 	operand2 = kvm_s390_get_base_disp_s(vcpu);
														
 
															-	if (operand2 & 7) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															-		goto out;
														
 
															-	}
														
 
															+	if (operand2 & 7)
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
 
															-	rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data);
														
 
															-	if (rc == -EFAULT) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-		goto out;
														
 
															-	}
														
 
															+	if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2))
														
 
															+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															 	VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
														
 
															-out:
														
 
															 	return 0;
														
 
															 }
														
@@ -394,8 +344,9 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 
															 	int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
														
 
															 	int sel1 = vcpu->run->s.regs.gprs[0] & 0xff;
														
 
															 	int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff;
														
 
															+	unsigned long mem = 0;
														
 
															 	u64 operand2;
														
 
															-	unsigned long mem;
														
 
															+	int rc = 0;
														
 
															 	vcpu->stat.instruction_stsi++;
														
 
															 	VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
														
@@ -414,37 +365,37 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 
															 	case 2:
														
 
															 		mem = get_zeroed_page(GFP_KERNEL);
														
 
															 		if (!mem)
														
 
															-			goto out_fail;
														
 
															+			goto out_no_data;
														
 
															 		if (stsi((void *) mem, fc, sel1, sel2))
														
 
															-			goto out_mem;
														
 
															+			goto out_no_data;
														
 
															 		break;
														
 
															 	case 3:
														
 
															 		if (sel1 != 2 || sel2 != 2)
														
 
															-			goto out_fail;
														
 
															+			goto out_no_data;
														
 
															 		mem = get_zeroed_page(GFP_KERNEL);
														
 
															 		if (!mem)
														
 
															-			goto out_fail;
														
 
															+			goto out_no_data;
														
 
															 		handle_stsi_3_2_2(vcpu, (void *) mem);
														
 
															 		break;
														
 
															 	default:
														
 
															-		goto out_fail;
														
 
															+		goto out_no_data;
														
 
															 	}
														
 
															 	if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
														
 
															-		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-		goto out_mem;
														
 
															+		rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															+		goto out_exception;
														
 
															 	}
														
 
															 	trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
														
 
															 	free_page(mem);
														
 
															 	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
														
 
															 	vcpu->run->s.regs.gprs[0] = 0;
														
 
															 	return 0;
														
 
															-out_mem:
														
 
															-	free_page(mem);
														
 
															-out_fail:
														
 
															+out_no_data:
														
 
															 	/* condition code 3 */
														
 
															 	vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
														
 
															-	return 0;
														
 
															+out_exception:
														
 
															+	free_page(mem);
														
 
															+	return rc;
														
 
															 }
														
 
															 static const intercept_handler_t b2_handlers[256] = {
														
@@ -575,20 +526,13 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
 
															 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
														
 
															 		return -EOPNOTSUPP;
														
 
															-
														
 
															-	/* we must resolve the address without holding the mmap semaphore.
														
 
															-	 * This is ok since the userspace hypervisor is not supposed to change
														
 
															-	 * the mapping while the guest queries the memory. Otherwise the guest
														
 
															-	 * might crash or get wrong info anyway. */
														
 
															-	user_address = (unsigned long) __guestaddr_to_user(vcpu, address1);
														
 
															-
														
 
															 	down_read(&current->mm->mmap_sem);
														
 
															+	user_address = __gmap_translate(address1, vcpu->arch.gmap);
														
 
															+	if (IS_ERR_VALUE(user_address))
														
 
															+		goto out_inject;
														
 
															 	vma = find_vma(current->mm, user_address);
														
 
															-	if (!vma) {
														
 
															-		up_read(&current->mm->mmap_sem);
														
 
															-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															-	}
														
 
															-
														
 
															+	if (!vma)
														
 
															+		goto out_inject;
														
 
															 	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
														
 
															 	if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ))
														
 
															 		vcpu->arch.sie_block->gpsw.mask |= (1ul << 44);
														
@@ -597,6 +541,10 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
 
															 	up_read(&current->mm->mmap_sem);
														
 
															 	return 0;
														
 
															+
														
 
															+out_inject:
														
 
															+	up_read(&current->mm->mmap_sem);
														
 
															+	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															 }
														
 
															 int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
														
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
 
															 BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
														
 
															+#ifdef CONFIG_HAVE_KVM
														
 
															+BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
														
 
															+#endif
														
 
															+
														
 
															 /*
														
 
															  * every pentium local APIC has two 'local interrupts', with a
														
 
															  * soft-definable vector attached to both interrupts, one of
														
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -11,6 +11,9 @@ typedef struct {
 
															 	unsigned int apic_timer_irqs;	/* arch dependent */
														
 
															 	unsigned int irq_spurious_count;
														
 
															 	unsigned int icr_read_retry_count;
														
 
															+#endif
														
 
															+#ifdef CONFIG_HAVE_KVM
														
 
															+	unsigned int kvm_posted_intr_ipis;
														
 
															 #endif
														
 
															 	unsigned int x86_platform_ipis;	/* arch dependent */
														
 
															 	unsigned int apic_perf_irqs;
														
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -28,6 +28,7 @@
 
															 /* Interrupt handlers registered during init_IRQ */
														
 
															 extern void apic_timer_interrupt(void);
														
 
															 extern void x86_platform_ipi(void);
														
 
															+extern void kvm_posted_intr_ipi(void);
														
 
															 extern void error_interrupt(void);
														
 
															 extern void irq_work_interrupt(void);
														
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,6 +102,11 @@
 
															  */
														
 
															 #define X86_PLATFORM_IPI_VECTOR		0xf7
														
 
															+/* Vector for KVM to deliver posted interrupt IPI */
														
 
															+#ifdef CONFIG_HAVE_KVM
														
 
															+#define POSTED_INTR_VECTOR		0xf2
														
 
															+#endif
														
 
															+
														
 
															 /*
														
 
															  * IRQ work vector:
														
 
															  */
														
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -31,7 +31,7 @@
 
															 #include <asm/msr-index.h>
														
 
															 #include <asm/asm.h>
														
 
															-#define KVM_MAX_VCPUS 254
														
 
															+#define KVM_MAX_VCPUS 255
														
 
															 #define KVM_SOFT_MAX_VCPUS 160
														
 
															 #define KVM_USER_MEM_SLOTS 125
														
 
															 /* memory slots that are not exposed to userspace */
														
@@ -43,6 +43,8 @@
 
															 #define KVM_PIO_PAGE_OFFSET 1
														
 
															 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
														
 
															+#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
														
 
															+
														
 
															 #define CR0_RESERVED_BITS                                               \
														
 
															 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
														
 
															 			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
														
@@ -94,9 +96,6 @@
 
															 #define ASYNC_PF_PER_VCPU 64
														
 
															-extern raw_spinlock_t kvm_lock;
														
 
															-extern struct list_head vm_list;
														
 
															-
														
 
															 struct kvm_vcpu;
														
 
															 struct kvm;
														
 
															 struct kvm_async_pf;
														
@@ -230,6 +229,7 @@ struct kvm_mmu_page {
 
															 #endif
														
 
															 	int write_flooding_count;
														
 
															+	bool mmio_cached;
														
 
															 };
														
 
															 struct kvm_pio_request {
														
@@ -345,7 +345,6 @@ struct kvm_vcpu_arch {
 
															 	unsigned long apic_attention;
														
 
															 	int32_t apic_arb_prio;
														
 
															 	int mp_state;
														
 
															-	int sipi_vector;
														
 
															 	u64 ia32_misc_enable_msr;
														
 
															 	bool tpr_access_reporting;
														
@@ -643,7 +642,7 @@ struct kvm_x86_ops {
 
															 	/* Create, but do not attach this VCPU */
														
 
															 	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
														
 
															 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
														
 
															-	int (*vcpu_reset)(struct kvm_vcpu *vcpu);
														
 
															+	void (*vcpu_reset)(struct kvm_vcpu *vcpu);
														
 
															 	void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
														
 
															 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
														
@@ -696,14 +695,16 @@ struct kvm_x86_ops {
 
															 	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
														
 
															 	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
														
 
															 	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
														
 
															-	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
														
 
															-	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
														
 
															+	int (*enable_nmi_window)(struct kvm_vcpu *vcpu);
														
 
															+	int (*enable_irq_window)(struct kvm_vcpu *vcpu);
														
 
															 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
														
 
															 	int (*vm_has_apicv)(struct kvm *kvm);
														
 
															 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
														
 
															 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
														
 
															 	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
														
 
															 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
														
 
															+	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
														
 
															+	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
														
 
															 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
														
 
															 	int (*get_tdp_level)(void);
														
 
															 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
														
@@ -730,6 +731,7 @@ struct kvm_x86_ops {
 
															 	int (*check_intercept)(struct kvm_vcpu *vcpu,
														
 
															 			       struct x86_instruction_info *info,
														
 
															 			       enum x86_intercept_stage stage);
														
 
															+	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
														
 
															 };
														
 
															 struct kvm_arch_async_pf {
														
@@ -767,6 +769,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 
															 				     struct kvm_memory_slot *slot,
														
 
															 				     gfn_t gfn_offset, unsigned long mask);
														
 
															 void kvm_mmu_zap_all(struct kvm *kvm);
														
 
															+void kvm_mmu_zap_mmio_sptes(struct kvm *kvm);
														
 
															 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
														
 
															 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
														
@@ -797,6 +800,7 @@ enum emulation_result {
 
															 #define EMULTYPE_TRAP_UD	    (1 << 1)
														
 
															 #define EMULTYPE_SKIP		    (1 << 2)
														
 
															 #define EMULTYPE_RETRY		    (1 << 3)
														
 
															+#define EMULTYPE_NO_REEXECUTE	    (1 << 4)
														
 
															 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
														
 
															 			    int emulation_type, void *insn, int insn_len);
														
@@ -807,6 +811,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
 
															 }
														
 
															 void kvm_enable_efer_bits(u64);
														
 
															+bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
														
 
															 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
														
 
															 int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
														
@@ -819,6 +824,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
															 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
														
 
															 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
														
 
															+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector);
														
 
															 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
														
 
															 		    int reason, bool has_error_code, u32 error_code);
														
@@ -973,7 +979,6 @@ enum {
 
															  * Trap the fault and ignore the instruction if that happens.
														
 
															  */
														
 
															 asmlinkage void kvm_spurious_fault(void);
														
 
															-extern bool kvm_rebooting;
														
 
															 #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn)	\
														
 
															 	"666: " insn "\n\t" \
														
@@ -1002,6 +1007,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
 
															 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
														
 
															 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
														
 
															 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
														
 
															+void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
														
 
															 void kvm_define_shared_msr(unsigned index, u32 msr);
														
 
															 void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
														
@@ -1027,7 +1033,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu);
 
															 void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
														
 
															 bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
														
 
															 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
														
 
															-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
														
 
															+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
														
 
															 int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
														
 
															 void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
														
 
															 void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
														
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -65,11 +65,16 @@
 
															 #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY    0x00000200
														
 
															 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
														
 
															 #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
														
 
															+#define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
														
 
															 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
														
 
															 #define PIN_BASED_NMI_EXITING                   0x00000008
														
 
															 #define PIN_BASED_VIRTUAL_NMIS                  0x00000020
														
 
															+#define PIN_BASED_VMX_PREEMPTION_TIMER          0x00000040
														
 
															+#define PIN_BASED_POSTED_INTR                   0x00000080
														
 
															+
														
 
															+#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR	0x00000016
														
 
															 #define VM_EXIT_SAVE_DEBUG_CONTROLS             0x00000002
														
 
															 #define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
														
@@ -81,6 +86,8 @@
 
															 #define VM_EXIT_LOAD_IA32_EFER                  0x00200000
														
 
															 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER       0x00400000
														
 
															+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR	0x00036dff
														
 
															+
														
 
															 #define VM_ENTRY_LOAD_DEBUG_CONTROLS            0x00000002
														
 
															 #define VM_ENTRY_IA32E_MODE                     0x00000200
														
 
															 #define VM_ENTRY_SMM                            0x00000400
														
@@ -89,9 +96,15 @@
 
															 #define VM_ENTRY_LOAD_IA32_PAT			0x00004000
														
 
															 #define VM_ENTRY_LOAD_IA32_EFER                 0x00008000
														
 
															+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR	0x000011ff
														
 
															+
														
 
															+#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK	0x0000001f
														
 
															+#define VMX_MISC_SAVE_EFER_LMA			0x00000020
														
 
															+
														
 
															 /* VMCS Encodings */
														
 
															 enum vmcs_field {
														
 
															 	VIRTUAL_PROCESSOR_ID            = 0x00000000,
														
 
															+	POSTED_INTR_NV                  = 0x00000002,
														
 
															 	GUEST_ES_SELECTOR               = 0x00000800,
														
 
															 	GUEST_CS_SELECTOR               = 0x00000802,
														
 
															 	GUEST_SS_SELECTOR               = 0x00000804,
														
@@ -126,6 +139,8 @@ enum vmcs_field {
 
															 	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
														
 
															 	APIC_ACCESS_ADDR		= 0x00002014,
														
 
															 	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
														
 
															+	POSTED_INTR_DESC_ADDR           = 0x00002016,
														
 
															+	POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
														
 
															 	EPT_POINTER                     = 0x0000201a,
														
 
															 	EPT_POINTER_HIGH                = 0x0000201b,
														
 
															 	EOI_EXIT_BITMAP0                = 0x0000201c,
														
@@ -136,6 +151,8 @@ enum vmcs_field {
 
															 	EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
														
 
															 	EOI_EXIT_BITMAP3                = 0x00002022,
														
 
															 	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
														
 
															+	VMREAD_BITMAP                   = 0x00002026,
														
 
															+	VMWRITE_BITMAP                  = 0x00002028,
														
 
															 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
														
 
															 	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
														
 
															 	VMCS_LINK_POINTER               = 0x00002800,
														
@@ -209,6 +226,7 @@ enum vmcs_field {
 
															 	GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
														
 
															 	GUEST_ACTIVITY_STATE            = 0X00004826,
														
 
															 	GUEST_SYSENTER_CS               = 0x0000482A,
														
 
															+	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
														
 
															 	HOST_IA32_SYSENTER_CS           = 0x00004c00,
														
 
															 	CR0_GUEST_HOST_MASK             = 0x00006000,
														
 
															 	CR4_GUEST_HOST_MASK             = 0x00006002,
														
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -29,7 +29,6 @@
 
															 #define __KVM_HAVE_PIT
														
 
															 #define __KVM_HAVE_IOAPIC
														
 
															 #define __KVM_HAVE_IRQ_LINE
														
 
															-#define __KVM_HAVE_DEVICE_ASSIGNMENT
														
 
															 #define __KVM_HAVE_MSI
														
 
															 #define __KVM_HAVE_USER_NMI
														
 
															 #define __KVM_HAVE_GUEST_DEBUG
														
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -528,6 +528,8 @@
 
															 #define VMX_BASIC_MEM_TYPE_WB	6LLU
														
 
															 #define VMX_BASIC_INOUT		0x0040000000000000LLU
														
 
															+/* MSR_IA32_VMX_MISC bits */
														
 
															+#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
														
 
															 /* AMD-V MSRs */
														
 
															 #define MSR_VM_CR                       0xc0010114
														
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -65,6 +65,7 @@
 
															 #define EXIT_REASON_EOI_INDUCED         45
														
 
															 #define EXIT_REASON_EPT_VIOLATION       48
														
 
															 #define EXIT_REASON_EPT_MISCONFIG       49
														
 
															+#define EXIT_REASON_PREEMPTION_TIMER    52
														
 
															 #define EXIT_REASON_WBINVD              54
														
 
															 #define EXIT_REASON_XSETBV              55
														
 
															 #define EXIT_REASON_APIC_WRITE          56
														
@@ -110,7 +111,7 @@
 
															 	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
														
 
															 	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
														
 
															 	{ EXIT_REASON_INVD,                  "INVD" }, \
														
 
															-	{ EXIT_REASON_INVPCID,               "INVPCID" }
														
 
															-
														
 
															+	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
														
 
															+	{ EXIT_REASON_PREEMPTION_TIMER,      "PREEMPTION_TIMER" }
														
 
															 #endif /* _UAPIVMX_H */
														
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \
 
															 apicinterrupt X86_PLATFORM_IPI_VECTOR \
														
 
															 	x86_platform_ipi smp_x86_platform_ipi
														
 
															+#ifdef CONFIG_HAVE_KVM
														
 
															+apicinterrupt POSTED_INTR_VECTOR \
														
 
															+	kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
														
 
															+#endif
														
 
															+
														
 
															 apicinterrupt THRESHOLD_APIC_VECTOR \
														
 
															 	threshold_interrupt smp_threshold_interrupt
														
 
															 apicinterrupt THERMAL_APIC_VECTOR \
														
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -224,6 +224,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
 
															 	set_irq_regs(old_regs);
														
 
															 }
														
 
															+#ifdef CONFIG_HAVE_KVM
														
 
															+/*
														
 
															+ * Handler for POSTED_INTERRUPT_VECTOR.
														
 
															+ */
														
 
															+void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
														
 
															+{
														
 
															+	struct pt_regs *old_regs = set_irq_regs(regs);
														
 
															+
														
 
															+	ack_APIC_irq();
														
 
															+
														
 
															+	irq_enter();
														
 
															+
														
 
															+	exit_idle();
														
 
															+
														
 
															+	inc_irq_stat(kvm_posted_intr_ipis);
														
 
															+
														
 
															+	irq_exit();
														
 
															+
														
 
															+	set_irq_regs(old_regs);
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															 EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
														
 
															 #ifdef CONFIG_HOTPLUG_CPU
														
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -172,6 +172,10 @@ static void __init apic_intr_init(void)
 
															 	/* IPI for X86 platform specific use */
														
 
															 	alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
														
 
															+#ifdef CONFIG_HAVE_KVM
														
 
															+	/* IPI for KVM to deliver posted interrupt */
														
 
															+	alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
														
 
															+#endif
														
 
															 	/* IPI vectors for APIC spurious and error interrupts */
														
 
															 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
														
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -160,8 +160,12 @@ int kvm_register_clock(char *txt)
 
															 {
														
 
															 	int cpu = smp_processor_id();
														
 
															 	int low, high, ret;
														
 
															-	struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
														
 
															+	struct pvclock_vcpu_time_info *src;
														
 
															+
														
 
															+	if (!hv_clock)
														
 
															+		return 0;
														
 
															+	src = &hv_clock[cpu].pvti;
														
 
															 	low = (int)slow_virt_to_phys(src) | 1;
														
 
															 	high = ((u64)slow_virt_to_phys(src) >> 32);
														
 
															 	ret = native_write_msr_safe(msr_kvm_system_time, low, high);
														
@@ -276,6 +280,9 @@ int __init kvm_setup_vsyscall_timeinfo(void)
 
															 	struct pvclock_vcpu_time_info *vcpu_time;
														
 
															 	unsigned int size;
														
 
															+	if (!hv_clock)
														
 
															+		return 0;
														
 
															+
														
 
															 	size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
														
 
															 	preempt_disable();
														
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -21,14 +21,13 @@ config KVM
 
															 	tristate "Kernel-based Virtual Machine (KVM) support"
														
 
															 	depends on HAVE_KVM
														
 
															 	depends on HIGH_RES_TIMERS
														
 
															-	# for device assignment:
														
 
															-	depends on PCI
														
 
															 	# for TASKSTATS/TASK_DELAY_ACCT:
														
 
															 	depends on NET
														
 
															 	select PREEMPT_NOTIFIERS
														
 
															 	select MMU_NOTIFIER
														
 
															 	select ANON_INODES
														
 
															 	select HAVE_KVM_IRQCHIP
														
 
															+	select HAVE_KVM_IRQ_ROUTING
														
 
															 	select HAVE_KVM_EVENTFD
														
 
															 	select KVM_APIC_ARCHITECTURE
														
 
															 	select KVM_ASYNC_PF
														
@@ -82,6 +81,17 @@ config KVM_MMU_AUDIT
 
															 	 This option adds a R/W kVM module parameter 'mmu_audit', which allows
														
 
															 	 audit  KVM MMU at runtime.
														
 
															+config KVM_DEVICE_ASSIGNMENT
														
 
															+	bool "KVM legacy PCI device assignment support"
														
 
															+	depends on KVM && PCI && IOMMU_API
														
 
															+	default y
														
 
															+	---help---
														
 
															+	  Provide support for legacy PCI device assignment through KVM.  The
														
 
															+	  kernel now also supports a full featured userspace device driver
														
 
															+	  framework through VFIO, which supersedes much of this support.
														
 
															+
														
 
															+	  If unsure, say Y.
														
 
															+
														
 
															 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
														
 
															 # the virtualization menu.
														
 
															 source drivers/vhost/Kconfig
														
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,8 +7,9 @@ CFLAGS_vmx.o := -I.
 
															 kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
														
 
															 				coalesced_mmio.o irq_comm.o eventfd.o \
														
 
															-				assigned-dev.o)
														
 
															-kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
														
 
															+				irqchip.o)
														
 
															+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(addprefix ../../../virt/kvm/, \
														
 
															+				assigned-dev.o iommu.o)
														
 
															 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
														
 
															 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
														
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -132,8 +132,9 @@
 
															 #define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
														
 
															 #define No64	    (1<<28)
														
 
															 #define PageTable   (1 << 29)   /* instruction used to write page table */
														
 
															+#define NotImpl     (1 << 30)   /* instruction is not implemented */
														
 
															 /* Source 2 operand type */
														
 
															-#define Src2Shift   (30)
														
 
															+#define Src2Shift   (31)
														
 
															 #define Src2None    (OpNone << Src2Shift)
														
 
															 #define Src2CL      (OpCL << Src2Shift)
														
 
															 #define Src2ImmByte (OpImmByte << Src2Shift)
														
@@ -1578,12 +1579,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
															 	memset(&seg_desc, 0, sizeof seg_desc);
														
 
															-	if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
														
 
															-	    || ctxt->mode == X86EMUL_MODE_REAL) {
														
 
															-		/* set real mode segment descriptor */
														
 
															+	if (ctxt->mode == X86EMUL_MODE_REAL) {
														
 
															+		/* set real mode segment descriptor (keep limit etc. for
														
 
															+		 * unreal mode) */
														
 
															 		ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
														
 
															 		set_desc_base(&seg_desc, selector << 4);
														
 
															 		goto load;
														
 
															+	} else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
														
 
															+		/* VM86 needs a clean new segment descriptor */
														
 
															+		set_desc_base(&seg_desc, selector << 4);
														
 
															+		set_desc_limit(&seg_desc, 0xffff);
														
 
															+		seg_desc.type = 3;
														
 
															+		seg_desc.p = 1;
														
 
															+		seg_desc.s = 1;
														
 
															+		seg_desc.dpl = 3;
														
 
															+		goto load;
														
 
															 	}
														
 
															 	rpl = selector & 3;
														
@@ -3615,7 +3625,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 
															 #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
														
 
															 #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
														
 
															 		      .check_perm = (_p) }
														
 
															-#define N    D(0)
														
 
															+#define N    D(NotImpl)
														
 
															 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
														
 
															 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
														
 
															 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
														
@@ -3713,7 +3723,7 @@ static const struct opcode group5[] = {
 
															 	I(SrcMemFAddr | ImplicitOps | Stack,	em_call_far),
														
 
															 	I(SrcMem | Stack,			em_grp45),
														
 
															 	I(SrcMemFAddr | ImplicitOps,		em_grp45),
														
 
															-	I(SrcMem | Stack,			em_grp45), N,
														
 
															+	I(SrcMem | Stack,			em_grp45), D(Undefined),
														
 
															 };
														
 
															 static const struct opcode group6[] = {
														
@@ -4162,6 +4172,10 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 
															 		break;
														
 
															 	case OpMem8:
														
 
															 		ctxt->memop.bytes = 1;
														
 
															+		if (ctxt->memop.type == OP_REG) {
														
 
															+			ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1);
														
 
															+			fetch_register_operand(&ctxt->memop);
														
 
															+		}
														
 
															 		goto mem_common;
														
 
															 	case OpMem16:
														
 
															 		ctxt->memop.bytes = 2;
														
@@ -4373,7 +4387,7 @@ done_prefixes:
 
															 	ctxt->intercept = opcode.intercept;
														
 
															 	/* Unrecognised? */
														
 
															-	if (ctxt->d == 0 || (ctxt->d & Undefined))
														
 
															+	if (ctxt->d == 0 || (ctxt->d & NotImpl))
														
 
															 		return EMULATION_FAILED;
														
 
															 	if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn)
														
@@ -4511,7 +4525,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 
															 	ctxt->mem_read.pos = 0;
														
 
															-	if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) {
														
 
															+	if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
														
 
															+			(ctxt->d & Undefined)) {
														
 
															 		rc = emulate_ud(ctxt);
														
 
															 		goto done;
														
 
															 	}
														
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -290,8 +290,8 @@ static void pit_do_work(struct kthread_work *work)
 
															 	}
														
 
															 	spin_unlock(&ps->inject_lock);
														
 
															 	if (inject) {
														
 
															-		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
														
 
															-		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
														
 
															+		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
														
 
															+		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
														
 
															 		/*
														
 
															 		 * Provides NMI watchdog support via Virtual Wire mode.
														
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap)
 
															 	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
														
 
															 }
														
 
															+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
														
 
															+{
														
 
															+	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															+
														
 
															+	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
														
 
															+		apic_test_vector(vector, apic->regs + APIC_IRR);
														
 
															+}
														
 
															+
														
 
															 static inline void apic_set_vector(int vec, void *bitmap)
														
 
															 {
														
 
															 	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
														
@@ -145,53 +153,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
 
															 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
														
 
															 }
														
 
															-void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
														
 
															-				struct kvm_lapic_irq *irq,
														
 
															-				u64 *eoi_exit_bitmap)
														
 
															-{
														
 
															-	struct kvm_lapic **dst;
														
 
															-	struct kvm_apic_map *map;
														
 
															-	unsigned long bitmap = 1;
														
 
															-	int i;
														
 
															-
														
 
															-	rcu_read_lock();
														
 
															-	map = rcu_dereference(vcpu->kvm->arch.apic_map);
														
 
															-
														
 
															-	if (unlikely(!map)) {
														
 
															-		__set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	if (irq->dest_mode == 0) { /* physical mode */
														
 
															-		if (irq->delivery_mode == APIC_DM_LOWEST ||
														
 
															-				irq->dest_id == 0xff) {
														
 
															-			__set_bit(irq->vector,
														
 
															-				  (unsigned long *)eoi_exit_bitmap);
														
 
															-			goto out;
														
 
															-		}
														
 
															-		dst = &map->phys_map[irq->dest_id & 0xff];
														
 
															-	} else {
														
 
															-		u32 mda = irq->dest_id << (32 - map->ldr_bits);
														
 
															-
														
 
															-		dst = map->logical_map[apic_cluster_id(map, mda)];
														
 
															-
														
 
															-		bitmap = apic_logical_id(map, mda);
														
 
															-	}
														
 
															-
														
 
															-	for_each_set_bit(i, &bitmap, 16) {
														
 
															-		if (!dst[i])
														
 
															-			continue;
														
 
															-		if (dst[i]->vcpu == vcpu) {
														
 
															-			__set_bit(irq->vector,
														
 
															-				  (unsigned long *)eoi_exit_bitmap);
														
 
															-			break;
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-out:
														
 
															-	rcu_read_unlock();
														
 
															-}
														
 
															-
														
 
															 static void recalculate_apic_map(struct kvm *kvm)
														
 
															 {
														
 
															 	struct kvm_apic_map *new, *old = NULL;
														
@@ -256,7 +217,7 @@ out:
 
															 	if (old)
														
 
															 		kfree_rcu(old, rcu);
														
 
															-	kvm_ioapic_make_eoibitmap_request(kvm);
														
 
															+	kvm_vcpu_request_scan_ioapic(kvm);
														
 
															 }
														
 
															 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
														
@@ -357,6 +318,19 @@ static u8 count_vectors(void *bitmap)
 
															 	return count;
														
 
															 }
														
 
															+void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
														
 
															+{
														
 
															+	u32 i, pir_val;
														
 
															+	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															+
														
 
															+	for (i = 0; i <= 7; i++) {
														
 
															+		pir_val = xchg(&pir[i], 0);
														
 
															+		if (pir_val)
														
 
															+			*((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val;
														
 
															+	}
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
														
 
															+
														
 
															 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
														
 
															 {
														
 
															 	apic->irr_pending = true;
														
@@ -379,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 
															 	if (!apic->irr_pending)
														
 
															 		return -1;
														
 
															+	kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
														
 
															 	result = apic_search_irr(apic);
														
 
															 	ASSERT(result == -1 || result >= 16);
														
@@ -431,14 +406,16 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 
															 }
														
 
															 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
														
 
															-			     int vector, int level, int trig_mode);
														
 
															+			     int vector, int level, int trig_mode,
														
 
															+			     unsigned long *dest_map);
														
 
															-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
														
 
															+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
														
 
															+		unsigned long *dest_map)
														
 
															 {
														
 
															 	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															 	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
														
 
															-			irq->level, irq->trig_mode);
														
 
															+			irq->level, irq->trig_mode, dest_map);
														
 
															 }
														
 
															 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
														
@@ -505,6 +482,15 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 
															 	return result;
														
 
															 }
														
 
															+void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
														
 
															+{
														
 
															+	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															+	int i;
														
 
															+
														
 
															+	for (i = 0; i < 8; i++)
														
 
															+		apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]);
														
 
															+}
														
 
															+
														
 
															 static void apic_update_ppr(struct kvm_lapic *apic)
														
 
															 {
														
 
															 	u32 tpr, isrv, ppr, old_ppr;
														
@@ -611,7 +597,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 
															 }
														
 
															 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
														
 
															-		struct kvm_lapic_irq *irq, int *r)
														
 
															+		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
														
 
															 {
														
 
															 	struct kvm_apic_map *map;
														
 
															 	unsigned long bitmap = 1;
														
@@ -622,7 +608,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
															 	*r = -1;
														
 
															 	if (irq->shorthand == APIC_DEST_SELF) {
														
 
															-		*r = kvm_apic_set_irq(src->vcpu, irq);
														
 
															+		*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
														
 
															 		return true;
														
 
															 	}
														
@@ -667,7 +653,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
															 			continue;
														
 
															 		if (*r < 0)
														
 
															 			*r = 0;
														
 
															-		*r += kvm_apic_set_irq(dst[i]->vcpu, irq);
														
 
															+		*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
														
 
															 	}
														
 
															 	ret = true;
														
@@ -681,7 +667,8 @@ out:
 
															  * Return 1 if successfully added and 0 if discarded.
														
 
															  */
														
 
															 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
														
 
															-			     int vector, int level, int trig_mode)
														
 
															+			     int vector, int level, int trig_mode,
														
 
															+			     unsigned long *dest_map)
														
 
															 {
														
 
															 	int result = 0;
														
 
															 	struct kvm_vcpu *vcpu = apic->vcpu;
														
@@ -694,24 +681,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
															 		if (unlikely(!apic_enabled(apic)))
														
 
															 			break;
														
 
															-		if (trig_mode) {
														
 
															-			apic_debug("level trig mode for vector %d", vector);
														
 
															-			apic_set_vector(vector, apic->regs + APIC_TMR);
														
 
															-		} else
														
 
															-			apic_clear_vector(vector, apic->regs + APIC_TMR);
														
 
															+		if (dest_map)
														
 
															+			__set_bit(vcpu->vcpu_id, dest_map);
														
 
															-		result = !apic_test_and_set_irr(vector, apic);
														
 
															-		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
														
 
															-					  trig_mode, vector, !result);
														
 
															-		if (!result) {
														
 
															-			if (trig_mode)
														
 
															-				apic_debug("level trig mode repeatedly for "
														
 
															-						"vector %d", vector);
														
 
															-			break;
														
 
															-		}
														
 
															+		if (kvm_x86_ops->deliver_posted_interrupt) {
														
 
															+			result = 1;
														
 
															+			kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
														
 
															+		} else {
														
 
															+			result = !apic_test_and_set_irr(vector, apic);
														
 
															-		kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															-		kvm_vcpu_kick(vcpu);
														
 
															+			if (!result) {
														
 
															+				if (trig_mode)
														
 
															+					apic_debug("level trig mode repeatedly "
														
 
															+						"for vector %d", vector);
														
 
															+				goto out;
														
 
															+			}
														
 
															+
														
 
															+			kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															+			kvm_vcpu_kick(vcpu);
														
 
															+		}
														
 
															+out:
														
 
															+		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
														
 
															+				trig_mode, vector, !result);
														
 
															 		break;
														
 
															 	case APIC_DM_REMRD:
														
@@ -731,7 +722,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
															 	case APIC_DM_INIT:
														
 
															 		if (!trig_mode || level) {
														
 
															 			result = 1;
														
 
															-			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
														
 
															+			/* assumes that there are only KVM_APIC_INIT/SIPI */
														
 
															+			apic->pending_events = (1UL << KVM_APIC_INIT);
														
 
															+			/* make sure pending_events is visible before sending
														
 
															+			 * the request */
														
 
															+			smp_wmb();
														
 
															 			kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															 			kvm_vcpu_kick(vcpu);
														
 
															 		} else {
														
@@ -743,13 +738,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
															 	case APIC_DM_STARTUP:
														
 
															 		apic_debug("SIPI to vcpu %d vector 0x%02x\n",
														
 
															 			   vcpu->vcpu_id, vector);
														
 
															-		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
														
 
															-			result = 1;
														
 
															-			vcpu->arch.sipi_vector = vector;
														
 
															-			vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
														
 
															-			kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															-			kvm_vcpu_kick(vcpu);
														
 
															-		}
														
 
															+		result = 1;
														
 
															+		apic->sipi_vector = vector;
														
 
															+		/* make sure sipi_vector is visible for the receiver */
														
 
															+		smp_wmb();
														
 
															+		set_bit(KVM_APIC_SIPI, &apic->pending_events);
														
 
															+		kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															+		kvm_vcpu_kick(vcpu);
														
 
															 		break;
														
 
															 	case APIC_DM_EXTINT:
														
@@ -782,7 +777,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 
															 			trigger_mode = IOAPIC_LEVEL_TRIG;
														
 
															 		else
														
 
															 			trigger_mode = IOAPIC_EDGE_TRIG;
														
 
															-		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
														
 
															+		kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
														
 
															 	}
														
 
															 }
														
@@ -848,7 +843,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
															 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
														
 
															 		   irq.vector);
														
 
															-	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
														
 
															+	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
														
 
															 }
														
 
															 static u32 apic_get_tmcct(struct kvm_lapic *apic)
														
@@ -1484,7 +1479,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 
															 		vector = reg & APIC_VECTOR_MASK;
														
 
															 		mode = reg & APIC_MODE_MASK;
														
 
															 		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
														
 
															-		return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
														
 
															+		return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
														
 
															+					NULL);
														
 
															 	}
														
 
															 	return 0;
														
 
															 }
														
@@ -1654,6 +1650,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 
															 	apic->highest_isr_cache = -1;
														
 
															 	kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
														
 
															 	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															+	kvm_rtc_eoi_tracking_restore_one(vcpu);
														
 
															 }
														
 
															 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
														
@@ -1860,6 +1857,34 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
 
															 					 addr, sizeof(u8));
														
 
															 }
														
 
															+void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															+	unsigned int sipi_vector;
														
 
															+
														
 
															+	if (!kvm_vcpu_has_lapic(vcpu))
														
 
															+		return;
														
 
															+
														
 
															+	if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
														
 
															+		kvm_lapic_reset(vcpu);
														
 
															+		kvm_vcpu_reset(vcpu);
														
 
															+		if (kvm_vcpu_is_bsp(apic->vcpu))
														
 
															+			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
														
 
															+		else
														
 
															+			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
														
 
															+	}
														
 
															+	if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) &&
														
 
															+	    vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
														
 
															+		/* evaluate pending_events before reading the vector */
														
 
															+		smp_rmb();
														
 
															+		sipi_vector = apic->sipi_vector;
														
 
															+		pr_debug("vcpu %d received sipi with vector # %x\n",
														
 
															+			 vcpu->vcpu_id, sipi_vector);
														
 
															+		kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
														
 
															+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															 void kvm_lapic_init(void)
														
 
															 {
														
 
															 	/* do not patch jump label more than once per second */
														
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -5,6 +5,9 @@
 
															 #include <linux/kvm_host.h>
														
 
															+#define KVM_APIC_INIT		0
														
 
															+#define KVM_APIC_SIPI		1
														
 
															+
														
 
															 struct kvm_timer {
														
 
															 	struct hrtimer timer;
														
 
															 	s64 period; 				/* unit: ns */
														
@@ -32,6 +35,8 @@ struct kvm_lapic {
 
															 	void *regs;
														
 
															 	gpa_t vapic_addr;
														
 
															 	struct page *vapic_page;
														
 
															+	unsigned long pending_events;
														
 
															+	unsigned int sipi_vector;
														
 
															 };
														
 
															 int kvm_create_lapic(struct kvm_vcpu *vcpu);
														
 
															 void kvm_free_lapic(struct kvm_vcpu *vcpu);
														
@@ -39,6 +44,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu);
 
															 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
														
 
															 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
														
 
															 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
														
 
															+void kvm_apic_accept_events(struct kvm_vcpu *vcpu);
														
 
															 void kvm_lapic_reset(struct kvm_vcpu *vcpu);
														
 
															 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
														
 
															 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
														
@@ -47,13 +53,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
 
															 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
														
 
															 void kvm_apic_set_version(struct kvm_vcpu *vcpu);
														
 
															+void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
														
 
															+void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
														
 
															 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
														
 
															 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
														
 
															-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
														
 
															+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
														
 
															+		unsigned long *dest_map);
														
 
															 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
														
 
															 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
														
 
															-		struct kvm_lapic_irq *irq, int *r);
														
 
															+		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
														
 
															 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
														
 
															 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
														
@@ -154,8 +163,11 @@ static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
 
															 	return ldr & map->lid_mask;
														
 
															 }
														
 
															-void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
														
 
															-				struct kvm_lapic_irq *irq,
														
 
															-				u64 *eoi_bitmap);
														
 
															+static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	return vcpu->arch.apic->pending_events;
														
 
															+}
														
 
															+
														
 
															+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
														
 
															 #endif
														
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -199,8 +199,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 
															 static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access)
														
 
															 {
														
 
															+	struct kvm_mmu_page *sp =  page_header(__pa(sptep));
														
 
															+
														
 
															 	access &= ACC_WRITE_MASK | ACC_USER_MASK;
														
 
															+	sp->mmio_cached = true;
														
 
															 	trace_mark_mmio_spte(sptep, gfn, access);
														
 
															 	mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT);
														
 
															 }
														
@@ -1502,6 +1505,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 
															 					       u64 *parent_pte, int direct)
														
 
															 {
														
 
															 	struct kvm_mmu_page *sp;
														
 
															+
														
 
															 	sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
														
 
															 	sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
														
 
															 	if (!direct)
														
@@ -1644,16 +1648,14 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 
															 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
														
 
															 				    struct list_head *invalid_list);
														
 
															-#define for_each_gfn_sp(kvm, sp, gfn)					\
														
 
															-  hlist_for_each_entry(sp,						\
														
 
															-   &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link)	\
														
 
															-	if ((sp)->gfn != (gfn)) {} else
														
 
															+#define for_each_gfn_sp(_kvm, _sp, _gfn)				\
														
 
															+	hlist_for_each_entry(_sp,					\
														
 
															+	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
														
 
															+		if ((_sp)->gfn != (_gfn)) {} else
														
 
															-#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn)			\
														
 
															-  hlist_for_each_entry(sp,						\
														
 
															-   &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link)	\
														
 
															-		if ((sp)->gfn != (gfn) || (sp)->role.direct ||		\
														
 
															-			(sp)->role.invalid) {} else
														
 
															+#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)			\
														
 
															+	for_each_gfn_sp(_kvm, _sp, _gfn)				\
														
 
															+		if ((_sp)->role.direct || (_sp)->role.invalid) {} else
														
 
															 /* @sp->gfn should be write-protected at the call site */
														
 
															 static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
														
@@ -2089,7 +2091,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 
															 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
														
 
															 				    struct list_head *invalid_list)
														
 
															 {
														
 
															-	struct kvm_mmu_page *sp;
														
 
															+	struct kvm_mmu_page *sp, *nsp;
														
 
															 	if (list_empty(invalid_list))
														
 
															 		return;
														
@@ -2106,11 +2108,25 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 
															 	 */
														
 
															 	kvm_flush_remote_tlbs(kvm);
														
 
															-	do {
														
 
															-		sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
														
 
															+	list_for_each_entry_safe(sp, nsp, invalid_list, link) {
														
 
															 		WARN_ON(!sp->role.invalid || sp->root_count);
														
 
															 		kvm_mmu_free_page(sp);
														
 
															-	} while (!list_empty(invalid_list));
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
														
 
															+					struct list_head *invalid_list)
														
 
															+{
														
 
															+	struct kvm_mmu_page *sp;
														
 
															+
														
 
															+	if (list_empty(&kvm->arch.active_mmu_pages))
														
 
															+		return false;
														
 
															+
														
 
															+	sp = list_entry(kvm->arch.active_mmu_pages.prev,
														
 
															+			struct kvm_mmu_page, link);
														
 
															+	kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
														
 
															+
														
 
															+	return true;
														
 
															 }
														
 
															 /*
														
@@ -2120,23 +2136,15 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 
															 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
														
 
															 {
														
 
															 	LIST_HEAD(invalid_list);
														
 
															-	/*
														
 
															-	 * If we set the number of mmu pages to be smaller be than the
														
 
															-	 * number of actived pages , we must to free some mmu pages before we
														
 
															-	 * change the value
														
 
															-	 */
														
 
															 	spin_lock(&kvm->mmu_lock);
														
 
															 	if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
														
 
															-		while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages &&
														
 
															-			!list_empty(&kvm->arch.active_mmu_pages)) {
														
 
															-			struct kvm_mmu_page *page;
														
 
															+		/* Need to free some mmu pages to achieve the goal. */
														
 
															+		while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages)
														
 
															+			if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list))
														
 
															+				break;
														
 
															-			page = container_of(kvm->arch.active_mmu_pages.prev,
														
 
															-					    struct kvm_mmu_page, link);
														
 
															-			kvm_mmu_prepare_zap_page(kvm, page, &invalid_list);
														
 
															-		}
														
 
															 		kvm_mmu_commit_zap_page(kvm, &invalid_list);
														
 
															 		goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages;
														
 
															 	}
														
@@ -2794,6 +2802,7 @@ exit:
 
															 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
														
 
															 			 gva_t gva, pfn_t *pfn, bool write, bool *writable);
														
 
															+static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
														
 
															 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
														
 
															 			 gfn_t gfn, bool prefault)
														
@@ -2835,7 +2844,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 
															 	spin_lock(&vcpu->kvm->mmu_lock);
														
 
															 	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
														
 
															 		goto out_unlock;
														
 
															-	kvm_mmu_free_some_pages(vcpu);
														
 
															+	make_mmu_pages_available(vcpu);
														
 
															 	if (likely(!force_pt_level))
														
 
															 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
														
 
															 	r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
														
@@ -2913,7 +2922,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
															 	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
														
 
															 		spin_lock(&vcpu->kvm->mmu_lock);
														
 
															-		kvm_mmu_free_some_pages(vcpu);
														
 
															+		make_mmu_pages_available(vcpu);
														
 
															 		sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
														
 
															 				      1, ACC_ALL, NULL);
														
 
															 		++sp->root_count;
														
@@ -2925,7 +2934,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
															 			ASSERT(!VALID_PAGE(root));
														
 
															 			spin_lock(&vcpu->kvm->mmu_lock);
														
 
															-			kvm_mmu_free_some_pages(vcpu);
														
 
															+			make_mmu_pages_available(vcpu);
														
 
															 			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
														
 
															 					      i << 30,
														
 
															 					      PT32_ROOT_LEVEL, 1, ACC_ALL,
														
@@ -2964,7 +2973,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
															 		ASSERT(!VALID_PAGE(root));
														
 
															 		spin_lock(&vcpu->kvm->mmu_lock);
														
 
															-		kvm_mmu_free_some_pages(vcpu);
														
 
															+		make_mmu_pages_available(vcpu);
														
 
															 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
														
 
															 				      0, ACC_ALL, NULL);
														
 
															 		root = __pa(sp->spt);
														
@@ -2998,7 +3007,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
															 				return 1;
														
 
															 		}
														
 
															 		spin_lock(&vcpu->kvm->mmu_lock);
														
 
															-		kvm_mmu_free_some_pages(vcpu);
														
 
															+		make_mmu_pages_available(vcpu);
														
 
															 		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
														
 
															 				      PT32_ROOT_LEVEL, 0,
														
 
															 				      ACC_ALL, NULL);
														
@@ -3304,7 +3313,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 
															 	spin_lock(&vcpu->kvm->mmu_lock);
														
 
															 	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
														
 
															 		goto out_unlock;
														
 
															-	kvm_mmu_free_some_pages(vcpu);
														
 
															+	make_mmu_pages_available(vcpu);
														
 
															 	if (likely(!force_pt_level))
														
 
															 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
														
 
															 	r = __direct_map(vcpu, gpa, write, map_writable,
														
@@ -4006,17 +4015,17 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
														
 
															-void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
														
 
															+static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	LIST_HEAD(invalid_list);
														
 
															-	while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES &&
														
 
															-	       !list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
														
 
															-		struct kvm_mmu_page *sp;
														
 
															+	if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
														
 
															+		return;
														
 
															+
														
 
															+	while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
														
 
															+		if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
														
 
															+			break;
														
 
															-		sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
														
 
															-				  struct kvm_mmu_page, link);
														
 
															-		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
														
 
															 		++vcpu->kvm->stat.mmu_recycled;
														
 
															 	}
														
 
															 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
														
@@ -4185,17 +4194,22 @@ restart:
 
															 	spin_unlock(&kvm->mmu_lock);
														
 
															 }
														
 
															-static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
														
 
															-						struct list_head *invalid_list)
														
 
															+void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
														
 
															 {
														
 
															-	struct kvm_mmu_page *page;
														
 
															+	struct kvm_mmu_page *sp, *node;
														
 
															+	LIST_HEAD(invalid_list);
														
 
															-	if (list_empty(&kvm->arch.active_mmu_pages))
														
 
															-		return;
														
 
															+	spin_lock(&kvm->mmu_lock);
														
 
															+restart:
														
 
															+	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
														
 
															+		if (!sp->mmio_cached)
														
 
															+			continue;
														
 
															+		if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
														
 
															+			goto restart;
														
 
															+	}
														
 
															-	page = container_of(kvm->arch.active_mmu_pages.prev,
														
 
															-			    struct kvm_mmu_page, link);
														
 
															-	kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
														
 
															+	kvm_mmu_commit_zap_page(kvm, &invalid_list);
														
 
															+	spin_unlock(&kvm->mmu_lock);
														
 
															 }
														
 
															 static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
														
@@ -4232,7 +4246,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 
															 		idx = srcu_read_lock(&kvm->srcu);
														
 
															 		spin_lock(&kvm->mmu_lock);
														
 
															-		kvm_mmu_remove_some_alloc_mmu_pages(kvm, &invalid_list);
														
 
															+		prepare_zap_oldest_mmu_page(kvm, &invalid_list);
														
 
															 		kvm_mmu_commit_zap_page(kvm, &invalid_list);
														
 
															 		spin_unlock(&kvm->mmu_lock);
														
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -57,14 +57,11 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
 
															 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
														
 
															 {
														
 
															-	return kvm->arch.n_max_mmu_pages -
														
 
															-		kvm->arch.n_used_mmu_pages;
														
 
															-}
														
 
															+	if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
														
 
															+		return kvm->arch.n_max_mmu_pages -
														
 
															+			kvm->arch.n_used_mmu_pages;
														
 
															-static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
														
 
															-{
														
 
															-	if (unlikely(kvm_mmu_available_pages(vcpu->kvm)< KVM_MIN_FREE_MMU_PAGES))
														
 
															-		__kvm_mmu_free_some_pages(vcpu);
														
 
															+	return 0;
														
 
															 }
														
 
															 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
														
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -627,7 +627,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 
															 		goto out_unlock;
														
 
															 	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
														
 
															-	kvm_mmu_free_some_pages(vcpu);
														
 
															+	make_mmu_pages_available(vcpu);
														
 
															 	if (!force_pt_level)
														
 
															 		transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
														
 
															 	r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
														
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
 
															 	return 1;
														
 
															 }
														
 
															-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
														
 
															+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
														
 
															 {
														
 
															 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
														
 
															 	struct kvm_pmc *pmc;
														
 
															+	u32 index = msr_info->index;
														
 
															+	u64 data = msr_info->data;
														
 
															 	switch (index) {
														
 
															 	case MSR_CORE_PERF_FIXED_CTR_CTRL:
														
@@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
 
															 		}
														
 
															 		break;
														
 
															 	case MSR_CORE_PERF_GLOBAL_STATUS:
														
 
															+		if (msr_info->host_initiated) {
														
 
															+			pmu->global_status = data;
														
 
															+			return 0;
														
 
															+		}
														
 
															 		break; /* RO MSR */
														
 
															 	case MSR_CORE_PERF_GLOBAL_CTRL:
														
 
															 		if (pmu->global_ctrl == data)
														
@@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
 
															 		break;
														
 
															 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
														
 
															 		if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
														
 
															-			pmu->global_status &= ~data;
														
 
															+			if (!msr_info->host_initiated)
														
 
															+				pmu->global_status &= ~data;
														
 
															 			pmu->global_ovf_ctrl = data;
														
 
															 			return 0;
														
 
															 		}
														
@@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
 
															 	default:
														
 
															 		if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
														
 
															 				(pmc = get_fixed_pmc(pmu, index))) {
														
 
															-			data = (s64)(s32)data;
														
 
															+			if (!msr_info->host_initiated)
														
 
															+				data = (s64)(s32)data;
														
 
															 			pmc->counter += data - read_pmc(pmc);
														
 
															 			return 0;
														
 
															 		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
														
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1131,17 +1131,11 @@ static void init_vmcb(struct vcpu_svm *svm)
 
															 	init_seg(&save->gs);
														
 
															 	save->cs.selector = 0xf000;
														
 
															+	save->cs.base = 0xffff0000;
														
 
															 	/* Executable/Readable Code Segment */
														
 
															 	save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
														
 
															 		SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
														
 
															 	save->cs.limit = 0xffff;
														
 
															-	/*
														
 
															-	 * cs.base should really be 0xffff0000, but vmx can't handle that, so
														
 
															-	 * be consistent with it.
														
 
															-	 *
														
 
															-	 * Replace when we have real mode working for vmx.
														
 
															-	 */
														
 
															-	save->cs.base = 0xf0000;
														
 
															 	save->gdtr.limit = 0xffff;
														
 
															 	save->idtr.limit = 0xffff;
														
@@ -1191,7 +1185,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 
															 	enable_gif(svm);
														
 
															 }
														
 
															-static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
														
 
															+static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct vcpu_svm *svm = to_svm(vcpu);
														
 
															 	u32 dummy;
														
@@ -1199,16 +1193,8 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
 
															 	init_vmcb(svm);
														
 
															-	if (!kvm_vcpu_is_bsp(vcpu)) {
														
 
															-		kvm_rip_write(vcpu, 0);
														
 
															-		svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
														
 
															-		svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
														
 
															-	}
														
 
															-
														
 
															 	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
														
 
															 	kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
														
 
															-
														
 
															-	return 0;
														
 
															 }
														
 
															 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
														
@@ -3487,7 +3473,7 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 
															 	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
														
 
															 	    exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
														
 
															 	    exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
														
 
															-		printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
														
 
															+		printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
														
 
															 		       "exit_code 0x%x\n",
														
 
															 		       __func__, svm->vmcb->control.exit_int_info,
														
 
															 		       exit_code);
														
@@ -3591,6 +3577,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
 
															 	return;
														
 
															 }
														
 
															+static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	return;
														
 
															+}
														
 
															+
														
 
															 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct vcpu_svm *svm = to_svm(vcpu);
														
@@ -3641,7 +3632,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 
															 	return ret;
														
 
															 }
														
 
															-static void enable_irq_window(struct kvm_vcpu *vcpu)
														
 
															+static int enable_irq_window(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct vcpu_svm *svm = to_svm(vcpu);
														
@@ -3655,15 +3646,16 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
 
															 		svm_set_vintr(svm);
														
 
															 		svm_inject_irq(svm, 0x0);
														
 
															 	}
														
 
															+	return 0;
														
 
															 }
														
 
															-static void enable_nmi_window(struct kvm_vcpu *vcpu)
														
 
															+static int enable_nmi_window(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct vcpu_svm *svm = to_svm(vcpu);
														
 
															 	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
														
 
															 	    == HF_NMI_MASK)
														
 
															-		return; /* IRET will cause a vm exit */
														
 
															+		return 0; /* IRET will cause a vm exit */
														
 
															 	/*
														
 
															 	 * Something prevents NMI from been injected. Single step over possible
														
@@ -3672,6 +3664,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 
															 	svm->nmi_singlestep = true;
														
 
															 	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
														
 
															 	update_db_bp_intercept(vcpu);
														
 
															+	return 0;
														
 
															 }
														
 
															 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
														
@@ -4247,6 +4240,11 @@ out:
 
															 	return ret;
														
 
															 }
														
 
															+static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	local_irq_enable();
														
 
															+}
														
 
															+
														
 
															 static struct kvm_x86_ops svm_x86_ops = {
														
 
															 	.cpu_has_kvm_support = has_svm,
														
 
															 	.disabled_by_bios = is_disabled,
														
@@ -4314,6 +4312,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
															 	.vm_has_apicv = svm_vm_has_apicv,
														
 
															 	.load_eoi_exitmap = svm_load_eoi_exitmap,
														
 
															 	.hwapic_isr_update = svm_hwapic_isr_update,
														
 
															+	.sync_pir_to_irr = svm_sync_pir_to_irr,
														
 
															 	.set_tss_addr = svm_set_tss_addr,
														
 
															 	.get_tdp_level = get_npt_level,
														
@@ -4342,6 +4341,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
															 	.set_tdp_cr3 = set_tdp_cr3,
														
 
															 	.check_intercept = svm_check_intercept,
														
 
															+	.handle_external_intr = svm_handle_external_intr,
														
 
															 };
														
 
															 static int __init svm_init(void)
														
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,8 +84,11 @@ module_param(vmm_exclusive, bool, S_IRUGO);
 
															 static bool __read_mostly fasteoi = 1;
														
 
															 module_param(fasteoi, bool, S_IRUGO);
														
 
															-static bool __read_mostly enable_apicv_reg_vid;
														
 
															+static bool __read_mostly enable_apicv = 1;
														
 
															+module_param(enable_apicv, bool, S_IRUGO);
														
 
															+static bool __read_mostly enable_shadow_vmcs = 1;
														
 
															+module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
														
 
															 /*
														
 
															  * If nested=1, nested virtualization is supported, i.e., guests may use
														
 
															  * VMX and be a hypervisor for its own guests. If nested=0, guests may not
														
@@ -298,7 +301,8 @@ struct __packed vmcs12 {
 
															 	u32 guest_activity_state;
														
 
															 	u32 guest_sysenter_cs;
														
 
															 	u32 host_ia32_sysenter_cs;
														
 
															-	u32 padding32[8]; /* room for future expansion */
														
 
															+	u32 vmx_preemption_timer_value;
														
 
															+	u32 padding32[7]; /* room for future expansion */
														
 
															 	u16 virtual_processor_id;
														
 
															 	u16 guest_es_selector;
														
 
															 	u16 guest_cs_selector;
														
@@ -351,6 +355,12 @@ struct nested_vmx {
 
															 	/* The host-usable pointer to the above */
														
 
															 	struct page *current_vmcs12_page;
														
 
															 	struct vmcs12 *current_vmcs12;
														
 
															+	struct vmcs *current_shadow_vmcs;
														
 
															+	/*
														
 
															+	 * Indicates if the shadow vmcs must be updated with the
														
 
															+	 * data hold by vmcs12
														
 
															+	 */
														
 
															+	bool sync_shadow_vmcs;
														
 
															 	/* vmcs02_list cache of VMCSs recently used to run L2 guests */
														
 
															 	struct list_head vmcs02_pool;
														
@@ -365,6 +375,31 @@ struct nested_vmx {
 
															 	struct page *apic_access_page;
														
 
															 };
														
 
															+#define POSTED_INTR_ON  0
														
 
															+/* Posted-Interrupt Descriptor */
														
 
															+struct pi_desc {
														
 
															+	u32 pir[8];     /* Posted interrupt requested */
														
 
															+	u32 control;	/* bit 0 of control is outstanding notification bit */
														
 
															+	u32 rsvd[7];
														
 
															+} __aligned(64);
														
 
															+
														
 
															+static bool pi_test_and_set_on(struct pi_desc *pi_desc)
														
 
															+{
														
 
															+	return test_and_set_bit(POSTED_INTR_ON,
														
 
															+			(unsigned long *)&pi_desc->control);
														
 
															+}
														
 
															+
														
 
															+static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
														
 
															+{
														
 
															+	return test_and_clear_bit(POSTED_INTR_ON,
														
 
															+			(unsigned long *)&pi_desc->control);
														
 
															+}
														
 
															+
														
 
															+static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
														
 
															+{
														
 
															+	return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
														
 
															+}
														
 
															+
														
 
															 struct vcpu_vmx {
														
 
															 	struct kvm_vcpu       vcpu;
														
 
															 	unsigned long         host_rsp;
														
@@ -377,6 +412,7 @@ struct vcpu_vmx {
 
															 	struct shared_msr_entry *guest_msrs;
														
 
															 	int                   nmsrs;
														
 
															 	int                   save_nmsrs;
														
 
															+	unsigned long	      host_idt_base;
														
 
															 #ifdef CONFIG_X86_64
														
 
															 	u64 		      msr_host_kernel_gs_base;
														
 
															 	u64 		      msr_guest_kernel_gs_base;
														
@@ -428,6 +464,9 @@ struct vcpu_vmx {
 
															 	bool rdtscp_enabled;
														
 
															+	/* Posted interrupt descriptor */
														
 
															+	struct pi_desc pi_desc;
														
 
															+
														
 
															 	/* Support for a guest hypervisor (nested VMX) */
														
 
															 	struct nested_vmx nested;
														
 
															 };
														
@@ -451,6 +490,64 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 
															 #define FIELD64(number, name)	[number] = VMCS12_OFFSET(name), \
														
 
															 				[number##_HIGH] = VMCS12_OFFSET(name)+4
														
 
															+
														
 
															+static const unsigned long shadow_read_only_fields[] = {
														
 
															+	/*
														
 
															+	 * We do NOT shadow fields that are modified when L0
														
 
															+	 * traps and emulates any vmx instruction (e.g. VMPTRLD,
														
 
															+	 * VMXON...) executed by L1.
														
 
															+	 * For example, VM_INSTRUCTION_ERROR is read
														
 
															+	 * by L1 if a vmx instruction fails (part of the error path).
														
 
															+	 * Note the code assumes this logic. If for some reason
														
 
															+	 * we start shadowing these fields then we need to
														
 
															+	 * force a shadow sync when L0 emulates vmx instructions
														
 
															+	 * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
														
 
															+	 * by nested_vmx_failValid)
														
 
															+	 */
														
 
															+	VM_EXIT_REASON,
														
 
															+	VM_EXIT_INTR_INFO,
														
 
															+	VM_EXIT_INSTRUCTION_LEN,
														
 
															+	IDT_VECTORING_INFO_FIELD,
														
 
															+	IDT_VECTORING_ERROR_CODE,
														
 
															+	VM_EXIT_INTR_ERROR_CODE,
														
 
															+	EXIT_QUALIFICATION,
														
 
															+	GUEST_LINEAR_ADDRESS,
														
 
															+	GUEST_PHYSICAL_ADDRESS
														
 
															+};
														
 
															+static const int max_shadow_read_only_fields =
														
 
															+	ARRAY_SIZE(shadow_read_only_fields);
														
 
															+
														
 
															+static const unsigned long shadow_read_write_fields[] = {
														
 
															+	GUEST_RIP,
														
 
															+	GUEST_RSP,
														
 
															+	GUEST_CR0,
														
 
															+	GUEST_CR3,
														
 
															+	GUEST_CR4,
														
 
															+	GUEST_INTERRUPTIBILITY_INFO,
														
 
															+	GUEST_RFLAGS,
														
 
															+	GUEST_CS_SELECTOR,
														
 
															+	GUEST_CS_AR_BYTES,
														
 
															+	GUEST_CS_LIMIT,
														
 
															+	GUEST_CS_BASE,
														
 
															+	GUEST_ES_BASE,
														
 
															+	CR0_GUEST_HOST_MASK,
														
 
															+	CR0_READ_SHADOW,
														
 
															+	CR4_READ_SHADOW,
														
 
															+	TSC_OFFSET,
														
 
															+	EXCEPTION_BITMAP,
														
 
															+	CPU_BASED_VM_EXEC_CONTROL,
														
 
															+	VM_ENTRY_EXCEPTION_ERROR_CODE,
														
 
															+	VM_ENTRY_INTR_INFO_FIELD,
														
 
															+	VM_ENTRY_INSTRUCTION_LEN,
														
 
															+	VM_ENTRY_EXCEPTION_ERROR_CODE,
														
 
															+	HOST_FS_BASE,
														
 
															+	HOST_GS_BASE,
														
 
															+	HOST_FS_SELECTOR,
														
 
															+	HOST_GS_SELECTOR
														
 
															+};
														
 
															+static const int max_shadow_read_write_fields =
														
 
															+	ARRAY_SIZE(shadow_read_write_fields);
														
 
															+
														
 
															 static const unsigned short vmcs_field_to_offset_table[] = {
														
 
															 	FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
														
 
															 	FIELD(GUEST_ES_SELECTOR, guest_es_selector),
														
@@ -537,6 +634,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 
															 	FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
														
 
															 	FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
														
 
															 	FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
														
 
															+	FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value),
														
 
															 	FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
														
 
															 	FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
														
 
															 	FIELD(CR0_READ_SHADOW, cr0_read_shadow),
														
@@ -624,6 +722,9 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 
															 			    struct kvm_segment *var, int seg);
														
 
															 static bool guest_state_valid(struct kvm_vcpu *vcpu);
														
 
															 static u32 vmx_segment_access_rights(struct kvm_segment *var);
														
 
															+static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
														
 
															+static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
														
 
															+static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
														
 
															 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
														
 
															 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
														
@@ -640,6 +741,8 @@ static unsigned long *vmx_msr_bitmap_legacy;
 
															 static unsigned long *vmx_msr_bitmap_longmode;
														
 
															 static unsigned long *vmx_msr_bitmap_legacy_x2apic;
														
 
															 static unsigned long *vmx_msr_bitmap_longmode_x2apic;
														
 
															+static unsigned long *vmx_vmread_bitmap;
														
 
															+static unsigned long *vmx_vmwrite_bitmap;
														
 
															 static bool cpu_has_load_ia32_efer;
														
 
															 static bool cpu_has_load_perf_global_ctrl;
														
@@ -782,6 +885,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
 
															 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
														
 
															 }
														
 
															+static inline bool cpu_has_vmx_posted_intr(void)
														
 
															+{
														
 
															+	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
														
 
															+}
														
 
															+
														
 
															+static inline bool cpu_has_vmx_apicv(void)
														
 
															+{
														
 
															+	return cpu_has_vmx_apic_register_virt() &&
														
 
															+		cpu_has_vmx_virtual_intr_delivery() &&
														
 
															+		cpu_has_vmx_posted_intr();
														
 
															+}
														
 
															+
														
 
															 static inline bool cpu_has_vmx_flexpriority(void)
														
 
															 {
														
 
															 	return cpu_has_vmx_tpr_shadow() &&
														
@@ -895,6 +1010,18 @@ static inline bool cpu_has_vmx_wbinvd_exit(void)
 
															 		SECONDARY_EXEC_WBINVD_EXITING;
														
 
															 }
														
 
															+static inline bool cpu_has_vmx_shadow_vmcs(void)
														
 
															+{
														
 
															+	u64 vmx_msr;
														
 
															+	rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
														
 
															+	/* check if the cpu supports writing r/o exit information fields */
														
 
															+	if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
														
 
															+		return false;
														
 
															+
														
 
															+	return vmcs_config.cpu_based_2nd_exec_ctrl &
														
 
															+		SECONDARY_EXEC_SHADOW_VMCS;
														
 
															+}
														
 
															+
														
 
															 static inline bool report_flexpriority(void)
														
 
															 {
														
 
															 	return flexpriority_enabled;
														
@@ -1790,7 +1917,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 
															 	u32 intr_info = nr | INTR_INFO_VALID_MASK;
														
 
															 	if (nr == PF_VECTOR && is_guest_mode(vcpu) &&
														
 
															-		nested_pf_handled(vcpu))
														
 
															+	    !vmx->nested.nested_run_pending && nested_pf_handled(vcpu))
														
 
															 		return;
														
 
															 	if (has_error_code) {
														
@@ -2022,6 +2149,7 @@ static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high;
 
															 static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
														
 
															 static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
														
 
															 static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
														
 
															+static u32 nested_vmx_misc_low, nested_vmx_misc_high;
														
 
															 static __init void nested_vmx_setup_ctls_msrs(void)
														
 
															 {
														
 
															 	/*
														
@@ -2040,30 +2168,40 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 
															 	 */
														
 
															 	/* pin-based controls */
														
 
															+	rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
														
 
															+	      nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high);
														
 
															 	/*
														
 
															 	 * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is
														
 
															 	 * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR.
														
 
															 	 */
														
 
															-	nested_vmx_pinbased_ctls_low = 0x16 ;
														
 
															-	nested_vmx_pinbased_ctls_high = 0x16 |
														
 
															-		PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
														
 
															-		PIN_BASED_VIRTUAL_NMIS;
														
 
															+	nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															+	nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
														
 
															+		PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS |
														
 
															+		PIN_BASED_VMX_PREEMPTION_TIMER;
														
 
															+	nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															-	/* exit controls */
														
 
															-	nested_vmx_exit_ctls_low = 0;
														
 
															+	/*
														
 
															+	 * Exit controls
														
 
															+	 * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
														
 
															+	 * 17 must be 1.
														
 
															+	 */
														
 
															+	nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															 	/* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */
														
 
															 #ifdef CONFIG_X86_64
														
 
															 	nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
														
 
															 #else
														
 
															 	nested_vmx_exit_ctls_high = 0;
														
 
															 #endif
														
 
															+	nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															 	/* entry controls */
														
 
															 	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
														
 
															 		nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
														
 
															-	nested_vmx_entry_ctls_low = 0;
														
 
															+	/* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
														
 
															+	nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															 	nested_vmx_entry_ctls_high &=
														
 
															 		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
														
 
															+	nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															 	/* cpu-based controls */
														
 
															 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
														
@@ -2080,6 +2218,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 
															 		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
														
 
															 		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
														
 
															 		CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
														
 
															+		CPU_BASED_PAUSE_EXITING |
														
 
															 		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
														
 
															 	/*
														
 
															 	 * We can allow some features even when not supported by the
														
@@ -2094,7 +2233,14 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 
															 		nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high);
														
 
															 	nested_vmx_secondary_ctls_low = 0;
														
 
															 	nested_vmx_secondary_ctls_high &=
														
 
															-		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
														
 
															+		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
														
 
															+		SECONDARY_EXEC_WBINVD_EXITING;
														
 
															+
														
 
															+	/* miscellaneous data */
														
 
															+	rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
														
 
															+	nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
														
 
															+		VMX_MISC_SAVE_EFER_LMA;
														
 
															+	nested_vmx_misc_high = 0;
														
 
															 }
														
 
															 static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
														
@@ -2165,7 +2311,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
															 					nested_vmx_entry_ctls_high);
														
 
															 		break;
														
 
															 	case MSR_IA32_VMX_MISC:
														
 
															-		*pdata = 0;
														
 
															+		*pdata = vmx_control_msr(nested_vmx_misc_low,
														
 
															+					 nested_vmx_misc_high);
														
 
															 		break;
														
 
															 	/*
														
 
															 	 * These MSRs specify bits which the guest must keep fixed (on or off)
														
@@ -2529,12 +2676,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
															 	u32 _vmexit_control = 0;
														
 
															 	u32 _vmentry_control = 0;
														
 
															-	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
														
 
															-	opt = PIN_BASED_VIRTUAL_NMIS;
														
 
															-	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
														
 
															-				&_pin_based_exec_control) < 0)
														
 
															-		return -EIO;
														
 
															-
														
 
															 	min = CPU_BASED_HLT_EXITING |
														
 
															 #ifdef CONFIG_X86_64
														
 
															 	      CPU_BASED_CR8_LOAD_EXITING |
														
@@ -2573,7 +2714,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
															 			SECONDARY_EXEC_RDTSCP |
														
 
															 			SECONDARY_EXEC_ENABLE_INVPCID |
														
 
															 			SECONDARY_EXEC_APIC_REGISTER_VIRT |
														
 
															-			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
														
 
															+			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
														
 
															+			SECONDARY_EXEC_SHADOW_VMCS;
														
 
															 		if (adjust_vmx_controls(min2, opt2,
														
 
															 					MSR_IA32_VMX_PROCBASED_CTLS2,
														
 
															 					&_cpu_based_2nd_exec_control) < 0)
														
@@ -2605,11 +2747,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
															 #ifdef CONFIG_X86_64
														
 
															 	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
														
 
															 #endif
														
 
															-	opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
														
 
															+	opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
														
 
															+		VM_EXIT_ACK_INTR_ON_EXIT;
														
 
															 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
														
 
															 				&_vmexit_control) < 0)
														
 
															 		return -EIO;
														
 
															+	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
														
 
															+	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
														
 
															+	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
														
 
															+				&_pin_based_exec_control) < 0)
														
 
															+		return -EIO;
														
 
															+
														
 
															+	if (!(_cpu_based_2nd_exec_control &
														
 
															+		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
														
 
															+		!(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
														
 
															+		_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
														
 
															+
														
 
															 	min = 0;
														
 
															 	opt = VM_ENTRY_LOAD_IA32_PAT;
														
 
															 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
														
@@ -2762,6 +2916,8 @@ static __init int hardware_setup(void)
 
															 	if (!cpu_has_vmx_vpid())
														
 
															 		enable_vpid = 0;
														
 
															+	if (!cpu_has_vmx_shadow_vmcs())
														
 
															+		enable_shadow_vmcs = 0;
														
 
															 	if (!cpu_has_vmx_ept() ||
														
 
															 	    !cpu_has_vmx_ept_4levels()) {
														
@@ -2788,14 +2944,16 @@ static __init int hardware_setup(void)
 
															 	if (!cpu_has_vmx_ple())
														
 
															 		ple_gap = 0;
														
 
															-	if (!cpu_has_vmx_apic_register_virt() ||
														
 
															-				!cpu_has_vmx_virtual_intr_delivery())
														
 
															-		enable_apicv_reg_vid = 0;
														
 
															+	if (!cpu_has_vmx_apicv())
														
 
															+		enable_apicv = 0;
														
 
															-	if (enable_apicv_reg_vid)
														
 
															+	if (enable_apicv)
														
 
															 		kvm_x86_ops->update_cr8_intercept = NULL;
														
 
															-	else
														
 
															+	else {
														
 
															 		kvm_x86_ops->hwapic_irr_update = NULL;
														
 
															+		kvm_x86_ops->deliver_posted_interrupt = NULL;
														
 
															+		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
														
 
															+	}
														
 
															 	if (nested)
														
 
															 		nested_vmx_setup_ctls_msrs();
														
@@ -2876,22 +3034,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 
															 	vmx->cpl = 0;
														
 
															 }
														
 
															-static gva_t rmode_tss_base(struct kvm *kvm)
														
 
															-{
														
 
															-	if (!kvm->arch.tss_addr) {
														
 
															-		struct kvm_memslots *slots;
														
 
															-		struct kvm_memory_slot *slot;
														
 
															-		gfn_t base_gfn;
														
 
															-
														
 
															-		slots = kvm_memslots(kvm);
														
 
															-		slot = id_to_memslot(slots, 0);
														
 
															-		base_gfn = slot->base_gfn + slot->npages - 3;
														
 
															-
														
 
															-		return base_gfn << PAGE_SHIFT;
														
 
															-	}
														
 
															-	return kvm->arch.tss_addr;
														
 
															-}
														
 
															-
														
 
															 static void fix_rmode_seg(int seg, struct kvm_segment *save)
														
 
															 {
														
 
															 	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
														
@@ -2942,19 +3084,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
															 	/*
														
 
															 	 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
														
 
															-	 * vcpu. Call it here with phys address pointing 16M below 4G.
														
 
															+	 * vcpu. Warn the user that an update is overdue.
														
 
															 	 */
														
 
															-	if (!vcpu->kvm->arch.tss_addr) {
														
 
															+	if (!vcpu->kvm->arch.tss_addr)
														
 
															 		printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
														
 
															 			     "called before entering vcpu\n");
														
 
															-		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
														
 
															-		vmx_set_tss_addr(vcpu->kvm, 0xfeffd000);
														
 
															-		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
														
 
															-	}
														
 
															 	vmx_segment_cache_clear(vmx);
														
 
															-	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
														
 
															+	vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr);
														
 
															 	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
														
 
															 	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
														
@@ -3214,7 +3352,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
															 		 */
														
 
															 		if (!nested_vmx_allowed(vcpu))
														
 
															 			return 1;
														
 
															-	} else if (to_vmx(vcpu)->nested.vmxon)
														
 
															+	}
														
 
															+	if (to_vmx(vcpu)->nested.vmxon &&
														
 
															+	    ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON))
														
 
															 		return 1;
														
 
															 	vcpu->arch.cr4 = cr4;
														
@@ -3550,7 +3690,7 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
 
															 		return true;
														
 
															 	/* real mode guest state checks */
														
 
															-	if (!is_protmode(vcpu)) {
														
 
															+	if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
														
 
															 		if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
														
 
															 			return false;
														
 
															 		if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
														
@@ -3599,7 +3739,7 @@ static int init_rmode_tss(struct kvm *kvm)
 
															 	int r, idx, ret = 0;
														
 
															 	idx = srcu_read_lock(&kvm->srcu);
														
 
															-	fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
														
 
															+	fn = kvm->arch.tss_addr >> PAGE_SHIFT;
														
 
															 	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
														
 
															 	if (r < 0)
														
 
															 		goto out;
														
@@ -3692,7 +3832,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
 
															 	kvm_userspace_mem.flags = 0;
														
 
															 	kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
														
 
															 	kvm_userspace_mem.memory_size = PAGE_SIZE;
														
 
															-	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
														
 
															+	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
														
 
															 	if (r)
														
 
															 		goto out;
														
@@ -3722,7 +3862,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
 
															 	kvm_userspace_mem.guest_phys_addr =
														
 
															 		kvm->arch.ept_identity_map_addr;
														
 
															 	kvm_userspace_mem.memory_size = PAGE_SIZE;
														
 
															-	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
														
 
															+	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
														
 
															 	if (r)
														
 
															 		goto out;
														
@@ -3869,13 +4009,59 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
 
															 			msr, MSR_TYPE_W);
														
 
															 }
														
 
															+static int vmx_vm_has_apicv(struct kvm *kvm)
														
 
															+{
														
 
															+	return enable_apicv && irqchip_in_kernel(kvm);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Send interrupt to vcpu via posted interrupt way.
														
 
															+ * 1. If target vcpu is running(non-root mode), send posted interrupt
														
 
															+ * notification to vcpu and hardware will sync PIR to vIRR atomically.
														
 
															+ * 2. If target vcpu isn't running(root mode), kick it to pick up the
														
 
															+ * interrupt from PIR in next vmentry.
														
 
															+ */
														
 
															+static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
														
 
															+{
														
 
															+	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+	int r;
														
 
															+
														
 
															+	if (pi_test_and_set_pir(vector, &vmx->pi_desc))
														
 
															+		return;
														
 
															+
														
 
															+	r = pi_test_and_set_on(&vmx->pi_desc);
														
 
															+	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															+#ifdef CONFIG_SMP
														
 
															+	if (!r && (vcpu->mode == IN_GUEST_MODE))
														
 
															+		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
														
 
															+				POSTED_INTR_VECTOR);
														
 
															+	else
														
 
															+#endif
														
 
															+		kvm_vcpu_kick(vcpu);
														
 
															+}
														
 
															+
														
 
															+static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+
														
 
															+	if (!pi_test_and_clear_on(&vmx->pi_desc))
														
 
															+		return;
														
 
															+
														
 
															+	kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
														
 
															+}
														
 
															+
														
 
															+static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	return;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
														
 
															  * will not change in the lifetime of the guest.
														
 
															  * Note that host-state that does change is set elsewhere. E.g., host-state
														
 
															  * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
														
 
															  */
														
 
															-static void vmx_set_constant_host_state(void)
														
 
															+static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															 	u32 low32, high32;
														
 
															 	unsigned long tmpl;
														
@@ -3903,6 +4089,7 @@ static void vmx_set_constant_host_state(void)
 
															 	native_store_idt(&dt);
														
 
															 	vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
														
 
															+	vmx->host_idt_base = dt.address;
														
 
															 	vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
														
@@ -3928,6 +4115,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
 
															 	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
														
 
															 }
														
 
															+static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
														
 
															+{
														
 
															+	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
														
 
															+
														
 
															+	if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
														
 
															+		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
														
 
															+	return pin_based_exec_ctrl;
														
 
															+}
														
 
															+
														
 
															 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															 	u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
														
@@ -3945,11 +4141,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 
															 	return exec_control;
														
 
															 }
														
 
															-static int vmx_vm_has_apicv(struct kvm *kvm)
														
 
															-{
														
 
															-	return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
														
 
															-}
														
 
															-
														
 
															 static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															 	u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
														
@@ -3971,6 +4162,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 
															 		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
														
 
															 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
														
 
															 	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
														
 
															+	/* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
														
 
															+	   (handle_vmptrld).
														
 
															+	   We can NOT enable shadow_vmcs here because we don't have yet
														
 
															+	   a current VMCS12
														
 
															+	*/
														
 
															+	exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
														
 
															 	return exec_control;
														
 
															 }
														
@@ -3999,14 +4196,17 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
															 	vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
														
 
															 	vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
														
 
															+	if (enable_shadow_vmcs) {
														
 
															+		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
														
 
															+		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
														
 
															+	}
														
 
															 	if (cpu_has_vmx_msr_bitmap())
														
 
															 		vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
														
 
															 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
														
 
															 	/* Control */
														
 
															-	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
														
 
															-		vmcs_config.pin_based_exec_ctrl);
														
 
															+	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
														
 
															 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
														
@@ -4015,13 +4215,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
															 				vmx_secondary_exec_control(vmx));
														
 
															 	}
														
 
															-	if (enable_apicv_reg_vid) {
														
 
															+	if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
														
 
															 		vmcs_write64(EOI_EXIT_BITMAP0, 0);
														
 
															 		vmcs_write64(EOI_EXIT_BITMAP1, 0);
														
 
															 		vmcs_write64(EOI_EXIT_BITMAP2, 0);
														
 
															 		vmcs_write64(EOI_EXIT_BITMAP3, 0);
														
 
															 		vmcs_write16(GUEST_INTR_STATUS, 0);
														
 
															+
														
 
															+		vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
														
 
															+		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
														
 
															 	}
														
 
															 	if (ple_gap) {
														
@@ -4035,7 +4238,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
															 	vmcs_write16(HOST_FS_SELECTOR, 0);            /* 22.2.4 */
														
 
															 	vmcs_write16(HOST_GS_SELECTOR, 0);            /* 22.2.4 */
														
 
															-	vmx_set_constant_host_state();
														
 
															+	vmx_set_constant_host_state(vmx);
														
 
															 #ifdef CONFIG_X86_64
														
 
															 	rdmsrl(MSR_FS_BASE, a);
														
 
															 	vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
														
@@ -4089,11 +4292,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
															 	return 0;
														
 
															 }
														
 
															-static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
														
 
															+static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	u64 msr;
														
 
															-	int ret;
														
 
															 	vmx->rmode.vm86_active = 0;
														
@@ -4109,12 +4311,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
															 	vmx_segment_cache_clear(vmx);
														
 
															 	seg_setup(VCPU_SREG_CS);
														
 
															-	if (kvm_vcpu_is_bsp(&vmx->vcpu))
														
 
															-		vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
														
 
															-	else {
														
 
															-		vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
														
 
															-		vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
														
 
															-	}
														
 
															+	vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
														
 
															+	vmcs_write32(GUEST_CS_BASE, 0xffff0000);
														
 
															 	seg_setup(VCPU_SREG_DS);
														
 
															 	seg_setup(VCPU_SREG_ES);
														
@@ -4137,10 +4335,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
															 	vmcs_writel(GUEST_SYSENTER_EIP, 0);
														
 
															 	vmcs_writel(GUEST_RFLAGS, 0x02);
														
 
															-	if (kvm_vcpu_is_bsp(&vmx->vcpu))
														
 
															-		kvm_rip_write(vcpu, 0xfff0);
														
 
															-	else
														
 
															-		kvm_rip_write(vcpu, 0);
														
 
															+	kvm_rip_write(vcpu, 0xfff0);
														
 
															 	vmcs_writel(GUEST_GDTR_BASE, 0);
														
 
															 	vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
														
@@ -4171,23 +4366,20 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
															 		vmcs_write64(APIC_ACCESS_ADDR,
														
 
															 			     page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
														
 
															+	if (vmx_vm_has_apicv(vcpu->kvm))
														
 
															+		memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
														
 
															+
														
 
															 	if (vmx->vpid != 0)
														
 
															 		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
														
 
															 	vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
														
 
															-	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
														
 
															 	vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
														
 
															-	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
														
 
															 	vmx_set_cr4(&vmx->vcpu, 0);
														
 
															 	vmx_set_efer(&vmx->vcpu, 0);
														
 
															 	vmx_fpu_activate(&vmx->vcpu);
														
 
															 	update_exception_bitmap(&vmx->vcpu);
														
 
															 	vpid_sync_context(vmx);
														
 
															-
														
 
															-	ret = 0;
														
 
															-
														
 
															-	return ret;
														
 
															 }
														
 
															 /*
														
@@ -4200,40 +4392,45 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
 
															 		PIN_BASED_EXT_INTR_MASK;
														
 
															 }
														
 
															-static void enable_irq_window(struct kvm_vcpu *vcpu)
														
 
															+static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
														
 
															+		PIN_BASED_NMI_EXITING;
														
 
															+}
														
 
															+
														
 
															+static int enable_irq_window(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	u32 cpu_based_vm_exec_control;
														
 
															-	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
														
 
															+
														
 
															+	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
														
 
															 		/*
														
 
															 		 * We get here if vmx_interrupt_allowed() said we can't
														
 
															-		 * inject to L1 now because L2 must run. Ask L2 to exit
														
 
															-		 * right after entry, so we can inject to L1 more promptly.
														
 
															+		 * inject to L1 now because L2 must run. The caller will have
														
 
															+		 * to make L2 exit right after entry, so we can inject to L1
														
 
															+		 * more promptly.
														
 
															 		 */
														
 
															-		kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
														
 
															-		return;
														
 
															-	}
														
 
															+		return -EBUSY;
														
 
															 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
														
 
															 	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
														
 
															 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
														
 
															+	return 0;
														
 
															 }
														
 
															-static void enable_nmi_window(struct kvm_vcpu *vcpu)
														
 
															+static int enable_nmi_window(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	u32 cpu_based_vm_exec_control;
														
 
															-	if (!cpu_has_virtual_nmis()) {
														
 
															-		enable_irq_window(vcpu);
														
 
															-		return;
														
 
															-	}
														
 
															+	if (!cpu_has_virtual_nmis())
														
 
															+		return enable_irq_window(vcpu);
														
 
															+
														
 
															+	if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI)
														
 
															+		return enable_irq_window(vcpu);
														
 
															-	if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
														
 
															-		enable_irq_window(vcpu);
														
 
															-		return;
														
 
															-	}
														
 
															 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
														
 
															 	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
														
 
															 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
														
 
															+	return 0;
														
 
															 }
														
 
															 static void vmx_inject_irq(struct kvm_vcpu *vcpu)
														
@@ -4294,16 +4491,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 
															 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
														
 
															 }
														
 
															-static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
														
 
															-{
														
 
															-	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
														
 
															-		return 0;
														
 
															-
														
 
															-	return	!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
														
 
															-		  (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
														
 
															-		   | GUEST_INTR_STATE_NMI));
														
 
															-}
														
 
															-
														
 
															 static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	if (!cpu_has_virtual_nmis())
														
@@ -4333,18 +4520,52 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 
															 	}
														
 
															 }
														
 
															+static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	if (is_guest_mode(vcpu)) {
														
 
															+		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
														
 
															+
														
 
															+		if (to_vmx(vcpu)->nested.nested_run_pending)
														
 
															+			return 0;
														
 
															+		if (nested_exit_on_nmi(vcpu)) {
														
 
															+			nested_vmx_vmexit(vcpu);
														
 
															+			vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI;
														
 
															+			vmcs12->vm_exit_intr_info = NMI_VECTOR |
														
 
															+				INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK;
														
 
															+			/*
														
 
															+			 * The NMI-triggered VM exit counts as injection:
														
 
															+			 * clear this one and block further NMIs.
														
 
															+			 */
														
 
															+			vcpu->arch.nmi_pending = 0;
														
 
															+			vmx_set_nmi_mask(vcpu, true);
														
 
															+			return 0;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
														
 
															+		return 0;
														
 
															+
														
 
															+	return	!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
														
 
															+		  (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
														
 
															+		   | GUEST_INTR_STATE_NMI));
														
 
															+}
														
 
															+
														
 
															 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
														
 
															+	if (is_guest_mode(vcpu)) {
														
 
															 		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
														
 
															-		if (to_vmx(vcpu)->nested.nested_run_pending ||
														
 
															-		    (vmcs12->idt_vectoring_info_field &
														
 
															-		     VECTORING_INFO_VALID_MASK))
														
 
															+
														
 
															+		if (to_vmx(vcpu)->nested.nested_run_pending)
														
 
															 			return 0;
														
 
															-		nested_vmx_vmexit(vcpu);
														
 
															-		vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
														
 
															-		vmcs12->vm_exit_intr_info = 0;
														
 
															-		/* fall through to normal code, but now in L1, not L2 */
														
 
															+		if (nested_exit_on_intr(vcpu)) {
														
 
															+			nested_vmx_vmexit(vcpu);
														
 
															+			vmcs12->vm_exit_reason =
														
 
															+				EXIT_REASON_EXTERNAL_INTERRUPT;
														
 
															+			vmcs12->vm_exit_intr_info = 0;
														
 
															+			/*
														
 
															+			 * fall through to normal code, but now in L1, not L2
														
 
															+			 */
														
 
															+		}
														
 
															 	}
														
 
															 	return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
														
@@ -4362,7 +4583,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 
															 		.flags = 0,
														
 
															 	};
														
 
															-	ret = kvm_set_memory_region(kvm, &tss_mem, false);
														
 
															+	ret = kvm_set_memory_region(kvm, &tss_mem);
														
 
															 	if (ret)
														
 
															 		return ret;
														
 
															 	kvm->arch.tss_addr = addr;
														
@@ -4603,34 +4824,50 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
 
															 /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
														
 
															 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
														
 
															 {
														
 
															-	if (to_vmx(vcpu)->nested.vmxon &&
														
 
															-	    ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
														
 
															-		return 1;
														
 
															-
														
 
															 	if (is_guest_mode(vcpu)) {
														
 
															+		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
														
 
															+		unsigned long orig_val = val;
														
 
															+
														
 
															 		/*
														
 
															 		 * We get here when L2 changed cr0 in a way that did not change
														
 
															 		 * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
														
 
															-		 * but did change L0 shadowed bits. This can currently happen
														
 
															-		 * with the TS bit: L0 may want to leave TS on (for lazy fpu
														
 
															-		 * loading) while pretending to allow the guest to change it.
														
 
															+		 * but did change L0 shadowed bits. So we first calculate the
														
 
															+		 * effective cr0 value that L1 would like to write into the
														
 
															+		 * hardware. It consists of the L2-owned bits from the new
														
 
															+		 * value combined with the L1-owned bits from L1's guest_cr0.
														
 
															 		 */
														
 
															-		if (kvm_set_cr0(vcpu, (val & vcpu->arch.cr0_guest_owned_bits) |
														
 
															-			 (vcpu->arch.cr0 & ~vcpu->arch.cr0_guest_owned_bits)))
														
 
															+		val = (val & ~vmcs12->cr0_guest_host_mask) |
														
 
															+			(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
														
 
															+
														
 
															+		/* TODO: will have to take unrestricted guest mode into
														
 
															+		 * account */
														
 
															+		if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
														
 
															 			return 1;
														
 
															-		vmcs_writel(CR0_READ_SHADOW, val);
														
 
															+
														
 
															+		if (kvm_set_cr0(vcpu, val))
														
 
															+			return 1;
														
 
															+		vmcs_writel(CR0_READ_SHADOW, orig_val);
														
 
															 		return 0;
														
 
															-	} else
														
 
															+	} else {
														
 
															+		if (to_vmx(vcpu)->nested.vmxon &&
														
 
															+		    ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
														
 
															+			return 1;
														
 
															 		return kvm_set_cr0(vcpu, val);
														
 
															+	}
														
 
															 }
														
 
															 static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
														
 
															 {
														
 
															 	if (is_guest_mode(vcpu)) {
														
 
															-		if (kvm_set_cr4(vcpu, (val & vcpu->arch.cr4_guest_owned_bits) |
														
 
															-			 (vcpu->arch.cr4 & ~vcpu->arch.cr4_guest_owned_bits)))
														
 
															+		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
														
 
															+		unsigned long orig_val = val;
														
 
															+
														
 
															+		/* analogously to handle_set_cr0 */
														
 
															+		val = (val & ~vmcs12->cr4_guest_host_mask) |
														
 
															+			(vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
														
 
															+		if (kvm_set_cr4(vcpu, val))
														
 
															 			return 1;
														
 
															-		vmcs_writel(CR4_READ_SHADOW, val);
														
 
															+		vmcs_writel(CR4_READ_SHADOW, orig_val);
														
 
															 		return 0;
														
 
															 	} else
														
 
															 		return kvm_set_cr4(vcpu, val);
														
@@ -5183,7 +5420,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 
															 		if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
														
 
															 			return 1;
														
 
															-		err = emulate_instruction(vcpu, 0);
														
 
															+		err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
														
 
															 		if (err == EMULATE_DO_MMIO) {
														
 
															 			ret = 0;
														
@@ -5259,8 +5496,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
 
															 	}
														
 
															 	/* Create a new VMCS */
														
 
															-	item = (struct vmcs02_list *)
														
 
															-		kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
														
 
															+	item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
														
 
															 	if (!item)
														
 
															 		return NULL;
														
 
															 	item->vmcs02.vmcs = alloc_vmcs();
														
@@ -5309,6 +5545,9 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
 
															 		free_loaded_vmcs(&vmx->vmcs01);
														
 
															 }
														
 
															+static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
														
 
															+				 u32 vm_instruction_error);
														
 
															+
														
 
															 /*
														
 
															  * Emulate the VMXON instruction.
														
 
															  * Currently, we just remember that VMX is active, and do not save or even
														
@@ -5321,6 +5560,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	struct kvm_segment cs;
														
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+	struct vmcs *shadow_vmcs;
														
 
															 	/* The Intel VMX Instruction Reference lists a bunch of bits that
														
 
															 	 * are prerequisite to running VMXON, most notably cr4.VMXE must be
														
@@ -5344,6 +5584,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 
															 		kvm_inject_gp(vcpu, 0);
														
 
															 		return 1;
														
 
															 	}
														
 
															+	if (vmx->nested.vmxon) {
														
 
															+		nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
														
 
															+		skip_emulated_instruction(vcpu);
														
 
															+		return 1;
														
 
															+	}
														
 
															+	if (enable_shadow_vmcs) {
														
 
															+		shadow_vmcs = alloc_vmcs();
														
 
															+		if (!shadow_vmcs)
														
 
															+			return -ENOMEM;
														
 
															+		/* mark vmcs as shadow */
														
 
															+		shadow_vmcs->revision_id |= (1u << 31);
														
 
															+		/* init shadow vmcs */
														
 
															+		vmcs_clear(shadow_vmcs);
														
 
															+		vmx->nested.current_shadow_vmcs = shadow_vmcs;
														
 
															+	}
														
 
															 	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
														
 
															 	vmx->nested.vmcs02_num = 0;
														
@@ -5384,6 +5639,25 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
 
															 	return 1;
														
 
															 }
														
 
															+static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
														
 
															+{
														
 
															+	u32 exec_control;
														
 
															+	if (enable_shadow_vmcs) {
														
 
															+		if (vmx->nested.current_vmcs12 != NULL) {
														
 
															+			/* copy to memory all shadowed fields in case
														
 
															+			   they were modified */
														
 
															+			copy_shadow_to_vmcs12(vmx);
														
 
															+			vmx->nested.sync_shadow_vmcs = false;
														
 
															+			exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
 
															+			exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
														
 
															+			vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
														
 
															+			vmcs_write64(VMCS_LINK_POINTER, -1ull);
														
 
															+		}
														
 
															+	}
														
 
															+	kunmap(vmx->nested.current_vmcs12_page);
														
 
															+	nested_release_page(vmx->nested.current_vmcs12_page);
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Free whatever needs to be freed from vmx->nested when L1 goes down, or
														
 
															  * just stops using VMX.
														
@@ -5394,11 +5668,12 @@ static void free_nested(struct vcpu_vmx *vmx)
 
															 		return;
														
 
															 	vmx->nested.vmxon = false;
														
 
															 	if (vmx->nested.current_vmptr != -1ull) {
														
 
															-		kunmap(vmx->nested.current_vmcs12_page);
														
 
															-		nested_release_page(vmx->nested.current_vmcs12_page);
														
 
															+		nested_release_vmcs12(vmx);
														
 
															 		vmx->nested.current_vmptr = -1ull;
														
 
															 		vmx->nested.current_vmcs12 = NULL;
														
 
															 	}
														
 
															+	if (enable_shadow_vmcs)
														
 
															+		free_vmcs(vmx->nested.current_shadow_vmcs);
														
 
															 	/* Unpin physical memory we referred to in current vmcs02 */
														
 
															 	if (vmx->nested.apic_access_page) {
														
 
															 		nested_release_page(vmx->nested.apic_access_page);
														
@@ -5507,6 +5782,10 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
 
															 			    X86_EFLAGS_SF | X86_EFLAGS_OF))
														
 
															 			| X86_EFLAGS_ZF);
														
 
															 	get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
														
 
															+	/*
														
 
															+	 * We don't need to force a shadow sync because
														
 
															+	 * VM_INSTRUCTION_ERROR is not shadowed
														
 
															+	 */
														
 
															 }
														
 
															 /* Emulate the VMCLEAR instruction */
														
@@ -5539,8 +5818,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 
															 	}
														
 
															 	if (vmptr == vmx->nested.current_vmptr) {
														
 
															-		kunmap(vmx->nested.current_vmcs12_page);
														
 
															-		nested_release_page(vmx->nested.current_vmcs12_page);
														
 
															+		nested_release_vmcs12(vmx);
														
 
															 		vmx->nested.current_vmptr = -1ull;
														
 
															 		vmx->nested.current_vmcs12 = NULL;
														
 
															 	}
														
@@ -5639,6 +5917,111 @@ static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
 
															 	}
														
 
															 }
														
 
															+
														
 
															+static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu,
														
 
															+				    unsigned long field, u64 field_value){
														
 
															+	short offset = vmcs_field_to_offset(field);
														
 
															+	char *p = ((char *) get_vmcs12(vcpu)) + offset;
														
 
															+	if (offset < 0)
														
 
															+		return false;
														
 
															+
														
 
															+	switch (vmcs_field_type(field)) {
														
 
															+	case VMCS_FIELD_TYPE_U16:
														
 
															+		*(u16 *)p = field_value;
														
 
															+		return true;
														
 
															+	case VMCS_FIELD_TYPE_U32:
														
 
															+		*(u32 *)p = field_value;
														
 
															+		return true;
														
 
															+	case VMCS_FIELD_TYPE_U64:
														
 
															+		*(u64 *)p = field_value;
														
 
															+		return true;
														
 
															+	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
														
 
															+		*(natural_width *)p = field_value;
														
 
															+		return true;
														
 
															+	default:
														
 
															+		return false; /* can never happen. */
														
 
															+	}
														
 
															+
														
 
															+}
														
 
															+
														
 
															+static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
														
 
															+{
														
 
															+	int i;
														
 
															+	unsigned long field;
														
 
															+	u64 field_value;
														
 
															+	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
														
 
															+	unsigned long *fields = (unsigned long *)shadow_read_write_fields;
														
 
															+	int num_fields = max_shadow_read_write_fields;
														
 
															+
														
 
															+	vmcs_load(shadow_vmcs);
														
 
															+
														
 
															+	for (i = 0; i < num_fields; i++) {
														
 
															+		field = fields[i];
														
 
															+		switch (vmcs_field_type(field)) {
														
 
															+		case VMCS_FIELD_TYPE_U16:
														
 
															+			field_value = vmcs_read16(field);
														
 
															+			break;
														
 
															+		case VMCS_FIELD_TYPE_U32:
														
 
															+			field_value = vmcs_read32(field);
														
 
															+			break;
														
 
															+		case VMCS_FIELD_TYPE_U64:
														
 
															+			field_value = vmcs_read64(field);
														
 
															+			break;
														
 
															+		case VMCS_FIELD_TYPE_NATURAL_WIDTH:
														
 
															+			field_value = vmcs_readl(field);
														
 
															+			break;
														
 
															+		}
														
 
															+		vmcs12_write_any(&vmx->vcpu, field, field_value);
														
 
															+	}
														
 
															+
														
 
															+	vmcs_clear(shadow_vmcs);
														
 
															+	vmcs_load(vmx->loaded_vmcs->vmcs);
														
 
															+}
														
 
															+
														
 
															+static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
														
 
															+{
														
 
															+	unsigned long *fields[] = {
														
 
															+		(unsigned long *)shadow_read_write_fields,
														
 
															+		(unsigned long *)shadow_read_only_fields
														
 
															+	};
														
 
															+	int num_lists =  ARRAY_SIZE(fields);
														
 
															+	int max_fields[] = {
														
 
															+		max_shadow_read_write_fields,
														
 
															+		max_shadow_read_only_fields
														
 
															+	};
														
 
															+	int i, q;
														
 
															+	unsigned long field;
														
 
															+	u64 field_value = 0;
														
 
															+	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
														
 
															+
														
 
															+	vmcs_load(shadow_vmcs);
														
 
															+
														
 
															+	for (q = 0; q < num_lists; q++) {
														
 
															+		for (i = 0; i < max_fields[q]; i++) {
														
 
															+			field = fields[q][i];
														
 
															+			vmcs12_read_any(&vmx->vcpu, field, &field_value);
														
 
															+
														
 
															+			switch (vmcs_field_type(field)) {
														
 
															+			case VMCS_FIELD_TYPE_U16:
														
 
															+				vmcs_write16(field, (u16)field_value);
														
 
															+				break;
														
 
															+			case VMCS_FIELD_TYPE_U32:
														
 
															+				vmcs_write32(field, (u32)field_value);
														
 
															+				break;
														
 
															+			case VMCS_FIELD_TYPE_U64:
														
 
															+				vmcs_write64(field, (u64)field_value);
														
 
															+				break;
														
 
															+			case VMCS_FIELD_TYPE_NATURAL_WIDTH:
														
 
															+				vmcs_writel(field, (long)field_value);
														
 
															+				break;
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	vmcs_clear(shadow_vmcs);
														
 
															+	vmcs_load(vmx->loaded_vmcs->vmcs);
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was
														
 
															  * used before) all generate the same failure when it is missing.
														
@@ -5703,8 +6086,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 
															 	gva_t gva;
														
 
															 	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
														
 
															 	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
														
 
															-	char *p;
														
 
															-	short offset;
														
 
															 	/* The value to write might be 32 or 64 bits, depending on L1's long
														
 
															 	 * mode, and eventually we need to write that into a field of several
														
 
															 	 * possible lengths. The code below first zero-extends the value to 64
														
@@ -5741,28 +6122,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 
															 		return 1;
														
 
															 	}
														
 
															-	offset = vmcs_field_to_offset(field);
														
 
															-	if (offset < 0) {
														
 
															-		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
														
 
															-		skip_emulated_instruction(vcpu);
														
 
															-		return 1;
														
 
															-	}
														
 
															-	p = ((char *) get_vmcs12(vcpu)) + offset;
														
 
															-
														
 
															-	switch (vmcs_field_type(field)) {
														
 
															-	case VMCS_FIELD_TYPE_U16:
														
 
															-		*(u16 *)p = field_value;
														
 
															-		break;
														
 
															-	case VMCS_FIELD_TYPE_U32:
														
 
															-		*(u32 *)p = field_value;
														
 
															-		break;
														
 
															-	case VMCS_FIELD_TYPE_U64:
														
 
															-		*(u64 *)p = field_value;
														
 
															-		break;
														
 
															-	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
														
 
															-		*(natural_width *)p = field_value;
														
 
															-		break;
														
 
															-	default:
														
 
															+	if (!vmcs12_write_any(vcpu, field, field_value)) {
														
 
															 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
														
 
															 		skip_emulated_instruction(vcpu);
														
 
															 		return 1;
														
@@ -5780,6 +6140,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 
															 	gva_t gva;
														
 
															 	gpa_t vmptr;
														
 
															 	struct x86_exception e;
														
 
															+	u32 exec_control;
														
 
															 	if (!nested_vmx_check_permission(vcpu))
														
 
															 		return 1;
														
@@ -5818,14 +6179,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 
															 			skip_emulated_instruction(vcpu);
														
 
															 			return 1;
														
 
															 		}
														
 
															-		if (vmx->nested.current_vmptr != -1ull) {
														
 
															-			kunmap(vmx->nested.current_vmcs12_page);
														
 
															-			nested_release_page(vmx->nested.current_vmcs12_page);
														
 
															-		}
														
 
															+		if (vmx->nested.current_vmptr != -1ull)
														
 
															+			nested_release_vmcs12(vmx);
														
 
															 		vmx->nested.current_vmptr = vmptr;
														
 
															 		vmx->nested.current_vmcs12 = new_vmcs12;
														
 
															 		vmx->nested.current_vmcs12_page = page;
														
 
															+		if (enable_shadow_vmcs) {
														
 
															+			exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
 
															+			exec_control |= SECONDARY_EXEC_SHADOW_VMCS;
														
 
															+			vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
														
 
															+			vmcs_write64(VMCS_LINK_POINTER,
														
 
															+				     __pa(vmx->nested.current_shadow_vmcs));
														
 
															+			vmx->nested.sync_shadow_vmcs = true;
														
 
															+		}
														
 
															 	}
														
 
															 	nested_vmx_succeed(vcpu);
														
@@ -5908,6 +6275,52 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 
															 static const int kvm_vmx_max_exit_handlers =
														
 
															 	ARRAY_SIZE(kvm_vmx_exit_handlers);
														
 
															+static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
														
 
															+				       struct vmcs12 *vmcs12)
														
 
															+{
														
 
															+	unsigned long exit_qualification;
														
 
															+	gpa_t bitmap, last_bitmap;
														
 
															+	unsigned int port;
														
 
															+	int size;
														
 
															+	u8 b;
														
 
															+
														
 
															+	if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING))
														
 
															+		return 1;
														
 
															+
														
 
															+	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
														
 
															+		return 0;
														
 
															+
														
 
															+	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
														
 
															+
														
 
															+	port = exit_qualification >> 16;
														
 
															+	size = (exit_qualification & 7) + 1;
														
 
															+
														
 
															+	last_bitmap = (gpa_t)-1;
														
 
															+	b = -1;
														
 
															+
														
 
															+	while (size > 0) {
														
 
															+		if (port < 0x8000)
														
 
															+			bitmap = vmcs12->io_bitmap_a;
														
 
															+		else if (port < 0x10000)
														
 
															+			bitmap = vmcs12->io_bitmap_b;
														
 
															+		else
														
 
															+			return 1;
														
 
															+		bitmap += (port & 0x7fff) / 8;
														
 
															+
														
 
															+		if (last_bitmap != bitmap)
														
 
															+			if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
														
 
															+				return 1;
														
 
															+		if (b & (1 << (port & 7)))
														
 
															+			return 1;
														
 
															+
														
 
															+		port++;
														
 
															+		size--;
														
 
															+		last_bitmap = bitmap;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
														
 
															  * rather than handle it ourselves in L0. I.e., check whether L1 expressed
														
@@ -5939,7 +6352,8 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
 
															 	/* Then read the msr_index'th bit from this bitmap: */
														
 
															 	if (msr_index < 1024*8) {
														
 
															 		unsigned char b;
														
 
															-		kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1);
														
 
															+		if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
														
 
															+			return 1;
														
 
															 		return 1 & (b >> (msr_index & 7));
														
 
															 	} else
														
 
															 		return 1; /* let L1 handle the wrong parameter */
														
@@ -6033,10 +6447,10 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 
															  */
														
 
															 static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
														
 
															 	u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
														
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
														
 
															+	u32 exit_reason = vmx->exit_reason;
														
 
															 	if (vmx->nested.nested_run_pending)
														
 
															 		return 0;
														
@@ -6060,14 +6474,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 
															 	case EXIT_REASON_TRIPLE_FAULT:
														
 
															 		return 1;
														
 
															 	case EXIT_REASON_PENDING_INTERRUPT:
														
 
															+		return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
														
 
															 	case EXIT_REASON_NMI_WINDOW:
														
 
															-		/*
														
 
															-		 * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit
														
 
															-		 * (aka Interrupt Window Exiting) only when L1 turned it on,
														
 
															-		 * so if we got a PENDING_INTERRUPT exit, this must be for L1.
														
 
															-		 * Same for NMI Window Exiting.
														
 
															-		 */
														
 
															-		return 1;
														
 
															+		return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
														
 
															 	case EXIT_REASON_TASK_SWITCH:
														
 
															 		return 1;
														
 
															 	case EXIT_REASON_CPUID:
														
@@ -6097,8 +6506,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 
															 	case EXIT_REASON_DR_ACCESS:
														
 
															 		return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
														
 
															 	case EXIT_REASON_IO_INSTRUCTION:
														
 
															-		/* TODO: support IO bitmaps */
														
 
															-		return 1;
														
 
															+		return nested_vmx_exit_handled_io(vcpu, vmcs12);
														
 
															 	case EXIT_REASON_MSR_READ:
														
 
															 	case EXIT_REASON_MSR_WRITE:
														
 
															 		return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
														
@@ -6122,6 +6530,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 
															 	case EXIT_REASON_EPT_VIOLATION:
														
 
															 	case EXIT_REASON_EPT_MISCONFIG:
														
 
															 		return 0;
														
 
															+	case EXIT_REASON_PREEMPTION_TIMER:
														
 
															+		return vmcs12->pin_based_vm_exec_control &
														
 
															+			PIN_BASED_VMX_PREEMPTION_TIMER;
														
 
															 	case EXIT_REASON_WBINVD:
														
 
															 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
														
 
															 	case EXIT_REASON_XSETBV:
														
@@ -6316,6 +6727,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 
															 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
														
 
															 {
														
 
															+	if (!vmx_vm_has_apicv(vcpu->kvm))
														
 
															+		return;
														
 
															+
														
 
															 	vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
														
 
															 	vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
														
 
															 	vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
														
@@ -6346,6 +6760,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 
															 	}
														
 
															 }
														
 
															+static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
														
 
															+
														
 
															+	/*
														
 
															+	 * If external interrupt exists, IF bit is set in rflags/eflags on the
														
 
															+	 * interrupt stack frame, and interrupt will be enabled on a return
														
 
															+	 * from interrupt handler.
														
 
															+	 */
														
 
															+	if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
														
 
															+			== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
														
 
															+		unsigned int vector;
														
 
															+		unsigned long entry;
														
 
															+		gate_desc *desc;
														
 
															+		struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+#ifdef CONFIG_X86_64
														
 
															+		unsigned long tmp;
														
 
															+#endif
														
 
															+
														
 
															+		vector =  exit_intr_info & INTR_INFO_VECTOR_MASK;
														
 
															+		desc = (gate_desc *)vmx->host_idt_base + vector;
														
 
															+		entry = gate_offset(*desc);
														
 
															+		asm volatile(
														
 
															+#ifdef CONFIG_X86_64
														
 
															+			"mov %%" _ASM_SP ", %[sp]\n\t"
														
 
															+			"and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
														
 
															+			"push $%c[ss]\n\t"
														
 
															+			"push %[sp]\n\t"
														
 
															+#endif
														
 
															+			"pushf\n\t"
														
 
															+			"orl $0x200, (%%" _ASM_SP ")\n\t"
														
 
															+			__ASM_SIZE(push) " $%c[cs]\n\t"
														
 
															+			"call *%[entry]\n\t"
														
 
															+			:
														
 
															+#ifdef CONFIG_X86_64
														
 
															+			[sp]"=&r"(tmp)
														
 
															+#endif
														
 
															+			:
														
 
															+			[entry]"r"(entry),
														
 
															+			[ss]"i"(__KERNEL_DS),
														
 
															+			[cs]"i"(__KERNEL_CS)
														
 
															+			);
														
 
															+	} else
														
 
															+		local_irq_enable();
														
 
															+}
														
 
															+
														
 
															 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															 	u32 exit_intr_info;
														
@@ -6388,7 +6848,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 
															 			ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
														
 
															 }
														
 
															-static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
														
 
															+static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
														
 
															 				      u32 idt_vectoring_info,
														
 
															 				      int instr_len_field,
														
 
															 				      int error_code_field)
														
@@ -6399,46 +6859,43 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
 
															 	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
														
 
															-	vmx->vcpu.arch.nmi_injected = false;
														
 
															-	kvm_clear_exception_queue(&vmx->vcpu);
														
 
															-	kvm_clear_interrupt_queue(&vmx->vcpu);
														
 
															+	vcpu->arch.nmi_injected = false;
														
 
															+	kvm_clear_exception_queue(vcpu);
														
 
															+	kvm_clear_interrupt_queue(vcpu);
														
 
															 	if (!idtv_info_valid)
														
 
															 		return;
														
 
															-	kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
														
 
															+	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															 	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
														
 
															 	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
														
 
															 	switch (type) {
														
 
															 	case INTR_TYPE_NMI_INTR:
														
 
															-		vmx->vcpu.arch.nmi_injected = true;
														
 
															+		vcpu->arch.nmi_injected = true;
														
 
															 		/*
														
 
															 		 * SDM 3: 27.7.1.2 (September 2008)
														
 
															 		 * Clear bit "block by NMI" before VM entry if a NMI
														
 
															 		 * delivery faulted.
														
 
															 		 */
														
 
															-		vmx_set_nmi_mask(&vmx->vcpu, false);
														
 
															+		vmx_set_nmi_mask(vcpu, false);
														
 
															 		break;
														
 
															 	case INTR_TYPE_SOFT_EXCEPTION:
														
 
															-		vmx->vcpu.arch.event_exit_inst_len =
														
 
															-			vmcs_read32(instr_len_field);
														
 
															+		vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
														
 
															 		/* fall through */
														
 
															 	case INTR_TYPE_HARD_EXCEPTION:
														
 
															 		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
														
 
															 			u32 err = vmcs_read32(error_code_field);
														
 
															-			kvm_queue_exception_e(&vmx->vcpu, vector, err);
														
 
															+			kvm_queue_exception_e(vcpu, vector, err);
														
 
															 		} else
														
 
															-			kvm_queue_exception(&vmx->vcpu, vector);
														
 
															+			kvm_queue_exception(vcpu, vector);
														
 
															 		break;
														
 
															 	case INTR_TYPE_SOFT_INTR:
														
 
															-		vmx->vcpu.arch.event_exit_inst_len =
														
 
															-			vmcs_read32(instr_len_field);
														
 
															+		vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
														
 
															 		/* fall through */
														
 
															 	case INTR_TYPE_EXT_INTR:
														
 
															-		kvm_queue_interrupt(&vmx->vcpu, vector,
														
 
															-			type == INTR_TYPE_SOFT_INTR);
														
 
															+		kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
														
 
															 		break;
														
 
															 	default:
														
 
															 		break;
														
@@ -6447,18 +6904,14 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
 
															 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															-	if (is_guest_mode(&vmx->vcpu))
														
 
															-		return;
														
 
															-	__vmx_complete_interrupts(vmx, vmx->idt_vectoring_info,
														
 
															+	__vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
														
 
															 				  VM_EXIT_INSTRUCTION_LEN,
														
 
															 				  IDT_VECTORING_ERROR_CODE);
														
 
															 }
														
 
															 static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	if (is_guest_mode(vcpu))
														
 
															-		return;
														
 
															-	__vmx_complete_interrupts(to_vmx(vcpu),
														
 
															+	__vmx_complete_interrupts(vcpu,
														
 
															 				  vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
														
 
															 				  VM_ENTRY_INSTRUCTION_LEN,
														
 
															 				  VM_ENTRY_EXCEPTION_ERROR_CODE);
														
@@ -6489,21 +6942,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	unsigned long debugctlmsr;
														
 
															-	if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
														
 
															-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
														
 
															-		if (vmcs12->idt_vectoring_info_field &
														
 
															-				VECTORING_INFO_VALID_MASK) {
														
 
															-			vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
														
 
															-				vmcs12->idt_vectoring_info_field);
														
 
															-			vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
														
 
															-				vmcs12->vm_exit_instruction_len);
														
 
															-			if (vmcs12->idt_vectoring_info_field &
														
 
															-					VECTORING_INFO_DELIVER_CODE_MASK)
														
 
															-				vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
														
 
															-					vmcs12->idt_vectoring_error_code);
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															 	/* Record the guest's net vcpu time for enforced NMI injections. */
														
 
															 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
														
 
															 		vmx->entry_time = ktime_get();
														
@@ -6513,6 +6951,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
															 	if (vmx->emulation_required)
														
 
															 		return;
														
 
															+	if (vmx->nested.sync_shadow_vmcs) {
														
 
															+		copy_vmcs12_to_shadow(vmx);
														
 
															+		vmx->nested.sync_shadow_vmcs = false;
														
 
															+	}
														
 
															+
														
 
															 	if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
														
 
															 		vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
														
 
															 	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
														
@@ -6662,17 +7105,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
															 	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
														
 
															-	if (is_guest_mode(vcpu)) {
														
 
															-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
														
 
															-		vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
														
 
															-		if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
														
 
															-			vmcs12->idt_vectoring_error_code =
														
 
															-				vmcs_read32(IDT_VECTORING_ERROR_CODE);
														
 
															-			vmcs12->vm_exit_instruction_len =
														
 
															-				vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															 	vmx->loaded_vmcs->launched = 1;
														
 
															 	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
														
@@ -6734,10 +7166,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
															 	put_cpu();
														
 
															 	if (err)
														
 
															 		goto free_vmcs;
														
 
															-	if (vm_need_virtualize_apic_accesses(kvm))
														
 
															+	if (vm_need_virtualize_apic_accesses(kvm)) {
														
 
															 		err = alloc_apic_access_page(kvm);
														
 
															 		if (err)
														
 
															 			goto free_vmcs;
														
 
															+	}
														
 
															 	if (enable_ept) {
														
 
															 		if (!kvm->arch.ept_identity_map_addr)
														
@@ -6931,9 +7364,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 		vmcs12->vm_entry_instruction_len);
														
 
															 	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
														
 
															 		vmcs12->guest_interruptibility_info);
														
 
															-	vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state);
														
 
															 	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
														
 
															-	vmcs_writel(GUEST_DR7, vmcs12->guest_dr7);
														
 
															+	kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
														
 
															 	vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags);
														
 
															 	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
														
 
															 		vmcs12->guest_pending_dbg_exceptions);
														
@@ -6946,6 +7378,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 		(vmcs_config.pin_based_exec_ctrl |
														
 
															 		 vmcs12->pin_based_vm_exec_control));
														
 
															+	if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
														
 
															+		vmcs_write32(VMX_PREEMPTION_TIMER_VALUE,
														
 
															+			     vmcs12->vmx_preemption_timer_value);
														
 
															+
														
 
															 	/*
														
 
															 	 * Whether page-faults are trapped is determined by a combination of
														
 
															 	 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
														
@@ -7016,7 +7452,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 	 * Other fields are different per CPU, and will be set later when
														
 
															 	 * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
														
 
															 	 */
														
 
															-	vmx_set_constant_host_state();
														
 
															+	vmx_set_constant_host_state(vmx);
														
 
															 	/*
														
 
															 	 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
														
@@ -7082,7 +7518,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
														
 
															 		vcpu->arch.efer = vmcs12->guest_ia32_efer;
														
 
															-	if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
														
 
															+	else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
														
 
															 		vcpu->arch.efer |= (EFER_LMA | EFER_LME);
														
 
															 	else
														
 
															 		vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
														
@@ -7121,6 +7557,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	int cpu;
														
 
															 	struct loaded_vmcs *vmcs02;
														
 
															+	bool ia32e;
														
 
															 	if (!nested_vmx_check_permission(vcpu) ||
														
 
															 	    !nested_vmx_check_vmcs12(vcpu))
														
@@ -7129,6 +7566,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
															 	skip_emulated_instruction(vcpu);
														
 
															 	vmcs12 = get_vmcs12(vcpu);
														
 
															+	if (enable_shadow_vmcs)
														
 
															+		copy_shadow_to_vmcs12(vmx);
														
 
															+
														
 
															 	/*
														
 
															 	 * The nested entry process starts with enforcing various prerequisites
														
 
															 	 * on vmcs12 as required by the Intel SDM, and act appropriately when
														
@@ -7146,6 +7586,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
															 		return 1;
														
 
															 	}
														
 
															+	if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) {
														
 
															+		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															 	if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
														
 
															 			!IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) {
														
 
															 		/*TODO: Also verify bits beyond physical address width are 0*/
														
@@ -7203,6 +7648,45 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
															 		return 1;
														
 
															 	}
														
 
															+	/*
														
 
															+	 * If the load IA32_EFER VM-entry control is 1, the following checks
														
 
															+	 * are performed on the field for the IA32_EFER MSR:
														
 
															+	 * - Bits reserved in the IA32_EFER MSR must be 0.
														
 
															+	 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
														
 
															+	 *   the IA-32e mode guest VM-exit control. It must also be identical
														
 
															+	 *   to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
														
 
															+	 *   CR0.PG) is 1.
														
 
															+	 */
														
 
															+	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) {
														
 
															+		ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
														
 
															+		if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
														
 
															+		    ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
														
 
															+		    ((vmcs12->guest_cr0 & X86_CR0_PG) &&
														
 
															+		     ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) {
														
 
															+			nested_vmx_entry_failure(vcpu, vmcs12,
														
 
															+				EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
														
 
															+			return 1;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
														
 
															+	 * IA32_EFER MSR must be 0 in the field for that register. In addition,
														
 
															+	 * the values of the LMA and LME bits in the field must each be that of
														
 
															+	 * the host address-space size VM-exit control.
														
 
															+	 */
														
 
															+	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
														
 
															+		ia32e = (vmcs12->vm_exit_controls &
														
 
															+			 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
														
 
															+		if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
														
 
															+		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
														
 
															+		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) {
														
 
															+			nested_vmx_entry_failure(vcpu, vmcs12,
														
 
															+				EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
														
 
															+			return 1;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															 	/*
														
 
															 	 * We're finally done with prerequisite checking, and can start with
														
 
															 	 * the nested entry.
														
@@ -7223,6 +7707,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
															 	vcpu->cpu = cpu;
														
 
															 	put_cpu();
														
 
															+	vmx_segment_cache_clear(vmx);
														
 
															+
														
 
															 	vmcs12->launch_state = 1;
														
 
															 	prepare_vmcs02(vcpu, vmcs12);
														
@@ -7273,6 +7759,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 			vcpu->arch.cr4_guest_owned_bits));
														
 
															 }
														
 
															+static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
														
 
															+				       struct vmcs12 *vmcs12)
														
 
															+{
														
 
															+	u32 idt_vectoring;
														
 
															+	unsigned int nr;
														
 
															+
														
 
															+	if (vcpu->arch.exception.pending) {
														
 
															+		nr = vcpu->arch.exception.nr;
														
 
															+		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
														
 
															+
														
 
															+		if (kvm_exception_is_soft(nr)) {
														
 
															+			vmcs12->vm_exit_instruction_len =
														
 
															+				vcpu->arch.event_exit_inst_len;
														
 
															+			idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
														
 
															+		} else
														
 
															+			idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
														
 
															+
														
 
															+		if (vcpu->arch.exception.has_error_code) {
														
 
															+			idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
														
 
															+			vmcs12->idt_vectoring_error_code =
														
 
															+				vcpu->arch.exception.error_code;
														
 
															+		}
														
 
															+
														
 
															+		vmcs12->idt_vectoring_info_field = idt_vectoring;
														
 
															+	} else if (vcpu->arch.nmi_pending) {
														
 
															+		vmcs12->idt_vectoring_info_field =
														
 
															+			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
														
 
															+	} else if (vcpu->arch.interrupt.pending) {
														
 
															+		nr = vcpu->arch.interrupt.nr;
														
 
															+		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
														
 
															+
														
 
															+		if (vcpu->arch.interrupt.soft) {
														
 
															+			idt_vectoring |= INTR_TYPE_SOFT_INTR;
														
 
															+			vmcs12->vm_entry_instruction_len =
														
 
															+				vcpu->arch.event_exit_inst_len;
														
 
															+		} else
														
 
															+			idt_vectoring |= INTR_TYPE_EXT_INTR;
														
 
															+
														
 
															+		vmcs12->idt_vectoring_info_field = idt_vectoring;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
														
 
															  * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
														
@@ -7284,7 +7812,7 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															  * exit-information fields only. Other fields are modified by L1 with VMWRITE,
														
 
															  * which already writes to vmcs12 directly.
														
 
															  */
														
 
															-void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
														
 
															+static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
														
 
															 {
														
 
															 	/* update guest state fields: */
														
 
															 	vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
														
@@ -7332,16 +7860,19 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 	vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
														
 
															 	vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
														
 
															-	vmcs12->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE);
														
 
															 	vmcs12->guest_interruptibility_info =
														
 
															 		vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
														
 
															 	vmcs12->guest_pending_dbg_exceptions =
														
 
															 		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
														
 
															+	vmcs12->vm_entry_controls =
														
 
															+		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
														
 
															+		(vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
														
 
															+
														
 
															 	/* TODO: These cannot have changed unless we have MSR bitmaps and
														
 
															 	 * the relevant bit asks not to trap the change */
														
 
															 	vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
														
 
															-	if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT)
														
 
															+	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
														
 
															 		vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
														
 
															 	vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
														
 
															 	vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
														
@@ -7349,21 +7880,38 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 	/* update exit information fields: */
														
 
															-	vmcs12->vm_exit_reason  = vmcs_read32(VM_EXIT_REASON);
														
 
															+	vmcs12->vm_exit_reason  = to_vmx(vcpu)->exit_reason;
														
 
															 	vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
														
 
															 	vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
														
 
															-	vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
														
 
															-	vmcs12->idt_vectoring_info_field =
														
 
															-		vmcs_read32(IDT_VECTORING_INFO_FIELD);
														
 
															-	vmcs12->idt_vectoring_error_code =
														
 
															-		vmcs_read32(IDT_VECTORING_ERROR_CODE);
														
 
															+	if ((vmcs12->vm_exit_intr_info &
														
 
															+	     (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
														
 
															+	    (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
														
 
															+		vmcs12->vm_exit_intr_error_code =
														
 
															+			vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
														
 
															+	vmcs12->idt_vectoring_info_field = 0;
														
 
															 	vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
														
 
															 	vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
														
 
															-	/* clear vm-entry fields which are to be cleared on exit */
														
 
															-	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
														
 
															+	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
														
 
															+		/* vm_entry_intr_info_field is cleared on exit. Emulate this
														
 
															+		 * instead of reading the real value. */
														
 
															 		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
														
 
															+
														
 
															+		/*
														
 
															+		 * Transfer the event that L0 or L1 may wanted to inject into
														
 
															+		 * L2 to IDT_VECTORING_INFO_FIELD.
														
 
															+		 */
														
 
															+		vmcs12_save_pending_event(vcpu, vmcs12);
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
														
 
															+	 * preserved above and would only end up incorrectly in L1.
														
 
															+	 */
														
 
															+	vcpu->arch.nmi_injected = false;
														
 
															+	kvm_clear_exception_queue(vcpu);
														
 
															+	kvm_clear_interrupt_queue(vcpu);
														
 
															 }
														
 
															 /*
														
@@ -7375,11 +7923,12 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															  * Failures During or After Loading Guest State").
														
 
															  * This function should be called when the active VMCS is L1's (vmcs01).
														
 
															  */
														
 
															-void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
														
 
															+static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
														
 
															+				   struct vmcs12 *vmcs12)
														
 
															 {
														
 
															 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
														
 
															 		vcpu->arch.efer = vmcs12->host_ia32_efer;
														
 
															-	if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
														
 
															+	else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
														
 
															 		vcpu->arch.efer |= (EFER_LMA | EFER_LME);
														
 
															 	else
														
 
															 		vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
														
@@ -7387,6 +7936,7 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
														
 
															 	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
														
 
															+	vmx_set_rflags(vcpu, X86_EFLAGS_BIT1);
														
 
															 	/*
														
 
															 	 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
														
 
															 	 * actually changed, because it depends on the current state of
														
@@ -7445,6 +7995,9 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
														
 
															 		vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
														
 
															 			vmcs12->host_ia32_perf_global_ctrl);
														
 
															+
														
 
															+	kvm_set_dr(vcpu, 7, 0x400);
														
 
															+	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
														
 
															 }
														
 
															 /*
														
@@ -7458,6 +8011,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
 
															 	int cpu;
														
 
															 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
														
 
															+	/* trying to cancel vmlaunch/vmresume is a bug */
														
 
															+	WARN_ON_ONCE(vmx->nested.nested_run_pending);
														
 
															+
														
 
															 	leave_guest_mode(vcpu);
														
 
															 	prepare_vmcs12(vcpu, vmcs12);
														
@@ -7468,6 +8024,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
 
															 	vcpu->cpu = cpu;
														
 
															 	put_cpu();
														
 
															+	vmx_segment_cache_clear(vmx);
														
 
															+
														
 
															 	/* if no vmcs02 cache requested, remove the one we used */
														
 
															 	if (VMCS02_POOL_SIZE == 0)
														
 
															 		nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
														
@@ -7496,6 +8054,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
 
															 		nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR));
														
 
															 	} else
														
 
															 		nested_vmx_succeed(vcpu);
														
 
															+	if (enable_shadow_vmcs)
														
 
															+		vmx->nested.sync_shadow_vmcs = true;
														
 
															 }
														
 
															 /*
														
@@ -7513,6 +8073,8 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
 
															 	vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
														
 
															 	vmcs12->exit_qualification = qualification;
														
 
															 	nested_vmx_succeed(vcpu);
														
 
															+	if (enable_shadow_vmcs)
														
 
															+		to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
														
 
															 }
														
 
															 static int vmx_check_intercept(struct kvm_vcpu *vcpu,
														
@@ -7590,6 +8152,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
															 	.load_eoi_exitmap = vmx_load_eoi_exitmap,
														
 
															 	.hwapic_irr_update = vmx_hwapic_irr_update,
														
 
															 	.hwapic_isr_update = vmx_hwapic_isr_update,
														
 
															+	.sync_pir_to_irr = vmx_sync_pir_to_irr,
														
 
															+	.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
														
 
															 	.set_tss_addr = vmx_set_tss_addr,
														
 
															 	.get_tdp_level = get_ept_level,
														
@@ -7618,6 +8182,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
															 	.set_tdp_cr3 = vmx_set_cr3,
														
 
															 	.check_intercept = vmx_check_intercept,
														
 
															+	.handle_external_intr = vmx_handle_external_intr,
														
 
															 };
														
 
															 static int __init vmx_init(void)
														
@@ -7656,6 +8221,24 @@ static int __init vmx_init(void)
 
															 				(unsigned long *)__get_free_page(GFP_KERNEL);
														
 
															 	if (!vmx_msr_bitmap_longmode_x2apic)
														
 
															 		goto out4;
														
 
															+	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
														
 
															+	if (!vmx_vmread_bitmap)
														
 
															+		goto out5;
														
 
															+
														
 
															+	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
														
 
															+	if (!vmx_vmwrite_bitmap)
														
 
															+		goto out6;
														
 
															+
														
 
															+	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
														
 
															+	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
														
 
															+	/* shadowed read/write fields */
														
 
															+	for (i = 0; i < max_shadow_read_write_fields; i++) {
														
 
															+		clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap);
														
 
															+		clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap);
														
 
															+	}
														
 
															+	/* shadowed read only fields */
														
 
															+	for (i = 0; i < max_shadow_read_only_fields; i++)
														
 
															+		clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap);
														
 
															 	/*
														
 
															 	 * Allow direct access to the PC debug port (it is often used for I/O
														
@@ -7674,7 +8257,7 @@ static int __init vmx_init(void)
 
															 	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
														
 
															 		     __alignof__(struct vcpu_vmx), THIS_MODULE);
														
 
															 	if (r)
														
 
															-		goto out3;
														
 
															+		goto out7;
														
 
															 #ifdef CONFIG_KEXEC
														
 
															 	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
														
@@ -7692,7 +8275,7 @@ static int __init vmx_init(void)
 
															 	memcpy(vmx_msr_bitmap_longmode_x2apic,
														
 
															 			vmx_msr_bitmap_longmode, PAGE_SIZE);
														
 
															-	if (enable_apicv_reg_vid) {
														
 
															+	if (enable_apicv) {
														
 
															 		for (msr = 0x800; msr <= 0x8ff; msr++)
														
 
															 			vmx_disable_intercept_msr_read_x2apic(msr);
														
@@ -7722,6 +8305,12 @@ static int __init vmx_init(void)
 
															 	return 0;
														
 
															+out7:
														
 
															+	free_page((unsigned long)vmx_vmwrite_bitmap);
														
 
															+out6:
														
 
															+	free_page((unsigned long)vmx_vmread_bitmap);
														
 
															+out5:
														
 
															+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
														
 
															 out4:
														
 
															 	free_page((unsigned long)vmx_msr_bitmap_longmode);
														
 
															 out3:
														
@@ -7743,6 +8332,8 @@ static void __exit vmx_exit(void)
 
															 	free_page((unsigned long)vmx_msr_bitmap_longmode);
														
 
															 	free_page((unsigned long)vmx_io_bitmap_b);
														
 
															 	free_page((unsigned long)vmx_io_bitmap_a);
														
 
															+	free_page((unsigned long)vmx_vmwrite_bitmap);
														
 
															+	free_page((unsigned long)vmx_vmread_bitmap);
														
 
															 #ifdef CONFIG_KEXEC
														
 
															 	rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
														
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -162,8 +162,6 @@ u64 __read_mostly host_xcr0;
 
															 static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
														
 
															-static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
														
 
															-
														
 
															 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	int i;
														
@@ -263,6 +261,13 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
														
 
															+asmlinkage void kvm_spurious_fault(void)
														
 
															+{
														
 
															+	/* Fault while not rebooting.  We want the trace. */
														
 
															+	BUG();
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(kvm_spurious_fault);
														
 
															+
														
 
															 #define EXCPT_BENIGN		0
														
 
															 #define EXCPT_CONTRIBUTORY	1
														
 
															 #define EXCPT_PF		2
														
@@ -840,23 +845,17 @@ static const u32 emulated_msrs[] = {
 
															 	MSR_IA32_MCG_CTL,
														
 
															 };
														
 
															-static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
														
 
															+bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
														
 
															 {
														
 
															-	u64 old_efer = vcpu->arch.efer;
														
 
															-
														
 
															 	if (efer & efer_reserved_bits)
														
 
															-		return 1;
														
 
															-
														
 
															-	if (is_paging(vcpu)
														
 
															-	    && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
														
 
															-		return 1;
														
 
															+		return false;
														
 
															 	if (efer & EFER_FFXSR) {
														
 
															 		struct kvm_cpuid_entry2 *feat;
														
 
															 		feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
														
 
															 		if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
														
 
															-			return 1;
														
 
															+			return false;
														
 
															 	}
														
 
															 	if (efer & EFER_SVME) {
														
@@ -864,9 +863,24 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
 
															 		feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
														
 
															 		if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
														
 
															-			return 1;
														
 
															+			return false;
														
 
															 	}
														
 
															+	return true;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(kvm_valid_efer);
														
 
															+
														
 
															+static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
														
 
															+{
														
 
															+	u64 old_efer = vcpu->arch.efer;
														
 
															+
														
 
															+	if (!kvm_valid_efer(vcpu, efer))
														
 
															+		return 1;
														
 
															+
														
 
															+	if (is_paging(vcpu)
														
 
															+	    && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
														
 
															+		return 1;
														
 
															+
														
 
															 	efer &= ~EFER_LMA;
														
 
															 	efer |= vcpu->arch.efer & EFER_LMA;
														
@@ -1079,6 +1093,10 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
 
															 	u32 thresh_lo, thresh_hi;
														
 
															 	int use_scaling = 0;
														
 
															+	/* tsc_khz can be zero if TSC calibration fails */
														
 
															+	if (this_tsc_khz == 0)
														
 
															+		return;
														
 
															+
														
 
															 	/* Compute a scale to convert nanoseconds in TSC cycles */
														
 
															 	kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
														
 
															 			   &vcpu->arch.virtual_tsc_shift,
														
@@ -1156,20 +1174,23 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
 
															 	ns = get_kernel_ns();
														
 
															 	elapsed = ns - kvm->arch.last_tsc_nsec;
														
 
															-	/* n.b - signed multiplication and division required */
														
 
															-	usdiff = data - kvm->arch.last_tsc_write;
														
 
															+	if (vcpu->arch.virtual_tsc_khz) {
														
 
															+		/* n.b - signed multiplication and division required */
														
 
															+		usdiff = data - kvm->arch.last_tsc_write;
														
 
															 #ifdef CONFIG_X86_64
														
 
															-	usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
														
 
															+		usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
														
 
															 #else
														
 
															-	/* do_div() only does unsigned */
														
 
															-	asm("idivl %2; xor %%edx, %%edx"
														
 
															-	    : "=A"(usdiff)
														
 
															-	    : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
														
 
															+		/* do_div() only does unsigned */
														
 
															+		asm("idivl %2; xor %%edx, %%edx"
														
 
															+		: "=A"(usdiff)
														
 
															+		: "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
														
 
															 #endif
														
 
															-	do_div(elapsed, 1000);
														
 
															-	usdiff -= elapsed;
														
 
															-	if (usdiff < 0)
														
 
															-		usdiff = -usdiff;
														
 
															+		do_div(elapsed, 1000);
														
 
															+		usdiff -= elapsed;
														
 
															+		if (usdiff < 0)
														
 
															+			usdiff = -usdiff;
														
 
															+	} else
														
 
															+		usdiff = USEC_PER_SEC; /* disable TSC match window below */
														
 
															 	/*
														
 
															 	 * Special case: TSC write with a small delta (1 second) of virtual
														
@@ -2034,7 +2055,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
															 	case MSR_P6_EVNTSEL0:
														
 
															 	case MSR_P6_EVNTSEL1:
														
 
															 		if (kvm_pmu_msr(vcpu, msr))
														
 
															-			return kvm_pmu_set_msr(vcpu, msr, data);
														
 
															+			return kvm_pmu_set_msr(vcpu, msr_info);
														
 
															 		if (pr || data != 0)
														
 
															 			vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
														
@@ -2080,7 +2101,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
															 		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
														
 
															 			return xen_hvm_config(vcpu, data);
														
 
															 		if (kvm_pmu_msr(vcpu, msr))
														
 
															-			return kvm_pmu_set_msr(vcpu, msr, data);
														
 
															+			return kvm_pmu_set_msr(vcpu, msr_info);
														
 
															 		if (!ignore_msrs) {
														
 
															 			vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
														
 
															 				    msr, data);
														
@@ -2479,7 +2500,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_USER_NMI:
														
 
															 	case KVM_CAP_REINJECT_CONTROL:
														
 
															 	case KVM_CAP_IRQ_INJECT_STATUS:
														
 
															-	case KVM_CAP_ASSIGN_DEV_IRQ:
														
 
															 	case KVM_CAP_IRQFD:
														
 
															 	case KVM_CAP_IOEVENTFD:
														
 
															 	case KVM_CAP_PIT2:
														
@@ -2497,10 +2517,12 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_XSAVE:
														
 
															 	case KVM_CAP_ASYNC_PF:
														
 
															 	case KVM_CAP_GET_TSC_KHZ:
														
 
															-	case KVM_CAP_PCI_2_3:
														
 
															 	case KVM_CAP_KVMCLOCK_CTRL:
														
 
															 	case KVM_CAP_READONLY_MEM:
														
 
															-	case KVM_CAP_IRQFD_RESAMPLE:
														
 
															+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
														
 
															+	case KVM_CAP_ASSIGN_DEV_IRQ:
														
 
															+	case KVM_CAP_PCI_2_3:
														
 
															+#endif
														
 
															 		r = 1;
														
 
															 		break;
														
 
															 	case KVM_CAP_COALESCED_MMIO:
														
@@ -2521,9 +2543,11 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_PV_MMU:	/* obsolete */
														
 
															 		r = 0;
														
 
															 		break;
														
 
															+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
														
 
															 	case KVM_CAP_IOMMU:
														
 
															 		r = iommu_present(&pci_bus_type);
														
 
															 		break;
														
 
															+#endif
														
 
															 	case KVM_CAP_MCE:
														
 
															 		r = KVM_MAX_MCE_BANKS;
														
 
															 		break;
														
@@ -2679,6 +2703,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
															 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
														
 
															 				    struct kvm_lapic_state *s)
														
 
															 {
														
 
															+	kvm_x86_ops->sync_pir_to_irr(vcpu);
														
 
															 	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
														
 
															 	return 0;
														
@@ -2696,7 +2721,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
 
															 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
														
 
															 				    struct kvm_interrupt *irq)
														
 
															 {
														
 
															-	if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS)
														
 
															+	if (irq->irq >= KVM_NR_INTERRUPTS)
														
 
															 		return -EINVAL;
														
 
															 	if (irqchip_in_kernel(vcpu->kvm))
														
 
															 		return -ENXIO;
														
@@ -2819,10 +2844,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 
															 	events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
														
 
															 	events->nmi.pad = 0;
														
 
															-	events->sipi_vector = vcpu->arch.sipi_vector;
														
 
															+	events->sipi_vector = 0; /* never valid when reporting to user space */
														
 
															 	events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
														
 
															-			 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
														
 
															 			 | KVM_VCPUEVENT_VALID_SHADOW);
														
 
															 	memset(&events->reserved, 0, sizeof(events->reserved));
														
 
															 }
														
@@ -2853,8 +2877,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 
															 		vcpu->arch.nmi_pending = events->nmi.pending;
														
 
															 	kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
														
 
															-	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
														
 
															-		vcpu->arch.sipi_vector = events->sipi_vector;
														
 
															+	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
														
 
															+	    kvm_vcpu_has_lapic(vcpu))
														
 
															+		vcpu->arch.apic->sipi_vector = events->sipi_vector;
														
 
															 	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
@@ -3478,13 +3503,15 @@ out:
 
															 	return r;
														
 
															 }
														
 
															-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
														
 
															+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
														
 
															+			bool line_status)
														
 
															 {
														
 
															 	if (!irqchip_in_kernel(kvm))
														
 
															 		return -ENXIO;
														
 
															 	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
														
 
															-					irq_event->irq, irq_event->level);
														
 
															+					irq_event->irq, irq_event->level,
														
 
															+					line_status);
														
 
															 	return 0;
														
 
															 }
														
@@ -4752,11 +4779,15 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
 
															 }
														
 
															 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
														
 
															-				  bool write_fault_to_shadow_pgtable)
														
 
															+				  bool write_fault_to_shadow_pgtable,
														
 
															+				  int emulation_type)
														
 
															 {
														
 
															 	gpa_t gpa = cr2;
														
 
															 	pfn_t pfn;
														
 
															+	if (emulation_type & EMULTYPE_NO_REEXECUTE)
														
 
															+		return false;
														
 
															+
														
 
															 	if (!vcpu->arch.mmu.direct_map) {
														
 
															 		/*
														
 
															 		 * Write permission should be allowed since only
														
@@ -4899,8 +4930,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 
															 		if (r != EMULATION_OK)  {
														
 
															 			if (emulation_type & EMULTYPE_TRAP_UD)
														
 
															 				return EMULATE_FAIL;
														
 
															-			if (reexecute_instruction(vcpu, cr2,
														
 
															-						  write_fault_to_spt))
														
 
															+			if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
														
 
															+						emulation_type))
														
 
															 				return EMULATE_DONE;
														
 
															 			if (emulation_type & EMULTYPE_SKIP)
														
 
															 				return EMULATE_FAIL;
														
@@ -4930,7 +4961,8 @@ restart:
 
															 		return EMULATE_DONE;
														
 
															 	if (r == EMULATION_FAILED) {
														
 
															-		if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
														
 
															+		if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
														
 
															+					emulation_type))
														
 
															 			return EMULATE_DONE;
														
 
															 		return handle_emulation_failure(vcpu);
														
@@ -5641,14 +5673,20 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 
															 #endif
														
 
															 }
														
 
															-static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
														
 
															+static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	u64 eoi_exit_bitmap[4];
														
 
															+	u32 tmr[8];
														
 
															+
														
 
															+	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
														
 
															+		return;
														
 
															 	memset(eoi_exit_bitmap, 0, 32);
														
 
															+	memset(tmr, 0, 32);
														
 
															-	kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
														
 
															+	kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
														
 
															 	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
														
 
															+	kvm_apic_update_tmr(vcpu, tmr);
														
 
															 }
														
 
															 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
														
@@ -5656,7 +5694,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
															 	int r;
														
 
															 	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
														
 
															 		vcpu->run->request_interrupt_window;
														
 
															-	bool req_immediate_exit = 0;
														
 
															+	bool req_immediate_exit = false;
														
 
															 	if (vcpu->requests) {
														
 
															 		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
														
@@ -5698,24 +5736,30 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
															 			record_steal_time(vcpu);
														
 
															 		if (kvm_check_request(KVM_REQ_NMI, vcpu))
														
 
															 			process_nmi(vcpu);
														
 
															-		req_immediate_exit =
														
 
															-			kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
														
 
															 		if (kvm_check_request(KVM_REQ_PMU, vcpu))
														
 
															 			kvm_handle_pmu_event(vcpu);
														
 
															 		if (kvm_check_request(KVM_REQ_PMI, vcpu))
														
 
															 			kvm_deliver_pmi(vcpu);
														
 
															-		if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
														
 
															-			update_eoi_exitmap(vcpu);
														
 
															+		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
														
 
															+			vcpu_scan_ioapic(vcpu);
														
 
															 	}
														
 
															 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
														
 
															+		kvm_apic_accept_events(vcpu);
														
 
															+		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
														
 
															+			r = 1;
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															 		inject_pending_event(vcpu);
														
 
															 		/* enable NMI/IRQ window open exits if needed */
														
 
															 		if (vcpu->arch.nmi_pending)
														
 
															-			kvm_x86_ops->enable_nmi_window(vcpu);
														
 
															+			req_immediate_exit =
														
 
															+				kvm_x86_ops->enable_nmi_window(vcpu) != 0;
														
 
															 		else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
														
 
															-			kvm_x86_ops->enable_irq_window(vcpu);
														
 
															+			req_immediate_exit =
														
 
															+				kvm_x86_ops->enable_irq_window(vcpu) != 0;
														
 
															 		if (kvm_lapic_enabled(vcpu)) {
														
 
															 			/*
														
@@ -5794,7 +5838,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
															 	vcpu->mode = OUTSIDE_GUEST_MODE;
														
 
															 	smp_wmb();
														
 
															-	local_irq_enable();
														
 
															+
														
 
															+	/* Interrupt is enabled by handle_external_intr() */
														
 
															+	kvm_x86_ops->handle_external_intr(vcpu);
														
 
															 	++vcpu->stat.exits;
														
@@ -5843,16 +5889,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 
															 	int r;
														
 
															 	struct kvm *kvm = vcpu->kvm;
														
 
															-	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
														
 
															-		pr_debug("vcpu %d received sipi with vector # %x\n",
														
 
															-			 vcpu->vcpu_id, vcpu->arch.sipi_vector);
														
 
															-		kvm_lapic_reset(vcpu);
														
 
															-		r = kvm_vcpu_reset(vcpu);
														
 
															-		if (r)
														
 
															-			return r;
														
 
															-		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
														
 
															-	}
														
 
															-
														
 
															 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
														
 
															 	r = vapic_enter(vcpu);
														
 
															 	if (r) {
														
@@ -5869,8 +5905,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 
															 			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
														
 
															 			kvm_vcpu_block(vcpu);
														
 
															 			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
														
 
															-			if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
														
 
															-			{
														
 
															+			if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
														
 
															+				kvm_apic_accept_events(vcpu);
														
 
															 				switch(vcpu->arch.mp_state) {
														
 
															 				case KVM_MP_STATE_HALTED:
														
 
															 					vcpu->arch.mp_state =
														
@@ -5878,7 +5914,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 
															 				case KVM_MP_STATE_RUNNABLE:
														
 
															 					vcpu->arch.apf.halted = false;
														
 
															 					break;
														
 
															-				case KVM_MP_STATE_SIPI_RECEIVED:
														
 
															+				case KVM_MP_STATE_INIT_RECEIVED:
														
 
															+					break;
														
 
															 				default:
														
 
															 					r = -EINTR;
														
 
															 					break;
														
@@ -6013,6 +6050,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
														
 
															 		kvm_vcpu_block(vcpu);
														
 
															+		kvm_apic_accept_events(vcpu);
														
 
															 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
														
 
															 		r = -EAGAIN;
														
 
															 		goto out;
														
@@ -6169,6 +6207,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
															 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
														
 
															 				    struct kvm_mp_state *mp_state)
														
 
															 {
														
 
															+	kvm_apic_accept_events(vcpu);
														
 
															 	mp_state->mp_state = vcpu->arch.mp_state;
														
 
															 	return 0;
														
 
															 }
														
@@ -6176,7 +6215,15 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 
															 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
														
 
															 				    struct kvm_mp_state *mp_state)
														
 
															 {
														
 
															-	vcpu->arch.mp_state = mp_state->mp_state;
														
 
															+	if (!kvm_vcpu_has_lapic(vcpu) &&
														
 
															+	    mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
														
 
															+		vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
														
 
															+		set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
														
 
															+	} else
														
 
															+		vcpu->arch.mp_state = mp_state->mp_state;
														
 
															 	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															 	return 0;
														
 
															 }
														
@@ -6475,9 +6522,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
															 	r = vcpu_load(vcpu);
														
 
															 	if (r)
														
 
															 		return r;
														
 
															-	r = kvm_vcpu_reset(vcpu);
														
 
															-	if (r == 0)
														
 
															-		r = kvm_mmu_setup(vcpu);
														
 
															+	kvm_vcpu_reset(vcpu);
														
 
															+	r = kvm_mmu_setup(vcpu);
														
 
															 	vcpu_put(vcpu);
														
 
															 	return r;
														
@@ -6514,7 +6560,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
															 	kvm_x86_ops->vcpu_free(vcpu);
														
 
															 }
														
 
															-static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
														
 
															+void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	atomic_set(&vcpu->arch.nmi_queued, 0);
														
 
															 	vcpu->arch.nmi_pending = 0;
														
@@ -6541,7 +6587,18 @@ static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
 
															 	vcpu->arch.regs_avail = ~0;
														
 
															 	vcpu->arch.regs_dirty = ~0;
														
 
															-	return kvm_x86_ops->vcpu_reset(vcpu);
														
 
															+	kvm_x86_ops->vcpu_reset(vcpu);
														
 
															+}
														
 
															+
														
 
															+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
														
 
															+{
														
 
															+	struct kvm_segment cs;
														
 
															+
														
 
															+	kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
														
 
															+	cs.selector = vector << 8;
														
 
															+	cs.base = vector << 12;
														
 
															+	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
														
 
															+	kvm_rip_write(vcpu, 0);
														
 
															 }
														
 
															 int kvm_arch_hardware_enable(void *garbage)
														
@@ -6706,8 +6763,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
															 	}
														
 
															 	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
														
 
															-	if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
														
 
															+	if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
														
 
															+		r = -ENOMEM;
														
 
															 		goto fail_free_mce_banks;
														
 
															+	}
														
 
															 	r = fx_init(vcpu);
														
 
															 	if (r)
														
@@ -6811,6 +6870,23 @@ void kvm_arch_sync_events(struct kvm *kvm)
 
															 void kvm_arch_destroy_vm(struct kvm *kvm)
														
 
															 {
														
 
															+	if (current->mm == kvm->mm) {
														
 
															+		/*
														
 
															+		 * Free memory regions allocated on behalf of userspace,
														
 
															+		 * unless the the memory map has changed due to process exit
														
 
															+		 * or fd copying.
														
 
															+		 */
														
 
															+		struct kvm_userspace_memory_region mem;
														
 
															+		memset(&mem, 0, sizeof(mem));
														
 
															+		mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
														
 
															+		kvm_set_memory_region(kvm, &mem);
														
 
															+
														
 
															+		mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
														
 
															+		kvm_set_memory_region(kvm, &mem);
														
 
															+
														
 
															+		mem.slot = TSS_PRIVATE_MEMSLOT;
														
 
															+		kvm_set_memory_region(kvm, &mem);
														
 
															+	}
														
 
															 	kvm_iommu_unmap_guest(kvm);
														
 
															 	kfree(kvm->arch.vpic);
														
 
															 	kfree(kvm->arch.vioapic);
														
@@ -6903,24 +6979,21 @@ out_free:
 
															 int kvm_arch_prepare_memory_region(struct kvm *kvm,
														
 
															 				struct kvm_memory_slot *memslot,
														
 
															-				struct kvm_memory_slot old,
														
 
															 				struct kvm_userspace_memory_region *mem,
														
 
															-				bool user_alloc)
														
 
															+				enum kvm_mr_change change)
														
 
															 {
														
 
															-	int npages = memslot->npages;
														
 
															-
														
 
															 	/*
														
 
															 	 * Only private memory slots need to be mapped here since
														
 
															 	 * KVM_SET_MEMORY_REGION ioctl is no longer supported.
														
 
															 	 */
														
 
															-	if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
														
 
															+	if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
														
 
															 		unsigned long userspace_addr;
														
 
															 		/*
														
 
															 		 * MAP_SHARED to prevent internal slot pages from being moved
														
 
															 		 * by fork()/COW.
														
 
															 		 */
														
 
															-		userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE,
														
 
															+		userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
														
 
															 					 PROT_READ | PROT_WRITE,
														
 
															 					 MAP_SHARED | MAP_ANONYMOUS, 0);
														
@@ -6935,17 +7008,17 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
															 void kvm_arch_commit_memory_region(struct kvm *kvm,
														
 
															 				struct kvm_userspace_memory_region *mem,
														
 
															-				struct kvm_memory_slot old,
														
 
															-				bool user_alloc)
														
 
															+				const struct kvm_memory_slot *old,
														
 
															+				enum kvm_mr_change change)
														
 
															 {
														
 
															-	int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
														
 
															+	int nr_mmu_pages = 0;
														
 
															-	if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
														
 
															+	if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
														
 
															 		int ret;
														
 
															-		ret = vm_munmap(old.userspace_addr,
														
 
															-				old.npages * PAGE_SIZE);
														
 
															+		ret = vm_munmap(old->userspace_addr,
														
 
															+				old->npages * PAGE_SIZE);
														
 
															 		if (ret < 0)
														
 
															 			printk(KERN_WARNING
														
 
															 			       "kvm_vm_ioctl_set_memory_region: "
														
@@ -6962,14 +7035,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 
															 	 * Existing largepage mappings are destroyed here and new ones will
														
 
															 	 * not be created until the end of the logging.
														
 
															 	 */
														
 
															-	if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
														
 
															+	if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
														
 
															 		kvm_mmu_slot_remove_write_access(kvm, mem->slot);
														
 
															 	/*
														
 
															 	 * If memory slot is created, or moved, we need to clear all
														
 
															 	 * mmio sptes.
														
 
															 	 */
														
 
															-	if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) {
														
 
															-		kvm_mmu_zap_all(kvm);
														
 
															+	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
														
 
															+		kvm_mmu_zap_mmio_sptes(kvm);
														
 
															 		kvm_reload_remote_mmus(kvm);
														
 
															 	}
														
 
															 }
														
@@ -6991,7 +7064,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 
															 	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
														
 
															 		!vcpu->arch.apf.halted)
														
 
															 		|| !list_empty_careful(&vcpu->async_pf.done)
														
 
															-		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
														
 
															+		|| kvm_apic_has_events(vcpu)
														
 
															 		|| atomic_read(&vcpu->arch.nmi_queued) ||
														
 
															 		(kvm_arch_interrupt_allowed(vcpu) &&
														
 
															 		 kvm_cpu_has_interrupt(vcpu));
														
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -443,29 +443,30 @@ static int __init test_devices_support(unsigned long addr)
 
															 }
														
 
															 /*
														
 
															  * Init function for virtio
														
 
															- * devices are in a single page above top of "normal" mem
														
 
															+ * devices are in a single page above top of "normal" + standby mem
														
 
															  */
														
 
															 static int __init kvm_devices_init(void)
														
 
															 {
														
 
															 	int rc;
														
 
															+	unsigned long total_memory_size = sclp_get_rzm() * sclp_get_rnmax();
														
 
															 	if (!MACHINE_IS_KVM)
														
 
															 		return -ENODEV;
														
 
															-	if (test_devices_support(real_memory_size) < 0)
														
 
															+	if (test_devices_support(total_memory_size) < 0)
														
 
															 		return -ENODEV;
														
 
															-	rc = vmem_add_mapping(real_memory_size, PAGE_SIZE);
														
 
															+	rc = vmem_add_mapping(total_memory_size, PAGE_SIZE);
														
 
															 	if (rc)
														
 
															 		return rc;
														
 
															-	kvm_devices = (void *) real_memory_size;
														
 
															+	kvm_devices = (void *) total_memory_size;
														
 
															 	kvm_root = root_device_register("kvm_s390");
														
 
															 	if (IS_ERR(kvm_root)) {
														
 
															 		rc = PTR_ERR(kvm_root);
														
 
															 		printk(KERN_ERR "Could not register kvm_s390 root device");
														
 
															-		vmem_remove_mapping(real_memory_size, PAGE_SIZE);
														
 
															+		vmem_remove_mapping(total_memory_size, PAGE_SIZE);
														
 
															 		return rc;
														
 
															 	}
														
--- a/drivers/s390/kvm/virtio_ccw.c
+++ b/drivers/s390/kvm/virtio_ccw.c
@@ -31,6 +31,7 @@
 
															 #include <asm/irq.h>
														
 
															 #include <asm/cio.h>
														
 
															 #include <asm/ccwdev.h>
														
 
															+#include <asm/virtio-ccw.h>
														
 
															 /*
														
 
															  * virtio related functions
														
@@ -77,12 +78,9 @@ struct virtio_ccw_vq_info {
 
															 	void *queue;
														
 
															 	struct vq_info_block *info_block;
														
 
															 	struct list_head node;
														
 
															+	long cookie;
														
 
															 };
														
 
															-#define KVM_VIRTIO_CCW_RING_ALIGN 4096
														
 
															-
														
 
															-#define KVM_S390_VIRTIO_CCW_NOTIFY 3
														
 
															-
														
 
															 #define CCW_CMD_SET_VQ 0x13
														
 
															 #define CCW_CMD_VDEV_RESET 0x33
														
 
															 #define CCW_CMD_SET_IND 0x43
														
@@ -135,8 +133,11 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev,
 
															 	do {
														
 
															 		spin_lock_irqsave(get_ccwdev_lock(vcdev->cdev), flags);
														
 
															 		ret = ccw_device_start(vcdev->cdev, ccw, intparm, 0, 0);
														
 
															-		if (!ret)
														
 
															+		if (!ret) {
														
 
															+			if (!vcdev->curr_io)
														
 
															+				vcdev->err = 0;
														
 
															 			vcdev->curr_io |= flag;
														
 
															+		}
														
 
															 		spin_unlock_irqrestore(get_ccwdev_lock(vcdev->cdev), flags);
														
 
															 		cpu_relax();
														
 
															 	} while (ret == -EBUSY);
														
@@ -145,15 +146,18 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev,
 
															 }
														
 
															 static inline long do_kvm_notify(struct subchannel_id schid,
														
 
															-				 unsigned long queue_index)
														
 
															+				 unsigned long queue_index,
														
 
															+				 long cookie)
														
 
															 {
														
 
															 	register unsigned long __nr asm("1") = KVM_S390_VIRTIO_CCW_NOTIFY;
														
 
															 	register struct subchannel_id __schid asm("2") = schid;
														
 
															 	register unsigned long __index asm("3") = queue_index;
														
 
															 	register long __rc asm("2");
														
 
															+	register long __cookie asm("4") = cookie;
														
 
															 	asm volatile ("diag 2,4,0x500\n"
														
 
															-		      : "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index)
														
 
															+		      : "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index),
														
 
															+		      "d"(__cookie)
														
 
															 		      : "memory", "cc");
														
 
															 	return __rc;
														
 
															 }
														
@@ -166,7 +170,7 @@ static void virtio_ccw_kvm_notify(struct virtqueue *vq)
 
															 	vcdev = to_vc_device(info->vq->vdev);
														
 
															 	ccw_device_get_schid(vcdev->cdev, &schid);
														
 
															-	do_kvm_notify(schid, vq->index);
														
 
															+	info->cookie = do_kvm_notify(schid, vq->index, info->cookie);
														
 
															 }
														
 
															 static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev,
														
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -117,14 +117,13 @@ static inline bool is_error_page(struct page *page)
 
															 #define KVM_REQ_APF_HALT          12
														
 
															 #define KVM_REQ_STEAL_UPDATE      13
														
 
															 #define KVM_REQ_NMI               14
														
 
															-#define KVM_REQ_IMMEDIATE_EXIT    15
														
 
															-#define KVM_REQ_PMU               16
														
 
															-#define KVM_REQ_PMI               17
														
 
															-#define KVM_REQ_WATCHDOG          18
														
 
															-#define KVM_REQ_MASTERCLOCK_UPDATE 19
														
 
															-#define KVM_REQ_MCLOCK_INPROGRESS 20
														
 
															-#define KVM_REQ_EPR_EXIT          21
														
 
															-#define KVM_REQ_EOIBITMAP         22
														
 
															+#define KVM_REQ_PMU               15
														
 
															+#define KVM_REQ_PMI               16
														
 
															+#define KVM_REQ_WATCHDOG          17
														
 
															+#define KVM_REQ_MASTERCLOCK_UPDATE 18
														
 
															+#define KVM_REQ_MCLOCK_INPROGRESS 19
														
 
															+#define KVM_REQ_EPR_EXIT          20
														
 
															+#define KVM_REQ_SCAN_IOAPIC       21
														
 
															 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
														
 
															 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
														
@@ -133,6 +132,9 @@ struct kvm;
 
															 struct kvm_vcpu;
														
 
															 extern struct kmem_cache *kvm_vcpu_cache;
														
 
															+extern raw_spinlock_t kvm_lock;
														
 
															+extern struct list_head vm_list;
														
 
															+
														
 
															 struct kvm_io_range {
														
 
															 	gpa_t addr;
														
 
															 	int len;
														
@@ -149,6 +151,7 @@ struct kvm_io_bus {
 
															 enum kvm_bus {
														
 
															 	KVM_MMIO_BUS,
														
 
															 	KVM_PIO_BUS,
														
 
															+	KVM_VIRTIO_CCW_NOTIFY_BUS,
														
 
															 	KVM_NR_BUSES
														
 
															 };
														
@@ -252,6 +255,7 @@ struct kvm_vcpu {
 
															 		bool dy_eligible;
														
 
															 	} spin_loop;
														
 
															 #endif
														
 
															+	bool preempted;
														
 
															 	struct kvm_vcpu_arch arch;
														
 
															 };
														
@@ -285,7 +289,8 @@ struct kvm_kernel_irq_routing_entry {
 
															 	u32 gsi;
														
 
															 	u32 type;
														
 
															 	int (*set)(struct kvm_kernel_irq_routing_entry *e,
														
 
															-		   struct kvm *kvm, int irq_source_id, int level);
														
 
															+		   struct kvm *kvm, int irq_source_id, int level,
														
 
															+		   bool line_status);
														
 
															 	union {
														
 
															 		struct {
														
 
															 			unsigned irqchip;
														
@@ -296,10 +301,10 @@ struct kvm_kernel_irq_routing_entry {
 
															 	struct hlist_node link;
														
 
															 };
														
 
															-#ifdef __KVM_HAVE_IOAPIC
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															 struct kvm_irq_routing_table {
														
 
															-	int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
														
 
															+	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
														
 
															 	struct kvm_kernel_irq_routing_entry *rt_entries;
														
 
															 	u32 nr_rt_entries;
														
 
															 	/*
														
@@ -385,6 +390,7 @@ struct kvm {
 
															 	long mmu_notifier_count;
														
 
															 #endif
														
 
															 	long tlbs_dirty;
														
 
															+	struct list_head devices;
														
 
															 };
														
 
															 #define kvm_err(fmt, ...) \
														
@@ -424,6 +430,19 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 
															 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
														
 
															 void vcpu_put(struct kvm_vcpu *vcpu);
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															+int kvm_irqfd_init(void);
														
 
															+void kvm_irqfd_exit(void);
														
 
															+#else
														
 
															+static inline int kvm_irqfd_init(void)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static inline void kvm_irqfd_exit(void)
														
 
															+{
														
 
															+}
														
 
															+#endif
														
 
															 int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
														
 
															 		  struct module *module);
														
 
															 void kvm_exit(void);
														
@@ -452,24 +471,39 @@ id_to_memslot(struct kvm_memslots *slots, int id)
 
															 	return slot;
														
 
															 }
														
 
															+/*
														
 
															+ * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
														
 
															+ * - create a new memory slot
														
 
															+ * - delete an existing memory slot
														
 
															+ * - modify an existing memory slot
														
 
															+ *   -- move it in the guest physical memory space
														
 
															+ *   -- just change its flags
														
 
															+ *
														
 
															+ * Since flags can be changed by some of these operations, the following
														
 
															+ * differentiation is the best we can do for __kvm_set_memory_region():
														
 
															+ */
														
 
															+enum kvm_mr_change {
														
 
															+	KVM_MR_CREATE,
														
 
															+	KVM_MR_DELETE,
														
 
															+	KVM_MR_MOVE,
														
 
															+	KVM_MR_FLAGS_ONLY,
														
 
															+};
														
 
															+
														
 
															 int kvm_set_memory_region(struct kvm *kvm,
														
 
															-			  struct kvm_userspace_memory_region *mem,
														
 
															-			  bool user_alloc);
														
 
															+			  struct kvm_userspace_memory_region *mem);
														
 
															 int __kvm_set_memory_region(struct kvm *kvm,
														
 
															-			    struct kvm_userspace_memory_region *mem,
														
 
															-			    bool user_alloc);
														
 
															+			    struct kvm_userspace_memory_region *mem);
														
 
															 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
														
 
															 			   struct kvm_memory_slot *dont);
														
 
															 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
														
 
															 int kvm_arch_prepare_memory_region(struct kvm *kvm,
														
 
															 				struct kvm_memory_slot *memslot,
														
 
															-				struct kvm_memory_slot old,
														
 
															 				struct kvm_userspace_memory_region *mem,
														
 
															-				bool user_alloc);
														
 
															+				enum kvm_mr_change change);
														
 
															 void kvm_arch_commit_memory_region(struct kvm *kvm,
														
 
															 				struct kvm_userspace_memory_region *mem,
														
 
															-				struct kvm_memory_slot old,
														
 
															-				bool user_alloc);
														
 
															+				const struct kvm_memory_slot *old,
														
 
															+				enum kvm_mr_change change);
														
 
															 bool kvm_largepages_enabled(void);
														
 
															 void kvm_disable_largepages(void);
														
 
															 /* flush all memory translations */
														
@@ -539,7 +573,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
															 void kvm_flush_remote_tlbs(struct kvm *kvm);
														
 
															 void kvm_reload_remote_mmus(struct kvm *kvm);
														
 
															 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
														
 
															-void kvm_make_update_eoibitmap_request(struct kvm *kvm);
														
 
															+void kvm_make_scan_ioapic_request(struct kvm *kvm);
														
 
															 long kvm_arch_dev_ioctl(struct file *filp,
														
 
															 			unsigned int ioctl, unsigned long arg);
														
@@ -555,10 +589,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
															 				struct kvm_dirty_log *log);
														
 
															 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
														
 
															-				   struct
														
 
															-				   kvm_userspace_memory_region *mem,
														
 
															-				   bool user_alloc);
														
 
															-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level);
														
 
															+				   struct kvm_userspace_memory_region *mem);
														
 
															+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
														
 
															+			bool line_status);
														
 
															 long kvm_arch_vm_ioctl(struct file *filp,
														
 
															 		       unsigned int ioctl, unsigned long arg);
														
@@ -632,7 +665,6 @@ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 
															 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
														
 
															 void kvm_arch_destroy_vm(struct kvm *kvm);
														
 
															-void kvm_free_all_assigned_devices(struct kvm *kvm);
														
 
															 void kvm_arch_sync_events(struct kvm *kvm);
														
 
															 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
														
@@ -684,15 +716,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 
															 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
														
 
															 			     bool mask);
														
 
															-#ifdef __KVM_HAVE_IOAPIC
														
 
															-void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
														
 
															-				   union kvm_ioapic_redirect_entry *entry,
														
 
															-				   unsigned long *deliver_bitmask);
														
 
															-#endif
														
 
															-int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
														
 
															+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
														
 
															+		bool line_status);
														
 
															 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
														
 
															 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
														
 
															-		int irq_source_id, int level);
														
 
															+		int irq_source_id, int level, bool line_status);
														
 
															 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
														
 
															 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
														
 
															 void kvm_register_irq_ack_notifier(struct kvm *kvm,
														
@@ -705,7 +733,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
															 /* For vcpu->arch.iommu_flags */
														
 
															 #define KVM_IOMMU_CACHE_COHERENCY	0x1
														
 
															-#ifdef CONFIG_IOMMU_API
														
 
															+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
														
 
															 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
														
 
															 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
														
 
															 int kvm_iommu_map_guest(struct kvm *kvm);
														
@@ -714,7 +742,7 @@ int kvm_assign_device(struct kvm *kvm,
 
															 		      struct kvm_assigned_dev_kernel *assigned_dev);
														
 
															 int kvm_deassign_device(struct kvm *kvm,
														
 
															 			struct kvm_assigned_dev_kernel *assigned_dev);
														
 
															-#else /* CONFIG_IOMMU_API */
														
 
															+#else
														
 
															 static inline int kvm_iommu_map_pages(struct kvm *kvm,
														
 
															 				      struct kvm_memory_slot *slot)
														
 
															 {
														
@@ -726,28 +754,11 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
 
															 {
														
 
															 }
														
 
															-static inline int kvm_iommu_map_guest(struct kvm *kvm)
														
 
															-{
														
 
															-	return -ENODEV;
														
 
															-}
														
 
															-
														
 
															 static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
														
 
															 {
														
 
															 	return 0;
														
 
															 }
														
 
															-
														
 
															-static inline int kvm_assign_device(struct kvm *kvm,
														
 
															-		struct kvm_assigned_dev_kernel *assigned_dev)
														
 
															-{
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static inline int kvm_deassign_device(struct kvm *kvm,
														
 
															-		struct kvm_assigned_dev_kernel *assigned_dev)
														
 
															-{
														
 
															-	return 0;
														
 
															-}
														
 
															-#endif /* CONFIG_IOMMU_API */
														
 
															+#endif
														
 
															 static inline void __guest_enter(void)
														
 
															 {
														
@@ -921,7 +932,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
 
															 }
														
 
															 #endif
														
 
															-#ifdef KVM_CAP_IRQ_ROUTING
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															 #define KVM_MAX_IRQ_ROUTES 1024
														
@@ -930,6 +941,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
															 			const struct kvm_irq_routing_entry *entries,
														
 
															 			unsigned nr,
														
 
															 			unsigned flags);
														
 
															+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
														
 
															+			  struct kvm_kernel_irq_routing_entry *e,
														
 
															+			  const struct kvm_irq_routing_entry *ue);
														
 
															 void kvm_free_irq_routing(struct kvm *kvm);
														
 
															 int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
														
@@ -998,11 +1012,13 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
 
															 #endif
														
 
															-#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
														
 
															+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
														
 
															 long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
														
 
															 				  unsigned long arg);
														
 
															+void kvm_free_all_assigned_devices(struct kvm *kvm);
														
 
															+
														
 
															 #else
														
 
															 static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
														
@@ -1011,6 +1027,8 @@ static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 
															 	return -ENOTTY;
														
 
															 }
														
 
															+static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
														
 
															+
														
 
															 #endif
														
 
															 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
														
@@ -1028,6 +1046,46 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 
															 	}
														
 
															 }
														
 
															+extern bool kvm_rebooting;
														
 
															+
														
 
															+struct kvm_device_ops;
														
 
															+
														
 
															+struct kvm_device {
														
 
															+	struct kvm_device_ops *ops;
														
 
															+	struct kvm *kvm;
														
 
															+	void *private;
														
 
															+	struct list_head vm_node;
														
 
															+};
														
 
															+
														
 
															+/* create, destroy, and name are mandatory */
														
 
															+struct kvm_device_ops {
														
 
															+	const char *name;
														
 
															+	int (*create)(struct kvm_device *dev, u32 type);
														
 
															+
														
 
															+	/*
														
 
															+	 * Destroy is responsible for freeing dev.
														
 
															+	 *
														
 
															+	 * Destroy may be called before or after destructors are called
														
 
															+	 * on emulated I/O regions, depending on whether a reference is
														
 
															+	 * held by a vcpu or other kvm component that gets destroyed
														
 
															+	 * after the emulated I/O.
														
 
															+	 */
														
 
															+	void (*destroy)(struct kvm_device *dev);
														
 
															+
														
 
															+	int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
														
 
															+	int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
														
 
															+	int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
														
 
															+	long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
														
 
															+		      unsigned long arg);
														
 
															+};
														
 
															+
														
 
															+void kvm_device_get(struct kvm_device *dev);
														
 
															+void kvm_device_put(struct kvm_device *dev);
														
 
															+struct kvm_device *kvm_device_from_filp(struct file *filp);
														
 
															+
														
 
															+extern struct kvm_device_ops kvm_mpic_ops;
														
 
															+extern struct kvm_device_ops kvm_xics_ops;
														
 
															+
														
 
															 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
														
 
															 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
	`@@ -0,0 +1 @@`
			`+This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.`