11 years ago · b9085bcbf5
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -612,11 +612,14 @@ Type: vm ioctl
 
															 Parameters: none
														
 
															 Returns: 0 on success, -1 on error
														
 
															-Creates an interrupt controller model in the kernel.  On x86, creates a virtual
														
 
															-ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
														
 
															-local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
														
 
															-only go to the IOAPIC.  On ARM/arm64, a GIC is
														
 
															-created. On s390, a dummy irq routing table is created.
														
 
															+Creates an interrupt controller model in the kernel.
														
 
															+On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up
														
 
															+future vcpus to have a local APIC.  IRQ routing for GSIs 0-15 is set to both
														
 
															+PIC and IOAPIC; GSI 16-23 only go to the IOAPIC.
														
 
															+On ARM/arm64, a GICv2 is created. Any other GIC versions require the usage of
														
 
															+KVM_CREATE_DEVICE, which also supports creating a GICv2.  Using
														
 
															+KVM_CREATE_DEVICE is preferred over KVM_CREATE_IRQCHIP for GICv2.
														
 
															+On s390, a dummy irq routing table is created.
														
 
															 Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
														
 
															 before KVM_CREATE_IRQCHIP can be used.
														
@@ -2312,7 +2315,7 @@ struct kvm_s390_interrupt {
 
															 type can be one of the following:
														
 
															-KVM_S390_SIGP_STOP (vcpu) - sigp restart
														
 
															+KVM_S390_SIGP_STOP (vcpu) - sigp stop; optional flags in parm
														
 
															 KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm
														
 
															 KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm
														
 
															 KVM_S390_RESTART (vcpu) - restart
														
@@ -3225,3 +3228,23 @@ userspace from doing that.
 
															 If the hcall number specified is not one that has an in-kernel
														
 
															 implementation, the KVM_ENABLE_CAP ioctl will fail with an EINVAL
														
 
															 error.
														
 
															+
														
 
															+7.2 KVM_CAP_S390_USER_SIGP
														
 
															+
														
 
															+Architectures: s390
														
 
															+Parameters: none
														
 
															+
														
 
															+This capability controls which SIGP orders will be handled completely in user
														
 
															+space. With this capability enabled, all fast orders will be handled completely
														
 
															+in the kernel:
														
 
															+- SENSE
														
 
															+- SENSE RUNNING
														
 
															+- EXTERNAL CALL
														
 
															+- EMERGENCY SIGNAL
														
 
															+- CONDITIONAL EMERGENCY SIGNAL
														
 
															+
														
 
															+All other orders will be handled completely in user space.
														
 
															+
														
 
															+Only privileged operation exceptions will be checked for in the kernel (or even
														
 
															+in the hardware prior to interception). If this capability is not enabled, the
														
 
															+old way of handling SIGP orders is used (partially in kernel and user space).
														
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -3,22 +3,42 @@ ARM Virtual Generic Interrupt Controller (VGIC)
 
															 Device types supported:
														
 
															   KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0
														
 
															+  KVM_DEV_TYPE_ARM_VGIC_V3     ARM Generic Interrupt Controller v3.0
														
 
															 Only one VGIC instance may be instantiated through either this API or the
														
 
															 legacy KVM_CREATE_IRQCHIP api.  The created VGIC will act as the VM interrupt
														
 
															 controller, requiring emulated user-space devices to inject interrupts to the
														
 
															 VGIC instead of directly to CPUs.
														
 
															+Creating a guest GICv3 device requires a host GICv3 as well.
														
 
															+GICv3 implementations with hardware compatibility support allow a guest GICv2
														
 
															+as well.
														
 
															+
														
 
															 Groups:
														
 
															   KVM_DEV_ARM_VGIC_GRP_ADDR
														
 
															   Attributes:
														
 
															     KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
														
 
															       Base address in the guest physical address space of the GIC distributor
														
 
															-      register mappings.
														
 
															+      register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
														
 
															+      This address needs to be 4K aligned and the region covers 4 KByte.
														
 
															     KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
														
 
															       Base address in the guest physical address space of the GIC virtual cpu
														
 
															-      interface register mappings.
														
 
															+      interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
														
 
															+      This address needs to be 4K aligned and the region covers 4 KByte.
														
 
															+
														
 
															+    KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
														
 
															+      Base address in the guest physical address space of the GICv3 distributor
														
 
															+      register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
														
 
															+      This address needs to be 64K aligned and the region covers 64 KByte.
														
 
															+
														
 
															+    KVM_VGIC_V3_ADDR_TYPE_REDIST (rw, 64-bit)
														
 
															+      Base address in the guest physical address space of the GICv3
														
 
															+      redistributor register mappings. There are two 64K pages for each
														
 
															+      VCPU and all of the redistributor pages are contiguous.
														
 
															+      Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
														
 
															+      This address needs to be 64K aligned.
														
 
															+
														
 
															   KVM_DEV_ARM_VGIC_GRP_DIST_REGS
														
 
															   Attributes:
														
@@ -36,6 +56,7 @@ Groups:
 
															     the register.
														
 
															   Limitations:
														
 
															     - Priorities are not implemented, and registers are RAZ/WI
														
 
															+    - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
														
 
															   Errors:
														
 
															     -ENODEV: Getting or setting this register is not yet supported
														
 
															     -EBUSY: One or more VCPUs are running
														
@@ -68,6 +89,7 @@ Groups:
 
															   Limitations:
														
 
															     - Priorities are not implemented, and registers are RAZ/WI
														
 
															+    - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
														
 
															   Errors:
														
 
															     -ENODEV: Getting or setting this register is not yet supported
														
 
															     -EBUSY: One or more VCPUs are running
														
@@ -81,3 +103,14 @@ Groups:
 
															     -EINVAL: Value set is out of the expected range
														
 
															     -EBUSY: Value has already be set, or GIC has already been initialized
														
 
															             with default values.
														
 
															+
														
 
															+  KVM_DEV_ARM_VGIC_GRP_CTRL
														
 
															+  Attributes:
														
 
															+    KVM_DEV_ARM_VGIC_CTRL_INIT
														
 
															+      request the initialization of the VGIC, no additional parameter in
														
 
															+      kvm_device_attr.addr.
														
 
															+  Errors:
														
 
															+    -ENXIO: VGIC not properly configured as required prior to calling
														
 
															+     this attribute
														
 
															+    -ENODEV: no online VCPU
														
 
															+    -ENOMEM: memory shortage when allocating vgic internal data
														
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -24,3 +24,62 @@ Returns: 0
 
															 Clear the CMMA status for all guest pages, so any pages the guest marked
														
 
															 as unused are again used any may not be reclaimed by the host.
														
 
															+
														
 
															+1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE
														
 
															+Parameters: in attr->addr the address for the new limit of guest memory
														
 
															+Returns: -EFAULT if the given address is not accessible
														
 
															+         -EINVAL if the virtual machine is of type UCONTROL
														
 
															+         -E2BIG if the given guest memory is to big for that machine
														
 
															+         -EBUSY if a vcpu is already defined
														
 
															+         -ENOMEM if not enough memory is available for a new shadow guest mapping
														
 
															+          0 otherwise
														
 
															+
														
 
															+Allows userspace to query the actual limit and set a new limit for
														
 
															+the maximum guest memory size. The limit will be rounded up to
														
 
															+2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by
														
 
															+the number of page table levels.
														
 
															+
														
 
															+2. GROUP: KVM_S390_VM_CPU_MODEL
														
 
															+Architectures: s390
														
 
															+
														
 
															+2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
														
 
															+
														
 
															+Allows user space to retrieve machine and kvm specific cpu related information:
														
 
															+
														
 
															+struct kvm_s390_vm_cpu_machine {
														
 
															+       __u64 cpuid;           # CPUID of host
														
 
															+       __u32 ibc;             # IBC level range offered by host
														
 
															+       __u8  pad[4];
														
 
															+       __u64 fac_mask[256];   # set of cpu facilities enabled by KVM
														
 
															+       __u64 fac_list[256];   # set of cpu facilities offered by host
														
 
															+}
														
 
															+
														
 
															+Parameters: address of buffer to store the machine related cpu data
														
 
															+            of type struct kvm_s390_vm_cpu_machine*
														
 
															+Returns:    -EFAULT if the given address is not accessible from kernel space
														
 
															+	    -ENOMEM if not enough memory is available to process the ioctl
														
 
															+	    0 in case of success
														
 
															+
														
 
															+2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
														
 
															+
														
 
															+Allows user space to retrieve or request to change cpu related information for a vcpu:
														
 
															+
														
 
															+struct kvm_s390_vm_cpu_processor {
														
 
															+       __u64 cpuid;           # CPUID currently (to be) used by this vcpu
														
 
															+       __u16 ibc;             # IBC level currently (to be) used by this vcpu
														
 
															+       __u8  pad[6];
														
 
															+       __u64 fac_list[256];   # set of cpu facilities currently (to be) used
														
 
															+                              # by this vcpu
														
 
															+}
														
 
															+
														
 
															+KVM does not enforce or limit the cpu model data in any form. Take the information
														
 
															+retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration
														
 
															+setups. Instruction interceptions triggered by additionally set facilitiy bits that
														
 
															+are not handled by KVM need to by imlemented in the VM driver code.
														
 
															+
														
 
															+Parameters: address of buffer to store/set the processor related cpu
														
 
															+	    data of type struct kvm_s390_vm_cpu_processor*.
														
 
															+Returns:    -EBUSY in case 1 or more vcpus are already activated (only in write case)
														
 
															+	    -EFAULT if the given address is not accessible from kernel space
														
 
															+	    -ENOMEM if not enough memory is available to process the ioctl
														
 
															+	    0 in case of success
														
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -96,6 +96,7 @@ extern char __kvm_hyp_code_end[];
 
															 extern void __kvm_flush_vm_context(void);
														
 
															 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
														
 
															+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
														
 
															 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
														
 
															 #endif
														
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -23,6 +23,7 @@
 
															 #include <asm/kvm_asm.h>
														
 
															 #include <asm/kvm_mmio.h>
														
 
															 #include <asm/kvm_arm.h>
														
 
															+#include <asm/cputype.h>
														
 
															 unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
														
 
															 unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
														
@@ -177,9 +178,9 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
 
															 	return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
														
 
															 }
														
 
															-static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
														
 
															+static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return vcpu->arch.cp15[c0_MPIDR];
														
 
															+	return vcpu->arch.cp15[c0_MPIDR] & MPIDR_HWID_BITMASK;
														
 
															 }
														
 
															 static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
														
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -68,6 +68,7 @@ struct kvm_arch {
 
															 	/* Interrupt controller */
														
 
															 	struct vgic_dist	vgic;
														
 
															+	int max_vcpus;
														
 
															 };
														
 
															 #define KVM_NR_MEM_OBJS     40
														
@@ -144,6 +145,7 @@ struct kvm_vm_stat {
 
															 };
														
 
															 struct kvm_vcpu_stat {
														
 
															+	u32 halt_successful_poll;
														
 
															 	u32 halt_wakeup;
														
 
															 };
														
@@ -231,6 +233,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 
															 int kvm_perf_init(void);
														
 
															 int kvm_perf_teardown(void);
														
 
															+void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
														
 
															+
														
 
															+struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
														
 
															+
														
 
															 static inline void kvm_arch_hardware_disable(void) {}
														
 
															 static inline void kvm_arch_hardware_unsetup(void) {}
														
 
															 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
														
--- a/arch/arm/include/asm/kvm_mmio.h
+++ b/arch/arm/include/asm/kvm_mmio.h
@@ -37,6 +37,7 @@ struct kvm_exit_mmio {
 
															 	u8		data[8];
														
 
															 	u32		len;
														
 
															 	bool		is_write;
														
 
															+	void		*private;
														
 
															 };
														
 
															 static inline void kvm_prepare_mmio(struct kvm_run *run,
														
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -115,6 +115,27 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 
															 	pmd_val(*pmd) |= L_PMD_S2_RDWR;
														
 
															 }
														
 
															+static inline void kvm_set_s2pte_readonly(pte_t *pte)
														
 
															+{
														
 
															+	pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
														
 
															+}
														
 
															+
														
 
															+static inline bool kvm_s2pte_readonly(pte_t *pte)
														
 
															+{
														
 
															+	return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
														
 
															+}
														
 
															+
														
 
															+static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
														
 
															+{
														
 
															+	pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
														
 
															+}
														
 
															+
														
 
															+static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
														
 
															+{
														
 
															+	return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
														
 
															+}
														
 
															+
														
 
															+
														
 
															 /* Open coded p*d_addr_end that can deal with 64bit addresses */
														
 
															 #define kvm_pgd_addr_end(addr, end)					\
														
 
															 ({	u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;		\
														
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -129,6 +129,7 @@
 
															 #define L_PTE_S2_RDONLY			(_AT(pteval_t, 1) << 6)   /* HAP[1]   */
														
 
															 #define L_PTE_S2_RDWR			(_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
														
 
															+#define L_PMD_S2_RDONLY			(_AT(pmdval_t, 1) << 6)   /* HAP[1]   */
														
 
															 #define L_PMD_S2_RDWR			(_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
														
 
															 /*
														
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -175,6 +175,8 @@ struct kvm_arch_memory_slot {
 
															 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
														
 
															 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
														
 
															 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
														
 
															+#define KVM_DEV_ARM_VGIC_GRP_CTRL       4
														
 
															+#define   KVM_DEV_ARM_VGIC_CTRL_INIT    0
														
 
															 /* KVM_IRQ_LINE irq field index values */
														
 
															 #define KVM_ARM_IRQ_TYPE_SHIFT		24
														
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -21,8 +21,10 @@ config KVM
 
															 	select PREEMPT_NOTIFIERS
														
 
															 	select ANON_INODES
														
 
															 	select HAVE_KVM_CPU_RELAX_INTERCEPT
														
 
															+	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
														
 
															 	select KVM_MMIO
														
 
															 	select KVM_ARM_HOST
														
 
															+	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
														
 
															 	select SRCU
														
 
															 	depends on ARM_VIRT_EXT && ARM_LPAE
														
 
															 	---help---
														
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -22,4 +22,5 @@ obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 
															 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
														
 
															 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
														
 
															 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
														
 
															+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
														
 
															 obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
														
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -132,6 +132,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
															 	/* Mark the initial VMID generation invalid */
														
 
															 	kvm->arch.vmid_gen = 0;
														
 
															+	/* The maximum number of VCPUs is limited by the host's GIC model */
														
 
															+	kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus();
														
 
															+
														
 
															 	return ret;
														
 
															 out_free_stage2_pgd:
														
 
															 	kvm_free_stage2_pgd(kvm);
														
@@ -218,6 +221,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 
															 		goto out;
														
 
															 	}
														
 
															+	if (id >= kvm->arch.max_vcpus) {
														
 
															+		err = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
														
 
															 	if (!vcpu) {
														
 
															 		err = -ENOMEM;
														
@@ -241,9 +249,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 
															 	return ERR_PTR(err);
														
 
															 }
														
 
															-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
														
 
															+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return 0;
														
 
															 }
														
 
															 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
														
@@ -777,9 +784,39 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
															 	}
														
 
															 }
														
 
															+/**
														
 
															+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
														
 
															+ * @kvm: kvm instance
														
 
															+ * @log: slot id and address to which we copy the log
														
 
															+ *
														
 
															+ * Steps 1-4 below provide general overview of dirty page logging. See
														
 
															+ * kvm_get_dirty_log_protect() function description for additional details.
														
 
															+ *
														
 
															+ * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
														
 
															+ * always flush the TLB (step 4) even if previous step failed  and the dirty
														
 
															+ * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
														
 
															+ * does not preclude user space subsequent dirty log read. Flushing TLB ensures
														
 
															+ * writes will be marked dirty for next log read.
														
 
															+ *
														
 
															+ *   1. Take a snapshot of the bit and clear it if needed.
														
 
															+ *   2. Write protect the corresponding page.
														
 
															+ *   3. Copy the snapshot to the userspace.
														
 
															+ *   4. Flush TLB's if needed.
														
 
															+ */
														
 
															 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
														
 
															 {
														
 
															-	return -EINVAL;
														
 
															+	bool is_dirty = false;
														
 
															+	int r;
														
 
															+
														
 
															+	mutex_lock(&kvm->slots_lock);
														
 
															+
														
 
															+	r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
														
 
															+
														
 
															+	if (is_dirty)
														
 
															+		kvm_flush_remote_tlbs(kvm);
														
 
															+
														
 
															+	mutex_unlock(&kvm->slots_lock);
														
 
															+	return r;
														
 
															 }
														
 
															 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
														
@@ -811,7 +848,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 	switch (ioctl) {
														
 
															 	case KVM_CREATE_IRQCHIP: {
														
 
															 		if (vgic_present)
														
 
															-			return kvm_vgic_create(kvm);
														
 
															+			return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
														
 
															 		else
														
 
															 			return -ENXIO;
														
 
															 	}
														
@@ -1035,6 +1072,19 @@ static void check_kvm_target_cpu(void *ret)
 
															 	*(int *)ret = kvm_target_cpu();
														
 
															 }
														
 
															+struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+	int i;
														
 
															+
														
 
															+	mpidr &= MPIDR_HWID_BITMASK;
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
														
 
															+			return vcpu;
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															 /**
														
 
															  * Initialize Hyp-mode and memory mappings on all CPUs.
														
 
															  */
														
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -87,11 +87,13 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															  */
														
 
															 static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
														
 
															 {
														
 
															-	trace_kvm_wfi(*vcpu_pc(vcpu));
														
 
															-	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
														
 
															+	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
														
 
															+		trace_kvm_wfx(*vcpu_pc(vcpu), true);
														
 
															 		kvm_vcpu_on_spin(vcpu);
														
 
															-	else
														
 
															+	} else {
														
 
															+		trace_kvm_wfx(*vcpu_pc(vcpu), false);
														
 
															 		kvm_vcpu_block(vcpu);
														
 
															+	}
														
 
															 	kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
														
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -66,6 +66,17 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 
															 	bx	lr
														
 
															 ENDPROC(__kvm_tlb_flush_vmid_ipa)
														
 
															+/**
														
 
															+ * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
														
 
															+ *
														
 
															+ * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address
														
 
															+ * parameter
														
 
															+ */
														
 
															+
														
 
															+ENTRY(__kvm_tlb_flush_vmid)
														
 
															+	b	__kvm_tlb_flush_vmid_ipa
														
 
															+ENDPROC(__kvm_tlb_flush_vmid)
														
 
															+
														
 
															 /********************************************************************
														
 
															  * Flush TLBs and instruction caches of all CPUs inside the inner-shareable
														
 
															  * domain, for all VMIDs
														
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -45,6 +45,26 @@ static phys_addr_t hyp_idmap_vector;
 
															 #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
														
 
															 #define kvm_pmd_huge(_x)	(pmd_huge(_x) || pmd_trans_huge(_x))
														
 
															+#define kvm_pud_huge(_x)	pud_huge(_x)
														
 
															+
														
 
															+#define KVM_S2PTE_FLAG_IS_IOMAP		(1UL << 0)
														
 
															+#define KVM_S2_FLAG_LOGGING_ACTIVE	(1UL << 1)
														
 
															+
														
 
															+static bool memslot_is_logging(struct kvm_memory_slot *memslot)
														
 
															+{
														
 
															+	return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
														
 
															+ * @kvm:	pointer to kvm structure.
														
 
															+ *
														
 
															+ * Interface to HYP function to flush all VM TLB entries
														
 
															+ */
														
 
															+void kvm_flush_remote_tlbs(struct kvm *kvm)
														
 
															+{
														
 
															+	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
														
 
															+}
														
 
															 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
														
 
															 {
														
@@ -78,6 +98,25 @@ static void kvm_flush_dcache_pud(pud_t pud)
 
															 	__kvm_flush_dcache_pud(pud);
														
 
															 }
														
 
															+/**
														
 
															+ * stage2_dissolve_pmd() - clear and flush huge PMD entry
														
 
															+ * @kvm:	pointer to kvm structure.
														
 
															+ * @addr:	IPA
														
 
															+ * @pmd:	pmd pointer for IPA
														
 
															+ *
														
 
															+ * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all
														
 
															+ * pages in the range dirty.
														
 
															+ */
														
 
															+static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
														
 
															+{
														
 
															+	if (!kvm_pmd_huge(*pmd))
														
 
															+		return;
														
 
															+
														
 
															+	pmd_clear(pmd);
														
 
															+	kvm_tlb_flush_vmid_ipa(kvm, addr);
														
 
															+	put_page(virt_to_page(pmd));
														
 
															+}
														
 
															+
														
 
															 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
														
 
															 				  int min, int max)
														
 
															 {
														
@@ -819,10 +858,15 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
 
															 }
														
 
															 static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
														
 
															-			  phys_addr_t addr, const pte_t *new_pte, bool iomap)
														
 
															+			  phys_addr_t addr, const pte_t *new_pte,
														
 
															+			  unsigned long flags)
														
 
															 {
														
 
															 	pmd_t *pmd;
														
 
															 	pte_t *pte, old_pte;
														
 
															+	bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
														
 
															+	bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
														
 
															+
														
 
															+	VM_BUG_ON(logging_active && !cache);
														
 
															 	/* Create stage-2 page table mapping - Levels 0 and 1 */
														
 
															 	pmd = stage2_get_pmd(kvm, cache, addr);
														
@@ -834,6 +878,13 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 
															 		return 0;
														
 
															 	}
														
 
															+	/*
														
 
															+	 * While dirty page logging - dissolve huge PMD, then continue on to
														
 
															+	 * allocate page.
														
 
															+	 */
														
 
															+	if (logging_active)
														
 
															+		stage2_dissolve_pmd(kvm, addr, pmd);
														
 
															+
														
 
															 	/* Create stage-2 page mappings - Level 2 */
														
 
															 	if (pmd_none(*pmd)) {
														
 
															 		if (!cache)
														
@@ -890,7 +941,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 
															 		if (ret)
														
 
															 			goto out;
														
 
															 		spin_lock(&kvm->mmu_lock);
														
 
															-		ret = stage2_set_pte(kvm, &cache, addr, &pte, true);
														
 
															+		ret = stage2_set_pte(kvm, &cache, addr, &pte,
														
 
															+						KVM_S2PTE_FLAG_IS_IOMAP);
														
 
															 		spin_unlock(&kvm->mmu_lock);
														
 
															 		if (ret)
														
 
															 			goto out;
														
@@ -957,6 +1009,165 @@ static bool kvm_is_device_pfn(unsigned long pfn)
 
															 	return !pfn_valid(pfn);
														
 
															 }
														
 
															+/**
														
 
															+ * stage2_wp_ptes - write protect PMD range
														
 
															+ * @pmd:	pointer to pmd entry
														
 
															+ * @addr:	range start address
														
 
															+ * @end:	range end address
														
 
															+ */
														
 
															+static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
														
 
															+{
														
 
															+	pte_t *pte;
														
 
															+
														
 
															+	pte = pte_offset_kernel(pmd, addr);
														
 
															+	do {
														
 
															+		if (!pte_none(*pte)) {
														
 
															+			if (!kvm_s2pte_readonly(pte))
														
 
															+				kvm_set_s2pte_readonly(pte);
														
 
															+		}
														
 
															+	} while (pte++, addr += PAGE_SIZE, addr != end);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * stage2_wp_pmds - write protect PUD range
														
 
															+ * @pud:	pointer to pud entry
														
 
															+ * @addr:	range start address
														
 
															+ * @end:	range end address
														
 
															+ */
														
 
															+static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
														
 
															+{
														
 
															+	pmd_t *pmd;
														
 
															+	phys_addr_t next;
														
 
															+
														
 
															+	pmd = pmd_offset(pud, addr);
														
 
															+
														
 
															+	do {
														
 
															+		next = kvm_pmd_addr_end(addr, end);
														
 
															+		if (!pmd_none(*pmd)) {
														
 
															+			if (kvm_pmd_huge(*pmd)) {
														
 
															+				if (!kvm_s2pmd_readonly(pmd))
														
 
															+					kvm_set_s2pmd_readonly(pmd);
														
 
															+			} else {
														
 
															+				stage2_wp_ptes(pmd, addr, next);
														
 
															+			}
														
 
															+		}
														
 
															+	} while (pmd++, addr = next, addr != end);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+  * stage2_wp_puds - write protect PGD range
														
 
															+  * @pgd:	pointer to pgd entry
														
 
															+  * @addr:	range start address
														
 
															+  * @end:	range end address
														
 
															+  *
														
 
															+  * Process PUD entries, for a huge PUD we cause a panic.
														
 
															+  */
														
 
															+static void  stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
														
 
															+{
														
 
															+	pud_t *pud;
														
 
															+	phys_addr_t next;
														
 
															+
														
 
															+	pud = pud_offset(pgd, addr);
														
 
															+	do {
														
 
															+		next = kvm_pud_addr_end(addr, end);
														
 
															+		if (!pud_none(*pud)) {
														
 
															+			/* TODO:PUD not supported, revisit later if supported */
														
 
															+			BUG_ON(kvm_pud_huge(*pud));
														
 
															+			stage2_wp_pmds(pud, addr, next);
														
 
															+		}
														
 
															+	} while (pud++, addr = next, addr != end);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * stage2_wp_range() - write protect stage2 memory region range
														
 
															+ * @kvm:	The KVM pointer
														
 
															+ * @addr:	Start address of range
														
 
															+ * @end:	End address of range
														
 
															+ */
														
 
															+static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
														
 
															+{
														
 
															+	pgd_t *pgd;
														
 
															+	phys_addr_t next;
														
 
															+
														
 
															+	pgd = kvm->arch.pgd + pgd_index(addr);
														
 
															+	do {
														
 
															+		/*
														
 
															+		 * Release kvm_mmu_lock periodically if the memory region is
														
 
															+		 * large. Otherwise, we may see kernel panics with
														
 
															+		 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
														
 
															+		 * CONFIG_LOCKDEP. Additionally, holding the lock too long
														
 
															+		 * will also starve other vCPUs.
														
 
															+		 */
														
 
															+		if (need_resched() || spin_needbreak(&kvm->mmu_lock))
														
 
															+			cond_resched_lock(&kvm->mmu_lock);
														
 
															+
														
 
															+		next = kvm_pgd_addr_end(addr, end);
														
 
															+		if (pgd_present(*pgd))
														
 
															+			stage2_wp_puds(pgd, addr, next);
														
 
															+	} while (pgd++, addr = next, addr != end);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
														
 
															+ * @kvm:	The KVM pointer
														
 
															+ * @slot:	The memory slot to write protect
														
 
															+ *
														
 
															+ * Called to start logging dirty pages after memory region
														
 
															+ * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
														
 
															+ * all present PMD and PTEs are write protected in the memory region.
														
 
															+ * Afterwards read of dirty page log can be called.
														
 
															+ *
														
 
															+ * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
														
 
															+ * serializing operations for VM memory regions.
														
 
															+ */
														
 
															+void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
														
 
															+{
														
 
															+	struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot);
														
 
															+	phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
														
 
															+	phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
														
 
															+
														
 
															+	spin_lock(&kvm->mmu_lock);
														
 
															+	stage2_wp_range(kvm, start, end);
														
 
															+	spin_unlock(&kvm->mmu_lock);
														
 
															+	kvm_flush_remote_tlbs(kvm);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * kvm_mmu_write_protect_pt_masked() - write protect dirty pages
														
 
															+ * @kvm:	The KVM pointer
														
 
															+ * @slot:	The memory slot associated with mask
														
 
															+ * @gfn_offset:	The gfn offset in memory slot
														
 
															+ * @mask:	The mask of dirty pages at offset 'gfn_offset' in this memory
														
 
															+ *		slot to be write protected
														
 
															+ *
														
 
															+ * Walks bits set in mask write protects the associated pte's. Caller must
														
 
															+ * acquire kvm_mmu_lock.
														
 
															+ */
														
 
															+static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
														
 
															+		struct kvm_memory_slot *slot,
														
 
															+		gfn_t gfn_offset, unsigned long mask)
														
 
															+{
														
 
															+	phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
														
 
															+	phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
														
 
															+	phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
														
 
															+
														
 
															+	stage2_wp_range(kvm, start, end);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
														
 
															+ * dirty pages.
														
 
															+ *
														
 
															+ * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
														
 
															+ * enable dirty logging for them.
														
 
															+ */
														
 
															+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
														
 
															+		struct kvm_memory_slot *slot,
														
 
															+		gfn_t gfn_offset, unsigned long mask)
														
 
															+{
														
 
															+	kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
														
 
															+}
														
 
															+
														
 
															 static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
														
 
															 				      unsigned long size, bool uncached)
														
 
															 {
														
@@ -977,6 +1188,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
															 	pfn_t pfn;
														
 
															 	pgprot_t mem_type = PAGE_S2;
														
 
															 	bool fault_ipa_uncached;
														
 
															+	bool logging_active = memslot_is_logging(memslot);
														
 
															+	unsigned long flags = 0;
														
 
															 	write_fault = kvm_is_write_fault(vcpu);
														
 
															 	if (fault_status == FSC_PERM && !write_fault) {
														
@@ -993,7 +1206,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
															 		return -EFAULT;
														
 
															 	}
														
 
															-	if (is_vm_hugetlb_page(vma)) {
														
 
															+	if (is_vm_hugetlb_page(vma) && !logging_active) {
														
 
															 		hugetlb = true;
														
 
															 		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
														
 
															 	} else {
														
@@ -1034,12 +1247,30 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
															 	if (is_error_pfn(pfn))
														
 
															 		return -EFAULT;
														
 
															-	if (kvm_is_device_pfn(pfn))
														
 
															+	if (kvm_is_device_pfn(pfn)) {
														
 
															 		mem_type = PAGE_S2_DEVICE;
														
 
															+		flags |= KVM_S2PTE_FLAG_IS_IOMAP;
														
 
															+	} else if (logging_active) {
														
 
															+		/*
														
 
															+		 * Faults on pages in a memslot with logging enabled
														
 
															+		 * should not be mapped with huge pages (it introduces churn
														
 
															+		 * and performance degradation), so force a pte mapping.
														
 
															+		 */
														
 
															+		force_pte = true;
														
 
															+		flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
														
 
															+
														
 
															+		/*
														
 
															+		 * Only actually map the page as writable if this was a write
														
 
															+		 * fault.
														
 
															+		 */
														
 
															+		if (!write_fault)
														
 
															+			writable = false;
														
 
															+	}
														
 
															 	spin_lock(&kvm->mmu_lock);
														
 
															 	if (mmu_notifier_retry(kvm, mmu_seq))
														
 
															 		goto out_unlock;
														
 
															+
														
 
															 	if (!hugetlb && !force_pte)
														
 
															 		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
														
@@ -1056,16 +1287,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
															 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
														
 
															 	} else {
														
 
															 		pte_t new_pte = pfn_pte(pfn, mem_type);
														
 
															+
														
 
															 		if (writable) {
														
 
															 			kvm_set_s2pte_writable(&new_pte);
														
 
															 			kvm_set_pfn_dirty(pfn);
														
 
															+			mark_page_dirty(kvm, gfn);
														
 
															 		}
														
 
															 		coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
														
 
															-		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
														
 
															-			pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
														
 
															+		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
														
 
															 	}
														
 
															-
														
 
															 out_unlock:
														
 
															 	spin_unlock(&kvm->mmu_lock);
														
 
															 	kvm_release_pfn_clean(pfn);
														
@@ -1215,7 +1446,14 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
 
															 {
														
 
															 	pte_t *pte = (pte_t *)data;
														
 
															-	stage2_set_pte(kvm, NULL, gpa, pte, false);
														
 
															+	/*
														
 
															+	 * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE
														
 
															+	 * flag clear because MMU notifiers will have unmapped a huge PMD before
														
 
															+	 * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and
														
 
															+	 * therefore stage2_set_pte() never needs to clear out a huge PMD
														
 
															+	 * through this calling path.
														
 
															+	 */
														
 
															+	stage2_set_pte(kvm, NULL, gpa, pte, 0);
														
 
															 }
														
@@ -1348,6 +1586,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 
															 				   const struct kvm_memory_slot *old,
														
 
															 				   enum kvm_mr_change change)
														
 
															 {
														
 
															+	/*
														
 
															+	 * At this point memslot has been committed and there is an
														
 
															+	 * allocated dirty_bitmap[], dirty pages will be be tracked while the
														
 
															+	 * memory slot is write protected.
														
 
															+	 */
														
 
															+	if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
														
 
															+		kvm_mmu_wp_memory_region(kvm, mem->slot);
														
 
															 }
														
 
															 int kvm_arch_prepare_memory_region(struct kvm *kvm,
														
@@ -1360,7 +1605,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
															 	bool writable = !(mem->flags & KVM_MEM_READONLY);
														
 
															 	int ret = 0;
														
 
															-	if (change != KVM_MR_CREATE && change != KVM_MR_MOVE)
														
 
															+	if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
														
 
															+			change != KVM_MR_FLAGS_ONLY)
														
 
															 		return 0;
														
 
															 	/*
														
@@ -1411,6 +1657,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
															 			phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
														
 
															 					 vm_start - vma->vm_start;
														
 
															+			/* IO region dirty page logging not allowed */
														
 
															+			if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
														
 
															+				return -EINVAL;
														
 
															+
														
 
															 			ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
														
 
															 						    vm_end - vm_start,
														
 
															 						    writable);
														
@@ -1420,6 +1670,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
															 		hva = vm_end;
														
 
															 	} while (hva < reg_end);
														
 
															+	if (change == KVM_MR_FLAGS_ONLY)
														
 
															+		return ret;
														
 
															+
														
 
															 	spin_lock(&kvm->mmu_lock);
														
 
															 	if (ret)
														
 
															 		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
														
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -22,6 +22,7 @@
 
															 #include <asm/cputype.h>
														
 
															 #include <asm/kvm_emulate.h>
														
 
															 #include <asm/kvm_psci.h>
														
 
															+#include <asm/kvm_host.h>
														
 
															 /*
														
 
															  * This is an implementation of the Power State Coordination Interface
														
@@ -66,25 +67,17 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 
															 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
														
 
															 {
														
 
															 	struct kvm *kvm = source_vcpu->kvm;
														
 
															-	struct kvm_vcpu *vcpu = NULL, *tmp;
														
 
															+	struct kvm_vcpu *vcpu = NULL;
														
 
															 	wait_queue_head_t *wq;
														
 
															 	unsigned long cpu_id;
														
 
															 	unsigned long context_id;
														
 
															-	unsigned long mpidr;
														
 
															 	phys_addr_t target_pc;
														
 
															-	int i;
														
 
															-	cpu_id = *vcpu_reg(source_vcpu, 1);
														
 
															+	cpu_id = *vcpu_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
														
 
															 	if (vcpu_mode_is_32bit(source_vcpu))
														
 
															 		cpu_id &= ~((u32) 0);
														
 
															-	kvm_for_each_vcpu(i, tmp, kvm) {
														
 
															-		mpidr = kvm_vcpu_get_mpidr(tmp);
														
 
															-		if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
														
 
															-			vcpu = tmp;
														
 
															-			break;
														
 
															-		}
														
 
															-	}
														
 
															+	vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id);
														
 
															 	/*
														
 
															 	 * Make sure the caller requested a valid CPU and that the CPU is
														
@@ -155,7 +148,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 
															 	 * then ON else OFF
														
 
															 	 */
														
 
															 	kvm_for_each_vcpu(i, tmp, kvm) {
														
 
															-		mpidr = kvm_vcpu_get_mpidr(tmp);
														
 
															+		mpidr = kvm_vcpu_get_mpidr_aff(tmp);
														
 
															 		if (((mpidr & target_affinity_mask) == target_affinity) &&
														
 
															 		    !tmp->arch.pause) {
														
 
															 			return PSCI_0_2_AFFINITY_LEVEL_ON;
														
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -140,19 +140,22 @@ TRACE_EVENT(kvm_emulate_cp15_imp,
 
															 			__entry->CRm, __entry->Op2)
														
 
															 );
														
 
															-TRACE_EVENT(kvm_wfi,
														
 
															-	TP_PROTO(unsigned long vcpu_pc),
														
 
															-	TP_ARGS(vcpu_pc),
														
 
															+TRACE_EVENT(kvm_wfx,
														
 
															+	TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
														
 
															+	TP_ARGS(vcpu_pc, is_wfe),
														
 
															 	TP_STRUCT__entry(
														
 
															 		__field(	unsigned long,	vcpu_pc		)
														
 
															+		__field(		 bool,	is_wfe		)
														
 
															 	),
														
 
															 	TP_fast_assign(
														
 
															 		__entry->vcpu_pc		= vcpu_pc;
														
 
															+		__entry->is_wfe			= is_wfe;
														
 
															 	),
														
 
															-	TP_printk("guest executed wfi at: 0x%08lx", __entry->vcpu_pc)
														
 
															+	TP_printk("guest executed wf%c at: 0x%08lx",
														
 
															+		__entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
														
 
															 );
														
 
															 TRACE_EVENT(kvm_unmap_hva,
														
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -96,6 +96,7 @@
 
															 #define ESR_ELx_COND_SHIFT	(20)
														
 
															 #define ESR_ELx_COND_MASK	(UL(0xF) << ESR_ELx_COND_SHIFT)
														
 
															 #define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)
														
 
															+#define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1)
														
 
															 #ifndef __ASSEMBLY__
														
 
															 #include <asm/types.h>
														
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -126,6 +126,7 @@ extern char __kvm_hyp_vector[];
 
															 extern void __kvm_flush_vm_context(void);
														
 
															 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
														
 
															+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
														
 
															 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
														
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -29,6 +29,7 @@
 
															 #include <asm/kvm_asm.h>
														
 
															 #include <asm/kvm_mmio.h>
														
 
															 #include <asm/ptrace.h>
														
 
															+#include <asm/cputype.h>
														
 
															 unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
														
 
															 unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
														
@@ -140,6 +141,11 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
 
															 	return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
														
 
															 }
														
 
															+static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK;
														
 
															+}
														
 
															+
														
 
															 static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
														
@@ -201,9 +207,9 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
 
															 	return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
														
 
															 }
														
 
															-static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
														
 
															+static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return vcpu_sys_reg(vcpu, MPIDR_EL1);
														
 
															+	return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
														
 
															 }
														
 
															 static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
														
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -59,6 +59,9 @@ struct kvm_arch {
 
															 	/* VTTBR value associated with above pgd and vmid */
														
 
															 	u64    vttbr;
														
 
															+	/* The maximum number of vCPUs depends on the used GIC model */
														
 
															+	int max_vcpus;
														
 
															+
														
 
															 	/* Interrupt controller */
														
 
															 	struct vgic_dist	vgic;
														
@@ -159,6 +162,7 @@ struct kvm_vm_stat {
 
															 };
														
 
															 struct kvm_vcpu_stat {
														
 
															+	u32 halt_successful_poll;
														
 
															 	u32 halt_wakeup;
														
 
															 };
														
@@ -196,6 +200,7 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
															 u64 kvm_call_hyp(void *hypfn, ...);
														
 
															 void force_vm_exit(const cpumask_t *mask);
														
 
															+void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
														
 
															 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
														
 
															 		int exception_index);
														
@@ -203,6 +208,8 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
															 int kvm_perf_init(void);
														
 
															 int kvm_perf_teardown(void);
														
 
															+struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
														
 
															+
														
 
															 static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
														
 
															 				       phys_addr_t pgd_ptr,
														
 
															 				       unsigned long hyp_stack_ptr,
														
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -40,6 +40,7 @@ struct kvm_exit_mmio {
 
															 	u8		data[8];
														
 
															 	u32		len;
														
 
															 	bool		is_write;
														
 
															+	void		*private;
														
 
															 };
														
 
															 static inline void kvm_prepare_mmio(struct kvm_run *run,
														
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -118,6 +118,27 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 
															 	pmd_val(*pmd) |= PMD_S2_RDWR;
														
 
															 }
														
 
															+static inline void kvm_set_s2pte_readonly(pte_t *pte)
														
 
															+{
														
 
															+	pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY;
														
 
															+}
														
 
															+
														
 
															+static inline bool kvm_s2pte_readonly(pte_t *pte)
														
 
															+{
														
 
															+	return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
														
 
															+}
														
 
															+
														
 
															+static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
														
 
															+{
														
 
															+	pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY;
														
 
															+}
														
 
															+
														
 
															+static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
														
 
															+{
														
 
															+	return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY;
														
 
															+}
														
 
															+
														
 
															+
														
 
															 #define kvm_pgd_addr_end(addr, end)	pgd_addr_end(addr, end)
														
 
															 #define kvm_pud_addr_end(addr, end)	pud_addr_end(addr, end)
														
 
															 #define kvm_pmd_addr_end(addr, end)	pmd_addr_end(addr, end)
														
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -119,6 +119,7 @@
 
															 #define PTE_S2_RDONLY		(_AT(pteval_t, 1) << 6)   /* HAP[2:1] */
														
 
															 #define PTE_S2_RDWR		(_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
														
 
															+#define PMD_S2_RDONLY		(_AT(pmdval_t, 1) << 6)   /* HAP[2:1] */
														
 
															 #define PMD_S2_RDWR		(_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
														
 
															 /*
														
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -78,6 +78,13 @@ struct kvm_regs {
 
															 #define KVM_VGIC_V2_DIST_SIZE		0x1000
														
 
															 #define KVM_VGIC_V2_CPU_SIZE		0x2000
														
 
															+/* Supported VGICv3 address types  */
														
 
															+#define KVM_VGIC_V3_ADDR_TYPE_DIST	2
														
 
															+#define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
														
 
															+
														
 
															+#define KVM_VGIC_V3_DIST_SIZE		SZ_64K
														
 
															+#define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
														
 
															+
														
 
															 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
														
 
															 #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
														
 
															 #define KVM_ARM_VCPU_PSCI_0_2		2 /* CPU uses PSCI v0.2 */
														
@@ -161,6 +168,8 @@ struct kvm_arch_memory_slot {
 
															 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
														
 
															 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
														
 
															 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
														
 
															+#define KVM_DEV_ARM_VGIC_GRP_CTRL	4
														
 
															+#define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
														
 
															 /* KVM_IRQ_LINE irq field index values */
														
 
															 #define KVM_ARM_IRQ_TYPE_SHIFT		24
														
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -140,6 +140,7 @@ int main(void)
 
															   DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
														
 
															   DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
														
 
															   DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
														
 
															+  DEFINE(VGIC_V3_CPU_SRE,	offsetof(struct vgic_cpu, vgic_v3.vgic_sre));
														
 
															   DEFINE(VGIC_V3_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
														
 
															   DEFINE(VGIC_V3_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
														
 
															   DEFINE(VGIC_V3_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
														
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -22,10 +22,12 @@ config KVM
 
															 	select PREEMPT_NOTIFIERS
														
 
															 	select ANON_INODES
														
 
															 	select HAVE_KVM_CPU_RELAX_INTERCEPT
														
 
															+	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
														
 
															 	select KVM_MMIO
														
 
															 	select KVM_ARM_HOST
														
 
															 	select KVM_ARM_VGIC
														
 
															 	select KVM_ARM_TIMER
														
 
															+	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
														
 
															 	select SRCU
														
 
															 	---help---
														
 
															 	  Support hosting virtualized guest machines.
														
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -21,7 +21,9 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
															 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
														
 
															 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
														
 
															+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
														
 
															 kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
														
 
															 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
														
 
															+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o
														
 
															 kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
														
 
															 kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
														
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -28,12 +28,18 @@
 
															 #include <asm/kvm_mmu.h>
														
 
															 #include <asm/kvm_psci.h>
														
 
															+#define CREATE_TRACE_POINTS
														
 
															+#include "trace.h"
														
 
															+
														
 
															 typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
														
 
															 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
														
 
															 {
														
 
															 	int ret;
														
 
															+	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
														
 
															+			    kvm_vcpu_hvc_get_imm(vcpu));
														
 
															+
														
 
															 	ret = kvm_psci_call(vcpu);
														
 
															 	if (ret < 0) {
														
 
															 		kvm_inject_undefined(vcpu);
														
@@ -63,10 +69,13 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															  */
														
 
															 static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
														
 
															 {
														
 
															-	if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE)
														
 
															+	if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
														
 
															+		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
														
 
															 		kvm_vcpu_on_spin(vcpu);
														
 
															-	else
														
 
															+	} else {
														
 
															+		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
														
 
															 		kvm_vcpu_block(vcpu);
														
 
															+	}
														
 
															 	kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
														
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -1032,6 +1032,28 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 
															 	ret
														
 
															 ENDPROC(__kvm_tlb_flush_vmid_ipa)
														
 
															+/**
														
 
															+ * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
														
 
															+ * @struct kvm *kvm - pointer to kvm structure
														
 
															+ *
														
 
															+ * Invalidates all Stage 1 and 2 TLB entries for current VMID.
														
 
															+ */
														
 
															+ENTRY(__kvm_tlb_flush_vmid)
														
 
															+	dsb     ishst
														
 
															+
														
 
															+	kern_hyp_va     x0
														
 
															+	ldr     x2, [x0, #KVM_VTTBR]
														
 
															+	msr     vttbr_el2, x2
														
 
															+	isb
														
 
															+
														
 
															+	tlbi    vmalls12e1is
														
 
															+	dsb     ish
														
 
															+	isb
														
 
															+
														
 
															+	msr     vttbr_el2, xzr
														
 
															+	ret
														
 
															+ENDPROC(__kvm_tlb_flush_vmid)
														
 
															+
														
 
															 ENTRY(__kvm_flush_vm_context)
														
 
															 	dsb	ishst
														
 
															 	tlbi	alle1is
														
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -113,6 +113,27 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
 
															 	return true;
														
 
															 }
														
 
															+/*
														
 
															+ * Trap handler for the GICv3 SGI generation system register.
														
 
															+ * Forward the request to the VGIC emulation.
														
 
															+ * The cp15_64 code makes sure this automatically works
														
 
															+ * for both AArch64 and AArch32 accesses.
														
 
															+ */
														
 
															+static bool access_gic_sgi(struct kvm_vcpu *vcpu,
														
 
															+			   const struct sys_reg_params *p,
														
 
															+			   const struct sys_reg_desc *r)
														
 
															+{
														
 
															+	u64 val;
														
 
															+
														
 
															+	if (!p->is_write)
														
 
															+		return read_from_write_only(vcpu, p);
														
 
															+
														
 
															+	val = *vcpu_reg(vcpu, p->Rt);
														
 
															+	vgic_v3_dispatch_sgi(vcpu, val);
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															 static bool trap_raz_wi(struct kvm_vcpu *vcpu,
														
 
															 			const struct sys_reg_params *p,
														
 
															 			const struct sys_reg_desc *r)
														
@@ -200,10 +221,19 @@ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 
															 static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
														
 
															 {
														
 
															+	u64 mpidr;
														
 
															+
														
 
															 	/*
														
 
															-	 * Simply map the vcpu_id into the Aff0 field of the MPIDR.
														
 
															+	 * Map the vcpu_id into the first three affinity level fields of
														
 
															+	 * the MPIDR. We limit the number of VCPUs in level 0 due to a
														
 
															+	 * limitation to 16 CPUs in that level in the ICC_SGIxR registers
														
 
															+	 * of the GICv3 to be able to address each CPU directly when
														
 
															+	 * sending IPIs.
														
 
															 	 */
														
 
															-	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
														
 
															+	mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
														
 
															+	mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
														
 
															+	mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
														
 
															+	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
														
 
															 }
														
 
															 /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
														
@@ -373,6 +403,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
															 	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
														
 
															 	  NULL, reset_val, VBAR_EL1, 0 },
														
 
															+	/* ICC_SGI1R_EL1 */
														
 
															+	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1011), Op2(0b101),
														
 
															+	  access_gic_sgi },
														
 
															 	/* ICC_SRE_EL1 */
														
 
															 	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101),
														
 
															 	  trap_raz_wi },
														
@@ -605,6 +638,8 @@ static const struct sys_reg_desc cp14_64_regs[] = {
 
															  * register).
														
 
															  */
														
 
															 static const struct sys_reg_desc cp15_regs[] = {
														
 
															+	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
														
 
															+
														
 
															 	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
														
 
															 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
														
 
															 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
														
@@ -652,6 +687,7 @@ static const struct sys_reg_desc cp15_regs[] = {
 
															 static const struct sys_reg_desc cp15_64_regs[] = {
														
 
															 	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
														
 
															+	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
														
 
															 	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
														
 
															 };
														
--- a/arch/arm64/kvm/trace.h
+++ b/arch/arm64/kvm/trace.h
@@ -0,0 +1,55 @@
 
															+#if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
														
 
															+#define _TRACE_ARM64_KVM_H
														
 
															+
														
 
															+#include <linux/tracepoint.h>
														
 
															+
														
 
															+#undef TRACE_SYSTEM
														
 
															+#define TRACE_SYSTEM kvm
														
 
															+
														
 
															+TRACE_EVENT(kvm_wfx_arm64,
														
 
															+	TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
														
 
															+	TP_ARGS(vcpu_pc, is_wfe),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(unsigned long,	vcpu_pc)
														
 
															+		__field(bool,		is_wfe)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->vcpu_pc = vcpu_pc;
														
 
															+		__entry->is_wfe  = is_wfe;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("guest executed wf%c at: 0x%08lx",
														
 
															+		  __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(kvm_hvc_arm64,
														
 
															+	TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
														
 
															+	TP_ARGS(vcpu_pc, r0, imm),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(unsigned long, vcpu_pc)
														
 
															+		__field(unsigned long, r0)
														
 
															+		__field(unsigned long, imm)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->vcpu_pc = vcpu_pc;
														
 
															+		__entry->r0 = r0;
														
 
															+		__entry->imm = imm;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
														
 
															+		  __entry->vcpu_pc, __entry->r0, __entry->imm)
														
 
															+);
														
 
															+
														
 
															+#endif /* _TRACE_ARM64_KVM_H */
														
 
															+
														
 
															+#undef TRACE_INCLUDE_PATH
														
 
															+#define TRACE_INCLUDE_PATH .
														
 
															+#undef TRACE_INCLUDE_FILE
														
 
															+#define TRACE_INCLUDE_FILE trace
														
 
															+
														
 
															+/* This part must be outside protection */
														
 
															+#include <trace/define_trace.h>
														
--- a/arch/arm64/kvm/vgic-v3-switch.S
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -148,17 +148,18 @@
 
															  * x0: Register pointing to VCPU struct
														
 
															  */
														
 
															 .macro	restore_vgic_v3_state
														
 
															-	// Disable SRE_EL1 access. Necessary, otherwise
														
 
															-	// ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens...
														
 
															-	msr_s	ICC_SRE_EL1, xzr
														
 
															-	isb
														
 
															-
														
 
															 	// Compute the address of struct vgic_cpu
														
 
															 	add	x3, x0, #VCPU_VGIC_CPU
														
 
															 	// Restore all interesting registers
														
 
															 	ldr	w4, [x3, #VGIC_V3_CPU_HCR]
														
 
															 	ldr	w5, [x3, #VGIC_V3_CPU_VMCR]
														
 
															+	ldr	w25, [x3, #VGIC_V3_CPU_SRE]
														
 
															+
														
 
															+	msr_s	ICC_SRE_EL1, x25
														
 
															+
														
 
															+	// make sure SRE is valid before writing the other registers
														
 
															+	isb
														
 
															 	msr_s	ICH_HCR_EL2, x4
														
 
															 	msr_s	ICH_VMCR_EL2, x5
														
@@ -244,9 +245,12 @@
 
															 	dsb	sy
														
 
															 	// Prevent the guest from touching the GIC system registers
														
 
															+	// if SRE isn't enabled for GICv3 emulation
														
 
															+	cbnz	x25, 1f
														
 
															 	mrs_s	x5, ICC_SRE_EL2
														
 
															 	and	x5, x5, #~ICC_SRE_EL2_ENABLE
														
 
															 	msr_s	ICC_SRE_EL2, x5
														
 
															+1:
														
 
															 .endm
														
 
															 ENTRY(__save_vgic_v3_state)
														
--- a/arch/ia64/include/uapi/asm/Kbuild
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -18,7 +18,6 @@ header-y += intrinsics.h
 
															 header-y += ioctl.h
														
 
															 header-y += ioctls.h
														
 
															 header-y += ipcbuf.h
														
 
															-header-y += kvm.h
														
 
															 header-y += kvm_para.h
														
 
															 header-y += mman.h
														
 
															 header-y += msgbuf.h
														
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -120,6 +120,7 @@ struct kvm_vcpu_stat {
 
															 	u32 resvd_inst_exits;
														
 
															 	u32 break_inst_exits;
														
 
															 	u32 flush_dcache_exits;
														
 
															+	u32 halt_successful_poll;
														
 
															 	u32 halt_wakeup;
														
 
															 };
														
--- a/arch/mips/kvm/locore.S
+++ b/arch/mips/kvm/locore.S
@@ -434,7 +434,7 @@ __kvm_mips_return_to_guest:
 
															 	/* Setup status register for running guest in UM */
														
 
															 	.set	at
														
 
															 	or	v1, v1, (ST0_EXL | KSU_USER | ST0_IE)
														
 
															-	and	v1, v1, ~ST0_CU0
														
 
															+	and	v1, v1, ~(ST0_CU0 | ST0_MX)
														
 
															 	.set	noat
														
 
															 	mtc0	v1, CP0_STATUS
														
 
															 	ehb
														
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -15,9 +15,11 @@
 
															 #include <linux/vmalloc.h>
														
 
															 #include <linux/fs.h>
														
 
															 #include <linux/bootmem.h>
														
 
															+#include <asm/fpu.h>
														
 
															 #include <asm/page.h>
														
 
															 #include <asm/cacheflush.h>
														
 
															 #include <asm/mmu_context.h>
														
 
															+#include <asm/pgtable.h>
														
 
															 #include <linux/kvm_host.h>
														
@@ -47,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
															 	{ "resvd_inst",	  VCPU_STAT(resvd_inst_exits),	 KVM_STAT_VCPU },
														
 
															 	{ "break_inst",	  VCPU_STAT(break_inst_exits),	 KVM_STAT_VCPU },
														
 
															 	{ "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
														
 
															+	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
														
 
															 	{ "halt_wakeup",  VCPU_STAT(halt_wakeup),	 KVM_STAT_VCPU },
														
 
															 	{NULL}
														
 
															 };
														
@@ -378,6 +381,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 		vcpu->mmio_needed = 0;
														
 
															 	}
														
 
															+	lose_fpu(1);
														
 
															+
														
 
															 	local_irq_disable();
														
 
															 	/* Check if we have any exceptions/interrupts pending */
														
 
															 	kvm_mips_deliver_interrupts(vcpu,
														
@@ -385,8 +390,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 	kvm_guest_enter();
														
 
															+	/* Disable hardware page table walking while in guest */
														
 
															+	htw_stop();
														
 
															+
														
 
															 	r = __kvm_mips_vcpu_run(run, vcpu);
														
 
															+	/* Re-enable HTW before enabling interrupts */
														
 
															+	htw_start();
														
 
															+
														
 
															 	kvm_guest_exit();
														
 
															 	local_irq_enable();
														
@@ -832,9 +843,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
															 	return -ENOIOCTLCMD;
														
 
															 }
														
 
															-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
														
 
															+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return 0;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
														
@@ -980,9 +990,6 @@ static void kvm_mips_set_c0_status(void)
 
															 {
														
 
															 	uint32_t status = read_c0_status();
														
 
															-	if (cpu_has_fpu)
														
 
															-		status |= (ST0_CU1);
														
 
															-
														
 
															 	if (cpu_has_dsp)
														
 
															 		status |= (ST0_MX);
														
@@ -1002,6 +1009,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
															 	enum emulation_result er = EMULATE_DONE;
														
 
															 	int ret = RESUME_GUEST;
														
 
															+	/* re-enable HTW before enabling interrupts */
														
 
															+	htw_start();
														
 
															+
														
 
															 	/* Set a default exit reason */
														
 
															 	run->exit_reason = KVM_EXIT_UNKNOWN;
														
 
															 	run->ready_for_interrupt_injection = 1;
														
@@ -1136,6 +1146,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
															 		}
														
 
															 	}
														
 
															+	/* Disable HTW before returning to guest or host */
														
 
															+	htw_stop();
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -107,6 +107,7 @@ struct kvm_vcpu_stat {
 
															 	u32 emulated_inst_exits;
														
 
															 	u32 dec_exits;
														
 
															 	u32 ext_intr_exits;
														
 
															+	u32 halt_successful_poll;
														
 
															 	u32 halt_wakeup;
														
 
															 	u32 dbell_exits;
														
 
															 	u32 gdbell_exits;
														
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -52,6 +52,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
															 	{ "dec",         VCPU_STAT(dec_exits) },
														
 
															 	{ "ext_intr",    VCPU_STAT(ext_intr_exits) },
														
 
															 	{ "queue_intr",  VCPU_STAT(queue_intr) },
														
 
															+	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
														
 
															 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
														
 
															 	{ "pf_storage",  VCPU_STAT(pf_storage) },
														
 
															 	{ "sp_storage",  VCPU_STAT(sp_storage) },
														
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -62,6 +62,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
															 	{ "inst_emu",   VCPU_STAT(emulated_inst_exits) },
														
 
															 	{ "dec",        VCPU_STAT(dec_exits) },
														
 
															 	{ "ext_intr",   VCPU_STAT(ext_intr_exits) },
														
 
															+	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
														
 
															 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
														
 
															 	{ "doorbell", VCPU_STAT(dbell_exits) },
														
 
															 	{ "guest doorbell", VCPU_STAT(gdbell_exits) },
														
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -623,9 +623,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 
															 	return vcpu;
														
 
															 }
														
 
															-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
														
 
															+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return 0;
														
 
															 }
														
 
															 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
														
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -35,11 +35,13 @@
 
															 #define KVM_NR_IRQCHIPS 1
														
 
															 #define KVM_IRQCHIP_NUM_PINS 4096
														
 
															-#define SIGP_CTRL_C	0x00800000
														
 
															+#define SIGP_CTRL_C		0x80
														
 
															+#define SIGP_CTRL_SCN_MASK	0x3f
														
 
															 struct sca_entry {
														
 
															-	atomic_t ctrl;
														
 
															-	__u32	reserved;
														
 
															+	__u8	reserved0;
														
 
															+	__u8	sigp_ctrl;
														
 
															+	__u16	reserved[3];
														
 
															 	__u64	sda;
														
 
															 	__u64	reserved2[2];
														
 
															 } __attribute__((packed));
														
@@ -87,7 +89,8 @@ struct kvm_s390_sie_block {
 
															 	atomic_t cpuflags;		/* 0x0000 */
														
 
															 	__u32 : 1;			/* 0x0004 */
														
 
															 	__u32 prefix : 18;
														
 
															-	__u32 : 13;
														
 
															+	__u32 : 1;
														
 
															+	__u32 ibc : 12;
														
 
															 	__u8	reserved08[4];		/* 0x0008 */
														
 
															 #define PROG_IN_SIE (1<<0)
														
 
															 	__u32	prog0c;			/* 0x000c */
														
@@ -132,7 +135,9 @@ struct kvm_s390_sie_block {
 
															 	__u8	reserved60;		/* 0x0060 */
														
 
															 	__u8	ecb;			/* 0x0061 */
														
 
															 	__u8    ecb2;                   /* 0x0062 */
														
 
															-	__u8    reserved63[1];          /* 0x0063 */
														
 
															+#define ECB3_AES 0x04
														
 
															+#define ECB3_DEA 0x08
														
 
															+	__u8    ecb3;			/* 0x0063 */
														
 
															 	__u32	scaol;			/* 0x0064 */
														
 
															 	__u8	reserved68[4];		/* 0x0068 */
														
 
															 	__u32	todpr;			/* 0x006c */
														
@@ -159,6 +164,7 @@ struct kvm_s390_sie_block {
 
															 	__u64	tecmc;			/* 0x00e8 */
														
 
															 	__u8	reservedf0[12];		/* 0x00f0 */
														
 
															 #define CRYCB_FORMAT1 0x00000001
														
 
															+#define CRYCB_FORMAT2 0x00000003
														
 
															 	__u32	crycbd;			/* 0x00fc */
														
 
															 	__u64	gcr[16];		/* 0x0100 */
														
 
															 	__u64	gbea;			/* 0x0180 */
														
@@ -192,6 +198,7 @@ struct kvm_vcpu_stat {
 
															 	u32 exit_stop_request;
														
 
															 	u32 exit_validity;
														
 
															 	u32 exit_instruction;
														
 
															+	u32 halt_successful_poll;
														
 
															 	u32 halt_wakeup;
														
 
															 	u32 instruction_lctl;
														
 
															 	u32 instruction_lctlg;
														
@@ -378,14 +385,11 @@ struct kvm_s390_interrupt_info {
 
															 		struct kvm_s390_emerg_info emerg;
														
 
															 		struct kvm_s390_extcall_info extcall;
														
 
															 		struct kvm_s390_prefix_info prefix;
														
 
															+		struct kvm_s390_stop_info stop;
														
 
															 		struct kvm_s390_mchk_info mchk;
														
 
															 	};
														
 
															 };
														
 
															-/* for local_interrupt.action_flags */
														
 
															-#define ACTION_STORE_ON_STOP		(1<<0)
														
 
															-#define ACTION_STOP_ON_STOP		(1<<1)
														
 
															-
														
 
															 struct kvm_s390_irq_payload {
														
 
															 	struct kvm_s390_io_info io;
														
 
															 	struct kvm_s390_ext_info ext;
														
@@ -393,6 +397,7 @@ struct kvm_s390_irq_payload {
 
															 	struct kvm_s390_emerg_info emerg;
														
 
															 	struct kvm_s390_extcall_info extcall;
														
 
															 	struct kvm_s390_prefix_info prefix;
														
 
															+	struct kvm_s390_stop_info stop;
														
 
															 	struct kvm_s390_mchk_info mchk;
														
 
															 };
														
@@ -401,7 +406,6 @@ struct kvm_s390_local_interrupt {
 
															 	struct kvm_s390_float_interrupt *float_int;
														
 
															 	wait_queue_head_t *wq;
														
 
															 	atomic_t *cpuflags;
														
 
															-	unsigned int action_bits;
														
 
															 	DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
														
 
															 	struct kvm_s390_irq_payload irq;
														
 
															 	unsigned long pending_irqs;
														
@@ -470,7 +474,6 @@ struct kvm_vcpu_arch {
 
															 	};
														
 
															 	struct gmap *gmap;
														
 
															 	struct kvm_guestdbg_info_arch guestdbg;
														
 
															-#define KVM_S390_PFAULT_TOKEN_INVALID	(-1UL)
														
 
															 	unsigned long pfault_token;
														
 
															 	unsigned long pfault_select;
														
 
															 	unsigned long pfault_compare;
														
@@ -504,13 +507,39 @@ struct s390_io_adapter {
 
															 #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
														
 
															 #define MAX_S390_ADAPTER_MAPS 256
														
 
															+/* maximum size of facilities and facility mask is 2k bytes */
														
 
															+#define S390_ARCH_FAC_LIST_SIZE_BYTE (1<<11)
														
 
															+#define S390_ARCH_FAC_LIST_SIZE_U64 \
														
 
															+	(S390_ARCH_FAC_LIST_SIZE_BYTE / sizeof(u64))
														
 
															+#define S390_ARCH_FAC_MASK_SIZE_BYTE S390_ARCH_FAC_LIST_SIZE_BYTE
														
 
															+#define S390_ARCH_FAC_MASK_SIZE_U64 \
														
 
															+	(S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
														
 
															+
														
 
															+struct s390_model_fac {
														
 
															+	/* facilities used in SIE context */
														
 
															+	__u64 sie[S390_ARCH_FAC_LIST_SIZE_U64];
														
 
															+	/* subset enabled by kvm */
														
 
															+	__u64 kvm[S390_ARCH_FAC_LIST_SIZE_U64];
														
 
															+};
														
 
															+
														
 
															+struct kvm_s390_cpu_model {
														
 
															+	struct s390_model_fac *fac;
														
 
															+	struct cpuid cpu_id;
														
 
															+	unsigned short ibc;
														
 
															+};
														
 
															+
														
 
															 struct kvm_s390_crypto {
														
 
															 	struct kvm_s390_crypto_cb *crycb;
														
 
															 	__u32 crycbd;
														
 
															+	__u8 aes_kw;
														
 
															+	__u8 dea_kw;
														
 
															 };
														
 
															 struct kvm_s390_crypto_cb {
														
 
															-	__u8    reserved00[128];                /* 0x0000 */
														
 
															+	__u8    reserved00[72];                 /* 0x0000 */
														
 
															+	__u8    dea_wrapping_key_mask[24];      /* 0x0048 */
														
 
															+	__u8    aes_wrapping_key_mask[32];      /* 0x0060 */
														
 
															+	__u8    reserved80[128];                /* 0x0080 */
														
 
															 };
														
 
															 struct kvm_arch{
														
@@ -523,12 +552,15 @@ struct kvm_arch{
 
															 	int use_irqchip;
														
 
															 	int use_cmma;
														
 
															 	int user_cpu_state_ctrl;
														
 
															+	int user_sigp;
														
 
															 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
														
 
															 	wait_queue_head_t ipte_wq;
														
 
															 	int ipte_lock_count;
														
 
															 	struct mutex ipte_mutex;
														
 
															 	spinlock_t start_stop_lock;
														
 
															+	struct kvm_s390_cpu_model model;
														
 
															 	struct kvm_s390_crypto crypto;
														
 
															+	u64 epoch;
														
 
															 };
														
 
															 #define KVM_HVA_ERR_BAD		(-1UL)
														
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -31,7 +31,8 @@ struct sclp_cpu_entry {
 
															 	u8 reserved0[2];
														
 
															 	u8 : 3;
														
 
															 	u8 siif : 1;
														
 
															-	u8 : 4;
														
 
															+	u8 sigpif : 1;
														
 
															+	u8 : 3;
														
 
															 	u8 reserved2[10];
														
 
															 	u8 type;
														
 
															 	u8 reserved1;
														
@@ -69,6 +70,7 @@ int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
 
															 unsigned long sclp_get_hsa_size(void);
														
 
															 void sclp_early_detect(void);
														
 
															 int sclp_has_siif(void);
														
 
															+int sclp_has_sigpif(void);
														
 
															 unsigned int sclp_get_ibc(void);
														
 
															 long _sclp_print_early(const char *);
														
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -15,6 +15,7 @@
 
															 #define __ASM_S390_SYSINFO_H
														
 
															 #include <asm/bitsperlong.h>
														
 
															+#include <linux/uuid.h>
														
 
															 struct sysinfo_1_1_1 {
														
 
															 	unsigned char p:1;
														
@@ -116,10 +117,13 @@ struct sysinfo_3_2_2 {
 
															 		char name[8];
														
 
															 		unsigned int caf;
														
 
															 		char cpi[16];
														
 
															-		char reserved_1[24];
														
 
															-
														
 
															+		char reserved_1[3];
														
 
															+		char ext_name_encoding;
														
 
															+		unsigned int reserved_2;
														
 
															+		uuid_be uuid;
														
 
															 	} vm[8];
														
 
															-	char reserved_544[3552];
														
 
															+	char reserved_3[1504];
														
 
															+	char ext_names[8][256];
														
 
															 };
														
 
															 extern int topology_max_mnest;
														
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -57,10 +57,44 @@ struct kvm_s390_io_adapter_req {
 
															 /* kvm attr_group  on vm fd */
														
 
															 #define KVM_S390_VM_MEM_CTRL		0
														
 
															+#define KVM_S390_VM_TOD			1
														
 
															+#define KVM_S390_VM_CRYPTO		2
														
 
															+#define KVM_S390_VM_CPU_MODEL		3
														
 
															 /* kvm attributes for mem_ctrl */
														
 
															 #define KVM_S390_VM_MEM_ENABLE_CMMA	0
														
 
															 #define KVM_S390_VM_MEM_CLR_CMMA	1
														
 
															+#define KVM_S390_VM_MEM_LIMIT_SIZE	2
														
 
															+
														
 
															+/* kvm attributes for KVM_S390_VM_TOD */
														
 
															+#define KVM_S390_VM_TOD_LOW		0
														
 
															+#define KVM_S390_VM_TOD_HIGH		1
														
 
															+
														
 
															+/* kvm attributes for KVM_S390_VM_CPU_MODEL */
														
 
															+/* processor related attributes are r/w */
														
 
															+#define KVM_S390_VM_CPU_PROCESSOR	0
														
 
															+struct kvm_s390_vm_cpu_processor {
														
 
															+	__u64 cpuid;
														
 
															+	__u16 ibc;
														
 
															+	__u8  pad[6];
														
 
															+	__u64 fac_list[256];
														
 
															+};
														
 
															+
														
 
															+/* machine related attributes are r/o */
														
 
															+#define KVM_S390_VM_CPU_MACHINE		1
														
 
															+struct kvm_s390_vm_cpu_machine {
														
 
															+	__u64 cpuid;
														
 
															+	__u32 ibc;
														
 
															+	__u8  pad[4];
														
 
															+	__u64 fac_mask[256];
														
 
															+	__u64 fac_list[256];
														
 
															+};
														
 
															+
														
 
															+/* kvm attributes for crypto */
														
 
															+#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW	0
														
 
															+#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
														
 
															+#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW	2
														
 
															+#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW	3
														
 
															 /* for KVM_GET_REGS and KVM_SET_REGS */
														
 
															 struct kvm_regs {
														
@@ -107,6 +141,9 @@ struct kvm_guest_debug_arch {
 
															 	struct kvm_hw_breakpoint __user *hw_bp;
														
 
															 };
														
 
															+/* for KVM_SYNC_PFAULT and KVM_REG_S390_PFTOKEN */
														
 
															+#define KVM_S390_PFAULT_TOKEN_INVALID	0xffffffffffffffffULL
														
 
															+
														
 
															 #define KVM_SYNC_PREFIX (1UL << 0)
														
 
															 #define KVM_SYNC_GPRS   (1UL << 1)
														
 
															 #define KVM_SYNC_ACRS   (1UL << 2)
														
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -204,6 +204,33 @@ static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
 
															 	}
														
 
															 }
														
 
															+static void print_ext_name(struct seq_file *m, int lvl,
														
 
															+			   struct sysinfo_3_2_2 *info)
														
 
															+{
														
 
															+	if (info->vm[lvl].ext_name_encoding == 0)
														
 
															+		return;
														
 
															+	if (info->ext_names[lvl][0] == 0)
														
 
															+		return;
														
 
															+	switch (info->vm[lvl].ext_name_encoding) {
														
 
															+	case 1: /* EBCDIC */
														
 
															+		EBCASC(info->ext_names[lvl], sizeof(info->ext_names[lvl]));
														
 
															+		break;
														
 
															+	case 2:	/* UTF-8 */
														
 
															+		break;
														
 
															+	default:
														
 
															+		return;
														
 
															+	}
														
 
															+	seq_printf(m, "VM%02d Extended Name:   %-.256s\n", lvl,
														
 
															+		   info->ext_names[lvl]);
														
 
															+}
														
 
															+
														
 
															+static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
														
 
															+{
														
 
															+	if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be)))
														
 
															+		return;
														
 
															+	seq_printf(m, "VM%02d UUID:            %pUb\n", i, &info->vm[i].uuid);
														
 
															+}
														
 
															+
														
 
															 static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
														
 
															 {
														
 
															 	int i;
														
@@ -221,6 +248,8 @@ static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
 
															 		seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured);
														
 
															 		seq_printf(m, "VM%02d CPUs Standby:    %d\n", i, info->vm[i].cpus_standby);
														
 
															 		seq_printf(m, "VM%02d CPUs Reserved:   %d\n", i, info->vm[i].cpus_reserved);
														
 
															+		print_ext_name(m, i, info);
														
 
															+		print_uuid(m, i, info);
														
 
															 	}
														
 
															 }
														
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -357,8 +357,8 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 
															 	union asce asce;
														
 
															 	ctlreg0.val = vcpu->arch.sie_block->gcr[0];
														
 
															-	edat1 = ctlreg0.edat && test_vfacility(8);
														
 
															-	edat2 = edat1 && test_vfacility(78);
														
 
															+	edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
														
 
															+	edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
														
 
															 	asce.val = get_vcpu_asce(vcpu);
														
 
															 	if (asce.r)
														
 
															 		goto real_address;
														
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -68,18 +68,27 @@ static int handle_noop(struct kvm_vcpu *vcpu)
 
															 static int handle_stop(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
 
															 	int rc = 0;
														
 
															-	unsigned int action_bits;
														
 
															+	uint8_t flags, stop_pending;
														
 
															 	vcpu->stat.exit_stop_request++;
														
 
															-	trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
														
 
															-	action_bits = vcpu->arch.local_int.action_bits;
														
 
															+	/* delay the stop if any non-stop irq is pending */
														
 
															+	if (kvm_s390_vcpu_has_irq(vcpu, 1))
														
 
															+		return 0;
														
 
															+
														
 
															+	/* avoid races with the injection/SIGP STOP code */
														
 
															+	spin_lock(&li->lock);
														
 
															+	flags = li->irq.stop.flags;
														
 
															+	stop_pending = kvm_s390_is_stop_irq_pending(vcpu);
														
 
															+	spin_unlock(&li->lock);
														
 
															-	if (!(action_bits & ACTION_STOP_ON_STOP))
														
 
															+	trace_kvm_s390_stop_request(stop_pending, flags);
														
 
															+	if (!stop_pending)
														
 
															 		return 0;
														
 
															-	if (action_bits & ACTION_STORE_ON_STOP) {
														
 
															+	if (flags & KVM_S390_STOP_FLAG_STORE_STATUS) {
														
 
															 		rc = kvm_s390_vcpu_store_status(vcpu,
														
 
															 						KVM_S390_STORE_STATUS_NOADDR);
														
 
															 		if (rc)
														
@@ -279,11 +288,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 
															 		irq.type = KVM_S390_INT_CPU_TIMER;
														
 
															 		break;
														
 
															 	case EXT_IRQ_EXTERNAL_CALL:
														
 
															-		if (kvm_s390_si_ext_call_pending(vcpu))
														
 
															-			return 0;
														
 
															 		irq.type = KVM_S390_INT_EXTERNAL_CALL;
														
 
															 		irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
														
 
															-		break;
														
 
															+		rc = kvm_s390_inject_vcpu(vcpu, &irq);
														
 
															+		/* ignore if another external call is already pending */
														
 
															+		if (rc == -EBUSY)
														
 
															+			return 0;
														
 
															+		return rc;
														
 
															 	default:
														
 
															 		return -EOPNOTSUPP;
														
 
															 	}
														
@@ -307,17 +318,19 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 
															 	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
														
 
															 	/* Make sure that the source is paged-in */
														
 
															-	srcaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg2]);
														
 
															-	if (kvm_is_error_gpa(vcpu->kvm, srcaddr))
														
 
															-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															+	rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
														
 
															+				     &srcaddr, 0);
														
 
															+	if (rc)
														
 
															+		return kvm_s390_inject_prog_cond(vcpu, rc);
														
 
															 	rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
														
 
															 	if (rc != 0)
														
 
															 		return rc;
														
 
															 	/* Make sure that the destination is paged-in */
														
 
															-	dstaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg1]);
														
 
															-	if (kvm_is_error_gpa(vcpu->kvm, dstaddr))
														
 
															-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
														
 
															+	rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
														
 
															+				     &dstaddr, 1);
														
 
															+	if (rc)
														
 
															+		return kvm_s390_inject_prog_cond(vcpu, rc);
														
 
															 	rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
														
 
															 	if (rc != 0)
														
 
															 		return rc;
														
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -19,6 +19,7 @@
 
															 #include <linux/bitmap.h>
														
 
															 #include <asm/asm-offsets.h>
														
 
															 #include <asm/uaccess.h>
														
 
															+#include <asm/sclp.h>
														
 
															 #include "kvm-s390.h"
														
 
															 #include "gaccess.h"
														
 
															 #include "trace-s390.h"
														
@@ -159,6 +160,12 @@ static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
 
															 	if (psw_mchk_disabled(vcpu))
														
 
															 		active_mask &= ~IRQ_PEND_MCHK_MASK;
														
 
															+	/*
														
 
															+	 * STOP irqs will never be actively delivered. They are triggered via
														
 
															+	 * intercept requests and cleared when the stop intercept is performed.
														
 
															+	 */
														
 
															+	__clear_bit(IRQ_PEND_SIGP_STOP, &active_mask);
														
 
															+
														
 
															 	return active_mask;
														
 
															 }
														
@@ -186,9 +193,6 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 
															 					       LCTL_CR10 | LCTL_CR11);
														
 
															 		vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
														
 
															 	}
														
 
															-
														
 
															-	if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP)
														
 
															-		atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
														
 
															 }
														
 
															 static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
														
@@ -216,11 +220,18 @@ static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
 
															 		vcpu->arch.sie_block->lctl |= LCTL_CR14;
														
 
															 }
														
 
															+static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	if (kvm_s390_is_stop_irq_pending(vcpu))
														
 
															+		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
														
 
															+}
														
 
															+
														
 
															 /* Set interception request for non-deliverable local interrupts */
														
 
															 static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	set_intercept_indicators_ext(vcpu);
														
 
															 	set_intercept_indicators_mchk(vcpu);
														
 
															+	set_intercept_indicators_stop(vcpu);
														
 
															 }
														
 
															 static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
														
@@ -392,18 +403,6 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
 
															 	return rc ? -EFAULT : 0;
														
 
															 }
														
 
															-static int __must_check __deliver_stop(struct kvm_vcpu *vcpu)
														
 
															-{
														
 
															-	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
														
 
															-	vcpu->stat.deliver_stop_signal++;
														
 
															-	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP,
														
 
															-					 0, 0);
														
 
															-
														
 
															-	__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
														
 
															-	clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs);
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															 static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
@@ -705,7 +704,6 @@ static const deliver_irq_t deliver_irq_funcs[] = {
 
															 	[IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
														
 
															 	[IRQ_PEND_EXT_CPU_TIMER]  = __deliver_cpu_timer,
														
 
															 	[IRQ_PEND_RESTART]        = __deliver_restart,
														
 
															-	[IRQ_PEND_SIGP_STOP]      = __deliver_stop,
														
 
															 	[IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
														
 
															 	[IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
														
 
															 };
														
@@ -738,21 +736,20 @@ static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
 
															 	return rc;
														
 
															 }
														
 
															-/* Check whether SIGP interpretation facility has an external call pending */
														
 
															-int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
														
 
															+/* Check whether an external call is pending (deliverable or not) */
														
 
															+int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	atomic_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl;
														
 
															+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
 
															+	uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
														
 
															-	if (!psw_extint_disabled(vcpu) &&
														
 
															-	    (vcpu->arch.sie_block->gcr[0] & 0x2000ul) &&
														
 
															-	    (atomic_read(sigp_ctrl) & SIGP_CTRL_C) &&
														
 
															-	    (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
														
 
															-		return 1;
														
 
															+	if (!sclp_has_sigpif())
														
 
															+		return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
														
 
															-	return 0;
														
 
															+	return (sigp_ctrl & SIGP_CTRL_C) &&
														
 
															+	       (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND);
														
 
															 }
														
 
															-int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
														
 
															+int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
														
 
															 {
														
 
															 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
														
 
															 	struct kvm_s390_interrupt_info  *inti;
														
@@ -773,7 +770,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 
															 	if (!rc && kvm_cpu_has_pending_timer(vcpu))
														
 
															 		rc = 1;
														
 
															-	if (!rc && kvm_s390_si_ext_call_pending(vcpu))
														
 
															+	/* external call pending and deliverable */
														
 
															+	if (!rc && kvm_s390_ext_call_pending(vcpu) &&
														
 
															+	    !psw_extint_disabled(vcpu) &&
														
 
															+	    (vcpu->arch.sie_block->gcr[0] & 0x2000ul))
														
 
															+		rc = 1;
														
 
															+
														
 
															+	if (!rc && !exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
														
 
															 		rc = 1;
														
 
															 	return rc;
														
@@ -804,14 +807,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 
															 		return -EOPNOTSUPP; /* disabled wait */
														
 
															 	}
														
 
															-	__set_cpu_idle(vcpu);
														
 
															 	if (!ckc_interrupts_enabled(vcpu)) {
														
 
															 		VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
														
 
															+		__set_cpu_idle(vcpu);
														
 
															 		goto no_timer;
														
 
															 	}
														
 
															 	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
														
 
															 	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
														
 
															+
														
 
															+	/* underflow */
														
 
															+	if (vcpu->arch.sie_block->ckc < now)
														
 
															+		return 0;
														
 
															+
														
 
															+	__set_cpu_idle(vcpu);
														
 
															 	hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
														
 
															 	VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
														
 
															 no_timer:
														
@@ -820,7 +829,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 
															 	__unset_cpu_idle(vcpu);
														
 
															 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
														
 
															-	hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
														
 
															+	hrtimer_cancel(&vcpu->arch.ckc_timer);
														
 
															 	return 0;
														
 
															 }
														
@@ -840,10 +849,20 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
 
															 enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
														
 
															 {
														
 
															 	struct kvm_vcpu *vcpu;
														
 
															+	u64 now, sltime;
														
 
															 	vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
														
 
															-	kvm_s390_vcpu_wakeup(vcpu);
														
 
															+	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
														
 
															+	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
														
 
															+	/*
														
 
															+	 * If the monotonic clock runs faster than the tod clock we might be
														
 
															+	 * woken up too early and have to go back to sleep to avoid deadlocks.
														
 
															+	 */
														
 
															+	if (vcpu->arch.sie_block->ckc > now &&
														
 
															+	    hrtimer_forward_now(timer, ns_to_ktime(sltime)))
														
 
															+		return HRTIMER_RESTART;
														
 
															+	kvm_s390_vcpu_wakeup(vcpu);
														
 
															 	return HRTIMER_NORESTART;
														
 
															 }
														
@@ -859,8 +878,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 
															 	/* clear pending external calls set by sigp interpretation facility */
														
 
															 	atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
														
 
															-	atomic_clear_mask(SIGP_CTRL_C,
														
 
															-			  &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
														
 
															+	vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0;
														
 
															 }
														
 
															 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
														
@@ -984,18 +1002,43 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 
															 	return 0;
														
 
															 }
														
 
															-int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
														
 
															+static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id)
														
 
															+{
														
 
															+	unsigned char new_val, old_val;
														
 
															+	uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
														
 
															+
														
 
															+	new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK);
														
 
															+	old_val = *sigp_ctrl & ~SIGP_CTRL_C;
														
 
															+	if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) {
														
 
															+		/* another external call is pending */
														
 
															+		return -EBUSY;
														
 
															+	}
														
 
															+	atomic_set_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
														
 
															 {
														
 
															 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
 
															 	struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
														
 
															+	uint16_t src_id = irq->u.extcall.code;
														
 
															 	VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
														
 
															-		   irq->u.extcall.code);
														
 
															+		   src_id);
														
 
															 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
														
 
															-				   irq->u.extcall.code, 0, 2);
														
 
															+				   src_id, 0, 2);
														
 
															+
														
 
															+	/* sending vcpu invalid */
														
 
															+	if (src_id >= KVM_MAX_VCPUS ||
														
 
															+	    kvm_get_vcpu(vcpu->kvm, src_id) == NULL)
														
 
															+		return -EINVAL;
														
 
															+	if (sclp_has_sigpif())
														
 
															+		return __inject_extcall_sigpif(vcpu, src_id);
														
 
															+
														
 
															+	if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
														
 
															+		return -EBUSY;
														
 
															 	*extcall = irq->u.extcall;
														
 
															-	set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
														
 
															 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
														
 
															 	return 0;
														
 
															 }
														
@@ -1006,23 +1049,41 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 
															 	struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
														
 
															 	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
														
 
															-		   prefix->address);
														
 
															+		   irq->u.prefix.address);
														
 
															 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
														
 
															-				   prefix->address, 0, 2);
														
 
															+				   irq->u.prefix.address, 0, 2);
														
 
															+
														
 
															+	if (!is_vcpu_stopped(vcpu))
														
 
															+		return -EBUSY;
														
 
															 	*prefix = irq->u.prefix;
														
 
															 	set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
														
 
															 	return 0;
														
 
															 }
														
 
															+#define KVM_S390_STOP_SUPP_FLAGS (KVM_S390_STOP_FLAG_STORE_STATUS)
														
 
															 static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
														
 
															 {
														
 
															 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
 
															+	struct kvm_s390_stop_info *stop = &li->irq.stop;
														
 
															+	int rc = 0;
														
 
															 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
														
 
															-	li->action_bits |= ACTION_STOP_ON_STOP;
														
 
															-	set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
														
 
															+	if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (is_vcpu_stopped(vcpu)) {
														
 
															+		if (irq->u.stop.flags & KVM_S390_STOP_FLAG_STORE_STATUS)
														
 
															+			rc = kvm_s390_store_status_unloaded(vcpu,
														
 
															+						KVM_S390_STORE_STATUS_NOADDR);
														
 
															+		return rc;
														
 
															+	}
														
 
															+
														
 
															+	if (test_and_set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs))
														
 
															+		return -EBUSY;
														
 
															+	stop->flags = irq->u.stop.flags;
														
 
															+	__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
														
 
															 	return 0;
														
 
															 }
														
@@ -1042,14 +1103,13 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
 
															 				   struct kvm_s390_irq *irq)
														
 
															 {
														
 
															 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
 
															-	struct kvm_s390_emerg_info *emerg = &li->irq.emerg;
														
 
															 	VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
														
 
															 		   irq->u.emerg.code);
														
 
															 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
														
 
															-				   emerg->code, 0, 2);
														
 
															+				   irq->u.emerg.code, 0, 2);
														
 
															-	set_bit(emerg->code, li->sigp_emerg_pending);
														
 
															+	set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
														
 
															 	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
														
 
															 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
														
 
															 	return 0;
														
@@ -1061,9 +1121,9 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 
															 	struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
														
 
															 	VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
														
 
															-		   mchk->mcic);
														
 
															+		   irq->u.mchk.mcic);
														
 
															 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
														
 
															-				   mchk->mcic, 2);
														
 
															+				   irq->u.mchk.mcic, 2);
														
 
															 	/*
														
 
															 	 * Because repressible machine checks can be indicated along with
														
@@ -1121,7 +1181,6 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 
															 	if ((!schid && !cr6) || (schid && cr6))
														
 
															 		return NULL;
														
 
															-	mutex_lock(&kvm->lock);
														
 
															 	fi = &kvm->arch.float_int;
														
 
															 	spin_lock(&fi->lock);
														
 
															 	inti = NULL;
														
@@ -1149,7 +1208,6 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 
															 	if (list_empty(&fi->list))
														
 
															 		atomic_set(&fi->active, 0);
														
 
															 	spin_unlock(&fi->lock);
														
 
															-	mutex_unlock(&kvm->lock);
														
 
															 	return inti;
														
 
															 }
														
@@ -1162,7 +1220,6 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 
															 	int sigcpu;
														
 
															 	int rc = 0;
														
 
															-	mutex_lock(&kvm->lock);
														
 
															 	fi = &kvm->arch.float_int;
														
 
															 	spin_lock(&fi->lock);
														
 
															 	if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
														
@@ -1187,6 +1244,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 
															 		list_add_tail(&inti->list, &iter->list);
														
 
															 	}
														
 
															 	atomic_set(&fi->active, 1);
														
 
															+	if (atomic_read(&kvm->online_vcpus) == 0)
														
 
															+		goto unlock_fi;
														
 
															 	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
														
 
															 	if (sigcpu == KVM_MAX_VCPUS) {
														
 
															 		do {
														
@@ -1213,7 +1272,6 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 
															 	kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
														
 
															 unlock_fi:
														
 
															 	spin_unlock(&fi->lock);
														
 
															-	mutex_unlock(&kvm->lock);
														
 
															 	return rc;
														
 
															 }
														
@@ -1221,6 +1279,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 
															 		       struct kvm_s390_interrupt *s390int)
														
 
															 {
														
 
															 	struct kvm_s390_interrupt_info *inti;
														
 
															+	int rc;
														
 
															 	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
														
 
															 	if (!inti)
														
@@ -1239,7 +1298,6 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 
															 		inti->ext.ext_params = s390int->parm;
														
 
															 		break;
														
 
															 	case KVM_S390_INT_PFAULT_DONE:
														
 
															-		inti->type = s390int->type;
														
 
															 		inti->ext.ext_params2 = s390int->parm64;
														
 
															 		break;
														
 
															 	case KVM_S390_MCHK:
														
@@ -1268,7 +1326,10 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 
															 	trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
														
 
															 				 2);
														
 
															-	return __inject_vm(kvm, inti);
														
 
															+	rc = __inject_vm(kvm, inti);
														
 
															+	if (rc)
														
 
															+		kfree(inti);
														
 
															+	return rc;
														
 
															 }
														
 
															 void kvm_s390_reinject_io_int(struct kvm *kvm,
														
@@ -1290,13 +1351,16 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
 
															 	case KVM_S390_SIGP_SET_PREFIX:
														
 
															 		irq->u.prefix.address = s390int->parm;
														
 
															 		break;
														
 
															+	case KVM_S390_SIGP_STOP:
														
 
															+		irq->u.stop.flags = s390int->parm;
														
 
															+		break;
														
 
															 	case KVM_S390_INT_EXTERNAL_CALL:
														
 
															-		if (irq->u.extcall.code & 0xffff0000)
														
 
															+		if (s390int->parm & 0xffff0000)
														
 
															 			return -EINVAL;
														
 
															 		irq->u.extcall.code = s390int->parm;
														
 
															 		break;
														
 
															 	case KVM_S390_INT_EMERGENCY:
														
 
															-		if (irq->u.emerg.code & 0xffff0000)
														
 
															+		if (s390int->parm & 0xffff0000)
														
 
															 			return -EINVAL;
														
 
															 		irq->u.emerg.code = s390int->parm;
														
 
															 		break;
														
@@ -1307,6 +1371,23 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
 
															 	return 0;
														
 
															 }
														
 
															+int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
 
															+
														
 
															+	return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
														
 
															+}
														
 
															+
														
 
															+void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
 
															+
														
 
															+	spin_lock(&li->lock);
														
 
															+	li->irq.stop.flags = 0;
														
 
															+	clear_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
														
 
															+	spin_unlock(&li->lock);
														
 
															+}
														
 
															+
														
 
															 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
														
 
															 {
														
 
															 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
@@ -1363,7 +1444,6 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
 
															 	struct kvm_s390_float_interrupt *fi;
														
 
															 	struct kvm_s390_interrupt_info	*n, *inti = NULL;
														
 
															-	mutex_lock(&kvm->lock);
														
 
															 	fi = &kvm->arch.float_int;
														
 
															 	spin_lock(&fi->lock);
														
 
															 	list_for_each_entry_safe(inti, n, &fi->list, list) {
														
@@ -1373,7 +1453,6 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
 
															 	fi->irq_count = 0;
														
 
															 	atomic_set(&fi->active, 0);
														
 
															 	spin_unlock(&fi->lock);
														
 
															-	mutex_unlock(&kvm->lock);
														
 
															 }
														
 
															 static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
														
@@ -1413,7 +1492,6 @@ static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
 
															 	int ret = 0;
														
 
															 	int n = 0;
														
 
															-	mutex_lock(&kvm->lock);
														
 
															 	fi = &kvm->arch.float_int;
														
 
															 	spin_lock(&fi->lock);
														
@@ -1432,7 +1510,6 @@ static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
 
															 	}
														
 
															 	spin_unlock(&fi->lock);
														
 
															-	mutex_unlock(&kvm->lock);
														
 
															 	return ret < 0 ? ret : n;
														
 
															 }
														
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -22,6 +22,7 @@
 
															 #include <linux/kvm.h>
														
 
															 #include <linux/kvm_host.h>
														
 
															 #include <linux/module.h>
														
 
															+#include <linux/random.h>
														
 
															 #include <linux/slab.h>
														
 
															 #include <linux/timer.h>
														
 
															 #include <asm/asm-offsets.h>
														
@@ -29,7 +30,6 @@
 
															 #include <asm/pgtable.h>
														
 
															 #include <asm/nmi.h>
														
 
															 #include <asm/switch_to.h>
														
 
															-#include <asm/facility.h>
														
 
															 #include <asm/sclp.h>
														
 
															 #include "kvm-s390.h"
														
 
															 #include "gaccess.h"
														
@@ -50,6 +50,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
															 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
														
 
															 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
														
 
															 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
														
 
															+	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
														
 
															 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
														
 
															 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
														
 
															 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
														
@@ -98,15 +99,20 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
															 	{ NULL }
														
 
															 };
														
 
															-unsigned long *vfacilities;
														
 
															-static struct gmap_notifier gmap_notifier;
														
 
															+/* upper facilities limit for kvm */
														
 
															+unsigned long kvm_s390_fac_list_mask[] = {
														
 
															+	0xff82fffbf4fc2000UL,
														
 
															+	0x005c000000000000UL,
														
 
															+};
														
 
															-/* test availability of vfacility */
														
 
															-int test_vfacility(unsigned long nr)
														
 
															+unsigned long kvm_s390_fac_list_mask_size(void)
														
 
															 {
														
 
															-	return __test_facility(nr, (void *) vfacilities);
														
 
															+	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
														
 
															+	return ARRAY_SIZE(kvm_s390_fac_list_mask);
														
 
															 }
														
 
															+static struct gmap_notifier gmap_notifier;
														
 
															+
														
 
															 /* Section: not file related */
														
 
															 int kvm_arch_hardware_enable(void)
														
 
															 {
														
@@ -166,6 +172,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
															 	case KVM_CAP_S390_IRQCHIP:
														
 
															 	case KVM_CAP_VM_ATTRIBUTES:
														
 
															 	case KVM_CAP_MP_STATE:
														
 
															+	case KVM_CAP_S390_USER_SIGP:
														
 
															 		r = 1;
														
 
															 		break;
														
 
															 	case KVM_CAP_NR_VCPUS:
														
@@ -254,6 +261,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 
															 		kvm->arch.use_irqchip = 1;
														
 
															 		r = 0;
														
 
															 		break;
														
 
															+	case KVM_CAP_S390_USER_SIGP:
														
 
															+		kvm->arch.user_sigp = 1;
														
 
															+		r = 0;
														
 
															+		break;
														
 
															 	default:
														
 
															 		r = -EINVAL;
														
 
															 		break;
														
@@ -261,7 +272,24 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 
															 	return r;
														
 
															 }
														
 
															-static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	switch (attr->attr) {
														
 
															+	case KVM_S390_VM_MEM_LIMIT_SIZE:
														
 
															+		ret = 0;
														
 
															+		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
														
 
															+			ret = -EFAULT;
														
 
															+		break;
														
 
															+	default:
														
 
															+		ret = -ENXIO;
														
 
															+		break;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															 {
														
 
															 	int ret;
														
 
															 	unsigned int idx;
														
@@ -283,6 +311,36 @@ static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 
															 		mutex_unlock(&kvm->lock);
														
 
															 		ret = 0;
														
 
															 		break;
														
 
															+	case KVM_S390_VM_MEM_LIMIT_SIZE: {
														
 
															+		unsigned long new_limit;
														
 
															+
														
 
															+		if (kvm_is_ucontrol(kvm))
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+		if (get_user(new_limit, (u64 __user *)attr->addr))
														
 
															+			return -EFAULT;
														
 
															+
														
 
															+		if (new_limit > kvm->arch.gmap->asce_end)
														
 
															+			return -E2BIG;
														
 
															+
														
 
															+		ret = -EBUSY;
														
 
															+		mutex_lock(&kvm->lock);
														
 
															+		if (atomic_read(&kvm->online_vcpus) == 0) {
														
 
															+			/* gmap_alloc will round the limit up */
														
 
															+			struct gmap *new = gmap_alloc(current->mm, new_limit);
														
 
															+
														
 
															+			if (!new) {
														
 
															+				ret = -ENOMEM;
														
 
															+			} else {
														
 
															+				gmap_free(kvm->arch.gmap);
														
 
															+				new->private = kvm;
														
 
															+				kvm->arch.gmap = new;
														
 
															+				ret = 0;
														
 
															+			}
														
 
															+		}
														
 
															+		mutex_unlock(&kvm->lock);
														
 
															+		break;
														
 
															+	}
														
 
															 	default:
														
 
															 		ret = -ENXIO;
														
 
															 		break;
														
@@ -290,13 +348,276 @@ static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 
															 	return ret;
														
 
															 }
														
 
															+static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
														
 
															+
														
 
															+static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+	int i;
														
 
															+
														
 
															+	if (!test_kvm_facility(kvm, 76))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+	switch (attr->attr) {
														
 
															+	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
														
 
															+		get_random_bytes(
														
 
															+			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
														
 
															+			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
														
 
															+		kvm->arch.crypto.aes_kw = 1;
														
 
															+		break;
														
 
															+	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
														
 
															+		get_random_bytes(
														
 
															+			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
														
 
															+			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
														
 
															+		kvm->arch.crypto.dea_kw = 1;
														
 
															+		break;
														
 
															+	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
														
 
															+		kvm->arch.crypto.aes_kw = 0;
														
 
															+		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
														
 
															+			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
														
 
															+		break;
														
 
															+	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
														
 
															+		kvm->arch.crypto.dea_kw = 0;
														
 
															+		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
														
 
															+			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
														
 
															+		break;
														
 
															+	default:
														
 
															+		mutex_unlock(&kvm->lock);
														
 
															+		return -ENXIO;
														
 
															+	}
														
 
															+
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		kvm_s390_vcpu_crypto_setup(vcpu);
														
 
															+		exit_sie(vcpu);
														
 
															+	}
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	u8 gtod_high;
														
 
															+
														
 
															+	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
														
 
															+					   sizeof(gtod_high)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	if (gtod_high != 0)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	struct kvm_vcpu *cur_vcpu;
														
 
															+	unsigned int vcpu_idx;
														
 
															+	u64 host_tod, gtod;
														
 
															+	int r;
														
 
															+
														
 
															+	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	r = store_tod_clock(&host_tod);
														
 
															+	if (r)
														
 
															+		return r;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+	kvm->arch.epoch = gtod - host_tod;
														
 
															+	kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
														
 
															+		cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
														
 
															+		exit_sie(cur_vcpu);
														
 
															+	}
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	if (attr->flags)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	switch (attr->attr) {
														
 
															+	case KVM_S390_VM_TOD_HIGH:
														
 
															+		ret = kvm_s390_set_tod_high(kvm, attr);
														
 
															+		break;
														
 
															+	case KVM_S390_VM_TOD_LOW:
														
 
															+		ret = kvm_s390_set_tod_low(kvm, attr);
														
 
															+		break;
														
 
															+	default:
														
 
															+		ret = -ENXIO;
														
 
															+		break;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	u8 gtod_high = 0;
														
 
															+
														
 
															+	if (copy_to_user((void __user *)attr->addr, &gtod_high,
														
 
															+					 sizeof(gtod_high)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	u64 host_tod, gtod;
														
 
															+	int r;
														
 
															+
														
 
															+	r = store_tod_clock(&host_tod);
														
 
															+	if (r)
														
 
															+		return r;
														
 
															+
														
 
															+	gtod = host_tod + kvm->arch.epoch;
														
 
															+	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	if (attr->flags)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	switch (attr->attr) {
														
 
															+	case KVM_S390_VM_TOD_HIGH:
														
 
															+		ret = kvm_s390_get_tod_high(kvm, attr);
														
 
															+		break;
														
 
															+	case KVM_S390_VM_TOD_LOW:
														
 
															+		ret = kvm_s390_get_tod_low(kvm, attr);
														
 
															+		break;
														
 
															+	default:
														
 
															+		ret = -ENXIO;
														
 
															+		break;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	struct kvm_s390_vm_cpu_processor *proc;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+	if (atomic_read(&kvm->online_vcpus)) {
														
 
															+		ret = -EBUSY;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
														
 
															+	if (!proc) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	if (!copy_from_user(proc, (void __user *)attr->addr,
														
 
															+			    sizeof(*proc))) {
														
 
															+		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
														
 
															+		       sizeof(struct cpuid));
														
 
															+		kvm->arch.model.ibc = proc->ibc;
														
 
															+		memcpy(kvm->arch.model.fac->kvm, proc->fac_list,
														
 
															+		       S390_ARCH_FAC_LIST_SIZE_BYTE);
														
 
															+	} else
														
 
															+		ret = -EFAULT;
														
 
															+	kfree(proc);
														
 
															+out:
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	int ret = -ENXIO;
														
 
															+
														
 
															+	switch (attr->attr) {
														
 
															+	case KVM_S390_VM_CPU_PROCESSOR:
														
 
															+		ret = kvm_s390_set_processor(kvm, attr);
														
 
															+		break;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	struct kvm_s390_vm_cpu_processor *proc;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
														
 
															+	if (!proc) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
														
 
															+	proc->ibc = kvm->arch.model.ibc;
														
 
															+	memcpy(&proc->fac_list, kvm->arch.model.fac->kvm, S390_ARCH_FAC_LIST_SIZE_BYTE);
														
 
															+	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
														
 
															+		ret = -EFAULT;
														
 
															+	kfree(proc);
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	struct kvm_s390_vm_cpu_machine *mach;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
														
 
															+	if (!mach) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	get_cpu_id((struct cpuid *) &mach->cpuid);
														
 
															+	mach->ibc = sclp_get_ibc();
														
 
															+	memcpy(&mach->fac_mask, kvm_s390_fac_list_mask,
														
 
															+	       kvm_s390_fac_list_mask_size() * sizeof(u64));
														
 
															+	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
														
 
															+	       S390_ARCH_FAC_LIST_SIZE_U64);
														
 
															+	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
														
 
															+		ret = -EFAULT;
														
 
															+	kfree(mach);
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	int ret = -ENXIO;
														
 
															+
														
 
															+	switch (attr->attr) {
														
 
															+	case KVM_S390_VM_CPU_PROCESSOR:
														
 
															+		ret = kvm_s390_get_processor(kvm, attr);
														
 
															+		break;
														
 
															+	case KVM_S390_VM_CPU_MACHINE:
														
 
															+		ret = kvm_s390_get_machine(kvm, attr);
														
 
															+		break;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															 {
														
 
															 	int ret;
														
 
															 	switch (attr->group) {
														
 
															 	case KVM_S390_VM_MEM_CTRL:
														
 
															-		ret = kvm_s390_mem_control(kvm, attr);
														
 
															+		ret = kvm_s390_set_mem_control(kvm, attr);
														
 
															+		break;
														
 
															+	case KVM_S390_VM_TOD:
														
 
															+		ret = kvm_s390_set_tod(kvm, attr);
														
 
															+		break;
														
 
															+	case KVM_S390_VM_CPU_MODEL:
														
 
															+		ret = kvm_s390_set_cpu_model(kvm, attr);
														
 
															+		break;
														
 
															+	case KVM_S390_VM_CRYPTO:
														
 
															+		ret = kvm_s390_vm_set_crypto(kvm, attr);
														
 
															 		break;
														
 
															 	default:
														
 
															 		ret = -ENXIO;
														
@@ -308,7 +629,24 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 
															 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
														
 
															 {
														
 
															-	return -ENXIO;
														
 
															+	int ret;
														
 
															+
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_S390_VM_MEM_CTRL:
														
 
															+		ret = kvm_s390_get_mem_control(kvm, attr);
														
 
															+		break;
														
 
															+	case KVM_S390_VM_TOD:
														
 
															+		ret = kvm_s390_get_tod(kvm, attr);
														
 
															+		break;
														
 
															+	case KVM_S390_VM_CPU_MODEL:
														
 
															+		ret = kvm_s390_get_cpu_model(kvm, attr);
														
 
															+		break;
														
 
															+	default:
														
 
															+		ret = -ENXIO;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															 }
														
 
															 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
														
@@ -320,6 +658,42 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 
															 		switch (attr->attr) {
														
 
															 		case KVM_S390_VM_MEM_ENABLE_CMMA:
														
 
															 		case KVM_S390_VM_MEM_CLR_CMMA:
														
 
															+		case KVM_S390_VM_MEM_LIMIT_SIZE:
														
 
															+			ret = 0;
														
 
															+			break;
														
 
															+		default:
														
 
															+			ret = -ENXIO;
														
 
															+			break;
														
 
															+		}
														
 
															+		break;
														
 
															+	case KVM_S390_VM_TOD:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_S390_VM_TOD_LOW:
														
 
															+		case KVM_S390_VM_TOD_HIGH:
														
 
															+			ret = 0;
														
 
															+			break;
														
 
															+		default:
														
 
															+			ret = -ENXIO;
														
 
															+			break;
														
 
															+		}
														
 
															+		break;
														
 
															+	case KVM_S390_VM_CPU_MODEL:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_S390_VM_CPU_PROCESSOR:
														
 
															+		case KVM_S390_VM_CPU_MACHINE:
														
 
															+			ret = 0;
														
 
															+			break;
														
 
															+		default:
														
 
															+			ret = -ENXIO;
														
 
															+			break;
														
 
															+		}
														
 
															+		break;
														
 
															+	case KVM_S390_VM_CRYPTO:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
														
 
															+		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
														
 
															+		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
														
 
															+		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
														
 
															 			ret = 0;
														
 
															 			break;
														
 
															 		default:
														
@@ -401,9 +775,61 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 	return r;
														
 
															 }
														
 
															+static int kvm_s390_query_ap_config(u8 *config)
														
 
															+{
														
 
															+	u32 fcn_code = 0x04000000UL;
														
 
															+	u32 cc;
														
 
															+
														
 
															+	asm volatile(
														
 
															+		"lgr 0,%1\n"
														
 
															+		"lgr 2,%2\n"
														
 
															+		".long 0xb2af0000\n"		/* PQAP(QCI) */
														
 
															+		"ipm %0\n"
														
 
															+		"srl %0,28\n"
														
 
															+		: "=r" (cc)
														
 
															+		: "r" (fcn_code), "r" (config)
														
 
															+		: "cc", "0", "2", "memory"
														
 
															+	);
														
 
															+
														
 
															+	return cc;
														
 
															+}
														
 
															+
														
 
															+static int kvm_s390_apxa_installed(void)
														
 
															+{
														
 
															+	u8 config[128];
														
 
															+	int cc;
														
 
															+
														
 
															+	if (test_facility(2) && test_facility(12)) {
														
 
															+		cc = kvm_s390_query_ap_config(config);
														
 
															+
														
 
															+		if (cc)
														
 
															+			pr_err("PQAP(QCI) failed with cc=%d", cc);
														
 
															+		else
														
 
															+			return config[0] & 0x40;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void kvm_s390_set_crycb_format(struct kvm *kvm)
														
 
															+{
														
 
															+	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
														
 
															+
														
 
															+	if (kvm_s390_apxa_installed())
														
 
															+		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
														
 
															+	else
														
 
															+		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
														
 
															+}
														
 
															+
														
 
															+static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
														
 
															+{
														
 
															+	get_cpu_id(cpu_id);
														
 
															+	cpu_id->version = 0xff;
														
 
															+}
														
 
															+
														
 
															 static int kvm_s390_crypto_init(struct kvm *kvm)
														
 
															 {
														
 
															-	if (!test_vfacility(76))
														
 
															+	if (!test_kvm_facility(kvm, 76))
														
 
															 		return 0;
														
 
															 	kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
														
@@ -411,15 +837,18 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
 
															 	if (!kvm->arch.crypto.crycb)
														
 
															 		return -ENOMEM;
														
 
															-	kvm->arch.crypto.crycbd = (__u32) (unsigned long) kvm->arch.crypto.crycb |
														
 
															-				  CRYCB_FORMAT1;
														
 
															+	kvm_s390_set_crycb_format(kvm);
														
 
															+
														
 
															+	/* Disable AES/DEA protected key functions by default */
														
 
															+	kvm->arch.crypto.aes_kw = 0;
														
 
															+	kvm->arch.crypto.dea_kw = 0;
														
 
															 	return 0;
														
 
															 }
														
 
															 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
														
 
															 {
														
 
															-	int rc;
														
 
															+	int i, rc;
														
 
															 	char debug_name[16];
														
 
															 	static unsigned long sca_offset;
														
@@ -454,6 +883,46 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
															 	if (!kvm->arch.dbf)
														
 
															 		goto out_nodbf;
														
 
															+	/*
														
 
															+	 * The architectural maximum amount of facilities is 16 kbit. To store
														
 
															+	 * this amount, 2 kbyte of memory is required. Thus we need a full
														
 
															+	 * page to hold the active copy (arch.model.fac->sie) and the current
														
 
															+	 * facilities set (arch.model.fac->kvm). Its address size has to be
														
 
															+	 * 31 bits and word aligned.
														
 
															+	 */
														
 
															+	kvm->arch.model.fac =
														
 
															+		(struct s390_model_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
														
 
															+	if (!kvm->arch.model.fac)
														
 
															+		goto out_nofac;
														
 
															+
														
 
															+	memcpy(kvm->arch.model.fac->kvm, S390_lowcore.stfle_fac_list,
														
 
															+	       S390_ARCH_FAC_LIST_SIZE_U64);
														
 
															+
														
 
															+	/*
														
 
															+	 * If this KVM host runs *not* in a LPAR, relax the facility bits
														
 
															+	 * of the kvm facility mask by all missing facilities. This will allow
														
 
															+	 * to determine the right CPU model by means of the remaining facilities.
														
 
															+	 * Live guest migration must prohibit the migration of KVMs running in
														
 
															+	 * a LPAR to non LPAR hosts.
														
 
															+	 */
														
 
															+	if (!MACHINE_IS_LPAR)
														
 
															+		for (i = 0; i < kvm_s390_fac_list_mask_size(); i++)
														
 
															+			kvm_s390_fac_list_mask[i] &= kvm->arch.model.fac->kvm[i];
														
 
															+
														
 
															+	/*
														
 
															+	 * Apply the kvm facility mask to limit the kvm supported/tolerated
														
 
															+	 * facility list.
														
 
															+	 */
														
 
															+	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
														
 
															+		if (i < kvm_s390_fac_list_mask_size())
														
 
															+			kvm->arch.model.fac->kvm[i] &= kvm_s390_fac_list_mask[i];
														
 
															+		else
														
 
															+			kvm->arch.model.fac->kvm[i] = 0UL;
														
 
															+	}
														
 
															+
														
 
															+	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
														
 
															+	kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
														
 
															+
														
 
															 	if (kvm_s390_crypto_init(kvm) < 0)
														
 
															 		goto out_crypto;
														
@@ -477,6 +946,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
															 	kvm->arch.css_support = 0;
														
 
															 	kvm->arch.use_irqchip = 0;
														
 
															+	kvm->arch.epoch = 0;
														
 
															 	spin_lock_init(&kvm->arch.start_stop_lock);
														
@@ -484,6 +954,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
															 out_nogmap:
														
 
															 	kfree(kvm->arch.crypto.crycb);
														
 
															 out_crypto:
														
 
															+	free_page((unsigned long)kvm->arch.model.fac);
														
 
															+out_nofac:
														
 
															 	debug_unregister(kvm->arch.dbf);
														
 
															 out_nodbf:
														
 
															 	free_page((unsigned long)(kvm->arch.sca));
														
@@ -536,6 +1008,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
															 void kvm_arch_destroy_vm(struct kvm *kvm)
														
 
															 {
														
 
															 	kvm_free_vcpus(kvm);
														
 
															+	free_page((unsigned long)kvm->arch.model.fac);
														
 
															 	free_page((unsigned long)(kvm->arch.sca));
														
 
															 	debug_unregister(kvm->arch.dbf);
														
 
															 	kfree(kvm->arch.crypto.crycb);
														
@@ -546,25 +1019,30 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 
															 }
														
 
															 /* Section: vcpu related */
														
 
															+static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
														
 
															+	if (!vcpu->arch.gmap)
														
 
															+		return -ENOMEM;
														
 
															+	vcpu->arch.gmap->private = vcpu->kvm;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
														
 
															 	kvm_clear_async_pf_completion_queue(vcpu);
														
 
															-	if (kvm_is_ucontrol(vcpu->kvm)) {
														
 
															-		vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
														
 
															-		if (!vcpu->arch.gmap)
														
 
															-			return -ENOMEM;
														
 
															-		vcpu->arch.gmap->private = vcpu->kvm;
														
 
															-		return 0;
														
 
															-	}
														
 
															-
														
 
															-	vcpu->arch.gmap = vcpu->kvm->arch.gmap;
														
 
															 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
														
 
															 				    KVM_SYNC_GPRS |
														
 
															 				    KVM_SYNC_ACRS |
														
 
															 				    KVM_SYNC_CRS |
														
 
															 				    KVM_SYNC_ARCH0 |
														
 
															 				    KVM_SYNC_PFAULT;
														
 
															+
														
 
															+	if (kvm_is_ucontrol(vcpu->kvm))
														
 
															+		return __kvm_ucontrol_vcpu_init(vcpu);
														
 
															+
														
 
															 	return 0;
														
 
															 }
														
@@ -615,16 +1093,27 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 
															 	kvm_s390_clear_local_irqs(vcpu);
														
 
															 }
														
 
															-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
														
 
															+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return 0;
														
 
															+	mutex_lock(&vcpu->kvm->lock);
														
 
															+	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
														
 
															+	mutex_unlock(&vcpu->kvm->lock);
														
 
															+	if (!kvm_is_ucontrol(vcpu->kvm))
														
 
															+		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
														
 
															 }
														
 
															 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	if (!test_vfacility(76))
														
 
															+	if (!test_kvm_facility(vcpu->kvm, 76))
														
 
															 		return;
														
 
															+	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
														
 
															+
														
 
															+	if (vcpu->kvm->arch.crypto.aes_kw)
														
 
															+		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
														
 
															+	if (vcpu->kvm->arch.crypto.dea_kw)
														
 
															+		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
														
 
															+
														
 
															 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
														
 
															 }
														
@@ -654,14 +1143,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
															 						    CPUSTAT_STOPPED |
														
 
															 						    CPUSTAT_GED);
														
 
															 	vcpu->arch.sie_block->ecb   = 6;
														
 
															-	if (test_vfacility(50) && test_vfacility(73))
														
 
															+	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
														
 
															 		vcpu->arch.sie_block->ecb |= 0x10;
														
 
															 	vcpu->arch.sie_block->ecb2  = 8;
														
 
															-	vcpu->arch.sie_block->eca   = 0xD1002000U;
														
 
															+	vcpu->arch.sie_block->eca   = 0xC1002000U;
														
 
															 	if (sclp_has_siif())
														
 
															 		vcpu->arch.sie_block->eca |= 1;
														
 
															-	vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
														
 
															+	if (sclp_has_sigpif())
														
 
															+		vcpu->arch.sie_block->eca |= 0x10000000U;
														
 
															 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
														
 
															 				      ICTL_TPROT;
														
@@ -670,10 +1160,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
															 		if (rc)
														
 
															 			return rc;
														
 
															 	}
														
 
															-	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
														
 
															+	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
														
 
															 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
														
 
															-	get_cpu_id(&vcpu->arch.cpu_id);
														
 
															-	vcpu->arch.cpu_id.version = 0xff;
														
 
															+
														
 
															+	mutex_lock(&vcpu->kvm->lock);
														
 
															+	vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id;
														
 
															+	memcpy(vcpu->kvm->arch.model.fac->sie, vcpu->kvm->arch.model.fac->kvm,
														
 
															+	       S390_ARCH_FAC_LIST_SIZE_BYTE);
														
 
															+	vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc;
														
 
															+	mutex_unlock(&vcpu->kvm->lock);
														
 
															 	kvm_s390_vcpu_crypto_setup(vcpu);
														
@@ -717,6 +1212,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
															 		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
														
 
															 		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
														
 
															 	}
														
 
															+	vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->sie;
														
 
															 	spin_lock_init(&vcpu->arch.local_int.lock);
														
 
															 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
														
@@ -741,7 +1237,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
															 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return kvm_cpu_has_interrupt(vcpu);
														
 
															+	return kvm_s390_vcpu_has_irq(vcpu, 0);
														
 
															 }
														
 
															 void s390_vcpu_block(struct kvm_vcpu *vcpu)
														
@@ -869,6 +1365,8 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
 
															 	case KVM_REG_S390_PFTOKEN:
														
 
															 		r = get_user(vcpu->arch.pfault_token,
														
 
															 			     (u64 __user *)reg->addr);
														
 
															+		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
														
 
															+			kvm_clear_async_pf_completion_queue(vcpu);
														
 
															 		break;
														
 
															 	case KVM_REG_S390_PFCOMPARE:
														
 
															 		r = get_user(vcpu->arch.pfault_compare,
														
@@ -1176,7 +1674,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
 
															 		return 0;
														
 
															 	if (psw_extint_disabled(vcpu))
														
 
															 		return 0;
														
 
															-	if (kvm_cpu_has_interrupt(vcpu))
														
 
															+	if (kvm_s390_vcpu_has_irq(vcpu, 0))
														
 
															 		return 0;
														
 
															 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
														
 
															 		return 0;
														
@@ -1341,6 +1839,8 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
														
 
															 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
														
 
															 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
														
 
															+		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
														
 
															+			kvm_clear_async_pf_completion_queue(vcpu);
														
 
															 	}
														
 
															 	kvm_run->kvm_dirty_regs = 0;
														
 
															 }
														
@@ -1559,15 +2059,10 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
 
															 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
														
 
															 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
														
 
															-	/* Need to lock access to action_bits to avoid a SIGP race condition */
														
 
															-	spin_lock(&vcpu->arch.local_int.lock);
														
 
															-	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
														
 
															-
														
 
															 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
														
 
															-	vcpu->arch.local_int.action_bits &=
														
 
															-				 ~(ACTION_STOP_ON_STOP | ACTION_STORE_ON_STOP);
														
 
															-	spin_unlock(&vcpu->arch.local_int.lock);
														
 
															+	kvm_s390_clear_stop_irq(vcpu);
														
 
															+	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
														
 
															 	__disable_ibs_on_vcpu(vcpu);
														
 
															 	for (i = 0; i < online_vcpus; i++) {
														
@@ -1783,30 +2278,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 
															 static int __init kvm_s390_init(void)
														
 
															 {
														
 
															-	int ret;
														
 
															-	ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
														
 
															-	if (ret)
														
 
															-		return ret;
														
 
															-
														
 
															-	/*
														
 
															-	 * guests can ask for up to 255+1 double words, we need a full page
														
 
															-	 * to hold the maximum amount of facilities. On the other hand, we
														
 
															-	 * only set facilities that are known to work in KVM.
														
 
															-	 */
														
 
															-	vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
														
 
															-	if (!vfacilities) {
														
 
															-		kvm_exit();
														
 
															-		return -ENOMEM;
														
 
															-	}
														
 
															-	memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
														
 
															-	vfacilities[0] &= 0xff82fffbf47c2000UL;
														
 
															-	vfacilities[1] &= 0x005c000000000000UL;
														
 
															-	return 0;
														
 
															+	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
														
 
															 }
														
 
															 static void __exit kvm_s390_exit(void)
														
 
															 {
														
 
															-	free_page((unsigned long) vfacilities);
														
 
															 	kvm_exit();
														
 
															 }
														
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -18,12 +18,10 @@
 
															 #include <linux/hrtimer.h>
														
 
															 #include <linux/kvm.h>
														
 
															 #include <linux/kvm_host.h>
														
 
															+#include <asm/facility.h>
														
 
															 typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
														
 
															-/* declare vfacilities extern */
														
 
															-extern unsigned long *vfacilities;
														
 
															-
														
 
															 /* Transactional Memory Execution related macros */
														
 
															 #define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & 0x10))
														
 
															 #define TDB_FORMAT1		1
														
@@ -127,6 +125,12 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
 
															 	vcpu->arch.sie_block->gpsw.mask |= cc << 44;
														
 
															 }
														
 
															+/* test availability of facility in a kvm intance */
														
 
															+static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
														
 
															+{
														
 
															+	return __test_facility(nr, kvm->arch.model.fac->kvm);
														
 
															+}
														
 
															+
														
 
															 /* are cpu states controlled by user space */
														
 
															 static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
														
 
															 {
														
@@ -183,7 +187,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
 
															 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
														
 
															 /* is cmma enabled */
														
 
															 bool kvm_s390_cmma_enabled(struct kvm *kvm);
														
 
															-int test_vfacility(unsigned long nr);
														
 
															+unsigned long kvm_s390_fac_list_mask_size(void);
														
 
															+extern unsigned long kvm_s390_fac_list_mask[];
														
 
															 /* implemented in diag.c */
														
 
															 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
														
@@ -228,11 +233,13 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
 
															 			struct kvm_s390_irq *s390irq);
														
 
															 /* implemented in interrupt.c */
														
 
															-int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
														
 
															+int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop);
														
 
															 int psw_extint_disabled(struct kvm_vcpu *vcpu);
														
 
															 void kvm_s390_destroy_adapters(struct kvm *kvm);
														
 
															-int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
														
 
															+int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
														
 
															 extern struct kvm_device_ops kvm_flic_ops;
														
 
															+int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
														
 
															+void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
														
 
															 /* implemented in guestdbg.c */
														
 
															 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
														
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -337,19 +337,24 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
 
															 static int handle_stfl(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	int rc;
														
 
															+	unsigned int fac;
														
 
															 	vcpu->stat.instruction_stfl++;
														
 
															 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
														
 
															 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
														
 
															+	/*
														
 
															+	 * We need to shift the lower 32 facility bits (bit 0-31) from a u64
														
 
															+	 * into a u32 memory representation. They will remain bits 0-31.
														
 
															+	 */
														
 
															+	fac = *vcpu->kvm->arch.model.fac->sie >> 32;
														
 
															 	rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
														
 
															-			    vfacilities, 4);
														
 
															+			    &fac, sizeof(fac));
														
 
															 	if (rc)
														
 
															 		return rc;
														
 
															-	VCPU_EVENT(vcpu, 5, "store facility list value %x",
														
 
															-		   *(unsigned int *) vfacilities);
														
 
															-	trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);
														
 
															+	VCPU_EVENT(vcpu, 5, "store facility list value %x", fac);
														
 
															+	trace_kvm_s390_handle_stfl(vcpu, fac);
														
 
															 	return 0;
														
 
															 }
														
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -26,15 +26,17 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
 
															 	struct kvm_s390_local_interrupt *li;
														
 
															 	int cpuflags;
														
 
															 	int rc;
														
 
															+	int ext_call_pending;
														
 
															 	li = &dst_vcpu->arch.local_int;
														
 
															 	cpuflags = atomic_read(li->cpuflags);
														
 
															-	if (!(cpuflags & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED)))
														
 
															+	ext_call_pending = kvm_s390_ext_call_pending(dst_vcpu);
														
 
															+	if (!(cpuflags & CPUSTAT_STOPPED) && !ext_call_pending)
														
 
															 		rc = SIGP_CC_ORDER_CODE_ACCEPTED;
														
 
															 	else {
														
 
															 		*reg &= 0xffffffff00000000UL;
														
 
															-		if (cpuflags & CPUSTAT_ECALL_PEND)
														
 
															+		if (ext_call_pending)
														
 
															 			*reg |= SIGP_STATUS_EXT_CALL_PENDING;
														
 
															 		if (cpuflags & CPUSTAT_STOPPED)
														
 
															 			*reg |= SIGP_STATUS_STOPPED;
														
@@ -96,7 +98,7 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
 
															 }
														
 
															 static int __sigp_external_call(struct kvm_vcpu *vcpu,
														
 
															-				struct kvm_vcpu *dst_vcpu)
														
 
															+				struct kvm_vcpu *dst_vcpu, u64 *reg)
														
 
															 {
														
 
															 	struct kvm_s390_irq irq = {
														
 
															 		.type = KVM_S390_INT_EXTERNAL_CALL,
														
@@ -105,45 +107,31 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu,
 
															 	int rc;
														
 
															 	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
														
 
															-	if (!rc)
														
 
															+	if (rc == -EBUSY) {
														
 
															+		*reg &= 0xffffffff00000000UL;
														
 
															+		*reg |= SIGP_STATUS_EXT_CALL_PENDING;
														
 
															+		return SIGP_CC_STATUS_STORED;
														
 
															+	} else if (rc == 0) {
														
 
															 		VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
														
 
															 			   dst_vcpu->vcpu_id);
														
 
															-
														
 
															-	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
														
 
															-}
														
 
															-
														
 
															-static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action)
														
 
															-{
														
 
															-	struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
														
 
															-	int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
														
 
															-
														
 
															-	spin_lock(&li->lock);
														
 
															-	if (li->action_bits & ACTION_STOP_ON_STOP) {
														
 
															-		/* another SIGP STOP is pending */
														
 
															-		rc = SIGP_CC_BUSY;
														
 
															-		goto out;
														
 
															 	}
														
 
															-	if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
														
 
															-		if ((action & ACTION_STORE_ON_STOP) != 0)
														
 
															-			rc = -ESHUTDOWN;
														
 
															-		goto out;
														
 
															-	}
														
 
															-	set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
														
 
															-	li->action_bits |= action;
														
 
															-	atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
														
 
															-	kvm_s390_vcpu_wakeup(dst_vcpu);
														
 
															-out:
														
 
															-	spin_unlock(&li->lock);
														
 
															-	return rc;
														
 
															+	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
														
 
															 }
														
 
															 static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
														
 
															 {
														
 
															+	struct kvm_s390_irq irq = {
														
 
															+		.type = KVM_S390_SIGP_STOP,
														
 
															+	};
														
 
															 	int rc;
														
 
															-	rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP);
														
 
															-	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id);
														
 
															+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
														
 
															+	if (rc == -EBUSY)
														
 
															+		rc = SIGP_CC_BUSY;
														
 
															+	else if (rc == 0)
														
 
															+		VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x",
														
 
															+			   dst_vcpu->vcpu_id);
														
 
															 	return rc;
														
 
															 }
														
@@ -151,20 +139,18 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
 
															 static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
														
 
															 					struct kvm_vcpu *dst_vcpu, u64 *reg)
														
 
															 {
														
 
															+	struct kvm_s390_irq irq = {
														
 
															+		.type = KVM_S390_SIGP_STOP,
														
 
															+		.u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS,
														
 
															+	};
														
 
															 	int rc;
														
 
															-	rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP |
														
 
															-					      ACTION_STORE_ON_STOP);
														
 
															-	VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
														
 
															-		   dst_vcpu->vcpu_id);
														
 
															-
														
 
															-	if (rc == -ESHUTDOWN) {
														
 
															-		/* If the CPU has already been stopped, we still have
														
 
															-		 * to save the status when doing stop-and-store. This
														
 
															-		 * has to be done after unlocking all spinlocks. */
														
 
															-		rc = kvm_s390_store_status_unloaded(dst_vcpu,
														
 
															-						KVM_S390_STORE_STATUS_NOADDR);
														
 
															-	}
														
 
															+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
														
 
															+	if (rc == -EBUSY)
														
 
															+		rc = SIGP_CC_BUSY;
														
 
															+	else if (rc == 0)
														
 
															+		VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
														
 
															+			   dst_vcpu->vcpu_id);
														
 
															 	return rc;
														
 
															 }
														
@@ -197,41 +183,33 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
 
															 static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
														
 
															 			     u32 address, u64 *reg)
														
 
															 {
														
 
															-	struct kvm_s390_local_interrupt *li;
														
 
															+	struct kvm_s390_irq irq = {
														
 
															+		.type = KVM_S390_SIGP_SET_PREFIX,
														
 
															+		.u.prefix.address = address & 0x7fffe000u,
														
 
															+	};
														
 
															 	int rc;
														
 
															-	li = &dst_vcpu->arch.local_int;
														
 
															-
														
 
															 	/*
														
 
															 	 * Make sure the new value is valid memory. We only need to check the
														
 
															 	 * first page, since address is 8k aligned and memory pieces are always
														
 
															 	 * at least 1MB aligned and have at least a size of 1MB.
														
 
															 	 */
														
 
															-	address &= 0x7fffe000u;
														
 
															-	if (kvm_is_error_gpa(vcpu->kvm, address)) {
														
 
															+	if (kvm_is_error_gpa(vcpu->kvm, irq.u.prefix.address)) {
														
 
															 		*reg &= 0xffffffff00000000UL;
														
 
															 		*reg |= SIGP_STATUS_INVALID_PARAMETER;
														
 
															 		return SIGP_CC_STATUS_STORED;
														
 
															 	}
														
 
															-	spin_lock(&li->lock);
														
 
															-	/* cpu must be in stopped state */
														
 
															-	if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
														
 
															+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
														
 
															+	if (rc == -EBUSY) {
														
 
															 		*reg &= 0xffffffff00000000UL;
														
 
															 		*reg |= SIGP_STATUS_INCORRECT_STATE;
														
 
															-		rc = SIGP_CC_STATUS_STORED;
														
 
															-		goto out_li;
														
 
															+		return SIGP_CC_STATUS_STORED;
														
 
															+	} else if (rc == 0) {
														
 
															+		VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x",
														
 
															+			   dst_vcpu->vcpu_id, irq.u.prefix.address);
														
 
															 	}
														
 
															-	li->irq.prefix.address = address;
														
 
															-	set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
														
 
															-	kvm_s390_vcpu_wakeup(dst_vcpu);
														
 
															-	rc = SIGP_CC_ORDER_CODE_ACCEPTED;
														
 
															-
														
 
															-	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id,
														
 
															-		   address);
														
 
															-out_li:
														
 
															-	spin_unlock(&li->lock);
														
 
															 	return rc;
														
 
															 }
														
@@ -242,9 +220,7 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
 
															 	int flags;
														
 
															 	int rc;
														
 
															-	spin_lock(&dst_vcpu->arch.local_int.lock);
														
 
															 	flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
														
 
															-	spin_unlock(&dst_vcpu->arch.local_int.lock);
														
 
															 	if (!(flags & CPUSTAT_STOPPED)) {
														
 
															 		*reg &= 0xffffffff00000000UL;
														
 
															 		*reg |= SIGP_STATUS_INCORRECT_STATE;
														
@@ -291,8 +267,9 @@ static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
 
															 	/* handle (RE)START in user space */
														
 
															 	int rc = -EOPNOTSUPP;
														
 
															+	/* make sure we don't race with STOP irq injection */
														
 
															 	spin_lock(&li->lock);
														
 
															-	if (li->action_bits & ACTION_STOP_ON_STOP)
														
 
															+	if (kvm_s390_is_stop_irq_pending(dst_vcpu))
														
 
															 		rc = SIGP_CC_BUSY;
														
 
															 	spin_unlock(&li->lock);
														
@@ -333,7 +310,7 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
 
															 		break;
														
 
															 	case SIGP_EXTERNAL_CALL:
														
 
															 		vcpu->stat.instruction_sigp_external_call++;
														
 
															-		rc = __sigp_external_call(vcpu, dst_vcpu);
														
 
															+		rc = __sigp_external_call(vcpu, dst_vcpu, status_reg);
														
 
															 		break;
														
 
															 	case SIGP_EMERGENCY_SIGNAL:
														
 
															 		vcpu->stat.instruction_sigp_emergency++;
														
@@ -394,6 +371,53 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
 
															 	return rc;
														
 
															 }
														
 
															+static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
														
 
															+{
														
 
															+	if (!vcpu->kvm->arch.user_sigp)
														
 
															+		return 0;
														
 
															+
														
 
															+	switch (order_code) {
														
 
															+	case SIGP_SENSE:
														
 
															+	case SIGP_EXTERNAL_CALL:
														
 
															+	case SIGP_EMERGENCY_SIGNAL:
														
 
															+	case SIGP_COND_EMERGENCY_SIGNAL:
														
 
															+	case SIGP_SENSE_RUNNING:
														
 
															+		return 0;
														
 
															+	/* update counters as we're directly dropping to user space */
														
 
															+	case SIGP_STOP:
														
 
															+		vcpu->stat.instruction_sigp_stop++;
														
 
															+		break;
														
 
															+	case SIGP_STOP_AND_STORE_STATUS:
														
 
															+		vcpu->stat.instruction_sigp_stop_store_status++;
														
 
															+		break;
														
 
															+	case SIGP_STORE_STATUS_AT_ADDRESS:
														
 
															+		vcpu->stat.instruction_sigp_store_status++;
														
 
															+		break;
														
 
															+	case SIGP_SET_PREFIX:
														
 
															+		vcpu->stat.instruction_sigp_prefix++;
														
 
															+		break;
														
 
															+	case SIGP_START:
														
 
															+		vcpu->stat.instruction_sigp_start++;
														
 
															+		break;
														
 
															+	case SIGP_RESTART:
														
 
															+		vcpu->stat.instruction_sigp_restart++;
														
 
															+		break;
														
 
															+	case SIGP_INITIAL_CPU_RESET:
														
 
															+		vcpu->stat.instruction_sigp_init_cpu_reset++;
														
 
															+		break;
														
 
															+	case SIGP_CPU_RESET:
														
 
															+		vcpu->stat.instruction_sigp_cpu_reset++;
														
 
															+		break;
														
 
															+	default:
														
 
															+		vcpu->stat.instruction_sigp_unknown++;
														
 
															+	}
														
 
															+
														
 
															+	VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space",
														
 
															+		   order_code);
														
 
															+
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
														
@@ -408,6 +432,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 
															 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
														
 
															 	order_code = kvm_s390_get_base_disp_rs(vcpu);
														
 
															+	if (handle_sigp_order_in_user_space(vcpu, order_code))
														
 
															+		return -EOPNOTSUPP;
														
 
															 	if (r1 % 2)
														
 
															 		parameter = vcpu->run->s.regs.gprs[r1];
														
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -209,19 +209,21 @@ TRACE_EVENT(kvm_s390_request_resets,
 
															  * Trace point for a vcpu's stop requests.
														
 
															  */
														
 
															 TRACE_EVENT(kvm_s390_stop_request,
														
 
															-	    TP_PROTO(unsigned int action_bits),
														
 
															-	    TP_ARGS(action_bits),
														
 
															+	    TP_PROTO(unsigned char stop_irq, unsigned char flags),
														
 
															+	    TP_ARGS(stop_irq, flags),
														
 
															 	    TP_STRUCT__entry(
														
 
															-		    __field(unsigned int, action_bits)
														
 
															+		    __field(unsigned char, stop_irq)
														
 
															+		    __field(unsigned char, flags)
														
 
															 		    ),
														
 
															 	    TP_fast_assign(
														
 
															-		    __entry->action_bits = action_bits;
														
 
															+		    __entry->stop_irq = stop_irq;
														
 
															+		    __entry->flags = flags;
														
 
															 		    ),
														
 
															-	    TP_printk("stop request, action_bits = %08x",
														
 
															-		      __entry->action_bits)
														
 
															+	    TP_printk("stop request, stop irq = %u, flags = %08x",
														
 
															+		      __entry->stop_irq, __entry->flags)
														
 
															 	);
														
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -208,6 +208,7 @@ struct x86_emulate_ops {
 
															 	void (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
														
 
															 			  u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
														
 
															+	void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
														
 
															 };
														
 
															 typedef u32 __attribute__((vector_size(16))) sse128_t;
														
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -38,8 +38,6 @@
 
															 #define KVM_PRIVATE_MEM_SLOTS 3
														
 
															 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
														
 
															-#define KVM_MMIO_SIZE 16
														
 
															-
														
 
															 #define KVM_PIO_PAGE_OFFSET 1
														
 
															 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
														
@@ -51,7 +49,7 @@
 
															 			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
														
 
															 #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
														
 
															-#define CR3_PCID_INVD		 (1UL << 63)
														
 
															+#define CR3_PCID_INVD		 BIT_64(63)
														
 
															 #define CR4_RESERVED_BITS                                               \
														
 
															 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
														
 
															 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
														
@@ -160,6 +158,18 @@ enum {
 
															 #define DR7_FIXED_1	0x00000400
														
 
															 #define DR7_VOLATILE	0xffff2bff
														
 
															+#define PFERR_PRESENT_BIT 0
														
 
															+#define PFERR_WRITE_BIT 1
														
 
															+#define PFERR_USER_BIT 2
														
 
															+#define PFERR_RSVD_BIT 3
														
 
															+#define PFERR_FETCH_BIT 4
														
 
															+
														
 
															+#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
														
 
															+#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
														
 
															+#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
														
 
															+#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
														
 
															+#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
														
 
															+
														
 
															 /* apic attention bits */
														
 
															 #define KVM_APIC_CHECK_VAPIC	0
														
 
															 /*
														
@@ -615,6 +625,8 @@ struct kvm_arch {
 
															 	#ifdef CONFIG_KVM_MMU_AUDIT
														
 
															 	int audit_point;
														
 
															 	#endif
														
 
															+
														
 
															+	bool boot_vcpu_runs_old_kvmclock;
														
 
															 };
														
 
															 struct kvm_vm_stat {
														
@@ -643,6 +655,7 @@ struct kvm_vcpu_stat {
 
															 	u32 irq_window_exits;
														
 
															 	u32 nmi_window_exits;
														
 
															 	u32 halt_exits;
														
 
															+	u32 halt_successful_poll;
														
 
															 	u32 halt_wakeup;
														
 
															 	u32 request_irq_exits;
														
 
															 	u32 irq_exits;
														
@@ -787,6 +800,31 @@ struct kvm_x86_ops {
 
															 	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
														
 
															 	void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
														
 
															+
														
 
															+	/*
														
 
															+	 * Arch-specific dirty logging hooks. These hooks are only supposed to
														
 
															+	 * be valid if the specific arch has hardware-accelerated dirty logging
														
 
															+	 * mechanism. Currently only for PML on VMX.
														
 
															+	 *
														
 
															+	 *  - slot_enable_log_dirty:
														
 
															+	 *	called when enabling log dirty mode for the slot.
														
 
															+	 *  - slot_disable_log_dirty:
														
 
															+	 *	called when disabling log dirty mode for the slot.
														
 
															+	 *	also called when slot is created with log dirty disabled.
														
 
															+	 *  - flush_log_dirty:
														
 
															+	 *	called before reporting dirty_bitmap to userspace.
														
 
															+	 *  - enable_log_dirty_pt_masked:
														
 
															+	 *	called when reenabling log dirty for the GFNs in the mask after
														
 
															+	 *	corresponding bits are cleared in slot->dirty_bitmap.
														
 
															+	 */
														
 
															+	void (*slot_enable_log_dirty)(struct kvm *kvm,
														
 
															+				      struct kvm_memory_slot *slot);
														
 
															+	void (*slot_disable_log_dirty)(struct kvm *kvm,
														
 
															+				       struct kvm_memory_slot *slot);
														
 
															+	void (*flush_log_dirty)(struct kvm *kvm);
														
 
															+	void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
														
 
															+					   struct kvm_memory_slot *slot,
														
 
															+					   gfn_t offset, unsigned long mask);
														
 
															 };
														
 
															 struct kvm_arch_async_pf {
														
@@ -819,10 +857,17 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 
															 		u64 dirty_mask, u64 nx_mask, u64 x_mask);
														
 
															 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
														
 
															-void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
														
 
															-void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
														
 
															-				     struct kvm_memory_slot *slot,
														
 
															-				     gfn_t gfn_offset, unsigned long mask);
														
 
															+void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
														
 
															+				      struct kvm_memory_slot *memslot);
														
 
															+void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
														
 
															+				   struct kvm_memory_slot *memslot);
														
 
															+void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
														
 
															+					struct kvm_memory_slot *memslot);
														
 
															+void kvm_mmu_slot_set_dirty(struct kvm *kvm,
														
 
															+			    struct kvm_memory_slot *memslot);
														
 
															+void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
														
 
															+				   struct kvm_memory_slot *slot,
														
 
															+				   gfn_t gfn_offset, unsigned long mask);
														
 
															 void kvm_mmu_zap_all(struct kvm *kvm);
														
 
															 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
														
 
															 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
														
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
 
															 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
														
 
															 #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
														
 
															 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
														
 
															+#define SECONDARY_EXEC_ENABLE_PML               0x00020000
														
 
															 #define SECONDARY_EXEC_XSAVES			0x00100000
														
@@ -121,6 +122,7 @@ enum vmcs_field {
 
															 	GUEST_LDTR_SELECTOR             = 0x0000080c,
														
 
															 	GUEST_TR_SELECTOR               = 0x0000080e,
														
 
															 	GUEST_INTR_STATUS               = 0x00000810,
														
 
															+	GUEST_PML_INDEX			= 0x00000812,
														
 
															 	HOST_ES_SELECTOR                = 0x00000c00,
														
 
															 	HOST_CS_SELECTOR                = 0x00000c02,
														
 
															 	HOST_SS_SELECTOR                = 0x00000c04,
														
@@ -140,6 +142,8 @@ enum vmcs_field {
 
															 	VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
														
 
															 	VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
														
 
															 	VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
														
 
															+	PML_ADDRESS			= 0x0000200e,
														
 
															+	PML_ADDRESS_HIGH		= 0x0000200f,
														
 
															 	TSC_OFFSET                      = 0x00002010,
														
 
															 	TSC_OFFSET_HIGH                 = 0x00002011,
														
 
															 	VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
														
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -364,6 +364,9 @@
 
															 #define MSR_IA32_UCODE_WRITE		0x00000079
														
 
															 #define MSR_IA32_UCODE_REV		0x0000008b
														
 
															+#define MSR_IA32_SMM_MONITOR_CTL	0x0000009b
														
 
															+#define MSR_IA32_SMBASE			0x0000009e
														
 
															+
														
 
															 #define MSR_IA32_PERF_STATUS		0x00000198
														
 
															 #define MSR_IA32_PERF_CTL		0x00000199
														
 
															 #define INTEL_PERF_CTL_MASK		0xffff
														
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -56,6 +56,7 @@
 
															 #define EXIT_REASON_MSR_READ            31
														
 
															 #define EXIT_REASON_MSR_WRITE           32
														
 
															 #define EXIT_REASON_INVALID_STATE       33
														
 
															+#define EXIT_REASON_MSR_LOAD_FAIL       34
														
 
															 #define EXIT_REASON_MWAIT_INSTRUCTION   36
														
 
															 #define EXIT_REASON_MONITOR_INSTRUCTION 39
														
 
															 #define EXIT_REASON_PAUSE_INSTRUCTION   40
														
@@ -72,6 +73,7 @@
 
															 #define EXIT_REASON_XSETBV              55
														
 
															 #define EXIT_REASON_APIC_WRITE          56
														
 
															 #define EXIT_REASON_INVPCID             58
														
 
															+#define EXIT_REASON_PML_FULL            62
														
 
															 #define EXIT_REASON_XSAVES              63
														
 
															 #define EXIT_REASON_XRSTORS             64
														
@@ -116,10 +118,14 @@
 
															 	{ EXIT_REASON_APIC_WRITE,            "APIC_WRITE" }, \
														
 
															 	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
														
 
															 	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
														
 
															+	{ EXIT_REASON_MSR_LOAD_FAIL,         "MSR_LOAD_FAIL" }, \
														
 
															 	{ EXIT_REASON_INVD,                  "INVD" }, \
														
 
															 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
														
 
															 	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
														
 
															 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
														
 
															 	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
														
 
															+#define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
														
 
															+#define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
														
 
															+
														
 
															 #endif /* _UAPIVMX_H */
														
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -39,6 +39,7 @@ config KVM
 
															 	select PERF_EVENTS
														
 
															 	select HAVE_KVM_MSI
														
 
															 	select HAVE_KVM_CPU_RELAX_INTERCEPT
														
 
															+	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
														
 
															 	select KVM_VFIO
														
 
															 	select SRCU
														
 
															 	---help---
														
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -86,6 +86,7 @@
 
															 #define DstAcc      (OpAcc << DstShift)
														
 
															 #define DstDI       (OpDI << DstShift)
														
 
															 #define DstMem64    (OpMem64 << DstShift)
														
 
															+#define DstMem16    (OpMem16 << DstShift)
														
 
															 #define DstImmUByte (OpImmUByte << DstShift)
														
 
															 #define DstDX       (OpDX << DstShift)
														
 
															 #define DstAccLo    (OpAccLo << DstShift)
														
@@ -124,6 +125,7 @@
 
															 #define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
														
 
															 #define Escape      (5<<15)     /* Escape to coprocessor instruction */
														
 
															 #define InstrDual   (6<<15)     /* Alternate instruction decoding of mod == 3 */
														
 
															+#define ModeDual    (7<<15)     /* Different instruction for 32/64 bit */
														
 
															 #define Sse         (1<<18)     /* SSE Vector instruction */
														
 
															 /* Generic ModRM decode. */
														
 
															 #define ModRM       (1<<19)
														
@@ -165,10 +167,10 @@
 
															 #define NoMod	    ((u64)1 << 47)  /* Mod field is ignored */
														
 
															 #define Intercept   ((u64)1 << 48)  /* Has valid intercept field */
														
 
															 #define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
														
 
															-#define NoBigReal   ((u64)1 << 50)  /* No big real mode */
														
 
															 #define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
														
 
															 #define NearBranch  ((u64)1 << 52)  /* Near branches */
														
 
															 #define No16	    ((u64)1 << 53)  /* No 16 bit operand */
														
 
															+#define IncSP       ((u64)1 << 54)  /* SP is incremented before ModRM calc */
														
 
															 #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
														
@@ -213,6 +215,7 @@ struct opcode {
 
															 		const struct gprefix *gprefix;
														
 
															 		const struct escape *esc;
														
 
															 		const struct instr_dual *idual;
														
 
															+		const struct mode_dual *mdual;
														
 
															 		void (*fastop)(struct fastop *fake);
														
 
															 	} u;
														
 
															 	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
														
@@ -240,6 +243,11 @@ struct instr_dual {
 
															 	struct opcode mod3;
														
 
															 };
														
 
															+struct mode_dual {
														
 
															+	struct opcode mode32;
														
 
															+	struct opcode mode64;
														
 
															+};
														
 
															+
														
 
															 /* EFLAGS bit definitions. */
														
 
															 #define EFLG_ID (1<<21)
														
 
															 #define EFLG_VIP (1<<20)
														
@@ -262,6 +270,13 @@ struct instr_dual {
 
															 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
														
 
															 #define EFLG_RESERVED_ONE_MASK 2
														
 
															+enum x86_transfer_type {
														
 
															+	X86_TRANSFER_NONE,
														
 
															+	X86_TRANSFER_CALL_JMP,
														
 
															+	X86_TRANSFER_RET,
														
 
															+	X86_TRANSFER_TASK_SWITCH,
														
 
															+};
														
 
															+
														
 
															 static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
														
 
															 {
														
 
															 	if (!(ctxt->regs_valid & (1 << nr))) {
														
@@ -669,9 +684,13 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
 
															 		}
														
 
															 		if (addr.ea > lim)
														
 
															 			goto bad;
														
 
															-		*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
														
 
															-		if (size > *max_size)
														
 
															-			goto bad;
														
 
															+		if (lim == 0xffffffff)
														
 
															+			*max_size = ~0u;
														
 
															+		else {
														
 
															+			*max_size = (u64)lim + 1 - addr.ea;
														
 
															+			if (size > *max_size)
														
 
															+				goto bad;
														
 
															+		}
														
 
															 		la &= (u32)-1;
														
 
															 		break;
														
 
															 	}
														
@@ -722,19 +741,26 @@ static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
 
															 			  const struct desc_struct *cs_desc)
														
 
															 {
														
 
															 	enum x86emul_mode mode = ctxt->mode;
														
 
															+	int rc;
														
 
															 #ifdef CONFIG_X86_64
														
 
															-	if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) {
														
 
															-		u64 efer = 0;
														
 
															+	if (ctxt->mode >= X86EMUL_MODE_PROT16) {
														
 
															+		if (cs_desc->l) {
														
 
															+			u64 efer = 0;
														
 
															-		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
														
 
															-		if (efer & EFER_LMA)
														
 
															-			mode = X86EMUL_MODE_PROT64;
														
 
															+			ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
														
 
															+			if (efer & EFER_LMA)
														
 
															+				mode = X86EMUL_MODE_PROT64;
														
 
															+		} else
														
 
															+			mode = X86EMUL_MODE_PROT32; /* temporary value */
														
 
															 	}
														
 
															 #endif
														
 
															 	if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
														
 
															 		mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
														
 
															-	return assign_eip(ctxt, dst, mode);
														
 
															+	rc = assign_eip(ctxt, dst, mode);
														
 
															+	if (rc == X86EMUL_CONTINUE)
														
 
															+		ctxt->mode = mode;
														
 
															+	return rc;
														
 
															 }
														
 
															 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
														
@@ -1057,8 +1083,6 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
 
															 	asm volatile("fnstcw %0": "+m"(fcw));
														
 
															 	ctxt->ops->put_fpu(ctxt);
														
 
															-	/* force 2 byte destination */
														
 
															-	ctxt->dst.bytes = 2;
														
 
															 	ctxt->dst.val = fcw;
														
 
															 	return X86EMUL_CONTINUE;
														
@@ -1075,8 +1099,6 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
 
															 	asm volatile("fnstsw %0": "+m"(fsw));
														
 
															 	ctxt->ops->put_fpu(ctxt);
														
 
															-	/* force 2 byte destination */
														
 
															-	ctxt->dst.bytes = 2;
														
 
															 	ctxt->dst.val = fsw;
														
 
															 	return X86EMUL_CONTINUE;
														
@@ -1223,6 +1245,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 
															 			else {
														
 
															 				modrm_ea += reg_read(ctxt, base_reg);
														
 
															 				adjust_modrm_seg(ctxt, base_reg);
														
 
															+				/* Increment ESP on POP [ESP] */
														
 
															+				if ((ctxt->d & IncSP) &&
														
 
															+				    base_reg == VCPU_REGS_RSP)
														
 
															+					modrm_ea += ctxt->op_bytes;
														
 
															 			}
														
 
															 			if (index_reg != 4)
														
 
															 				modrm_ea += reg_read(ctxt, index_reg) << scale;
														
@@ -1435,10 +1461,8 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
 
															 		ops->get_gdt(ctxt, dt);
														
 
															 }
														
 
															-/* allowed just for 8 bytes segments */
														
 
															-static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
														
 
															-				   u16 selector, struct desc_struct *desc,
														
 
															-				   ulong *desc_addr_p)
														
 
															+static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
														
 
															+			      u16 selector, ulong *desc_addr_p)
														
 
															 {
														
 
															 	struct desc_ptr dt;
														
 
															 	u16 index = selector >> 3;
														
@@ -1449,8 +1473,34 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
															 	if (dt.size < index * 8 + 7)
														
 
															 		return emulate_gp(ctxt, selector & 0xfffc);
														
 
															-	*desc_addr_p = addr = dt.address + index * 8;
														
 
															-	return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
														
 
															+	addr = dt.address + index * 8;
														
 
															+
														
 
															+#ifdef CONFIG_X86_64
														
 
															+	if (addr >> 32 != 0) {
														
 
															+		u64 efer = 0;
														
 
															+
														
 
															+		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
														
 
															+		if (!(efer & EFER_LMA))
														
 
															+			addr &= (u32)-1;
														
 
															+	}
														
 
															+#endif
														
 
															+
														
 
															+	*desc_addr_p = addr;
														
 
															+	return X86EMUL_CONTINUE;
														
 
															+}
														
 
															+
														
 
															+/* allowed just for 8 bytes segments */
														
 
															+static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
														
 
															+				   u16 selector, struct desc_struct *desc,
														
 
															+				   ulong *desc_addr_p)
														
 
															+{
														
 
															+	int rc;
														
 
															+
														
 
															+	rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
														
 
															+	if (rc != X86EMUL_CONTINUE)
														
 
															+		return rc;
														
 
															+
														
 
															+	return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
														
 
															 				   &ctxt->exception);
														
 
															 }
														
@@ -1458,16 +1508,13 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
															 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
														
 
															 				    u16 selector, struct desc_struct *desc)
														
 
															 {
														
 
															-	struct desc_ptr dt;
														
 
															-	u16 index = selector >> 3;
														
 
															+	int rc;
														
 
															 	ulong addr;
														
 
															-	get_descriptor_table_ptr(ctxt, selector, &dt);
														
 
															-
														
 
															-	if (dt.size < index * 8 + 7)
														
 
															-		return emulate_gp(ctxt, selector & 0xfffc);
														
 
															+	rc = get_descriptor_ptr(ctxt, selector, &addr);
														
 
															+	if (rc != X86EMUL_CONTINUE)
														
 
															+		return rc;
														
 
															-	addr = dt.address + index * 8;
														
 
															 	return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
														
 
															 				    &ctxt->exception);
														
 
															 }
														
@@ -1475,7 +1522,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
															 /* Does not support long mode */
														
 
															 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
														
 
															 				     u16 selector, int seg, u8 cpl,
														
 
															-				     bool in_task_switch,
														
 
															+				     enum x86_transfer_type transfer,
														
 
															 				     struct desc_struct *desc)
														
 
															 {
														
 
															 	struct desc_struct seg_desc, old_desc;
														
@@ -1529,11 +1576,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
															 		return ret;
														
 
															 	err_code = selector & 0xfffc;
														
 
															-	err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR;
														
 
															+	err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
														
 
															+							   GP_VECTOR;
														
 
															 	/* can't load system descriptor into segment selector */
														
 
															-	if (seg <= VCPU_SREG_GS && !seg_desc.s)
														
 
															+	if (seg <= VCPU_SREG_GS && !seg_desc.s) {
														
 
															+		if (transfer == X86_TRANSFER_CALL_JMP)
														
 
															+			return X86EMUL_UNHANDLEABLE;
														
 
															 		goto exception;
														
 
															+	}
														
 
															 	if (!seg_desc.p) {
														
 
															 		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
														
@@ -1605,10 +1656,13 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
															 	if (seg_desc.s) {
														
 
															 		/* mark segment as accessed */
														
 
															-		seg_desc.type |= 1;
														
 
															-		ret = write_segment_descriptor(ctxt, selector, &seg_desc);
														
 
															-		if (ret != X86EMUL_CONTINUE)
														
 
															-			return ret;
														
 
															+		if (!(seg_desc.type & 1)) {
														
 
															+			seg_desc.type |= 1;
														
 
															+			ret = write_segment_descriptor(ctxt, selector,
														
 
															+						       &seg_desc);
														
 
															+			if (ret != X86EMUL_CONTINUE)
														
 
															+				return ret;
														
 
															+		}
														
 
															 	} else if (ctxt->mode == X86EMUL_MODE_PROT64) {
														
 
															 		ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
														
 
															 				sizeof(base3), &ctxt->exception);
														
@@ -1631,7 +1685,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
															 				   u16 selector, int seg)
														
 
															 {
														
 
															 	u8 cpl = ctxt->ops->cpl(ctxt);
														
 
															-	return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
														
 
															+	return __load_segment_descriptor(ctxt, selector, seg, cpl,
														
 
															+					 X86_TRANSFER_NONE, NULL);
														
 
															 }
														
 
															 static void write_register_operand(struct operand *op)
														
@@ -1828,12 +1883,14 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
 
															 	unsigned long selector;
														
 
															 	int rc;
														
 
															-	rc = emulate_pop(ctxt, &selector, ctxt->op_bytes);
														
 
															+	rc = emulate_pop(ctxt, &selector, 2);
														
 
															 	if (rc != X86EMUL_CONTINUE)
														
 
															 		return rc;
														
 
															 	if (ctxt->modrm_reg == VCPU_SREG_SS)
														
 
															 		ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
														
 
															+	if (ctxt->op_bytes > 2)
														
 
															+		rsp_increment(ctxt, ctxt->op_bytes - 2);
														
 
															 	rc = load_segment_descriptor(ctxt, (u16)selector, seg);
														
 
															 	return rc;
														
@@ -2007,6 +2064,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
 
															 	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
														
 
															 	ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
														
 
															+	ctxt->ops->set_nmi_mask(ctxt, false);
														
 
															 	return rc;
														
 
															 }
														
@@ -2041,7 +2099,8 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 
															 	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
														
 
															-	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
														
 
															+	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
														
 
															+				       X86_TRANSFER_CALL_JMP,
														
 
															 				       &new_desc);
														
 
															 	if (rc != X86EMUL_CONTINUE)
														
 
															 		return rc;
														
@@ -2130,7 +2189,8 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 
															 	/* Outer-privilege level return is not implemented */
														
 
															 	if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
														
 
															 		return X86EMUL_UNHANDLEABLE;
														
 
															-	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false,
														
 
															+	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
														
 
															+				       X86_TRANSFER_RET,
														
 
															 				       &new_desc);
														
 
															 	if (rc != X86EMUL_CONTINUE)
														
 
															 		return rc;
														
@@ -2163,12 +2223,15 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
 
															 	fastop(ctxt, em_cmp);
														
 
															 	if (ctxt->eflags & EFLG_ZF) {
														
 
															-		/* Success: write back to memory. */
														
 
															+		/* Success: write back to memory; no update of EAX */
														
 
															+		ctxt->src.type = OP_NONE;
														
 
															 		ctxt->dst.val = ctxt->src.orig_val;
														
 
															 	} else {
														
 
															 		/* Failure: write the value we saw to EAX. */
														
 
															-		ctxt->dst.type = OP_REG;
														
 
															-		ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
														
 
															+		ctxt->src.type = OP_REG;
														
 
															+		ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
														
 
															+		ctxt->src.val = ctxt->dst.orig_val;
														
 
															+		/* Create write-cycle to dest by writing the same value */
														
 
															 		ctxt->dst.val = ctxt->dst.orig_val;
														
 
															 	}
														
 
															 	return X86EMUL_CONTINUE;
														
@@ -2556,23 +2619,23 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
 
															 	 * it is handled in a context of new task
														
 
															 	 */
														
 
															 	ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
														
 
															-					true, NULL);
														
 
															+					X86_TRANSFER_TASK_SWITCH, NULL);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															 		return ret;
														
 
															 	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
														
 
															-					true, NULL);
														
 
															+					X86_TRANSFER_TASK_SWITCH, NULL);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															 		return ret;
														
 
															 	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
														
 
															-					true, NULL);
														
 
															+					X86_TRANSFER_TASK_SWITCH, NULL);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															 		return ret;
														
 
															 	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
														
 
															-					true, NULL);
														
 
															+					X86_TRANSFER_TASK_SWITCH, NULL);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															 		return ret;
														
 
															 	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
														
 
															-					true, NULL);
														
 
															+					X86_TRANSFER_TASK_SWITCH, NULL);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															 		return ret;
														
@@ -2694,31 +2757,31 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
 
															 	 * it is handled in a context of new task
														
 
															 	 */
														
 
															 	ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
														
 
															-					cpl, true, NULL);
														
 
															+					cpl, X86_TRANSFER_TASK_SWITCH, NULL);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															 		return ret;
														
 
															 	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
														
 
															-					true, NULL);
														
 
															+					X86_TRANSFER_TASK_SWITCH, NULL);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															 		return ret;
														
 
															 	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
														
 
															-					true, NULL);
														
 
															+					X86_TRANSFER_TASK_SWITCH, NULL);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															 		return ret;
														
 
															 	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
														
 
															-					true, NULL);
														
 
															+					X86_TRANSFER_TASK_SWITCH, NULL);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															 		return ret;
														
 
															 	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
														
 
															-					true, NULL);
														
 
															+					X86_TRANSFER_TASK_SWITCH, NULL);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															 		return ret;
														
 
															 	ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
														
 
															-					true, NULL);
														
 
															+					X86_TRANSFER_TASK_SWITCH, NULL);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															 		return ret;
														
 
															 	ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
														
 
															-					true, NULL);
														
 
															+					X86_TRANSFER_TASK_SWITCH, NULL);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															 		return ret;
														
@@ -2739,7 +2802,6 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 
															 	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
														
 
															 			    &ctxt->exception);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															-		/* FIXME: need to provide precise fault address */
														
 
															 		return ret;
														
 
															 	save_state_to_tss32(ctxt, &tss_seg);
														
@@ -2748,13 +2810,11 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 
															 	ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
														
 
															 			     ldt_sel_offset - eip_offset, &ctxt->exception);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															-		/* FIXME: need to provide precise fault address */
														
 
															 		return ret;
														
 
															 	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
														
 
															 			    &ctxt->exception);
														
 
															 	if (ret != X86EMUL_CONTINUE)
														
 
															-		/* FIXME: need to provide precise fault address */
														
 
															 		return ret;
														
 
															 	if (old_tss_sel != 0xffff) {
														
@@ -2765,7 +2825,6 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 
															 				     sizeof tss_seg.prev_task_link,
														
 
															 				     &ctxt->exception);
														
 
															 		if (ret != X86EMUL_CONTINUE)
														
 
															-			/* FIXME: need to provide precise fault address */
														
 
															 			return ret;
														
 
															 	}
														
@@ -2999,15 +3058,16 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
 
															 	struct desc_struct old_desc, new_desc;
														
 
															 	const struct x86_emulate_ops *ops = ctxt->ops;
														
 
															 	int cpl = ctxt->ops->cpl(ctxt);
														
 
															+	enum x86emul_mode prev_mode = ctxt->mode;
														
 
															 	old_eip = ctxt->_eip;
														
 
															 	ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
														
 
															 	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
														
 
															-	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
														
 
															-				       &new_desc);
														
 
															+	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
														
 
															+				       X86_TRANSFER_CALL_JMP, &new_desc);
														
 
															 	if (rc != X86EMUL_CONTINUE)
														
 
															-		return X86EMUL_CONTINUE;
														
 
															+		return rc;
														
 
															 	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
														
 
															 	if (rc != X86EMUL_CONTINUE)
														
@@ -3022,11 +3082,14 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
 
															 	rc = em_push(ctxt);
														
 
															 	/* If we failed, we tainted the memory, but the very least we should
														
 
															 	   restore cs */
														
 
															-	if (rc != X86EMUL_CONTINUE)
														
 
															+	if (rc != X86EMUL_CONTINUE) {
														
 
															+		pr_warn_once("faulting far call emulation tainted memory\n");
														
 
															 		goto fail;
														
 
															+	}
														
 
															 	return rc;
														
 
															 fail:
														
 
															 	ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
														
 
															+	ctxt->mode = prev_mode;
														
 
															 	return rc;
														
 
															 }
														
@@ -3477,6 +3540,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt)
 
															 	return X86EMUL_CONTINUE;
														
 
															 }
														
 
															+static int em_movsxd(struct x86_emulate_ctxt *ctxt)
														
 
															+{
														
 
															+	ctxt->dst.val = (s32) ctxt->src.val;
														
 
															+	return X86EMUL_CONTINUE;
														
 
															+}
														
 
															+
														
 
															 static bool valid_cr(int nr)
														
 
															 {
														
 
															 	switch (nr) {
														
@@ -3676,6 +3745,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 
															 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
														
 
															 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
														
 
															 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
														
 
															+#define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
														
 
															 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
														
 
															 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
														
 
															 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
														
@@ -3738,7 +3808,7 @@ static const struct opcode group1[] = {
 
															 };
														
 
															 static const struct opcode group1A[] = {
														
 
															-	I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
														
 
															+	I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N,
														
 
															 };
														
 
															 static const struct opcode group2[] = {
														
@@ -3854,7 +3924,7 @@ static const struct gprefix pfx_0f_e7 = {
 
															 };
														
 
															 static const struct escape escape_d9 = { {
														
 
															-	N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
														
 
															+	N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
														
 
															 }, {
														
 
															 	/* 0xC0 - 0xC7 */
														
 
															 	N, N, N, N, N, N, N, N,
														
@@ -3896,7 +3966,7 @@ static const struct escape escape_db = { {
 
															 } };
														
 
															 static const struct escape escape_dd = { {
														
 
															-	N, N, N, N, N, N, N, I(DstMem, em_fnstsw),
														
 
															+	N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
														
 
															 }, {
														
 
															 	/* 0xC0 - 0xC7 */
														
 
															 	N, N, N, N, N, N, N, N,
														
@@ -3920,6 +3990,10 @@ static const struct instr_dual instr_dual_0f_c3 = {
 
															 	I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
														
 
															 };
														
 
															+static const struct mode_dual mode_dual_63 = {
														
 
															+	N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
														
 
															+};
														
 
															+
														
 
															 static const struct opcode opcode_table[256] = {
														
 
															 	/* 0x00 - 0x07 */
														
 
															 	F6ALU(Lock, em_add),
														
@@ -3954,7 +4028,7 @@ static const struct opcode opcode_table[256] = {
 
															 	/* 0x60 - 0x67 */
														
 
															 	I(ImplicitOps | Stack | No64, em_pusha),
														
 
															 	I(ImplicitOps | Stack | No64, em_popa),
														
 
															-	N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
														
 
															+	N, MD(ModRM, &mode_dual_63),
														
 
															 	N, N, N, N,
														
 
															 	/* 0x68 - 0x6F */
														
 
															 	I(SrcImm | Mov | Stack, em_push),
														
@@ -4010,8 +4084,8 @@ static const struct opcode opcode_table[256] = {
 
															 	G(ByteOp, group11), G(0, group11),
														
 
															 	/* 0xC8 - 0xCF */
														
 
															 	I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
														
 
															-	I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm),
														
 
															-	I(ImplicitOps | Stack, em_ret_far),
														
 
															+	I(ImplicitOps | SrcImmU16, em_ret_far_imm),
														
 
															+	I(ImplicitOps, em_ret_far),
														
 
															 	D(ImplicitOps), DI(SrcImmByte, intn),
														
 
															 	D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
														
 
															 	/* 0xD0 - 0xD7 */
														
@@ -4108,7 +4182,7 @@ static const struct opcode twobyte_table[256] = {
 
															 	F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
														
 
															 	GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
														
 
															 	/* 0xB0 - 0xB7 */
														
 
															-	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
														
 
															+	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
														
 
															 	I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
														
 
															 	F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
														
 
															 	I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
														
@@ -4174,6 +4248,8 @@ static const struct opcode opcode_map_0f_38[256] = {
 
															 #undef I
														
 
															 #undef GP
														
 
															 #undef EXT
														
 
															+#undef MD
														
 
															+#undef ID
														
 
															 #undef D2bv
														
 
															 #undef D2bvIP
														
@@ -4563,6 +4639,12 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 
															 			else
														
 
															 				opcode = opcode.u.idual->mod012;
														
 
															 			break;
														
 
															+		case ModeDual:
														
 
															+			if (ctxt->mode == X86EMUL_MODE_PROT64)
														
 
															+				opcode = opcode.u.mdual->mode64;
														
 
															+			else
														
 
															+				opcode = opcode.u.mdual->mode32;
														
 
															+			break;
														
 
															 		default:
														
 
															 			return EMULATION_FAILED;
														
 
															 		}
														
@@ -4860,8 +4942,13 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 
															 		/* optimisation - avoid slow emulated read if Mov */
														
 
															 		rc = segmented_read(ctxt, ctxt->dst.addr.mem,
														
 
															 				   &ctxt->dst.val, ctxt->dst.bytes);
														
 
															-		if (rc != X86EMUL_CONTINUE)
														
 
															+		if (rc != X86EMUL_CONTINUE) {
														
 
															+			if (!(ctxt->d & NoWrite) &&
														
 
															+			    rc == X86EMUL_PROPAGATE_FAULT &&
														
 
															+			    ctxt->exception.vector == PF_VECTOR)
														
 
															+				ctxt->exception.error_code |= PFERR_WRITE_MASK;
														
 
															 			goto done;
														
 
															+		}
														
 
															 	}
														
 
															 	ctxt->dst.orig_val = ctxt->dst.val;
														
@@ -4899,11 +4986,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 
															 		goto threebyte_insn;
														
 
															 	switch (ctxt->b) {
														
 
															-	case 0x63:		/* movsxd */
														
 
															-		if (ctxt->mode != X86EMUL_MODE_PROT64)
														
 
															-			goto cannot_emulate;
														
 
															-		ctxt->dst.val = (s32) ctxt->src.val;
														
 
															-		break;
														
 
															 	case 0x70 ... 0x7f: /* jcc (short) */
														
 
															 		if (test_cc(ctxt->b, ctxt->eflags))
														
 
															 			rc = jmp_rel(ctxt, ctxt->src.val);
														
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -98,7 +98,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 
															 }
														
 
															 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
														
 
															-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
														
 
															+bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
														
 
															 		int short_hand, unsigned int dest, int dest_mode);
														
 
															 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
														
 
															 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
														
--- a/arch/x86/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -138,7 +138,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 
															 		gfn += page_size >> PAGE_SHIFT;
														
 
															-
														
 
															+		cond_resched();
														
 
															 	}
														
 
															 	return 0;
														
@@ -306,6 +306,8 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 
															 		kvm_unpin_pages(kvm, pfn, unmap_pages);
														
 
															 		gfn += unmap_pages;
														
 
															+
														
 
															+		cond_resched();
														
 
															 	}
														
 
															 }
														
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -33,6 +33,7 @@
 
															 #include <asm/page.h>
														
 
															 #include <asm/current.h>
														
 
															 #include <asm/apicdef.h>
														
 
															+#include <asm/delay.h>
														
 
															 #include <linux/atomic.h>
														
 
															 #include <linux/jump_label.h>
														
 
															 #include "kvm_cache_regs.h"
														
@@ -327,17 +328,24 @@ static u8 count_vectors(void *bitmap)
 
															 	return count;
														
 
															 }
														
 
															-void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
														
 
															+void __kvm_apic_update_irr(u32 *pir, void *regs)
														
 
															 {
														
 
															 	u32 i, pir_val;
														
 
															-	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															 	for (i = 0; i <= 7; i++) {
														
 
															 		pir_val = xchg(&pir[i], 0);
														
 
															 		if (pir_val)
														
 
															-			*((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val;
														
 
															+			*((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val;
														
 
															 	}
														
 
															 }
														
 
															+EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
														
 
															+
														
 
															+void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
														
 
															+{
														
 
															+	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															+
														
 
															+	__kvm_apic_update_irr(pir, apic->regs);
														
 
															+}
														
 
															 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
														
 
															 static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
														
@@ -405,7 +413,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 
															 	 * because the processor can modify ISR under the hood.  Instead
														
 
															 	 * just set SVI.
														
 
															 	 */
														
 
															-	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
														
 
															+	if (unlikely(kvm_x86_ops->hwapic_isr_update))
														
 
															 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec);
														
 
															 	else {
														
 
															 		++apic->isr_count;
														
@@ -453,7 +461,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 
															 	 * on the other hand isr_count and highest_isr_cache are unused
														
 
															 	 * and must be left alone.
														
 
															 	 */
														
 
															-	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
														
 
															+	if (unlikely(kvm_x86_ops->hwapic_isr_update))
														
 
															 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
														
 
															 					       apic_find_highest_isr(apic));
														
 
															 	else {
														
@@ -580,55 +588,48 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 
															 	apic_update_ppr(apic);
														
 
															 }
														
 
															-static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
														
 
															+static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
														
 
															 {
														
 
															 	return dest == (apic_x2apic_mode(apic) ?
														
 
															 			X2APIC_BROADCAST : APIC_BROADCAST);
														
 
															 }
														
 
															-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
														
 
															+static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
														
 
															 {
														
 
															 	return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
														
 
															 }
														
 
															-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
														
 
															+static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
														
 
															 {
														
 
															-	int result = 0;
														
 
															 	u32 logical_id;
														
 
															 	if (kvm_apic_broadcast(apic, mda))
														
 
															-		return 1;
														
 
															+		return true;
														
 
															-	if (apic_x2apic_mode(apic)) {
														
 
															-		logical_id = kvm_apic_get_reg(apic, APIC_LDR);
														
 
															-		return logical_id & mda;
														
 
															-	}
														
 
															+	logical_id = kvm_apic_get_reg(apic, APIC_LDR);
														
 
															-	logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR));
														
 
															+	if (apic_x2apic_mode(apic))
														
 
															+		return ((logical_id >> 16) == (mda >> 16))
														
 
															+		       && (logical_id & mda & 0xffff) != 0;
														
 
															+
														
 
															+	logical_id = GET_APIC_LOGICAL_ID(logical_id);
														
 
															 	switch (kvm_apic_get_reg(apic, APIC_DFR)) {
														
 
															 	case APIC_DFR_FLAT:
														
 
															-		if (logical_id & mda)
														
 
															-			result = 1;
														
 
															-		break;
														
 
															+		return (logical_id & mda) != 0;
														
 
															 	case APIC_DFR_CLUSTER:
														
 
															-		if (((logical_id >> 4) == (mda >> 0x4))
														
 
															-		    && (logical_id & mda & 0xf))
														
 
															-			result = 1;
														
 
															-		break;
														
 
															+		return ((logical_id >> 4) == (mda >> 4))
														
 
															+		       && (logical_id & mda & 0xf) != 0;
														
 
															 	default:
														
 
															 		apic_debug("Bad DFR vcpu %d: %08x\n",
														
 
															 			   apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR));
														
 
															-		break;
														
 
															+		return false;
														
 
															 	}
														
 
															-
														
 
															-	return result;
														
 
															 }
														
 
															-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
														
 
															+bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
														
 
															 			   int short_hand, unsigned int dest, int dest_mode)
														
 
															 {
														
 
															-	int result = 0;
														
 
															 	struct kvm_lapic *target = vcpu->arch.apic;
														
 
															 	apic_debug("target %p, source %p, dest 0x%x, "
														
@@ -638,29 +639,21 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 
															 	ASSERT(target);
														
 
															 	switch (short_hand) {
														
 
															 	case APIC_DEST_NOSHORT:
														
 
															-		if (dest_mode == 0)
														
 
															-			/* Physical mode. */
														
 
															-			result = kvm_apic_match_physical_addr(target, dest);
														
 
															+		if (dest_mode == APIC_DEST_PHYSICAL)
														
 
															+			return kvm_apic_match_physical_addr(target, dest);
														
 
															 		else
														
 
															-			/* Logical mode. */
														
 
															-			result = kvm_apic_match_logical_addr(target, dest);
														
 
															-		break;
														
 
															+			return kvm_apic_match_logical_addr(target, dest);
														
 
															 	case APIC_DEST_SELF:
														
 
															-		result = (target == source);
														
 
															-		break;
														
 
															+		return target == source;
														
 
															 	case APIC_DEST_ALLINC:
														
 
															-		result = 1;
														
 
															-		break;
														
 
															+		return true;
														
 
															 	case APIC_DEST_ALLBUT:
														
 
															-		result = (target != source);
														
 
															-		break;
														
 
															+		return target != source;
														
 
															 	default:
														
 
															 		apic_debug("kvm: apic: Bad dest shorthand value %x\n",
														
 
															 			   short_hand);
														
 
															-		break;
														
 
															+		return false;
														
 
															 	}
														
 
															-
														
 
															-	return result;
														
 
															 }
														
 
															 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
														
@@ -693,7 +686,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
															 	ret = true;
														
 
															-	if (irq->dest_mode == 0) { /* physical mode */
														
 
															+	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
														
 
															 		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
														
 
															 			goto out;
														
@@ -1076,25 +1069,72 @@ static void apic_timer_expired(struct kvm_lapic *apic)
 
															 {
														
 
															 	struct kvm_vcpu *vcpu = apic->vcpu;
														
 
															 	wait_queue_head_t *q = &vcpu->wq;
														
 
															+	struct kvm_timer *ktimer = &apic->lapic_timer;
														
 
															-	/*
														
 
															-	 * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
														
 
															-	 * vcpu_enter_guest.
														
 
															-	 */
														
 
															 	if (atomic_read(&apic->lapic_timer.pending))
														
 
															 		return;
														
 
															 	atomic_inc(&apic->lapic_timer.pending);
														
 
															-	/* FIXME: this code should not know anything about vcpus */
														
 
															-	kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
														
 
															+	kvm_set_pending_timer(vcpu);
														
 
															 	if (waitqueue_active(q))
														
 
															 		wake_up_interruptible(q);
														
 
															+
														
 
															+	if (apic_lvtt_tscdeadline(apic))
														
 
															+		ktimer->expired_tscdeadline = ktimer->tscdeadline;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * On APICv, this test will cause a busy wait
														
 
															+ * during a higher-priority task.
														
 
															+ */
														
 
															+
														
 
															+static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															+	u32 reg = kvm_apic_get_reg(apic, APIC_LVTT);
														
 
															+
														
 
															+	if (kvm_apic_hw_enabled(apic)) {
														
 
															+		int vec = reg & APIC_VECTOR_MASK;
														
 
															+		void *bitmap = apic->regs + APIC_ISR;
														
 
															+
														
 
															+		if (kvm_x86_ops->deliver_posted_interrupt)
														
 
															+			bitmap = apic->regs + APIC_IRR;
														
 
															+
														
 
															+		if (apic_test_vector(vec, bitmap))
														
 
															+			return true;
														
 
															+	}
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+void wait_lapic_expire(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															+	u64 guest_tsc, tsc_deadline;
														
 
															+
														
 
															+	if (!kvm_vcpu_has_lapic(vcpu))
														
 
															+		return;
														
 
															+
														
 
															+	if (apic->lapic_timer.expired_tscdeadline == 0)
														
 
															+		return;
														
 
															+
														
 
															+	if (!lapic_timer_int_injected(vcpu))
														
 
															+		return;
														
 
															+
														
 
															+	tsc_deadline = apic->lapic_timer.expired_tscdeadline;
														
 
															+	apic->lapic_timer.expired_tscdeadline = 0;
														
 
															+	guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
														
 
															+	trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
														
 
															+
														
 
															+	/* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
														
 
															+	if (guest_tsc < tsc_deadline)
														
 
															+		__delay(tsc_deadline - guest_tsc);
														
 
															 }
														
 
															 static void start_apic_timer(struct kvm_lapic *apic)
														
 
															 {
														
 
															 	ktime_t now;
														
 
															+
														
 
															 	atomic_set(&apic->lapic_timer.pending, 0);
														
 
															 	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
														
@@ -1140,6 +1180,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
 
															 		/* lapic timer in tsc deadline mode */
														
 
															 		u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
														
 
															 		u64 ns = 0;
														
 
															+		ktime_t expire;
														
 
															 		struct kvm_vcpu *vcpu = apic->vcpu;
														
 
															 		unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
														
 
															 		unsigned long flags;
														
@@ -1154,8 +1195,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
 
															 		if (likely(tscdeadline > guest_tsc)) {
														
 
															 			ns = (tscdeadline - guest_tsc) * 1000000ULL;
														
 
															 			do_div(ns, this_tsc_khz);
														
 
															+			expire = ktime_add_ns(now, ns);
														
 
															+			expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
														
 
															 			hrtimer_start(&apic->lapic_timer.timer,
														
 
															-				ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
														
 
															+				      expire, HRTIMER_MODE_ABS);
														
 
															 		} else
														
 
															 			apic_timer_expired(apic);
														
@@ -1745,7 +1788,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 
															 	if (kvm_x86_ops->hwapic_irr_update)
														
 
															 		kvm_x86_ops->hwapic_irr_update(vcpu,
														
 
															 				apic_find_highest_irr(apic));
														
 
															-	kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
														
 
															+	if (unlikely(kvm_x86_ops->hwapic_isr_update))
														
 
															+		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
														
 
															+				apic_find_highest_isr(apic));
														
 
															 	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															 	kvm_rtc_eoi_tracking_restore_one(vcpu);
														
 
															 }
														
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -14,6 +14,7 @@ struct kvm_timer {
 
															 	u32 timer_mode;
														
 
															 	u32 timer_mode_mask;
														
 
															 	u64 tscdeadline;
														
 
															+	u64 expired_tscdeadline;
														
 
															 	atomic_t pending;			/* accumulated triggered timers */
														
 
															 };
														
@@ -56,9 +57,8 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 
															 void kvm_apic_set_version(struct kvm_vcpu *vcpu);
														
 
															 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
														
 
															+void __kvm_apic_update_irr(u32 *pir, void *regs);
														
 
															 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
														
 
															-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest);
														
 
															-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda);
														
 
															 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
														
 
															 		unsigned long *dest_map);
														
 
															 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
														
@@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
 
															 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
														
 
															+void wait_lapic_expire(struct kvm_vcpu *vcpu);
														
 
															+
														
 
															 #endif
														
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -63,30 +63,16 @@ enum {
 
															 #undef MMU_DEBUG
														
 
															 #ifdef MMU_DEBUG
														
 
															+static bool dbg = 0;
														
 
															+module_param(dbg, bool, 0644);
														
 
															 #define pgprintk(x...) do { if (dbg) printk(x); } while (0)
														
 
															 #define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
														
 
															-
														
 
															+#define MMU_WARN_ON(x) WARN_ON(x)
														
 
															 #else
														
 
															-
														
 
															 #define pgprintk(x...) do { } while (0)
														
 
															 #define rmap_printk(x...) do { } while (0)
														
 
															-
														
 
															-#endif
														
 
															-
														
 
															-#ifdef MMU_DEBUG
														
 
															-static bool dbg = 0;
														
 
															-module_param(dbg, bool, 0644);
														
 
															-#endif
														
 
															-
														
 
															-#ifndef MMU_DEBUG
														
 
															-#define ASSERT(x) do { } while (0)
														
 
															-#else
														
 
															-#define ASSERT(x)							\
														
 
															-	if (!(x)) {							\
														
 
															-		printk(KERN_WARNING "assertion failed %s:%d: %s\n",	\
														
 
															-		       __FILE__, __LINE__, #x);				\
														
 
															-	}
														
 
															+#define MMU_WARN_ON(x) do { } while (0)
														
 
															 #endif
														
 
															 #define PTE_PREFETCH_NUM		8
														
@@ -546,6 +532,11 @@ static bool spte_is_bit_cleared(u64 old_spte, u64 new_spte, u64 bit_mask)
 
															 	return (old_spte & bit_mask) && !(new_spte & bit_mask);
														
 
															 }
														
 
															+static bool spte_is_bit_changed(u64 old_spte, u64 new_spte, u64 bit_mask)
														
 
															+{
														
 
															+	return (old_spte & bit_mask) != (new_spte & bit_mask);
														
 
															+}
														
 
															+
														
 
															 /* Rules for using mmu_spte_set:
														
 
															  * Set the sptep from nonpresent to present.
														
 
															  * Note: the sptep being assigned *must* be either not present
														
@@ -596,6 +587,14 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
 
															 	if (!shadow_accessed_mask)
														
 
															 		return ret;
														
 
															+	/*
														
 
															+	 * Flush TLB when accessed/dirty bits are changed in the page tables,
														
 
															+	 * to guarantee consistency between TLB and page tables.
														
 
															+	 */
														
 
															+	if (spte_is_bit_changed(old_spte, new_spte,
														
 
															+                                shadow_accessed_mask | shadow_dirty_mask))
														
 
															+		ret = true;
														
 
															+
														
 
															 	if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask))
														
 
															 		kvm_set_pfn_accessed(spte_to_pfn(old_spte));
														
 
															 	if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask))
														
@@ -1216,6 +1215,60 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
 
															 	return flush;
														
 
															 }
														
 
															+static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep)
														
 
															+{
														
 
															+	u64 spte = *sptep;
														
 
															+
														
 
															+	rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep);
														
 
															+
														
 
															+	spte &= ~shadow_dirty_mask;
														
 
															+
														
 
															+	return mmu_spte_update(sptep, spte);
														
 
															+}
														
 
															+
														
 
															+static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
														
 
															+{
														
 
															+	u64 *sptep;
														
 
															+	struct rmap_iterator iter;
														
 
															+	bool flush = false;
														
 
															+
														
 
															+	for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
														
 
															+		BUG_ON(!(*sptep & PT_PRESENT_MASK));
														
 
															+
														
 
															+		flush |= spte_clear_dirty(kvm, sptep);
														
 
															+		sptep = rmap_get_next(&iter);
														
 
															+	}
														
 
															+
														
 
															+	return flush;
														
 
															+}
														
 
															+
														
 
															+static bool spte_set_dirty(struct kvm *kvm, u64 *sptep)
														
 
															+{
														
 
															+	u64 spte = *sptep;
														
 
															+
														
 
															+	rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep);
														
 
															+
														
 
															+	spte |= shadow_dirty_mask;
														
 
															+
														
 
															+	return mmu_spte_update(sptep, spte);
														
 
															+}
														
 
															+
														
 
															+static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp)
														
 
															+{
														
 
															+	u64 *sptep;
														
 
															+	struct rmap_iterator iter;
														
 
															+	bool flush = false;
														
 
															+
														
 
															+	for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
														
 
															+		BUG_ON(!(*sptep & PT_PRESENT_MASK));
														
 
															+
														
 
															+		flush |= spte_set_dirty(kvm, sptep);
														
 
															+		sptep = rmap_get_next(&iter);
														
 
															+	}
														
 
															+
														
 
															+	return flush;
														
 
															+}
														
 
															+
														
 
															 /**
														
 
															  * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
														
 
															  * @kvm: kvm instance
														
@@ -1226,7 +1279,7 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
 
															  * Used when we do not need to care about huge page mappings: e.g. during dirty
														
 
															  * logging we do not have any such mappings.
														
 
															  */
														
 
															-void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
														
 
															+static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
														
 
															 				     struct kvm_memory_slot *slot,
														
 
															 				     gfn_t gfn_offset, unsigned long mask)
														
 
															 {
														
@@ -1242,6 +1295,53 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 
															 	}
														
 
															 }
														
 
															+/**
														
 
															+ * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages
														
 
															+ * @kvm: kvm instance
														
 
															+ * @slot: slot to clear D-bit
														
 
															+ * @gfn_offset: start of the BITS_PER_LONG pages we care about
														
 
															+ * @mask: indicates which pages we should clear D-bit
														
 
															+ *
														
 
															+ * Used for PML to re-log the dirty GPAs after userspace querying dirty_bitmap.
														
 
															+ */
														
 
															+void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
														
 
															+				     struct kvm_memory_slot *slot,
														
 
															+				     gfn_t gfn_offset, unsigned long mask)
														
 
															+{
														
 
															+	unsigned long *rmapp;
														
 
															+
														
 
															+	while (mask) {
														
 
															+		rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
														
 
															+				      PT_PAGE_TABLE_LEVEL, slot);
														
 
															+		__rmap_clear_dirty(kvm, rmapp);
														
 
															+
														
 
															+		/* clear the first set bit */
														
 
															+		mask &= mask - 1;
														
 
															+	}
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(kvm_mmu_clear_dirty_pt_masked);
														
 
															+
														
 
															+/**
														
 
															+ * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
														
 
															+ * PT level pages.
														
 
															+ *
														
 
															+ * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
														
 
															+ * enable dirty logging for them.
														
 
															+ *
														
 
															+ * Used when we do not need to care about huge page mappings: e.g. during dirty
														
 
															+ * logging we do not have any such mappings.
														
 
															+ */
														
 
															+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
														
 
															+				struct kvm_memory_slot *slot,
														
 
															+				gfn_t gfn_offset, unsigned long mask)
														
 
															+{
														
 
															+	if (kvm_x86_ops->enable_log_dirty_pt_masked)
														
 
															+		kvm_x86_ops->enable_log_dirty_pt_masked(kvm, slot, gfn_offset,
														
 
															+				mask);
														
 
															+	else
														
 
															+		kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
														
 
															+}
														
 
															+
														
 
															 static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
														
 
															 {
														
 
															 	struct kvm_memory_slot *slot;
														
@@ -1536,7 +1636,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
 
															 static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
														
 
															 {
														
 
															-	ASSERT(is_empty_shadow_page(sp->spt));
														
 
															+	MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
														
 
															 	hlist_del(&sp->hash_link);
														
 
															 	list_del(&sp->link);
														
 
															 	free_page((unsigned long)sp->spt);
														
@@ -2501,8 +2601,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
															 		}
														
 
															 	}
														
 
															-	if (pte_access & ACC_WRITE_MASK)
														
 
															+	if (pte_access & ACC_WRITE_MASK) {
														
 
															 		mark_page_dirty(vcpu->kvm, gfn);
														
 
															+		spte |= shadow_dirty_mask;
														
 
															+	}
														
 
															 set_pte:
														
 
															 	if (mmu_spte_update(sptep, spte))
														
@@ -2818,6 +2920,18 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 
															 	 */
														
 
															 	gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
														
 
															+	/*
														
 
															+	 * Theoretically we could also set dirty bit (and flush TLB) here in
														
 
															+	 * order to eliminate unnecessary PML logging. See comments in
														
 
															+	 * set_spte. But fast_page_fault is very unlikely to happen with PML
														
 
															+	 * enabled, so we do not do this. This might result in the same GPA
														
 
															+	 * to be logged in PML buffer again when the write really happens, and
														
 
															+	 * eventually to be called by mark_page_dirty twice. But it's also no
														
 
															+	 * harm. This also avoids the TLB flush needed after setting dirty bit
														
 
															+	 * so non-PML cases won't be impacted.
														
 
															+	 *
														
 
															+	 * Compare with set_spte where instead shadow_dirty_mask is set.
														
 
															+	 */
														
 
															 	if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte)
														
 
															 		mark_page_dirty(vcpu->kvm, gfn);
														
@@ -3041,7 +3155,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
															 		for (i = 0; i < 4; ++i) {
														
 
															 			hpa_t root = vcpu->arch.mmu.pae_root[i];
														
 
															-			ASSERT(!VALID_PAGE(root));
														
 
															+			MMU_WARN_ON(VALID_PAGE(root));
														
 
															 			spin_lock(&vcpu->kvm->mmu_lock);
														
 
															 			make_mmu_pages_available(vcpu);
														
 
															 			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
														
@@ -3079,7 +3193,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
															 	if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
														
 
															 		hpa_t root = vcpu->arch.mmu.root_hpa;
														
 
															-		ASSERT(!VALID_PAGE(root));
														
 
															+		MMU_WARN_ON(VALID_PAGE(root));
														
 
															 		spin_lock(&vcpu->kvm->mmu_lock);
														
 
															 		make_mmu_pages_available(vcpu);
														
@@ -3104,7 +3218,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
															 	for (i = 0; i < 4; ++i) {
														
 
															 		hpa_t root = vcpu->arch.mmu.pae_root[i];
														
 
															-		ASSERT(!VALID_PAGE(root));
														
 
															+		MMU_WARN_ON(VALID_PAGE(root));
														
 
															 		if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
														
 
															 			pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
														
 
															 			if (!is_present_gpte(pdptr)) {
														
@@ -3329,8 +3443,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 
															 	if (r)
														
 
															 		return r;
														
 
															-	ASSERT(vcpu);
														
 
															-	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
														
 
															+	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
														
 
															 	gfn = gva >> PAGE_SHIFT;
														
@@ -3396,8 +3509,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 
															 	int write = error_code & PFERR_WRITE_MASK;
														
 
															 	bool map_writable;
														
 
															-	ASSERT(vcpu);
														
 
															-	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
														
 
															+	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
														
 
															 	if (unlikely(error_code & PFERR_RSVD_MASK)) {
														
 
															 		r = handle_mmio_page_fault(vcpu, gpa, error_code, true);
														
@@ -3718,7 +3830,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
 
															 	update_permission_bitmask(vcpu, context, false);
														
 
															 	update_last_pte_bitmap(vcpu, context);
														
 
															-	ASSERT(is_pae(vcpu));
														
 
															+	MMU_WARN_ON(!is_pae(vcpu));
														
 
															 	context->page_fault = paging64_page_fault;
														
 
															 	context->gva_to_gpa = paging64_gva_to_gpa;
														
 
															 	context->sync_page = paging64_sync_page;
														
@@ -3763,7 +3875,7 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu,
 
															 static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	struct kvm_mmu *context = vcpu->arch.walk_mmu;
														
 
															+	struct kvm_mmu *context = &vcpu->arch.mmu;
														
 
															 	context->base_role.word = 0;
														
 
															 	context->page_fault = tdp_page_fault;
														
@@ -3803,11 +3915,12 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 
															 	update_last_pte_bitmap(vcpu, context);
														
 
															 }
														
 
															-void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
														
 
															+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
														
 
															-	ASSERT(vcpu);
														
 
															-	ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
														
 
															+	struct kvm_mmu *context = &vcpu->arch.mmu;
														
 
															+
														
 
															+	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
														
 
															 	if (!is_paging(vcpu))
														
 
															 		nonpaging_init_context(vcpu, context);
														
@@ -3818,19 +3931,19 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 
															 	else
														
 
															 		paging32_init_context(vcpu, context);
														
 
															-	vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
														
 
															-	vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
														
 
															-	vcpu->arch.mmu.base_role.cr0_wp  = is_write_protection(vcpu);
														
 
															-	vcpu->arch.mmu.base_role.smep_andnot_wp
														
 
															+	context->base_role.nxe = is_nx(vcpu);
														
 
															+	context->base_role.cr4_pae = !!is_pae(vcpu);
														
 
															+	context->base_role.cr0_wp  = is_write_protection(vcpu);
														
 
															+	context->base_role.smep_andnot_wp
														
 
															 		= smep && !is_write_protection(vcpu);
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
														
 
															-void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
														
 
															-		bool execonly)
														
 
															+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
														
 
															 {
														
 
															-	ASSERT(vcpu);
														
 
															-	ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
														
 
															+	struct kvm_mmu *context = &vcpu->arch.mmu;
														
 
															+
														
 
															+	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
														
 
															 	context->shadow_root_level = kvm_x86_ops->get_tdp_level();
														
@@ -3851,11 +3964,13 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
 
															 static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
														
 
															-	vcpu->arch.walk_mmu->set_cr3           = kvm_x86_ops->set_cr3;
														
 
															-	vcpu->arch.walk_mmu->get_cr3           = get_cr3;
														
 
															-	vcpu->arch.walk_mmu->get_pdptr         = kvm_pdptr_read;
														
 
															-	vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
														
 
															+	struct kvm_mmu *context = &vcpu->arch.mmu;
														
 
															+
														
 
															+	kvm_init_shadow_mmu(vcpu);
														
 
															+	context->set_cr3           = kvm_x86_ops->set_cr3;
														
 
															+	context->get_cr3           = get_cr3;
														
 
															+	context->get_pdptr         = kvm_pdptr_read;
														
 
															+	context->inject_page_fault = kvm_inject_page_fault;
														
 
															 }
														
 
															 static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
														
@@ -3900,17 +4015,15 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 
															 static void init_kvm_mmu(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	if (mmu_is_nested(vcpu))
														
 
															-		return init_kvm_nested_mmu(vcpu);
														
 
															+		init_kvm_nested_mmu(vcpu);
														
 
															 	else if (tdp_enabled)
														
 
															-		return init_kvm_tdp_mmu(vcpu);
														
 
															+		init_kvm_tdp_mmu(vcpu);
														
 
															 	else
														
 
															-		return init_kvm_softmmu(vcpu);
														
 
															+		init_kvm_softmmu(vcpu);
														
 
															 }
														
 
															 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	ASSERT(vcpu);
														
 
															-
														
 
															 	kvm_mmu_unload(vcpu);
														
 
															 	init_kvm_mmu(vcpu);
														
 
															 }
														
@@ -4266,8 +4379,6 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 
															 	struct page *page;
														
 
															 	int i;
														
 
															-	ASSERT(vcpu);
														
 
															-
														
 
															 	/*
														
 
															 	 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
														
 
															 	 * Therefore we need to allocate shadow page tables in the first
														
@@ -4286,8 +4397,6 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 
															 int kvm_mmu_create(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	ASSERT(vcpu);
														
 
															-
														
 
															 	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
														
 
															 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
														
 
															 	vcpu->arch.mmu.translate_gpa = translate_gpa;
														
@@ -4298,19 +4407,18 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
 
															 void kvm_mmu_setup(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	ASSERT(vcpu);
														
 
															-	ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
														
 
															+	MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
														
 
															 	init_kvm_mmu(vcpu);
														
 
															 }
														
 
															-void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
														
 
															+void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
														
 
															+				      struct kvm_memory_slot *memslot)
														
 
															 {
														
 
															-	struct kvm_memory_slot *memslot;
														
 
															 	gfn_t last_gfn;
														
 
															 	int i;
														
 
															+	bool flush = false;
														
 
															-	memslot = id_to_memslot(kvm->memslots, slot);
														
 
															 	last_gfn = memslot->base_gfn + memslot->npages - 1;
														
 
															 	spin_lock(&kvm->mmu_lock);
														
@@ -4325,7 +4433,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 
															 		for (index = 0; index <= last_index; ++index, ++rmapp) {
														
 
															 			if (*rmapp)
														
 
															-				__rmap_write_protect(kvm, rmapp, false);
														
 
															+				flush |= __rmap_write_protect(kvm, rmapp,
														
 
															+						false);
														
 
															 			if (need_resched() || spin_needbreak(&kvm->mmu_lock))
														
 
															 				cond_resched_lock(&kvm->mmu_lock);
														
@@ -4352,8 +4461,124 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 
															 	 * instead of PT_WRITABLE_MASK, that means it does not depend
														
 
															 	 * on PT_WRITABLE_MASK anymore.
														
 
															 	 */
														
 
															-	kvm_flush_remote_tlbs(kvm);
														
 
															+	if (flush)
														
 
															+		kvm_flush_remote_tlbs(kvm);
														
 
															+}
														
 
															+
														
 
															+void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
														
 
															+				   struct kvm_memory_slot *memslot)
														
 
															+{
														
 
															+	gfn_t last_gfn;
														
 
															+	unsigned long *rmapp;
														
 
															+	unsigned long last_index, index;
														
 
															+	bool flush = false;
														
 
															+
														
 
															+	last_gfn = memslot->base_gfn + memslot->npages - 1;
														
 
															+
														
 
															+	spin_lock(&kvm->mmu_lock);
														
 
															+
														
 
															+	rmapp = memslot->arch.rmap[PT_PAGE_TABLE_LEVEL - 1];
														
 
															+	last_index = gfn_to_index(last_gfn, memslot->base_gfn,
														
 
															+			PT_PAGE_TABLE_LEVEL);
														
 
															+
														
 
															+	for (index = 0; index <= last_index; ++index, ++rmapp) {
														
 
															+		if (*rmapp)
														
 
															+			flush |= __rmap_clear_dirty(kvm, rmapp);
														
 
															+
														
 
															+		if (need_resched() || spin_needbreak(&kvm->mmu_lock))
														
 
															+			cond_resched_lock(&kvm->mmu_lock);
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock(&kvm->mmu_lock);
														
 
															+
														
 
															+	lockdep_assert_held(&kvm->slots_lock);
														
 
															+
														
 
															+	/*
														
 
															+	 * It's also safe to flush TLBs out of mmu lock here as currently this
														
 
															+	 * function is only used for dirty logging, in which case flushing TLB
														
 
															+	 * out of mmu lock also guarantees no dirty pages will be lost in
														
 
															+	 * dirty_bitmap.
														
 
															+	 */
														
 
															+	if (flush)
														
 
															+		kvm_flush_remote_tlbs(kvm);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty);
														
 
															+
														
 
															+void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
														
 
															+					struct kvm_memory_slot *memslot)
														
 
															+{
														
 
															+	gfn_t last_gfn;
														
 
															+	int i;
														
 
															+	bool flush = false;
														
 
															+
														
 
															+	last_gfn = memslot->base_gfn + memslot->npages - 1;
														
 
															+
														
 
															+	spin_lock(&kvm->mmu_lock);
														
 
															+
														
 
															+	for (i = PT_PAGE_TABLE_LEVEL + 1; /* skip rmap for 4K page */
														
 
															+	     i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
														
 
															+		unsigned long *rmapp;
														
 
															+		unsigned long last_index, index;
														
 
															+
														
 
															+		rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL];
														
 
															+		last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
														
 
															+
														
 
															+		for (index = 0; index <= last_index; ++index, ++rmapp) {
														
 
															+			if (*rmapp)
														
 
															+				flush |= __rmap_write_protect(kvm, rmapp,
														
 
															+						false);
														
 
															+
														
 
															+			if (need_resched() || spin_needbreak(&kvm->mmu_lock))
														
 
															+				cond_resched_lock(&kvm->mmu_lock);
														
 
															+		}
														
 
															+	}
														
 
															+	spin_unlock(&kvm->mmu_lock);
														
 
															+
														
 
															+	/* see kvm_mmu_slot_remove_write_access */
														
 
															+	lockdep_assert_held(&kvm->slots_lock);
														
 
															+
														
 
															+	if (flush)
														
 
															+		kvm_flush_remote_tlbs(kvm);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access);
														
 
															+
														
 
															+void kvm_mmu_slot_set_dirty(struct kvm *kvm,
														
 
															+			    struct kvm_memory_slot *memslot)
														
 
															+{
														
 
															+	gfn_t last_gfn;
														
 
															+	int i;
														
 
															+	bool flush = false;
														
 
															+
														
 
															+	last_gfn = memslot->base_gfn + memslot->npages - 1;
														
 
															+
														
 
															+	spin_lock(&kvm->mmu_lock);
														
 
															+
														
 
															+	for (i = PT_PAGE_TABLE_LEVEL;
														
 
															+	     i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
														
 
															+		unsigned long *rmapp;
														
 
															+		unsigned long last_index, index;
														
 
															+
														
 
															+		rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL];
														
 
															+		last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
														
 
															+
														
 
															+		for (index = 0; index <= last_index; ++index, ++rmapp) {
														
 
															+			if (*rmapp)
														
 
															+				flush |= __rmap_set_dirty(kvm, rmapp);
														
 
															+
														
 
															+			if (need_resched() || spin_needbreak(&kvm->mmu_lock))
														
 
															+				cond_resched_lock(&kvm->mmu_lock);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock(&kvm->mmu_lock);
														
 
															+
														
 
															+	lockdep_assert_held(&kvm->slots_lock);
														
 
															+
														
 
															+	/* see kvm_mmu_slot_leaf_clear_dirty */
														
 
															+	if (flush)
														
 
															+		kvm_flush_remote_tlbs(kvm);
														
 
															 }
														
 
															+EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
														
 
															 #define BATCH_ZAP_PAGES	10
														
 
															 static void kvm_zap_obsolete_pages(struct kvm *kvm)
														
@@ -4606,8 +4831,6 @@ EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
 
															 void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	ASSERT(vcpu);
														
 
															-
														
 
															 	kvm_mmu_unload(vcpu);
														
 
															 	free_mmu_pages(vcpu);
														
 
															 	mmu_free_memory_caches(vcpu);
														
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -44,18 +44,6 @@
 
															 #define PT_DIRECTORY_LEVEL 2
														
 
															 #define PT_PAGE_TABLE_LEVEL 1
														
 
															-#define PFERR_PRESENT_BIT 0
														
 
															-#define PFERR_WRITE_BIT 1
														
 
															-#define PFERR_USER_BIT 2
														
 
															-#define PFERR_RSVD_BIT 3
														
 
															-#define PFERR_FETCH_BIT 4
														
 
															-
														
 
															-#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
														
 
															-#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
														
 
															-#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
														
 
															-#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
														
 
															-#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
														
 
															-
														
 
															 static inline u64 rsvd_bits(int s, int e)
														
 
															 {
														
 
															 	return ((1ULL << (e - s + 1)) - 1) << s;
														
@@ -81,9 +69,8 @@ enum {
 
															 };
														
 
															 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
														
 
															-void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
														
 
															-void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
														
 
															-		bool execonly);
														
 
															+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
														
 
															+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
														
 
															 void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
														
 
															 		bool ept);
														
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2003,8 +2003,8 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
 
															 static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
														
 
															-
														
 
															+	WARN_ON(mmu_is_nested(vcpu));
														
 
															+	kvm_init_shadow_mmu(vcpu);
														
 
															 	vcpu->arch.mmu.set_cr3           = nested_svm_set_tdp_cr3;
														
 
															 	vcpu->arch.mmu.get_cr3           = nested_svm_get_tdp_cr3;
														
 
															 	vcpu->arch.mmu.get_pdptr         = nested_svm_get_tdp_pdptr;
														
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -848,6 +848,24 @@ TRACE_EVENT(kvm_track_tsc,
 
															 #endif /* CONFIG_X86_64 */
														
 
															+/*
														
 
															+ * Tracepoint for PML full VMEXIT.
														
 
															+ */
														
 
															+TRACE_EVENT(kvm_pml_full,
														
 
															+	TP_PROTO(unsigned int vcpu_id),
														
 
															+	TP_ARGS(vcpu_id),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	vcpu_id			)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->vcpu_id		= vcpu_id;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("vcpu %d: PML full", __entry->vcpu_id)
														
 
															+);
														
 
															+
														
 
															 TRACE_EVENT(kvm_ple_window,
														
 
															 	TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
														
 
															 	TP_ARGS(grow, vcpu_id, new, old),
														
@@ -914,6 +932,26 @@ TRACE_EVENT(kvm_pvclock_update,
 
															 		  __entry->flags)
														
 
															 );
														
 
															+TRACE_EVENT(kvm_wait_lapic_expire,
														
 
															+	TP_PROTO(unsigned int vcpu_id, s64 delta),
														
 
															+	TP_ARGS(vcpu_id, delta),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	unsigned int,	vcpu_id		)
														
 
															+		__field(	s64,		delta		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->vcpu_id	   = vcpu_id;
														
 
															+		__entry->delta             = delta;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("vcpu %u: delta %lld (%s)",
														
 
															+		  __entry->vcpu_id,
														
 
															+		  __entry->delta,
														
 
															+		  __entry->delta < 0 ? "early" : "late")
														
 
															+);
														
 
															+
														
 
															 #endif /* _TRACE_KVM_H */
														
 
															 #undef TRACE_INCLUDE_PATH
														
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -45,6 +45,7 @@
 
															 #include <asm/perf_event.h>
														
 
															 #include <asm/debugreg.h>
														
 
															 #include <asm/kexec.h>
														
 
															+#include <asm/apic.h>
														
 
															 #include "trace.h"
														
@@ -101,6 +102,9 @@ module_param(nested, bool, S_IRUGO);
 
															 static u64 __read_mostly host_xss;
														
 
															+static bool __read_mostly enable_pml = 1;
														
 
															+module_param_named(pml, enable_pml, bool, S_IRUGO);
														
 
															+
														
 
															 #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
														
 
															 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
														
 
															 #define KVM_VM_CR0_ALWAYS_ON						\
														
@@ -215,7 +219,12 @@ struct __packed vmcs12 {
 
															 	u64 tsc_offset;
														
 
															 	u64 virtual_apic_page_addr;
														
 
															 	u64 apic_access_addr;
														
 
															+	u64 posted_intr_desc_addr;
														
 
															 	u64 ept_pointer;
														
 
															+	u64 eoi_exit_bitmap0;
														
 
															+	u64 eoi_exit_bitmap1;
														
 
															+	u64 eoi_exit_bitmap2;
														
 
															+	u64 eoi_exit_bitmap3;
														
 
															 	u64 xss_exit_bitmap;
														
 
															 	u64 guest_physical_address;
														
 
															 	u64 vmcs_link_pointer;
														
@@ -330,6 +339,7 @@ struct __packed vmcs12 {
 
															 	u32 vmx_preemption_timer_value;
														
 
															 	u32 padding32[7]; /* room for future expansion */
														
 
															 	u16 virtual_processor_id;
														
 
															+	u16 posted_intr_nv;
														
 
															 	u16 guest_es_selector;
														
 
															 	u16 guest_cs_selector;
														
 
															 	u16 guest_ss_selector;
														
@@ -338,6 +348,7 @@ struct __packed vmcs12 {
 
															 	u16 guest_gs_selector;
														
 
															 	u16 guest_ldtr_selector;
														
 
															 	u16 guest_tr_selector;
														
 
															+	u16 guest_intr_status;
														
 
															 	u16 host_es_selector;
														
 
															 	u16 host_cs_selector;
														
 
															 	u16 host_ss_selector;
														
@@ -401,6 +412,10 @@ struct nested_vmx {
 
															 	 */
														
 
															 	struct page *apic_access_page;
														
 
															 	struct page *virtual_apic_page;
														
 
															+	struct page *pi_desc_page;
														
 
															+	struct pi_desc *pi_desc;
														
 
															+	bool pi_pending;
														
 
															+	u16 posted_intr_nv;
														
 
															 	u64 msr_ia32_feature_control;
														
 
															 	struct hrtimer preemption_timer;
														
@@ -408,6 +423,23 @@ struct nested_vmx {
 
															 	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
														
 
															 	u64 vmcs01_debugctl;
														
 
															+
														
 
															+	u32 nested_vmx_procbased_ctls_low;
														
 
															+	u32 nested_vmx_procbased_ctls_high;
														
 
															+	u32 nested_vmx_true_procbased_ctls_low;
														
 
															+	u32 nested_vmx_secondary_ctls_low;
														
 
															+	u32 nested_vmx_secondary_ctls_high;
														
 
															+	u32 nested_vmx_pinbased_ctls_low;
														
 
															+	u32 nested_vmx_pinbased_ctls_high;
														
 
															+	u32 nested_vmx_exit_ctls_low;
														
 
															+	u32 nested_vmx_exit_ctls_high;
														
 
															+	u32 nested_vmx_true_exit_ctls_low;
														
 
															+	u32 nested_vmx_entry_ctls_low;
														
 
															+	u32 nested_vmx_entry_ctls_high;
														
 
															+	u32 nested_vmx_true_entry_ctls_low;
														
 
															+	u32 nested_vmx_misc_low;
														
 
															+	u32 nested_vmx_misc_high;
														
 
															+	u32 nested_vmx_ept_caps;
														
 
															 };
														
 
															 #define POSTED_INTR_ON  0
														
@@ -511,6 +543,10 @@ struct vcpu_vmx {
 
															 	/* Dynamic PLE window. */
														
 
															 	int ple_window;
														
 
															 	bool ple_window_dirty;
														
 
															+
														
 
															+	/* Support for PML */
														
 
															+#define PML_ENTITY_NUM		512
														
 
															+	struct page *pml_pg;
														
 
															 };
														
 
															 enum segment_cache_field {
														
@@ -594,6 +630,7 @@ static int max_shadow_read_write_fields =
 
															 static const unsigned short vmcs_field_to_offset_table[] = {
														
 
															 	FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
														
 
															+	FIELD(POSTED_INTR_NV, posted_intr_nv),
														
 
															 	FIELD(GUEST_ES_SELECTOR, guest_es_selector),
														
 
															 	FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
														
 
															 	FIELD(GUEST_SS_SELECTOR, guest_ss_selector),
														
@@ -602,6 +639,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 
															 	FIELD(GUEST_GS_SELECTOR, guest_gs_selector),
														
 
															 	FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
														
 
															 	FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
														
 
															+	FIELD(GUEST_INTR_STATUS, guest_intr_status),
														
 
															 	FIELD(HOST_ES_SELECTOR, host_es_selector),
														
 
															 	FIELD(HOST_CS_SELECTOR, host_cs_selector),
														
 
															 	FIELD(HOST_SS_SELECTOR, host_ss_selector),
														
@@ -618,7 +656,12 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 
															 	FIELD64(TSC_OFFSET, tsc_offset),
														
 
															 	FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
														
 
															 	FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
														
 
															+	FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
														
 
															 	FIELD64(EPT_POINTER, ept_pointer),
														
 
															+	FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
														
 
															+	FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
														
 
															+	FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
														
 
															+	FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
														
 
															 	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
														
 
															 	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
														
 
															 	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
														
@@ -766,6 +809,7 @@ static void kvm_cpu_vmxon(u64 addr);
 
															 static void kvm_cpu_vmxoff(void);
														
 
															 static bool vmx_mpx_supported(void);
														
 
															 static bool vmx_xsaves_supported(void);
														
 
															+static int vmx_vm_has_apicv(struct kvm *kvm);
														
 
															 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
														
 
															 static void vmx_set_segment(struct kvm_vcpu *vcpu,
														
 
															 			    struct kvm_segment *var, int seg);
														
@@ -793,6 +837,7 @@ static unsigned long *vmx_msr_bitmap_legacy;
 
															 static unsigned long *vmx_msr_bitmap_longmode;
														
 
															 static unsigned long *vmx_msr_bitmap_legacy_x2apic;
														
 
															 static unsigned long *vmx_msr_bitmap_longmode_x2apic;
														
 
															+static unsigned long *vmx_msr_bitmap_nested;
														
 
															 static unsigned long *vmx_vmread_bitmap;
														
 
															 static unsigned long *vmx_vmwrite_bitmap;
														
@@ -959,16 +1004,6 @@ static inline bool cpu_has_vmx_ept_execute_only(void)
 
															 	return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
														
 
															 }
														
 
															-static inline bool cpu_has_vmx_eptp_uncacheable(void)
														
 
															-{
														
 
															-	return vmx_capability.ept & VMX_EPTP_UC_BIT;
														
 
															-}
														
 
															-
														
 
															-static inline bool cpu_has_vmx_eptp_writeback(void)
														
 
															-{
														
 
															-	return vmx_capability.ept & VMX_EPTP_WB_BIT;
														
 
															-}
														
 
															-
														
 
															 static inline bool cpu_has_vmx_ept_2m_page(void)
														
 
															 {
														
 
															 	return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT;
														
@@ -1073,6 +1108,11 @@ static inline bool cpu_has_vmx_shadow_vmcs(void)
 
															 		SECONDARY_EXEC_SHADOW_VMCS;
														
 
															 }
														
 
															+static inline bool cpu_has_vmx_pml(void)
														
 
															+{
														
 
															+	return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
														
 
															+}
														
 
															+
														
 
															 static inline bool report_flexpriority(void)
														
 
															 {
														
 
															 	return flexpriority_enabled;
														
@@ -1112,6 +1152,26 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
 
															 		vmx_xsaves_supported();
														
 
															 }
														
 
															+static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
														
 
															+{
														
 
															+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
														
 
															+}
														
 
															+
														
 
															+static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
														
 
															+{
														
 
															+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
														
 
															+}
														
 
															+
														
 
															+static inline bool nested_cpu_has_vid(struct vmcs12 *vmcs12)
														
 
															+{
														
 
															+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
														
 
															+}
														
 
															+
														
 
															+static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
														
 
															+{
														
 
															+	return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
														
 
															+}
														
 
															+
														
 
															 static inline bool is_exception(u32 intr_info)
														
 
															 {
														
 
															 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
														
@@ -2284,20 +2344,8 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
 
															  * if the corresponding bit in the (32-bit) control field *must* be on, and a
														
 
															  * bit in the high half is on if the corresponding bit in the control field
														
 
															  * may be on. See also vmx_control_verify().
														
 
															- * TODO: allow these variables to be modified (downgraded) by module options
														
 
															- * or other means.
														
 
															  */
														
 
															-static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high;
														
 
															-static u32 nested_vmx_true_procbased_ctls_low;
														
 
															-static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high;
														
 
															-static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
														
 
															-static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
														
 
															-static u32 nested_vmx_true_exit_ctls_low;
														
 
															-static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
														
 
															-static u32 nested_vmx_true_entry_ctls_low;
														
 
															-static u32 nested_vmx_misc_low, nested_vmx_misc_high;
														
 
															-static u32 nested_vmx_ept_caps;
														
 
															-static __init void nested_vmx_setup_ctls_msrs(void)
														
 
															+static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															 	/*
														
 
															 	 * Note that as a general rule, the high half of the MSRs (bits in
														
@@ -2316,57 +2364,74 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 
															 	/* pin-based controls */
														
 
															 	rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
														
 
															-	      nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high);
														
 
															-	nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															-	nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
														
 
															-		PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS;
														
 
															-	nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
														
 
															+		vmx->nested.nested_vmx_pinbased_ctls_low,
														
 
															+		vmx->nested.nested_vmx_pinbased_ctls_high);
														
 
															+	vmx->nested.nested_vmx_pinbased_ctls_low |=
														
 
															+		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															+	vmx->nested.nested_vmx_pinbased_ctls_high &=
														
 
															+		PIN_BASED_EXT_INTR_MASK |
														
 
															+		PIN_BASED_NMI_EXITING |
														
 
															+		PIN_BASED_VIRTUAL_NMIS;
														
 
															+	vmx->nested.nested_vmx_pinbased_ctls_high |=
														
 
															+		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
														
 
															 		PIN_BASED_VMX_PREEMPTION_TIMER;
														
 
															+	if (vmx_vm_has_apicv(vmx->vcpu.kvm))
														
 
															+		vmx->nested.nested_vmx_pinbased_ctls_high |=
														
 
															+			PIN_BASED_POSTED_INTR;
														
 
															 	/* exit controls */
														
 
															 	rdmsr(MSR_IA32_VMX_EXIT_CTLS,
														
 
															-		nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high);
														
 
															-	nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															+		vmx->nested.nested_vmx_exit_ctls_low,
														
 
															+		vmx->nested.nested_vmx_exit_ctls_high);
														
 
															+	vmx->nested.nested_vmx_exit_ctls_low =
														
 
															+		VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															-	nested_vmx_exit_ctls_high &=
														
 
															+	vmx->nested.nested_vmx_exit_ctls_high &=
														
 
															 #ifdef CONFIG_X86_64
														
 
															 		VM_EXIT_HOST_ADDR_SPACE_SIZE |
														
 
															 #endif
														
 
															 		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
														
 
															-	nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
														
 
															+	vmx->nested.nested_vmx_exit_ctls_high |=
														
 
															+		VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
														
 
															 		VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
														
 
															 		VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
														
 
															 	if (vmx_mpx_supported())
														
 
															-		nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
														
 
															+		vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
														
 
															 	/* We support free control of debug control saving. */
														
 
															-	nested_vmx_true_exit_ctls_low = nested_vmx_exit_ctls_low &
														
 
															+	vmx->nested.nested_vmx_true_exit_ctls_low =
														
 
															+		vmx->nested.nested_vmx_exit_ctls_low &
														
 
															 		~VM_EXIT_SAVE_DEBUG_CONTROLS;
														
 
															 	/* entry controls */
														
 
															 	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
														
 
															-		nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
														
 
															-	nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															-	nested_vmx_entry_ctls_high &=
														
 
															+		vmx->nested.nested_vmx_entry_ctls_low,
														
 
															+		vmx->nested.nested_vmx_entry_ctls_high);
														
 
															+	vmx->nested.nested_vmx_entry_ctls_low =
														
 
															+		VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															+	vmx->nested.nested_vmx_entry_ctls_high &=
														
 
															 #ifdef CONFIG_X86_64
														
 
															 		VM_ENTRY_IA32E_MODE |
														
 
															 #endif
														
 
															 		VM_ENTRY_LOAD_IA32_PAT;
														
 
															-	nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
														
 
															-				       VM_ENTRY_LOAD_IA32_EFER);
														
 
															+	vmx->nested.nested_vmx_entry_ctls_high |=
														
 
															+		(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
														
 
															 	if (vmx_mpx_supported())
														
 
															-		nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
														
 
															+		vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
														
 
															 	/* We support free control of debug control loading. */
														
 
															-	nested_vmx_true_entry_ctls_low = nested_vmx_entry_ctls_low &
														
 
															+	vmx->nested.nested_vmx_true_entry_ctls_low =
														
 
															+		vmx->nested.nested_vmx_entry_ctls_low &
														
 
															 		~VM_ENTRY_LOAD_DEBUG_CONTROLS;
														
 
															 	/* cpu-based controls */
														
 
															 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
														
 
															-		nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
														
 
															-	nested_vmx_procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															-	nested_vmx_procbased_ctls_high &=
														
 
															+		vmx->nested.nested_vmx_procbased_ctls_low,
														
 
															+		vmx->nested.nested_vmx_procbased_ctls_high);
														
 
															+	vmx->nested.nested_vmx_procbased_ctls_low =
														
 
															+		CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
														
 
															+	vmx->nested.nested_vmx_procbased_ctls_high &=
														
 
															 		CPU_BASED_VIRTUAL_INTR_PENDING |
														
 
															 		CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
														
 
															 		CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
														
@@ -2386,45 +2451,55 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 
															 	 * can use it to avoid exits to L1 - even when L0 runs L2
														
 
															 	 * without MSR bitmaps.
														
 
															 	 */
														
 
															-	nested_vmx_procbased_ctls_high |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
														
 
															+	vmx->nested.nested_vmx_procbased_ctls_high |=
														
 
															+		CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
														
 
															 		CPU_BASED_USE_MSR_BITMAPS;
														
 
															 	/* We support free control of CR3 access interception. */
														
 
															-	nested_vmx_true_procbased_ctls_low = nested_vmx_procbased_ctls_low &
														
 
															+	vmx->nested.nested_vmx_true_procbased_ctls_low =
														
 
															+		vmx->nested.nested_vmx_procbased_ctls_low &
														
 
															 		~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
														
 
															 	/* secondary cpu-based controls */
														
 
															 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
														
 
															-		nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high);
														
 
															-	nested_vmx_secondary_ctls_low = 0;
														
 
															-	nested_vmx_secondary_ctls_high &=
														
 
															+		vmx->nested.nested_vmx_secondary_ctls_low,
														
 
															+		vmx->nested.nested_vmx_secondary_ctls_high);
														
 
															+	vmx->nested.nested_vmx_secondary_ctls_low = 0;
														
 
															+	vmx->nested.nested_vmx_secondary_ctls_high &=
														
 
															 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
														
 
															+		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
														
 
															+		SECONDARY_EXEC_APIC_REGISTER_VIRT |
														
 
															+		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
														
 
															 		SECONDARY_EXEC_WBINVD_EXITING |
														
 
															 		SECONDARY_EXEC_XSAVES;
														
 
															 	if (enable_ept) {
														
 
															 		/* nested EPT: emulate EPT also to L1 */
														
 
															-		nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
														
 
															+		vmx->nested.nested_vmx_secondary_ctls_high |=
														
 
															+			SECONDARY_EXEC_ENABLE_EPT |
														
 
															 			SECONDARY_EXEC_UNRESTRICTED_GUEST;
														
 
															-		nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
														
 
															+		vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
														
 
															 			 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
														
 
															 			 VMX_EPT_INVEPT_BIT;
														
 
															-		nested_vmx_ept_caps &= vmx_capability.ept;
														
 
															+		vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
														
 
															 		/*
														
 
															 		 * For nested guests, we don't do anything specific
														
 
															 		 * for single context invalidation. Hence, only advertise
														
 
															 		 * support for global context invalidation.
														
 
															 		 */
														
 
															-		nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
														
 
															+		vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
														
 
															 	} else
														
 
															-		nested_vmx_ept_caps = 0;
														
 
															+		vmx->nested.nested_vmx_ept_caps = 0;
														
 
															 	/* miscellaneous data */
														
 
															-	rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
														
 
															-	nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
														
 
															-	nested_vmx_misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
														
 
															+	rdmsr(MSR_IA32_VMX_MISC,
														
 
															+		vmx->nested.nested_vmx_misc_low,
														
 
															+		vmx->nested.nested_vmx_misc_high);
														
 
															+	vmx->nested.nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
														
 
															+	vmx->nested.nested_vmx_misc_low |=
														
 
															+		VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
														
 
															 		VMX_MISC_ACTIVITY_HLT;
														
 
															-	nested_vmx_misc_high = 0;
														
 
															+	vmx->nested.nested_vmx_misc_high = 0;
														
 
															 }
														
 
															 static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
														
@@ -2443,6 +2518,8 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
 
															 /* Returns 0 on success, non-0 otherwise. */
														
 
															 static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
														
 
															 {
														
 
															+	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+
														
 
															 	switch (msr_index) {
														
 
															 	case MSR_IA32_VMX_BASIC:
														
 
															 		/*
														
@@ -2457,36 +2534,44 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
															 		break;
														
 
															 	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
														
 
															 	case MSR_IA32_VMX_PINBASED_CTLS:
														
 
															-		*pdata = vmx_control_msr(nested_vmx_pinbased_ctls_low,
														
 
															-					nested_vmx_pinbased_ctls_high);
														
 
															+		*pdata = vmx_control_msr(
														
 
															+			vmx->nested.nested_vmx_pinbased_ctls_low,
														
 
															+			vmx->nested.nested_vmx_pinbased_ctls_high);
														
 
															 		break;
														
 
															 	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
														
 
															-		*pdata = vmx_control_msr(nested_vmx_true_procbased_ctls_low,
														
 
															-					nested_vmx_procbased_ctls_high);
														
 
															+		*pdata = vmx_control_msr(
														
 
															+			vmx->nested.nested_vmx_true_procbased_ctls_low,
														
 
															+			vmx->nested.nested_vmx_procbased_ctls_high);
														
 
															 		break;
														
 
															 	case MSR_IA32_VMX_PROCBASED_CTLS:
														
 
															-		*pdata = vmx_control_msr(nested_vmx_procbased_ctls_low,
														
 
															-					nested_vmx_procbased_ctls_high);
														
 
															+		*pdata = vmx_control_msr(
														
 
															+			vmx->nested.nested_vmx_procbased_ctls_low,
														
 
															+			vmx->nested.nested_vmx_procbased_ctls_high);
														
 
															 		break;
														
 
															 	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
														
 
															-		*pdata = vmx_control_msr(nested_vmx_true_exit_ctls_low,
														
 
															-					nested_vmx_exit_ctls_high);
														
 
															+		*pdata = vmx_control_msr(
														
 
															+			vmx->nested.nested_vmx_true_exit_ctls_low,
														
 
															+			vmx->nested.nested_vmx_exit_ctls_high);
														
 
															 		break;
														
 
															 	case MSR_IA32_VMX_EXIT_CTLS:
														
 
															-		*pdata = vmx_control_msr(nested_vmx_exit_ctls_low,
														
 
															-					nested_vmx_exit_ctls_high);
														
 
															+		*pdata = vmx_control_msr(
														
 
															+			vmx->nested.nested_vmx_exit_ctls_low,
														
 
															+			vmx->nested.nested_vmx_exit_ctls_high);
														
 
															 		break;
														
 
															 	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
														
 
															-		*pdata = vmx_control_msr(nested_vmx_true_entry_ctls_low,
														
 
															-					nested_vmx_entry_ctls_high);
														
 
															+		*pdata = vmx_control_msr(
														
 
															+			vmx->nested.nested_vmx_true_entry_ctls_low,
														
 
															+			vmx->nested.nested_vmx_entry_ctls_high);
														
 
															 		break;
														
 
															 	case MSR_IA32_VMX_ENTRY_CTLS:
														
 
															-		*pdata = vmx_control_msr(nested_vmx_entry_ctls_low,
														
 
															-					nested_vmx_entry_ctls_high);
														
 
															+		*pdata = vmx_control_msr(
														
 
															+			vmx->nested.nested_vmx_entry_ctls_low,
														
 
															+			vmx->nested.nested_vmx_entry_ctls_high);
														
 
															 		break;
														
 
															 	case MSR_IA32_VMX_MISC:
														
 
															-		*pdata = vmx_control_msr(nested_vmx_misc_low,
														
 
															-					 nested_vmx_misc_high);
														
 
															+		*pdata = vmx_control_msr(
														
 
															+			vmx->nested.nested_vmx_misc_low,
														
 
															+			vmx->nested.nested_vmx_misc_high);
														
 
															 		break;
														
 
															 	/*
														
 
															 	 * These MSRs specify bits which the guest must keep fixed (on or off)
														
@@ -2511,12 +2596,13 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
															 		*pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */
														
 
															 		break;
														
 
															 	case MSR_IA32_VMX_PROCBASED_CTLS2:
														
 
															-		*pdata = vmx_control_msr(nested_vmx_secondary_ctls_low,
														
 
															-					nested_vmx_secondary_ctls_high);
														
 
															+		*pdata = vmx_control_msr(
														
 
															+			vmx->nested.nested_vmx_secondary_ctls_low,
														
 
															+			vmx->nested.nested_vmx_secondary_ctls_high);
														
 
															 		break;
														
 
															 	case MSR_IA32_VMX_EPT_VPID_CAP:
														
 
															 		/* Currently, no nested vpid support */
														
 
															-		*pdata = nested_vmx_ept_caps;
														
 
															+		*pdata = vmx->nested.nested_vmx_ept_caps;
														
 
															 		break;
														
 
															 	default:
														
 
															 		return 1;
														
@@ -2929,7 +3015,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
															 			SECONDARY_EXEC_APIC_REGISTER_VIRT |
														
 
															 			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
														
 
															 			SECONDARY_EXEC_SHADOW_VMCS |
														
 
															-			SECONDARY_EXEC_XSAVES;
														
 
															+			SECONDARY_EXEC_XSAVES |
														
 
															+			SECONDARY_EXEC_ENABLE_PML;
														
 
															 		if (adjust_vmx_controls(min2, opt2,
														
 
															 					MSR_IA32_VMX_PROCBASED_CTLS2,
														
 
															 					&_cpu_based_2nd_exec_control) < 0)
														
@@ -4159,6 +4246,52 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
 
															 	}
														
 
															 }
														
 
															+/*
														
 
															+ * If a msr is allowed by L0, we should check whether it is allowed by L1.
														
 
															+ * The corresponding bit will be cleared unless both of L0 and L1 allow it.
														
 
															+ */
														
 
															+static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
														
 
															+					       unsigned long *msr_bitmap_nested,
														
 
															+					       u32 msr, int type)
														
 
															+{
														
 
															+	int f = sizeof(unsigned long);
														
 
															+
														
 
															+	if (!cpu_has_vmx_msr_bitmap()) {
														
 
															+		WARN_ON(1);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
														
 
															+	 * have the write-low and read-high bitmap offsets the wrong way round.
														
 
															+	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
														
 
															+	 */
														
 
															+	if (msr <= 0x1fff) {
														
 
															+		if (type & MSR_TYPE_R &&
														
 
															+		   !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
														
 
															+			/* read-low */
														
 
															+			__clear_bit(msr, msr_bitmap_nested + 0x000 / f);
														
 
															+
														
 
															+		if (type & MSR_TYPE_W &&
														
 
															+		   !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
														
 
															+			/* write-low */
														
 
															+			__clear_bit(msr, msr_bitmap_nested + 0x800 / f);
														
 
															+
														
 
															+	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
														
 
															+		msr &= 0x1fff;
														
 
															+		if (type & MSR_TYPE_R &&
														
 
															+		   !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
														
 
															+			/* read-high */
														
 
															+			__clear_bit(msr, msr_bitmap_nested + 0x400 / f);
														
 
															+
														
 
															+		if (type & MSR_TYPE_W &&
														
 
															+		   !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
														
 
															+			/* write-high */
														
 
															+			__clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
														
 
															+
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															 static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
														
 
															 {
														
 
															 	if (!longmode_only)
														
@@ -4197,6 +4330,64 @@ static int vmx_vm_has_apicv(struct kvm *kvm)
 
															 	return enable_apicv && irqchip_in_kernel(kvm);
														
 
															 }
														
 
															+static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+	int max_irr;
														
 
															+	void *vapic_page;
														
 
															+	u16 status;
														
 
															+
														
 
															+	if (vmx->nested.pi_desc &&
														
 
															+	    vmx->nested.pi_pending) {
														
 
															+		vmx->nested.pi_pending = false;
														
 
															+		if (!pi_test_and_clear_on(vmx->nested.pi_desc))
														
 
															+			return 0;
														
 
															+
														
 
															+		max_irr = find_last_bit(
														
 
															+			(unsigned long *)vmx->nested.pi_desc->pir, 256);
														
 
															+
														
 
															+		if (max_irr == 256)
														
 
															+			return 0;
														
 
															+
														
 
															+		vapic_page = kmap(vmx->nested.virtual_apic_page);
														
 
															+		if (!vapic_page) {
														
 
															+			WARN_ON(1);
														
 
															+			return -ENOMEM;
														
 
															+		}
														
 
															+		__kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
														
 
															+		kunmap(vmx->nested.virtual_apic_page);
														
 
															+
														
 
															+		status = vmcs_read16(GUEST_INTR_STATUS);
														
 
															+		if ((u8)max_irr > ((u8)status & 0xff)) {
														
 
															+			status &= ~0xff;
														
 
															+			status |= (u8)max_irr;
														
 
															+			vmcs_write16(GUEST_INTR_STATUS, status);
														
 
															+		}
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
														
 
															+						int vector)
														
 
															+{
														
 
															+	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+
														
 
															+	if (is_guest_mode(vcpu) &&
														
 
															+	    vector == vmx->nested.posted_intr_nv) {
														
 
															+		/* the PIR and ON have been set by L1. */
														
 
															+		if (vcpu->mode == IN_GUEST_MODE)
														
 
															+			apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
														
 
															+				POSTED_INTR_VECTOR);
														
 
															+		/*
														
 
															+		 * If a posted intr is not recognized by hardware,
														
 
															+		 * we will accomplish it in the next vmentry.
														
 
															+		 */
														
 
															+		vmx->nested.pi_pending = true;
														
 
															+		kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															+		return 0;
														
 
															+	}
														
 
															+	return -1;
														
 
															+}
														
 
															 /*
														
 
															  * Send interrupt to vcpu via posted interrupt way.
														
 
															  * 1. If target vcpu is running(non-root mode), send posted interrupt
														
@@ -4209,6 +4400,10 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	int r;
														
 
															+	r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
														
 
															+	if (!r)
														
 
															+		return;
														
 
															+
														
 
															 	if (pi_test_and_set_pir(vector, &vmx->pi_desc))
														
 
															 		return;
														
@@ -4360,6 +4555,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 
															 	   a current VMCS12
														
 
															 	*/
														
 
															 	exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
														
 
															+	/* PML is enabled/disabled in creating/destorying vcpu */
														
 
															+	exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
														
 
															+
														
 
															 	return exec_control;
														
 
															 }
														
@@ -4986,11 +5184,12 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
 
															 	hypercall[2] = 0xc1;
														
 
															 }
														
 
															-static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val)
														
 
															+static bool nested_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
														
 
															 {
														
 
															 	unsigned long always_on = VMXON_CR0_ALWAYSON;
														
 
															+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
														
 
															-	if (nested_vmx_secondary_ctls_high &
														
 
															+	if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
														
 
															 		SECONDARY_EXEC_UNRESTRICTED_GUEST &&
														
 
															 	    nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
														
 
															 		always_on &= ~(X86_CR0_PE | X86_CR0_PG);
														
@@ -5015,7 +5214,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
 
															 		val = (val & ~vmcs12->cr0_guest_host_mask) |
														
 
															 			(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
														
 
															-		if (!nested_cr0_valid(vmcs12, val))
														
 
															+		if (!nested_cr0_valid(vcpu, val))
														
 
															 			return 1;
														
 
															 		if (kvm_set_cr0(vcpu, val))
														
@@ -5817,13 +6016,21 @@ static __init int hardware_setup(void)
 
															 				(unsigned long *)__get_free_page(GFP_KERNEL);
														
 
															 	if (!vmx_msr_bitmap_longmode_x2apic)
														
 
															 		goto out4;
														
 
															+
														
 
															+	if (nested) {
														
 
															+		vmx_msr_bitmap_nested =
														
 
															+			(unsigned long *)__get_free_page(GFP_KERNEL);
														
 
															+		if (!vmx_msr_bitmap_nested)
														
 
															+			goto out5;
														
 
															+	}
														
 
															+
														
 
															 	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
														
 
															 	if (!vmx_vmread_bitmap)
														
 
															-		goto out5;
														
 
															+		goto out6;
														
 
															 	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
														
 
															 	if (!vmx_vmwrite_bitmap)
														
 
															-		goto out6;
														
 
															+		goto out7;
														
 
															 	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
														
 
															 	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
														
@@ -5839,10 +6046,12 @@ static __init int hardware_setup(void)
 
															 	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
														
 
															 	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
														
 
															+	if (nested)
														
 
															+		memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
														
 
															 	if (setup_vmcs_config(&vmcs_config) < 0) {
														
 
															 		r = -EIO;
														
 
															-		goto out7;
														
 
															+		goto out8;
														
 
															 	}
														
 
															 	if (boot_cpu_has(X86_FEATURE_NX))
														
@@ -5868,16 +6077,16 @@ static __init int hardware_setup(void)
 
															 	if (!cpu_has_vmx_unrestricted_guest())
														
 
															 		enable_unrestricted_guest = 0;
														
 
															-	if (!cpu_has_vmx_flexpriority()) {
														
 
															+	if (!cpu_has_vmx_flexpriority())
														
 
															 		flexpriority_enabled = 0;
														
 
															-		/*
														
 
															-		 * set_apic_access_page_addr() is used to reload apic access
														
 
															-		 * page upon invalidation.  No need to do anything if the
														
 
															-		 * processor does not have the APIC_ACCESS_ADDR VMCS field.
														
 
															-		 */
														
 
															+	/*
														
 
															+	 * set_apic_access_page_addr() is used to reload apic access
														
 
															+	 * page upon invalidation.  No need to do anything if not
														
 
															+	 * using the APIC_ACCESS_ADDR VMCS field.
														
 
															+	 */
														
 
															+	if (!flexpriority_enabled)
														
 
															 		kvm_x86_ops->set_apic_access_page_addr = NULL;
														
 
															-	}
														
 
															 	if (!cpu_has_vmx_tpr_shadow())
														
 
															 		kvm_x86_ops->update_cr8_intercept = NULL;
														
@@ -5895,13 +6104,11 @@ static __init int hardware_setup(void)
 
															 		kvm_x86_ops->update_cr8_intercept = NULL;
														
 
															 	else {
														
 
															 		kvm_x86_ops->hwapic_irr_update = NULL;
														
 
															+		kvm_x86_ops->hwapic_isr_update = NULL;
														
 
															 		kvm_x86_ops->deliver_posted_interrupt = NULL;
														
 
															 		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
														
 
															 	}
														
 
															-	if (nested)
														
 
															-		nested_vmx_setup_ctls_msrs();
														
 
															-
														
 
															 	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
														
 
															 	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
														
 
															 	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
														
@@ -5945,12 +6152,29 @@ static __init int hardware_setup(void)
 
															 	update_ple_window_actual_max();
														
 
															+	/*
														
 
															+	 * Only enable PML when hardware supports PML feature, and both EPT
														
 
															+	 * and EPT A/D bit features are enabled -- PML depends on them to work.
														
 
															+	 */
														
 
															+	if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
														
 
															+		enable_pml = 0;
														
 
															+
														
 
															+	if (!enable_pml) {
														
 
															+		kvm_x86_ops->slot_enable_log_dirty = NULL;
														
 
															+		kvm_x86_ops->slot_disable_log_dirty = NULL;
														
 
															+		kvm_x86_ops->flush_log_dirty = NULL;
														
 
															+		kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
														
 
															+	}
														
 
															+
														
 
															 	return alloc_kvm_area();
														
 
															-out7:
														
 
															+out8:
														
 
															 	free_page((unsigned long)vmx_vmwrite_bitmap);
														
 
															-out6:
														
 
															+out7:
														
 
															 	free_page((unsigned long)vmx_vmread_bitmap);
														
 
															+out6:
														
 
															+	if (nested)
														
 
															+		free_page((unsigned long)vmx_msr_bitmap_nested);
														
 
															 out5:
														
 
															 	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
														
 
															 out4:
														
@@ -5977,6 +6201,8 @@ static __exit void hardware_unsetup(void)
 
															 	free_page((unsigned long)vmx_io_bitmap_a);
														
 
															 	free_page((unsigned long)vmx_vmwrite_bitmap);
														
 
															 	free_page((unsigned long)vmx_vmread_bitmap);
														
 
															+	if (nested)
														
 
															+		free_page((unsigned long)vmx_msr_bitmap_nested);
														
 
															 	free_kvm_area();
														
 
															 }
														
@@ -6143,6 +6369,13 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
 
															 	 */
														
 
															 }
														
 
															+static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
														
 
															+{
														
 
															+	/* TODO: not to reset guest simply here. */
														
 
															+	kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
														
 
															+	pr_warn("kvm: nested vmx abort, indicator %d\n", indicator);
														
 
															+}
														
 
															+
														
 
															 static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
														
 
															 {
														
 
															 	struct vcpu_vmx *vmx =
														
@@ -6432,6 +6665,7 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
 
															 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
														
 
															 		vmcs_write64(VMCS_LINK_POINTER, -1ull);
														
 
															 	}
														
 
															+	vmx->nested.posted_intr_nv = -1;
														
 
															 	kunmap(vmx->nested.current_vmcs12_page);
														
 
															 	nested_release_page(vmx->nested.current_vmcs12_page);
														
 
															 	vmx->nested.current_vmptr = -1ull;
														
@@ -6460,6 +6694,12 @@ static void free_nested(struct vcpu_vmx *vmx)
 
															 		nested_release_page(vmx->nested.virtual_apic_page);
														
 
															 		vmx->nested.virtual_apic_page = NULL;
														
 
															 	}
														
 
															+	if (vmx->nested.pi_desc_page) {
														
 
															+		kunmap(vmx->nested.pi_desc_page);
														
 
															+		nested_release_page(vmx->nested.pi_desc_page);
														
 
															+		vmx->nested.pi_desc_page = NULL;
														
 
															+		vmx->nested.pi_desc = NULL;
														
 
															+	}
														
 
															 	nested_free_all_saved_vmcss(vmx);
														
 
															 }
														
@@ -6893,6 +7133,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 
															 /* Emulate the INVEPT instruction */
														
 
															 static int handle_invept(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	u32 vmx_instruction_info, types;
														
 
															 	unsigned long type;
														
 
															 	gva_t gva;
														
@@ -6901,8 +7142,9 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 
															 		u64 eptp, gpa;
														
 
															 	} operand;
														
 
															-	if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) ||
														
 
															-	    !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
														
 
															+	if (!(vmx->nested.nested_vmx_secondary_ctls_high &
														
 
															+	      SECONDARY_EXEC_ENABLE_EPT) ||
														
 
															+	    !(vmx->nested.nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
														
 
															 		kvm_queue_exception(vcpu, UD_VECTOR);
														
 
															 		return 1;
														
 
															 	}
														
@@ -6918,7 +7160,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 
															 	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
														
 
															 	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
														
 
															-	types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
														
 
															+	types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
														
 
															 	if (!(types & (1UL << type))) {
														
 
															 		nested_vmx_failValid(vcpu,
														
@@ -6960,6 +7202,31 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 
															 	return 1;
														
 
															 }
														
 
															+static int handle_pml_full(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	unsigned long exit_qualification;
														
 
															+
														
 
															+	trace_kvm_pml_full(vcpu->vcpu_id);
														
 
															+
														
 
															+	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
														
 
															+
														
 
															+	/*
														
 
															+	 * PML buffer FULL happened while executing iret from NMI,
														
 
															+	 * "blocked by NMI" bit has to be set before next VM entry.
														
 
															+	 */
														
 
															+	if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
														
 
															+			cpu_has_virtual_nmis() &&
														
 
															+			(exit_qualification & INTR_INFO_UNBLOCK_NMI))
														
 
															+		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
														
 
															+				GUEST_INTR_STATE_NMI);
														
 
															+
														
 
															+	/*
														
 
															+	 * PML buffer already flushed at beginning of VMEXIT. Nothing to do
														
 
															+	 * here.., and there's no userspace involvement needed for PML.
														
 
															+	 */
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * The exit handlers return 1 if the exit was handled fully and guest execution
														
 
															  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
														
@@ -7008,6 +7275,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 
															 	[EXIT_REASON_INVVPID]                 = handle_invvpid,
														
 
															 	[EXIT_REASON_XSAVES]                  = handle_xsaves,
														
 
															 	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
														
 
															+	[EXIT_REASON_PML_FULL]		      = handle_pml_full,
														
 
															 };
														
 
															 static const int kvm_vmx_max_exit_handlers =
														
@@ -7275,6 +7543,10 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 
															 	case EXIT_REASON_APIC_ACCESS:
														
 
															 		return nested_cpu_has2(vmcs12,
														
 
															 			SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
														
 
															+	case EXIT_REASON_APIC_WRITE:
														
 
															+	case EXIT_REASON_EOI_INDUCED:
														
 
															+		/* apic_write and eoi_induced should exit unconditionally. */
														
 
															+		return 1;
														
 
															 	case EXIT_REASON_EPT_VIOLATION:
														
 
															 		/*
														
 
															 		 * L0 always deals with the EPT violation. If nested EPT is
														
@@ -7314,6 +7586,89 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
 
															 	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
														
 
															 }
														
 
															+static int vmx_enable_pml(struct vcpu_vmx *vmx)
														
 
															+{
														
 
															+	struct page *pml_pg;
														
 
															+	u32 exec_control;
														
 
															+
														
 
															+	pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
														
 
															+	if (!pml_pg)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	vmx->pml_pg = pml_pg;
														
 
															+
														
 
															+	vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
														
 
															+	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
														
 
															+
														
 
															+	exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
 
															+	exec_control |= SECONDARY_EXEC_ENABLE_PML;
														
 
															+	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void vmx_disable_pml(struct vcpu_vmx *vmx)
														
 
															+{
														
 
															+	u32 exec_control;
														
 
															+
														
 
															+	ASSERT(vmx->pml_pg);
														
 
															+	__free_page(vmx->pml_pg);
														
 
															+	vmx->pml_pg = NULL;
														
 
															+
														
 
															+	exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
 
															+	exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
														
 
															+	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
														
 
															+}
														
 
															+
														
 
															+static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
														
 
															+{
														
 
															+	struct kvm *kvm = vmx->vcpu.kvm;
														
 
															+	u64 *pml_buf;
														
 
															+	u16 pml_idx;
														
 
															+
														
 
															+	pml_idx = vmcs_read16(GUEST_PML_INDEX);
														
 
															+
														
 
															+	/* Do nothing if PML buffer is empty */
														
 
															+	if (pml_idx == (PML_ENTITY_NUM - 1))
														
 
															+		return;
														
 
															+
														
 
															+	/* PML index always points to next available PML buffer entity */
														
 
															+	if (pml_idx >= PML_ENTITY_NUM)
														
 
															+		pml_idx = 0;
														
 
															+	else
														
 
															+		pml_idx++;
														
 
															+
														
 
															+	pml_buf = page_address(vmx->pml_pg);
														
 
															+	for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
														
 
															+		u64 gpa;
														
 
															+
														
 
															+		gpa = pml_buf[pml_idx];
														
 
															+		WARN_ON(gpa & (PAGE_SIZE - 1));
														
 
															+		mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
														
 
															+	}
														
 
															+
														
 
															+	/* reset PML index */
														
 
															+	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
														
 
															+ * Called before reporting dirty_bitmap to userspace.
														
 
															+ */
														
 
															+static void kvm_flush_pml_buffers(struct kvm *kvm)
														
 
															+{
														
 
															+	int i;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+	/*
														
 
															+	 * We only need to kick vcpu out of guest mode here, as PML buffer
														
 
															+	 * is flushed at beginning of all VMEXITs, and it's obvious that only
														
 
															+	 * vcpus running in guest are possible to have unflushed GPAs in PML
														
 
															+	 * buffer.
														
 
															+	 */
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm)
														
 
															+		kvm_vcpu_kick(vcpu);
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * The guest has exited.  See if we can fix it or if we need userspace
														
 
															  * assistance.
														
@@ -7324,6 +7679,16 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 
															 	u32 exit_reason = vmx->exit_reason;
														
 
															 	u32 vectoring_info = vmx->idt_vectoring_info;
														
 
															+	/*
														
 
															+	 * Flush logged GPAs PML buffer, this will make dirty_bitmap more
														
 
															+	 * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
														
 
															+	 * querying dirty_bitmap, we only need to kick all vcpus out of guest
														
 
															+	 * mode as if vcpus is in root mode, the PML buffer must has been
														
 
															+	 * flushed already.
														
 
															+	 */
														
 
															+	if (enable_pml)
														
 
															+		vmx_flush_pml_buffer(vmx);
														
 
															+
														
 
															 	/* If guest state is invalid, start emulating */
														
 
															 	if (vmx->emulation_required)
														
 
															 		return handle_invalid_guest_state(vcpu);
														
@@ -7471,9 +7836,6 @@ static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
 
															 	u16 status;
														
 
															 	u8 old;
														
 
															-	if (!vmx_vm_has_apicv(kvm))
														
 
															-		return;
														
 
															-
														
 
															 	if (isr == -1)
														
 
															 		isr = 0;
														
@@ -7973,6 +8335,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+	if (enable_pml)
														
 
															+		vmx_disable_pml(vmx);
														
 
															 	free_vpid(vmx);
														
 
															 	leave_guest_mode(vcpu);
														
 
															 	vmx_load_vmcs01(vcpu);
														
@@ -8040,9 +8404,25 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
															 			goto free_vmcs;
														
 
															 	}
														
 
															+	if (nested)
														
 
															+		nested_vmx_setup_ctls_msrs(vmx);
														
 
															+
														
 
															+	vmx->nested.posted_intr_nv = -1;
														
 
															 	vmx->nested.current_vmptr = -1ull;
														
 
															 	vmx->nested.current_vmcs12 = NULL;
														
 
															+	/*
														
 
															+	 * If PML is turned on, failure on enabling PML just results in failure
														
 
															+	 * of creating the vcpu, therefore we can simplify PML logic (by
														
 
															+	 * avoiding dealing with cases, such as enabling PML partially on vcpus
														
 
															+	 * for the guest, etc.
														
 
															+	 */
														
 
															+	if (enable_pml) {
														
 
															+		err = vmx_enable_pml(vmx);
														
 
															+		if (err)
														
 
															+			goto free_vmcs;
														
 
															+	}
														
 
															+
														
 
															 	return &vmx->vcpu;
														
 
															 free_vmcs:
														
@@ -8184,9 +8564,10 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
 
															 static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
														
 
															-			nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT);
														
 
															-
														
 
															+	WARN_ON(mmu_is_nested(vcpu));
														
 
															+	kvm_init_shadow_ept_mmu(vcpu,
														
 
															+			to_vmx(vcpu)->nested.nested_vmx_ept_caps &
														
 
															+			VMX_EPT_EXECUTE_ONLY_BIT);
														
 
															 	vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
														
 
															 	vcpu->arch.mmu.get_cr3           = nested_ept_get_cr3;
														
 
															 	vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
														
@@ -8199,6 +8580,18 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
 
															 	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
														
 
															 }
														
 
															+static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
														
 
															+					    u16 error_code)
														
 
															+{
														
 
															+	bool inequality, bit;
														
 
															+
														
 
															+	bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
														
 
															+	inequality =
														
 
															+		(error_code & vmcs12->page_fault_error_code_mask) !=
														
 
															+		 vmcs12->page_fault_error_code_match;
														
 
															+	return inequality ^ bit;
														
 
															+}
														
 
															+
														
 
															 static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
														
 
															 		struct x86_exception *fault)
														
 
															 {
														
@@ -8206,8 +8599,7 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 
															 	WARN_ON(!is_guest_mode(vcpu));
														
 
															-	/* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
														
 
															-	if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
														
 
															+	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code))
														
 
															 		nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
														
 
															 				  vmcs_read32(VM_EXIT_INTR_INFO),
														
 
															 				  vmcs_readl(EXIT_QUALIFICATION));
														
@@ -8261,6 +8653,31 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 
															 			return false;
														
 
															 	}
														
 
															+	if (nested_cpu_has_posted_intr(vmcs12)) {
														
 
															+		if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64))
														
 
															+			return false;
														
 
															+
														
 
															+		if (vmx->nested.pi_desc_page) { /* shouldn't happen */
														
 
															+			kunmap(vmx->nested.pi_desc_page);
														
 
															+			nested_release_page(vmx->nested.pi_desc_page);
														
 
															+		}
														
 
															+		vmx->nested.pi_desc_page =
														
 
															+			nested_get_page(vcpu, vmcs12->posted_intr_desc_addr);
														
 
															+		if (!vmx->nested.pi_desc_page)
														
 
															+			return false;
														
 
															+
														
 
															+		vmx->nested.pi_desc =
														
 
															+			(struct pi_desc *)kmap(vmx->nested.pi_desc_page);
														
 
															+		if (!vmx->nested.pi_desc) {
														
 
															+			nested_release_page_clean(vmx->nested.pi_desc_page);
														
 
															+			return false;
														
 
															+		}
														
 
															+		vmx->nested.pi_desc =
														
 
															+			(struct pi_desc *)((void *)vmx->nested.pi_desc +
														
 
															+			(unsigned long)(vmcs12->posted_intr_desc_addr &
														
 
															+			(PAGE_SIZE - 1)));
														
 
															+	}
														
 
															+
														
 
															 	return true;
														
 
															 }
														
@@ -8286,6 +8703,310 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
 
															 		      ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
														
 
															 }
														
 
															+static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
														
 
															+						struct vmcs12 *vmcs12)
														
 
															+{
														
 
															+	int maxphyaddr;
														
 
															+	u64 addr;
														
 
															+
														
 
															+	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
														
 
															+		return 0;
														
 
															+
														
 
															+	if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) {
														
 
															+		WARN_ON(1);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+	maxphyaddr = cpuid_maxphyaddr(vcpu);
														
 
															+
														
 
															+	if (!PAGE_ALIGNED(vmcs12->msr_bitmap) ||
														
 
															+	   ((addr + PAGE_SIZE) >> maxphyaddr))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Merge L0's and L1's MSR bitmap, return false to indicate that
														
 
															+ * we do not use the hardware.
														
 
															+ */
														
 
															+static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
														
 
															+					       struct vmcs12 *vmcs12)
														
 
															+{
														
 
															+	int msr;
														
 
															+	struct page *page;
														
 
															+	unsigned long *msr_bitmap;
														
 
															+
														
 
															+	if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
														
 
															+		return false;
														
 
															+
														
 
															+	page = nested_get_page(vcpu, vmcs12->msr_bitmap);
														
 
															+	if (!page) {
														
 
															+		WARN_ON(1);
														
 
															+		return false;
														
 
															+	}
														
 
															+	msr_bitmap = (unsigned long *)kmap(page);
														
 
															+	if (!msr_bitmap) {
														
 
															+		nested_release_page_clean(page);
														
 
															+		WARN_ON(1);
														
 
															+		return false;
														
 
															+	}
														
 
															+
														
 
															+	if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
														
 
															+		if (nested_cpu_has_apic_reg_virt(vmcs12))
														
 
															+			for (msr = 0x800; msr <= 0x8ff; msr++)
														
 
															+				nested_vmx_disable_intercept_for_msr(
														
 
															+					msr_bitmap,
														
 
															+					vmx_msr_bitmap_nested,
														
 
															+					msr, MSR_TYPE_R);
														
 
															+		/* TPR is allowed */
														
 
															+		nested_vmx_disable_intercept_for_msr(msr_bitmap,
														
 
															+				vmx_msr_bitmap_nested,
														
 
															+				APIC_BASE_MSR + (APIC_TASKPRI >> 4),
														
 
															+				MSR_TYPE_R | MSR_TYPE_W);
														
 
															+		if (nested_cpu_has_vid(vmcs12)) {
														
 
															+			/* EOI and self-IPI are allowed */
														
 
															+			nested_vmx_disable_intercept_for_msr(
														
 
															+				msr_bitmap,
														
 
															+				vmx_msr_bitmap_nested,
														
 
															+				APIC_BASE_MSR + (APIC_EOI >> 4),
														
 
															+				MSR_TYPE_W);
														
 
															+			nested_vmx_disable_intercept_for_msr(
														
 
															+				msr_bitmap,
														
 
															+				vmx_msr_bitmap_nested,
														
 
															+				APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
														
 
															+				MSR_TYPE_W);
														
 
															+		}
														
 
															+	} else {
														
 
															+		/*
														
 
															+		 * Enable reading intercept of all the x2apic
														
 
															+		 * MSRs. We should not rely on vmcs12 to do any
														
 
															+		 * optimizations here, it may have been modified
														
 
															+		 * by L1.
														
 
															+		 */
														
 
															+		for (msr = 0x800; msr <= 0x8ff; msr++)
														
 
															+			__vmx_enable_intercept_for_msr(
														
 
															+				vmx_msr_bitmap_nested,
														
 
															+				msr,
														
 
															+				MSR_TYPE_R);
														
 
															+
														
 
															+		__vmx_enable_intercept_for_msr(
														
 
															+				vmx_msr_bitmap_nested,
														
 
															+				APIC_BASE_MSR + (APIC_TASKPRI >> 4),
														
 
															+				MSR_TYPE_W);
														
 
															+		__vmx_enable_intercept_for_msr(
														
 
															+				vmx_msr_bitmap_nested,
														
 
															+				APIC_BASE_MSR + (APIC_EOI >> 4),
														
 
															+				MSR_TYPE_W);
														
 
															+		__vmx_enable_intercept_for_msr(
														
 
															+				vmx_msr_bitmap_nested,
														
 
															+				APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
														
 
															+				MSR_TYPE_W);
														
 
															+	}
														
 
															+	kunmap(page);
														
 
															+	nested_release_page_clean(page);
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															+static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
														
 
															+					   struct vmcs12 *vmcs12)
														
 
															+{
														
 
															+	if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
														
 
															+	    !nested_cpu_has_apic_reg_virt(vmcs12) &&
														
 
															+	    !nested_cpu_has_vid(vmcs12) &&
														
 
															+	    !nested_cpu_has_posted_intr(vmcs12))
														
 
															+		return 0;
														
 
															+
														
 
															+	/*
														
 
															+	 * If virtualize x2apic mode is enabled,
														
 
															+	 * virtualize apic access must be disabled.
														
 
															+	 */
														
 
															+	if (nested_cpu_has_virt_x2apic_mode(vmcs12) &&
														
 
															+	    nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	/*
														
 
															+	 * If virtual interrupt delivery is enabled,
														
 
															+	 * we must exit on external interrupts.
														
 
															+	 */
														
 
															+	if (nested_cpu_has_vid(vmcs12) &&
														
 
															+	   !nested_exit_on_intr(vcpu))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	/*
														
 
															+	 * bits 15:8 should be zero in posted_intr_nv,
														
 
															+	 * the descriptor address has been already checked
														
 
															+	 * in nested_get_vmcs12_pages.
														
 
															+	 */
														
 
															+	if (nested_cpu_has_posted_intr(vmcs12) &&
														
 
															+	   (!nested_cpu_has_vid(vmcs12) ||
														
 
															+	    !nested_exit_intr_ack_set(vcpu) ||
														
 
															+	    vmcs12->posted_intr_nv & 0xff00))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	/* tpr shadow is needed by all apicv features. */
														
 
															+	if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
														
 
															+				       unsigned long count_field,
														
 
															+				       unsigned long addr_field,
														
 
															+				       int maxphyaddr)
														
 
															+{
														
 
															+	u64 count, addr;
														
 
															+
														
 
															+	if (vmcs12_read_any(vcpu, count_field, &count) ||
														
 
															+	    vmcs12_read_any(vcpu, addr_field, &addr)) {
														
 
															+		WARN_ON(1);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+	if (count == 0)
														
 
															+		return 0;
														
 
															+	if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
														
 
															+	    (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
														
 
															+		pr_warn_ratelimited(
														
 
															+			"nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
														
 
															+			addr_field, maxphyaddr, count, addr);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
														
 
															+						struct vmcs12 *vmcs12)
														
 
															+{
														
 
															+	int maxphyaddr;
														
 
															+
														
 
															+	if (vmcs12->vm_exit_msr_load_count == 0 &&
														
 
															+	    vmcs12->vm_exit_msr_store_count == 0 &&
														
 
															+	    vmcs12->vm_entry_msr_load_count == 0)
														
 
															+		return 0; /* Fast path */
														
 
															+	maxphyaddr = cpuid_maxphyaddr(vcpu);
														
 
															+	if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
														
 
															+					VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) ||
														
 
															+	    nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
														
 
															+					VM_EXIT_MSR_STORE_ADDR, maxphyaddr) ||
														
 
															+	    nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
														
 
															+					VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr))
														
 
															+		return -EINVAL;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
														
 
															+				       struct vmx_msr_entry *e)
														
 
															+{
														
 
															+	/* x2APIC MSR accesses are not allowed */
														
 
															+	if (apic_x2apic_mode(vcpu->arch.apic) && e->index >> 8 == 0x8)
														
 
															+		return -EINVAL;
														
 
															+	if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
														
 
															+	    e->index == MSR_IA32_UCODE_REV)
														
 
															+		return -EINVAL;
														
 
															+	if (e->reserved != 0)
														
 
															+		return -EINVAL;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
														
 
															+				     struct vmx_msr_entry *e)
														
 
															+{
														
 
															+	if (e->index == MSR_FS_BASE ||
														
 
															+	    e->index == MSR_GS_BASE ||
														
 
															+	    e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */
														
 
															+	    nested_vmx_msr_check_common(vcpu, e))
														
 
															+		return -EINVAL;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
														
 
															+				      struct vmx_msr_entry *e)
														
 
															+{
														
 
															+	if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */
														
 
															+	    nested_vmx_msr_check_common(vcpu, e))
														
 
															+		return -EINVAL;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Load guest's/host's msr at nested entry/exit.
														
 
															+ * return 0 for success, entry index for failure.
														
 
															+ */
														
 
															+static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
														
 
															+{
														
 
															+	u32 i;
														
 
															+	struct vmx_msr_entry e;
														
 
															+	struct msr_data msr;
														
 
															+
														
 
															+	msr.host_initiated = false;
														
 
															+	for (i = 0; i < count; i++) {
														
 
															+		if (kvm_read_guest(vcpu->kvm, gpa + i * sizeof(e),
														
 
															+				   &e, sizeof(e))) {
														
 
															+			pr_warn_ratelimited(
														
 
															+				"%s cannot read MSR entry (%u, 0x%08llx)\n",
														
 
															+				__func__, i, gpa + i * sizeof(e));
														
 
															+			goto fail;
														
 
															+		}
														
 
															+		if (nested_vmx_load_msr_check(vcpu, &e)) {
														
 
															+			pr_warn_ratelimited(
														
 
															+				"%s check failed (%u, 0x%x, 0x%x)\n",
														
 
															+				__func__, i, e.index, e.reserved);
														
 
															+			goto fail;
														
 
															+		}
														
 
															+		msr.index = e.index;
														
 
															+		msr.data = e.value;
														
 
															+		if (kvm_set_msr(vcpu, &msr)) {
														
 
															+			pr_warn_ratelimited(
														
 
															+				"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
														
 
															+				__func__, i, e.index, e.value);
														
 
															+			goto fail;
														
 
															+		}
														
 
															+	}
														
 
															+	return 0;
														
 
															+fail:
														
 
															+	return i + 1;
														
 
															+}
														
 
															+
														
 
															+static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
														
 
															+{
														
 
															+	u32 i;
														
 
															+	struct vmx_msr_entry e;
														
 
															+
														
 
															+	for (i = 0; i < count; i++) {
														
 
															+		if (kvm_read_guest(vcpu->kvm,
														
 
															+				   gpa + i * sizeof(e),
														
 
															+				   &e, 2 * sizeof(u32))) {
														
 
															+			pr_warn_ratelimited(
														
 
															+				"%s cannot read MSR entry (%u, 0x%08llx)\n",
														
 
															+				__func__, i, gpa + i * sizeof(e));
														
 
															+			return -EINVAL;
														
 
															+		}
														
 
															+		if (nested_vmx_store_msr_check(vcpu, &e)) {
														
 
															+			pr_warn_ratelimited(
														
 
															+				"%s check failed (%u, 0x%x, 0x%x)\n",
														
 
															+				__func__, i, e.index, e.reserved);
														
 
															+			return -EINVAL;
														
 
															+		}
														
 
															+		if (kvm_get_msr(vcpu, e.index, &e.value)) {
														
 
															+			pr_warn_ratelimited(
														
 
															+				"%s cannot read MSR (%u, 0x%x)\n",
														
 
															+				__func__, i, e.index);
														
 
															+			return -EINVAL;
														
 
															+		}
														
 
															+		if (kvm_write_guest(vcpu->kvm,
														
 
															+				    gpa + i * sizeof(e) +
														
 
															+					offsetof(struct vmx_msr_entry, value),
														
 
															+				    &e.value, sizeof(e.value))) {
														
 
															+			pr_warn_ratelimited(
														
 
															+				"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
														
 
															+				__func__, i, e.index, e.value);
														
 
															+			return -EINVAL;
														
 
															+		}
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
														
 
															  * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
														
@@ -8365,8 +9086,23 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 	exec_control = vmcs12->pin_based_vm_exec_control;
														
 
															 	exec_control |= vmcs_config.pin_based_exec_ctrl;
														
 
															-	exec_control &= ~(PIN_BASED_VMX_PREEMPTION_TIMER |
														
 
															-                          PIN_BASED_POSTED_INTR);
														
 
															+	exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
														
 
															+
														
 
															+	if (nested_cpu_has_posted_intr(vmcs12)) {
														
 
															+		/*
														
 
															+		 * Note that we use L0's vector here and in
														
 
															+		 * vmx_deliver_nested_posted_interrupt.
														
 
															+		 */
														
 
															+		vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
														
 
															+		vmx->nested.pi_pending = false;
														
 
															+		vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
														
 
															+		vmcs_write64(POSTED_INTR_DESC_ADDR,
														
 
															+			page_to_phys(vmx->nested.pi_desc_page) +
														
 
															+			(unsigned long)(vmcs12->posted_intr_desc_addr &
														
 
															+			(PAGE_SIZE - 1)));
														
 
															+	} else
														
 
															+		exec_control &= ~PIN_BASED_POSTED_INTR;
														
 
															+
														
 
															 	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
														
 
															 	vmx->nested.preemption_timer_expired = false;
														
@@ -8423,12 +9159,26 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 			else
														
 
															 				vmcs_write64(APIC_ACCESS_ADDR,
														
 
															 				  page_to_phys(vmx->nested.apic_access_page));
														
 
															-		} else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
														
 
															+		} else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) &&
														
 
															+			    (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))) {
														
 
															 			exec_control |=
														
 
															 				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
														
 
															 			kvm_vcpu_reload_apic_access_page(vcpu);
														
 
															 		}
														
 
															+		if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
														
 
															+			vmcs_write64(EOI_EXIT_BITMAP0,
														
 
															+				vmcs12->eoi_exit_bitmap0);
														
 
															+			vmcs_write64(EOI_EXIT_BITMAP1,
														
 
															+				vmcs12->eoi_exit_bitmap1);
														
 
															+			vmcs_write64(EOI_EXIT_BITMAP2,
														
 
															+				vmcs12->eoi_exit_bitmap2);
														
 
															+			vmcs_write64(EOI_EXIT_BITMAP3,
														
 
															+				vmcs12->eoi_exit_bitmap3);
														
 
															+			vmcs_write16(GUEST_INTR_STATUS,
														
 
															+				vmcs12->guest_intr_status);
														
 
															+		}
														
 
															+
														
 
															 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
														
 
															 	}
														
@@ -8462,11 +9212,17 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
															 		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
														
 
															 	}
														
 
															+	if (cpu_has_vmx_msr_bitmap() &&
														
 
															+	    exec_control & CPU_BASED_USE_MSR_BITMAPS &&
														
 
															+	    nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) {
														
 
															+		vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested));
														
 
															+	} else
														
 
															+		exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
														
 
															+
														
 
															 	/*
														
 
															-	 * Merging of IO and MSR bitmaps not currently supported.
														
 
															+	 * Merging of IO bitmap not currently supported.
														
 
															 	 * Rather, exit every time.
														
 
															 	 */
														
 
															-	exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
														
 
															 	exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
														
 
															 	exec_control |= CPU_BASED_UNCOND_IO_EXITING;
														
@@ -8582,6 +9338,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
															 	int cpu;
														
 
															 	struct loaded_vmcs *vmcs02;
														
 
															 	bool ia32e;
														
 
															+	u32 msr_entry_idx;
														
 
															 	if (!nested_vmx_check_permission(vcpu) ||
														
 
															 	    !nested_vmx_check_vmcs12(vcpu))
														
@@ -8616,41 +9373,42 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
															 		return 1;
														
 
															 	}
														
 
															-	if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
														
 
															-			!PAGE_ALIGNED(vmcs12->msr_bitmap)) {
														
 
															+	if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
														
 
															 		/*TODO: Also verify bits beyond physical address width are 0*/
														
 
															 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
														
 
															 		return 1;
														
 
															 	}
														
 
															-	if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
														
 
															-		/*TODO: Also verify bits beyond physical address width are 0*/
														
 
															+	if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) {
														
 
															 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
														
 
															 		return 1;
														
 
															 	}
														
 
															-	if (vmcs12->vm_entry_msr_load_count > 0 ||
														
 
															-	    vmcs12->vm_exit_msr_load_count > 0 ||
														
 
															-	    vmcs12->vm_exit_msr_store_count > 0) {
														
 
															-		pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n",
														
 
															-				    __func__);
														
 
															+	if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) {
														
 
															+		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															+	if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) {
														
 
															 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
														
 
															 		return 1;
														
 
															 	}
														
 
															 	if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
														
 
															-				nested_vmx_true_procbased_ctls_low,
														
 
															-				nested_vmx_procbased_ctls_high) ||
														
 
															+				vmx->nested.nested_vmx_true_procbased_ctls_low,
														
 
															+				vmx->nested.nested_vmx_procbased_ctls_high) ||
														
 
															 	    !vmx_control_verify(vmcs12->secondary_vm_exec_control,
														
 
															-	      nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) ||
														
 
															+				vmx->nested.nested_vmx_secondary_ctls_low,
														
 
															+				vmx->nested.nested_vmx_secondary_ctls_high) ||
														
 
															 	    !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
														
 
															-	      nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) ||
														
 
															+				vmx->nested.nested_vmx_pinbased_ctls_low,
														
 
															+				vmx->nested.nested_vmx_pinbased_ctls_high) ||
														
 
															 	    !vmx_control_verify(vmcs12->vm_exit_controls,
														
 
															-				nested_vmx_true_exit_ctls_low,
														
 
															-				nested_vmx_exit_ctls_high) ||
														
 
															+				vmx->nested.nested_vmx_true_exit_ctls_low,
														
 
															+				vmx->nested.nested_vmx_exit_ctls_high) ||
														
 
															 	    !vmx_control_verify(vmcs12->vm_entry_controls,
														
 
															-				nested_vmx_true_entry_ctls_low,
														
 
															-				nested_vmx_entry_ctls_high))
														
 
															+				vmx->nested.nested_vmx_true_entry_ctls_low,
														
 
															+				vmx->nested.nested_vmx_entry_ctls_high))
														
 
															 	{
														
 
															 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
														
 
															 		return 1;
														
@@ -8663,7 +9421,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
															 		return 1;
														
 
															 	}
														
 
															-	if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) ||
														
 
															+	if (!nested_cr0_valid(vcpu, vmcs12->guest_cr0) ||
														
 
															 	    ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
														
 
															 		nested_vmx_entry_failure(vcpu, vmcs12,
														
 
															 			EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
														
@@ -8739,10 +9497,21 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
															 	vmx_segment_cache_clear(vmx);
														
 
															-	vmcs12->launch_state = 1;
														
 
															-
														
 
															 	prepare_vmcs02(vcpu, vmcs12);
														
 
															+	msr_entry_idx = nested_vmx_load_msr(vcpu,
														
 
															+					    vmcs12->vm_entry_msr_load_addr,
														
 
															+					    vmcs12->vm_entry_msr_load_count);
														
 
															+	if (msr_entry_idx) {
														
 
															+		leave_guest_mode(vcpu);
														
 
															+		vmx_load_vmcs01(vcpu);
														
 
															+		nested_vmx_entry_failure(vcpu, vmcs12,
														
 
															+				EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															+	vmcs12->launch_state = 1;
														
 
															+
														
 
															 	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
														
 
															 		return kvm_emulate_halt(vcpu);
														
@@ -8869,9 +9638,10 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 
															 		if (vmx->nested.nested_run_pending)
														
 
															 			return -EBUSY;
														
 
															 		nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
														
 
															+		return 0;
														
 
															 	}
														
 
															-	return 0;
														
 
															+	return vmx_complete_nested_posted_interrupt(vcpu);
														
 
															 }
														
 
															 static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
														
@@ -8981,6 +9751,9 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 		vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
														
 
															 	}
														
 
															+	if (nested_cpu_has_vid(vmcs12))
														
 
															+		vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
														
 
															+
														
 
															 	vmcs12->vm_entry_controls =
														
 
															 		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
														
 
															 		(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
														
@@ -9172,6 +9945,13 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 
															 	kvm_set_dr(vcpu, 7, 0x400);
														
 
															 	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
														
 
															+
														
 
															+	if (cpu_has_vmx_msr_bitmap())
														
 
															+		vmx_set_msr_bitmap(vcpu);
														
 
															+
														
 
															+	if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
														
 
															+				vmcs12->vm_exit_msr_load_count))
														
 
															+		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
														
 
															 }
														
 
															 /*
														
@@ -9193,6 +9973,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 
															 	prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
														
 
															 		       exit_qualification);
														
 
															+	if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
														
 
															+				 vmcs12->vm_exit_msr_store_count))
														
 
															+		nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
														
 
															+
														
 
															 	vmx_load_vmcs01(vcpu);
														
 
															 	if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
														
@@ -9235,6 +10019,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 
															 		nested_release_page(vmx->nested.virtual_apic_page);
														
 
															 		vmx->nested.virtual_apic_page = NULL;
														
 
															 	}
														
 
															+	if (vmx->nested.pi_desc_page) {
														
 
															+		kunmap(vmx->nested.pi_desc_page);
														
 
															+		nested_release_page(vmx->nested.pi_desc_page);
														
 
															+		vmx->nested.pi_desc_page = NULL;
														
 
															+		vmx->nested.pi_desc = NULL;
														
 
															+	}
														
 
															 	/*
														
 
															 	 * We are now running in L2, mmu_notifier will force to reload the
														
@@ -9301,6 +10091,31 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 
															 		shrink_ple_window(vcpu);
														
 
															 }
														
 
															+static void vmx_slot_enable_log_dirty(struct kvm *kvm,
														
 
															+				     struct kvm_memory_slot *slot)
														
 
															+{
														
 
															+	kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
														
 
															+	kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
														
 
															+}
														
 
															+
														
 
															+static void vmx_slot_disable_log_dirty(struct kvm *kvm,
														
 
															+				       struct kvm_memory_slot *slot)
														
 
															+{
														
 
															+	kvm_mmu_slot_set_dirty(kvm, slot);
														
 
															+}
														
 
															+
														
 
															+static void vmx_flush_log_dirty(struct kvm *kvm)
														
 
															+{
														
 
															+	kvm_flush_pml_buffers(kvm);
														
 
															+}
														
 
															+
														
 
															+static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
														
 
															+					   struct kvm_memory_slot *memslot,
														
 
															+					   gfn_t offset, unsigned long mask)
														
 
															+{
														
 
															+	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
														
 
															+}
														
 
															+
														
 
															 static struct kvm_x86_ops vmx_x86_ops = {
														
 
															 	.cpu_has_kvm_support = cpu_has_kvm_support,
														
 
															 	.disabled_by_bios = vmx_disabled_by_bios,
														
@@ -9409,6 +10224,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
															 	.check_nested_events = vmx_check_nested_events,
														
 
															 	.sched_in = vmx_sched_in,
														
 
															+
														
 
															+	.slot_enable_log_dirty = vmx_slot_enable_log_dirty,
														
 
															+	.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
														
 
															+	.flush_log_dirty = vmx_flush_log_dirty,
														
 
															+	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
														
 
															 };
														
 
															 static int __init vmx_init(void)
														
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -108,6 +108,10 @@ EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
 
															 static u32 tsc_tolerance_ppm = 250;
														
 
															 module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
														
 
															+/* lapic timer advance (tscdeadline mode only) in nanoseconds */
														
 
															+unsigned int lapic_timer_advance_ns = 0;
														
 
															+module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
														
 
															+
														
 
															 static bool backwards_tsc_observed = false;
														
 
															 #define KVM_NR_SHARED_MSRS 16
														
@@ -141,6 +145,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
															 	{ "irq_window", VCPU_STAT(irq_window_exits) },
														
 
															 	{ "nmi_window", VCPU_STAT(nmi_window_exits) },
														
 
															 	{ "halt_exits", VCPU_STAT(halt_exits) },
														
 
															+	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
														
 
															 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
														
 
															 	{ "hypercalls", VCPU_STAT(hypercalls) },
														
 
															 	{ "request_irq", VCPU_STAT(request_irq_exits) },
														
@@ -492,7 +497,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
														
 
															-int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
														
 
															+static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
														
 
															 			       void *data, int offset, int len, u32 access)
														
 
															 {
														
 
															 	return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
														
@@ -643,7 +648,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
 
															 	}
														
 
															 }
														
 
															-int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
														
 
															+static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
														
 
															 {
														
 
															 	u64 xcr0 = xcr;
														
 
															 	u64 old_xcr0 = vcpu->arch.xcr0;
														
@@ -1083,6 +1088,15 @@ static void update_pvclock_gtod(struct timekeeper *tk)
 
															 }
														
 
															 #endif
														
 
															+void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	/*
														
 
															+	 * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
														
 
															+	 * vcpu_enter_guest.  This function is only called from
														
 
															+	 * the physical CPU that is running vcpu.
														
 
															+	 */
														
 
															+	kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
														
 
															+}
														
 
															 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
														
 
															 {
														
@@ -1180,7 +1194,7 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
 
															 #endif
														
 
															 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
														
 
															-unsigned long max_tsc_khz;
														
 
															+static unsigned long max_tsc_khz;
														
 
															 static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
														
 
															 {
														
@@ -1234,7 +1248,7 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
 
															 	return tsc;
														
 
															 }
														
 
															-void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
														
 
															+static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 #ifdef CONFIG_X86_64
														
 
															 	bool vcpus_matched;
														
@@ -1529,7 +1543,8 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
 
															 					&ka->master_cycle_now);
														
 
															 	ka->use_master_clock = host_tsc_clocksource && vcpus_matched
														
 
															-				&& !backwards_tsc_observed;
														
 
															+				&& !backwards_tsc_observed
														
 
															+				&& !ka->boot_vcpu_runs_old_kvmclock;
														
 
															 	if (ka->use_master_clock)
														
 
															 		atomic_set(&kvm_guest_has_master_clock, 1);
														
@@ -2161,8 +2176,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
															 	case MSR_KVM_SYSTEM_TIME_NEW:
														
 
															 	case MSR_KVM_SYSTEM_TIME: {
														
 
															 		u64 gpa_offset;
														
 
															+		struct kvm_arch *ka = &vcpu->kvm->arch;
														
 
															+
														
 
															 		kvmclock_reset(vcpu);
														
 
															+		if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
														
 
															+			bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
														
 
															+
														
 
															+			if (ka->boot_vcpu_runs_old_kvmclock != tmp)
														
 
															+				set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
														
 
															+					&vcpu->requests);
														
 
															+
														
 
															+			ka->boot_vcpu_runs_old_kvmclock = tmp;
														
 
															+		}
														
 
															+
														
 
															 		vcpu->arch.time = data;
														
 
															 		kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
														
@@ -2324,6 +2351,7 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
															 {
														
 
															 	return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
														
 
															 }
														
 
															+EXPORT_SYMBOL_GPL(kvm_get_msr);
														
 
															 static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
														
 
															 {
														
@@ -2738,6 +2766,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
															 	case KVM_CAP_READONLY_MEM:
														
 
															 	case KVM_CAP_HYPERV_TIME:
														
 
															 	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
														
 
															+	case KVM_CAP_TSC_DEADLINE_TIMER:
														
 
															 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
														
 
															 	case KVM_CAP_ASSIGN_DEV_IRQ:
														
 
															 	case KVM_CAP_PCI_2_3:
														
@@ -2776,9 +2805,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
															 	case KVM_CAP_TSC_CONTROL:
														
 
															 		r = kvm_has_tsc_control;
														
 
															 		break;
														
 
															-	case KVM_CAP_TSC_DEADLINE_TIMER:
														
 
															-		r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
														
 
															-		break;
														
 
															 	default:
														
 
															 		r = 0;
														
 
															 		break;
														
@@ -3734,83 +3760,43 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 
															  * @kvm: kvm instance
														
 
															  * @log: slot id and address to which we copy the log
														
 
															  *
														
 
															- * We need to keep it in mind that VCPU threads can write to the bitmap
														
 
															- * concurrently.  So, to avoid losing data, we keep the following order for
														
 
															- * each bit:
														
 
															+ * Steps 1-4 below provide general overview of dirty page logging. See
														
 
															+ * kvm_get_dirty_log_protect() function description for additional details.
														
 
															+ *
														
 
															+ * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
														
 
															+ * always flush the TLB (step 4) even if previous step failed  and the dirty
														
 
															+ * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
														
 
															+ * does not preclude user space subsequent dirty log read. Flushing TLB ensures
														
 
															+ * writes will be marked dirty for next log read.
														
 
															  *
														
 
															  *   1. Take a snapshot of the bit and clear it if needed.
														
 
															  *   2. Write protect the corresponding page.
														
 
															- *   3. Flush TLB's if needed.
														
 
															- *   4. Copy the snapshot to the userspace.
														
 
															- *
														
 
															- * Between 2 and 3, the guest may write to the page using the remaining TLB
														
 
															- * entry.  This is not a problem because the page will be reported dirty at
														
 
															- * step 4 using the snapshot taken before and step 3 ensures that successive
														
 
															- * writes will be logged for the next call.
														
 
															+ *   3. Copy the snapshot to the userspace.
														
 
															+ *   4. Flush TLB's if needed.
														
 
															  */
														
 
															 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
														
 
															 {
														
 
															-	int r;
														
 
															-	struct kvm_memory_slot *memslot;
														
 
															-	unsigned long n, i;
														
 
															-	unsigned long *dirty_bitmap;
														
 
															-	unsigned long *dirty_bitmap_buffer;
														
 
															 	bool is_dirty = false;
														
 
															+	int r;
														
 
															 	mutex_lock(&kvm->slots_lock);
														
 
															-	r = -EINVAL;
														
 
															-	if (log->slot >= KVM_USER_MEM_SLOTS)
														
 
															-		goto out;
														
 
															-
														
 
															-	memslot = id_to_memslot(kvm->memslots, log->slot);
														
 
															-
														
 
															-	dirty_bitmap = memslot->dirty_bitmap;
														
 
															-	r = -ENOENT;
														
 
															-	if (!dirty_bitmap)
														
 
															-		goto out;
														
 
															-
														
 
															-	n = kvm_dirty_bitmap_bytes(memslot);
														
 
															-
														
 
															-	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
														
 
															-	memset(dirty_bitmap_buffer, 0, n);
														
 
															-
														
 
															-	spin_lock(&kvm->mmu_lock);
														
 
															-
														
 
															-	for (i = 0; i < n / sizeof(long); i++) {
														
 
															-		unsigned long mask;
														
 
															-		gfn_t offset;
														
 
															-
														
 
															-		if (!dirty_bitmap[i])
														
 
															-			continue;
														
 
															-
														
 
															-		is_dirty = true;
														
 
															-
														
 
															-		mask = xchg(&dirty_bitmap[i], 0);
														
 
															-		dirty_bitmap_buffer[i] = mask;
														
 
															-
														
 
															-		offset = i * BITS_PER_LONG;
														
 
															-		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
														
 
															-	}
														
 
															-
														
 
															-	spin_unlock(&kvm->mmu_lock);
														
 
															+	/*
														
 
															+	 * Flush potentially hardware-cached dirty pages to dirty_bitmap.
														
 
															+	 */
														
 
															+	if (kvm_x86_ops->flush_log_dirty)
														
 
															+		kvm_x86_ops->flush_log_dirty(kvm);
														
 
															-	/* See the comments in kvm_mmu_slot_remove_write_access(). */
														
 
															-	lockdep_assert_held(&kvm->slots_lock);
														
 
															+	r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
														
 
															 	/*
														
 
															 	 * All the TLBs can be flushed out of mmu lock, see the comments in
														
 
															 	 * kvm_mmu_slot_remove_write_access().
														
 
															 	 */
														
 
															+	lockdep_assert_held(&kvm->slots_lock);
														
 
															 	if (is_dirty)
														
 
															 		kvm_flush_remote_tlbs(kvm);
														
 
															-	r = -EFAULT;
														
 
															-	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
														
 
															-		goto out;
														
 
															-
														
 
															-	r = 0;
														
 
															-out:
														
 
															 	mutex_unlock(&kvm->slots_lock);
														
 
															 	return r;
														
 
															 }
														
@@ -4516,6 +4502,8 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
 
															 		if (rc != X86EMUL_CONTINUE)
														
 
															 			return rc;
														
 
															 		addr += now;
														
 
															+		if (ctxt->mode != X86EMUL_MODE_PROT64)
														
 
															+			addr = (u32)addr;
														
 
															 		val += now;
														
 
															 		bytes -= now;
														
 
															 	}
														
@@ -4984,6 +4972,11 @@ static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulon
 
															 	kvm_register_write(emul_to_vcpu(ctxt), reg, val);
														
 
															 }
														
 
															+static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
														
 
															+{
														
 
															+	kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
														
 
															+}
														
 
															+
														
 
															 static const struct x86_emulate_ops emulate_ops = {
														
 
															 	.read_gpr            = emulator_read_gpr,
														
 
															 	.write_gpr           = emulator_write_gpr,
														
@@ -5019,6 +5012,7 @@ static const struct x86_emulate_ops emulate_ops = {
 
															 	.put_fpu             = emulator_put_fpu,
														
 
															 	.intercept           = emulator_intercept,
														
 
															 	.get_cpuid           = emulator_get_cpuid,
														
 
															+	.set_nmi_mask        = emulator_set_nmi_mask,
														
 
															 };
														
 
															 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
														
@@ -6311,6 +6305,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
															 	}
														
 
															 	trace_kvm_entry(vcpu->vcpu_id);
														
 
															+	wait_lapic_expire(vcpu);
														
 
															 	kvm_x86_ops->run(vcpu);
														
 
															 	/*
														
@@ -7041,15 +7036,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
															 	return r;
														
 
															 }
														
 
															-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
														
 
															+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	int r;
														
 
															 	struct msr_data msr;
														
 
															 	struct kvm *kvm = vcpu->kvm;
														
 
															-	r = vcpu_load(vcpu);
														
 
															-	if (r)
														
 
															-		return r;
														
 
															+	if (vcpu_load(vcpu))
														
 
															+		return;
														
 
															 	msr.data = 0x0;
														
 
															 	msr.index = MSR_IA32_TSC;
														
 
															 	msr.host_initiated = true;
														
@@ -7058,8 +7051,6 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
															 	schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
														
 
															 					KVMCLOCK_SYNC_PERIOD);
														
 
															-
														
 
															-	return r;
														
 
															 }
														
 
															 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
														
@@ -7549,12 +7540,62 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
															 	return 0;
														
 
															 }
														
 
															+static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
														
 
															+				     struct kvm_memory_slot *new)
														
 
															+{
														
 
															+	/* Still write protect RO slot */
														
 
															+	if (new->flags & KVM_MEM_READONLY) {
														
 
															+		kvm_mmu_slot_remove_write_access(kvm, new);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * Call kvm_x86_ops dirty logging hooks when they are valid.
														
 
															+	 *
														
 
															+	 * kvm_x86_ops->slot_disable_log_dirty is called when:
														
 
															+	 *
														
 
															+	 *  - KVM_MR_CREATE with dirty logging is disabled
														
 
															+	 *  - KVM_MR_FLAGS_ONLY with dirty logging is disabled in new flag
														
 
															+	 *
														
 
															+	 * The reason is, in case of PML, we need to set D-bit for any slots
														
 
															+	 * with dirty logging disabled in order to eliminate unnecessary GPA
														
 
															+	 * logging in PML buffer (and potential PML buffer full VMEXT). This
														
 
															+	 * guarantees leaving PML enabled during guest's lifetime won't have
														
 
															+	 * any additonal overhead from PML when guest is running with dirty
														
 
															+	 * logging disabled for memory slots.
														
 
															+	 *
														
 
															+	 * kvm_x86_ops->slot_enable_log_dirty is called when switching new slot
														
 
															+	 * to dirty logging mode.
														
 
															+	 *
														
 
															+	 * If kvm_x86_ops dirty logging hooks are invalid, use write protect.
														
 
															+	 *
														
 
															+	 * In case of write protect:
														
 
															+	 *
														
 
															+	 * Write protect all pages for dirty logging.
														
 
															+	 *
														
 
															+	 * All the sptes including the large sptes which point to this
														
 
															+	 * slot are set to readonly. We can not create any new large
														
 
															+	 * spte on this slot until the end of the logging.
														
 
															+	 *
														
 
															+	 * See the comments in fast_page_fault().
														
 
															+	 */
														
 
															+	if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
														
 
															+		if (kvm_x86_ops->slot_enable_log_dirty)
														
 
															+			kvm_x86_ops->slot_enable_log_dirty(kvm, new);
														
 
															+		else
														
 
															+			kvm_mmu_slot_remove_write_access(kvm, new);
														
 
															+	} else {
														
 
															+		if (kvm_x86_ops->slot_disable_log_dirty)
														
 
															+			kvm_x86_ops->slot_disable_log_dirty(kvm, new);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															 void kvm_arch_commit_memory_region(struct kvm *kvm,
														
 
															 				struct kvm_userspace_memory_region *mem,
														
 
															 				const struct kvm_memory_slot *old,
														
 
															 				enum kvm_mr_change change)
														
 
															 {
														
 
															-
														
 
															+	struct kvm_memory_slot *new;
														
 
															 	int nr_mmu_pages = 0;
														
 
															 	if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
														
@@ -7573,17 +7614,20 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 
															 	if (nr_mmu_pages)
														
 
															 		kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
														
 
															+
														
 
															+	/* It's OK to get 'new' slot here as it has already been installed */
														
 
															+	new = id_to_memslot(kvm->memslots, mem->slot);
														
 
															+
														
 
															 	/*
														
 
															-	 * Write protect all pages for dirty logging.
														
 
															+	 * Set up write protection and/or dirty logging for the new slot.
														
 
															 	 *
														
 
															-	 * All the sptes including the large sptes which point to this
														
 
															-	 * slot are set to readonly. We can not create any new large
														
 
															-	 * spte on this slot until the end of the logging.
														
 
															-	 *
														
 
															-	 * See the comments in fast_page_fault().
														
 
															+	 * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have
														
 
															+	 * been zapped so no dirty logging staff is needed for old slot. For
														
 
															+	 * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the
														
 
															+	 * new and it's also covered when dealing with the new slot.
														
 
															 	 */
														
 
															-	if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
														
 
															-		kvm_mmu_slot_remove_write_access(kvm, mem->slot);
														
 
															+	if (change != KVM_MR_DELETE)
														
 
															+		kvm_mmu_slot_apply_flags(kvm, new);
														
 
															 }
														
 
															 void kvm_arch_flush_shadow_all(struct kvm *kvm)
														
@@ -7837,3 +7881,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
 
															 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
														
 
															 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
														
 
															 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
														
 
															+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
														
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -147,6 +147,7 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
 
															 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
														
 
															 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
														
 
															+void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
														
 
															 int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
														
 
															 void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
														
@@ -170,5 +171,7 @@ extern u64 kvm_supported_xcr0(void);
 
															 extern unsigned int min_timer_period_us;
														
 
															+extern unsigned int lapic_timer_advance_ns;
														
 
															+
														
 
															 extern struct static_key kvm_no_apic_vcpu;
														
 
															 #endif
														
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -481,15 +481,19 @@ static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
 
															 	return tlist;
														
 
															 }
														
 
															+#define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \
														
 
															+	(MPIDR_AFFINITY_LEVEL(cluster_id, level) \
														
 
															+		<< ICC_SGI1R_AFFINITY_## level ##_SHIFT)
														
 
															+
														
 
															 static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
														
 
															 {
														
 
															 	u64 val;
														
 
															-	val = (MPIDR_AFFINITY_LEVEL(cluster_id, 3) << 48	|
														
 
															-	       MPIDR_AFFINITY_LEVEL(cluster_id, 2) << 32	|
														
 
															-	       irq << 24			    		|
														
 
															-	       MPIDR_AFFINITY_LEVEL(cluster_id, 1) << 16	|
														
 
															-	       tlist);
														
 
															+	val = (MPIDR_TO_SGI_AFFINITY(cluster_id, 3)	|
														
 
															+	       MPIDR_TO_SGI_AFFINITY(cluster_id, 2)	|
														
 
															+	       irq << ICC_SGI1R_SGI_ID_SHIFT		|
														
 
															+	       MPIDR_TO_SGI_AFFINITY(cluster_id, 1)	|
														
 
															+	       tlist << ICC_SGI1R_TARGET_LIST_SHIFT);
														
 
															 	pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
														
 
															 	gic_write_sgi1r(val);
														
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -54,6 +54,7 @@ static unsigned long sclp_hsa_size;
 
															 static unsigned int sclp_max_cpu;
														
 
															 static struct sclp_ipl_info sclp_ipl_info;
														
 
															 static unsigned char sclp_siif;
														
 
															+static unsigned char sclp_sigpif;
														
 
															 static u32 sclp_ibc;
														
 
															 static unsigned int sclp_mtid;
														
 
															 static unsigned int sclp_mtid_cp;
														
@@ -140,6 +141,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 
															 		if (boot_cpu_address != cpue->core_id)
														
 
															 			continue;
														
 
															 		sclp_siif = cpue->siif;
														
 
															+		sclp_sigpif = cpue->sigpif;
														
 
															 		break;
														
 
															 	}
														
@@ -186,6 +188,12 @@ int sclp_has_siif(void)
 
															 }
														
 
															 EXPORT_SYMBOL(sclp_has_siif);
														
 
															+int sclp_has_sigpif(void)
														
 
															+{
														
 
															+	return sclp_sigpif;
														
 
															+}
														
 
															+EXPORT_SYMBOL(sclp_has_sigpif);
														
 
															+
														
 
															 unsigned int sclp_get_ibc(void)
														
 
															 {
														
 
															 	return sclp_ibc;
														
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -33,10 +33,11 @@
 
															 #define VGIC_V2_MAX_LRS		(1 << 6)
														
 
															 #define VGIC_V3_MAX_LRS		16
														
 
															 #define VGIC_MAX_IRQS		1024
														
 
															+#define VGIC_V2_MAX_CPUS	8
														
 
															 /* Sanity checks... */
														
 
															-#if (KVM_MAX_VCPUS > 8)
														
 
															-#error	Invalid number of CPU interfaces
														
 
															+#if (KVM_MAX_VCPUS > 255)
														
 
															+#error Too many KVM VCPUs, the VGIC only supports up to 255 VCPUs for now
														
 
															 #endif
														
 
															 #if (VGIC_NR_IRQS_LEGACY & 31)
														
@@ -132,6 +133,18 @@ struct vgic_params {
 
															 	unsigned int	maint_irq;
														
 
															 	/* Virtual control interface base address */
														
 
															 	void __iomem	*vctrl_base;
														
 
															+	int		max_gic_vcpus;
														
 
															+	/* Only needed for the legacy KVM_CREATE_IRQCHIP */
														
 
															+	bool		can_emulate_gicv2;
														
 
															+};
														
 
															+
														
 
															+struct vgic_vm_ops {
														
 
															+	bool	(*handle_mmio)(struct kvm_vcpu *, struct kvm_run *,
														
 
															+			       struct kvm_exit_mmio *);
														
 
															+	bool	(*queue_sgi)(struct kvm_vcpu *, int irq);
														
 
															+	void	(*add_sgi_source)(struct kvm_vcpu *, int irq, int source);
														
 
															+	int	(*init_model)(struct kvm *);
														
 
															+	int	(*map_resources)(struct kvm *, const struct vgic_params *);
														
 
															 };
														
 
															 struct vgic_dist {
														
@@ -140,6 +153,9 @@ struct vgic_dist {
 
															 	bool			in_kernel;
														
 
															 	bool			ready;
														
 
															+	/* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */
														
 
															+	u32			vgic_model;
														
 
															+
														
 
															 	int			nr_cpus;
														
 
															 	int			nr_irqs;
														
@@ -148,7 +164,11 @@ struct vgic_dist {
 
															 	/* Distributor and vcpu interface mapping in the guest */
														
 
															 	phys_addr_t		vgic_dist_base;
														
 
															-	phys_addr_t		vgic_cpu_base;
														
 
															+	/* GICv2 and GICv3 use different mapped register blocks */
														
 
															+	union {
														
 
															+		phys_addr_t		vgic_cpu_base;
														
 
															+		phys_addr_t		vgic_redist_base;
														
 
															+	};
														
 
															 	/* Distributor enabled */
														
 
															 	u32			enabled;
														
@@ -210,8 +230,13 @@ struct vgic_dist {
 
															 	 */
														
 
															 	struct vgic_bitmap	*irq_spi_target;
														
 
															+	/* Target MPIDR for each IRQ (needed for GICv3 IROUTERn) only */
														
 
															+	u32			*irq_spi_mpidr;
														
 
															+
														
 
															 	/* Bitmap indicating which CPU has something pending */
														
 
															 	unsigned long		*irq_pending_on_cpu;
														
 
															+
														
 
															+	struct vgic_vm_ops	vm_ops;
														
 
															 #endif
														
 
															 };
														
@@ -229,6 +254,7 @@ struct vgic_v3_cpu_if {
 
															 #ifdef CONFIG_ARM_GIC_V3
														
 
															 	u32		vgic_hcr;
														
 
															 	u32		vgic_vmcr;
														
 
															+	u32		vgic_sre;	/* Restored only, change ignored */
														
 
															 	u32		vgic_misr;	/* Saved only */
														
 
															 	u32		vgic_eisr;	/* Saved only */
														
 
															 	u32		vgic_elrsr;	/* Saved only */
														
@@ -275,13 +301,15 @@ struct kvm_exit_mmio;
 
															 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
														
 
															 int kvm_vgic_hyp_init(void);
														
 
															 int kvm_vgic_map_resources(struct kvm *kvm);
														
 
															-int kvm_vgic_create(struct kvm *kvm);
														
 
															+int kvm_vgic_get_max_vcpus(void);
														
 
															+int kvm_vgic_create(struct kvm *kvm, u32 type);
														
 
															 void kvm_vgic_destroy(struct kvm *kvm);
														
 
															 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
														
 
															 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
														
 
															 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
														
 
															 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
														
 
															 			bool level);
														
 
															+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
														
 
															 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
														
 
															 bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
														
 
															 		      struct kvm_exit_mmio *mmio);
														
@@ -327,7 +355,7 @@ static inline int kvm_vgic_map_resources(struct kvm *kvm)
 
															 	return 0;
														
 
															 }
														
 
															-static inline int kvm_vgic_create(struct kvm *kvm)
														
 
															+static inline int kvm_vgic_create(struct kvm *kvm, u32 type)
														
 
															 {
														
 
															 	return 0;
														
 
															 }
														
@@ -379,6 +407,11 @@ static inline bool vgic_ready(struct kvm *kvm)
 
															 {
														
 
															 	return true;
														
 
															 }
														
 
															+
														
 
															+static inline int kvm_vgic_get_max_vcpus(void)
														
 
															+{
														
 
															+	return KVM_MAX_VCPUS;
														
 
															+}
														
 
															 #endif
														
 
															 #endif
														
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -33,6 +33,7 @@
 
															 #define GICD_SETSPI_SR			0x0050
														
 
															 #define GICD_CLRSPI_SR			0x0058
														
 
															 #define GICD_SEIR			0x0068
														
 
															+#define GICD_IGROUPR			0x0080
														
 
															 #define GICD_ISENABLER			0x0100
														
 
															 #define GICD_ICENABLER			0x0180
														
 
															 #define GICD_ISPENDR			0x0200
														
@@ -41,14 +42,37 @@
 
															 #define GICD_ICACTIVER			0x0380
														
 
															 #define GICD_IPRIORITYR			0x0400
														
 
															 #define GICD_ICFGR			0x0C00
														
 
															+#define GICD_IGRPMODR			0x0D00
														
 
															+#define GICD_NSACR			0x0E00
														
 
															 #define GICD_IROUTER			0x6000
														
 
															+#define GICD_IDREGS			0xFFD0
														
 
															 #define GICD_PIDR2			0xFFE8
														
 
															+/*
														
 
															+ * Those registers are actually from GICv2, but the spec demands that they
														
 
															+ * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
														
 
															+ */
														
 
															+#define GICD_ITARGETSR			0x0800
														
 
															+#define GICD_SGIR			0x0F00
														
 
															+#define GICD_CPENDSGIR			0x0F10
														
 
															+#define GICD_SPENDSGIR			0x0F20
														
 
															+
														
 
															 #define GICD_CTLR_RWP			(1U << 31)
														
 
															+#define GICD_CTLR_DS			(1U << 6)
														
 
															 #define GICD_CTLR_ARE_NS		(1U << 4)
														
 
															 #define GICD_CTLR_ENABLE_G1A		(1U << 1)
														
 
															 #define GICD_CTLR_ENABLE_G1		(1U << 0)
														
 
															+/*
														
 
															+ * In systems with a single security state (what we emulate in KVM)
														
 
															+ * the meaning of the interrupt group enable bits is slightly different
														
 
															+ */
														
 
															+#define GICD_CTLR_ENABLE_SS_G1		(1U << 1)
														
 
															+#define GICD_CTLR_ENABLE_SS_G0		(1U << 0)
														
 
															+
														
 
															+#define GICD_TYPER_LPIS			(1U << 17)
														
 
															+#define GICD_TYPER_MBIS			(1U << 16)
														
 
															+
														
 
															 #define GICD_TYPER_ID_BITS(typer)	((((typer) >> 19) & 0x1f) + 1)
														
 
															 #define GICD_TYPER_IRQS(typer)		((((typer) & 0x1f) + 1) * 32)
														
 
															 #define GICD_TYPER_LPIS			(1U << 17)
														
@@ -60,6 +84,8 @@
 
															 #define GIC_PIDR2_ARCH_GICv3		0x30
														
 
															 #define GIC_PIDR2_ARCH_GICv4		0x40
														
 
															+#define GIC_V3_DIST_SIZE		0x10000
														
 
															+
														
 
															 /*
														
 
															  * Re-Distributor registers, offsets from RD_base
														
 
															  */
														
@@ -78,6 +104,7 @@
 
															 #define GICR_SYNCR			0x00C0
														
 
															 #define GICR_MOVLPIR			0x0100
														
 
															 #define GICR_MOVALLR			0x0110
														
 
															+#define GICR_IDREGS			GICD_IDREGS
														
 
															 #define GICR_PIDR2			GICD_PIDR2
														
 
															 #define GICR_CTLR_ENABLE_LPIS		(1UL << 0)
														
@@ -104,6 +131,7 @@
 
															 /*
														
 
															  * Re-Distributor registers, offsets from SGI_base
														
 
															  */
														
 
															+#define GICR_IGROUPR0			GICD_IGROUPR
														
 
															 #define GICR_ISENABLER0			GICD_ISENABLER
														
 
															 #define GICR_ICENABLER0			GICD_ICENABLER
														
 
															 #define GICR_ISPENDR0			GICD_ISPENDR
														
@@ -112,11 +140,15 @@
 
															 #define GICR_ICACTIVER0			GICD_ICACTIVER
														
 
															 #define GICR_IPRIORITYR0		GICD_IPRIORITYR
														
 
															 #define GICR_ICFGR0			GICD_ICFGR
														
 
															+#define GICR_IGRPMODR0			GICD_IGRPMODR
														
 
															+#define GICR_NSACR			GICD_NSACR
														
 
															 #define GICR_TYPER_PLPIS		(1U << 0)
														
 
															 #define GICR_TYPER_VLPIS		(1U << 1)
														
 
															 #define GICR_TYPER_LAST			(1U << 4)
														
 
															+#define GIC_V3_REDIST_SIZE		0x20000
														
 
															+
														
 
															 #define LPI_PROP_GROUP1			(1 << 1)
														
 
															 #define LPI_PROP_ENABLED		(1 << 0)
														
@@ -248,6 +280,18 @@
 
															 #define ICC_SRE_EL2_SRE			(1 << 0)
														
 
															 #define ICC_SRE_EL2_ENABLE		(1 << 3)
														
 
															+#define ICC_SGI1R_TARGET_LIST_SHIFT	0
														
 
															+#define ICC_SGI1R_TARGET_LIST_MASK	(0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
														
 
															+#define ICC_SGI1R_AFFINITY_1_SHIFT	16
														
 
															+#define ICC_SGI1R_AFFINITY_1_MASK	(0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
														
 
															+#define ICC_SGI1R_SGI_ID_SHIFT		24
														
 
															+#define ICC_SGI1R_SGI_ID_MASK		(0xff << ICC_SGI1R_SGI_ID_SHIFT)
														
 
															+#define ICC_SGI1R_AFFINITY_2_SHIFT	32
														
 
															+#define ICC_SGI1R_AFFINITY_2_MASK	(0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT)
														
 
															+#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT	40
														
 
															+#define ICC_SGI1R_AFFINITY_3_SHIFT	48
														
 
															+#define ICC_SGI1R_AFFINITY_3_MASK	(0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT)
														
 
															+
														
 
															 /*
														
 
															  * System register definitions
														
 
															  */
														
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -33,10 +33,6 @@
 
															 #include <asm/kvm_host.h>
														
 
															-#ifndef KVM_MMIO_SIZE
														
 
															-#define KVM_MMIO_SIZE 8
														
 
															-#endif
														
 
															-
														
 
															 /*
														
 
															  * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
														
 
															  * in kvm, other bits are visible for userspace which are defined in
														
@@ -600,6 +596,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
 
															 int kvm_get_dirty_log(struct kvm *kvm,
														
 
															 			struct kvm_dirty_log *log, int *is_dirty);
														
 
															+
														
 
															+int kvm_get_dirty_log_protect(struct kvm *kvm,
														
 
															+			struct kvm_dirty_log *log, bool *is_dirty);
														
 
															+
														
 
															+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
														
 
															+					struct kvm_memory_slot *slot,
														
 
															+					gfn_t gfn_offset,
														
 
															+					unsigned long mask);
														
 
															+
														
 
															 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
														
 
															 				struct kvm_dirty_log *log);
														
@@ -641,7 +646,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 
															 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
														
 
															 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
														
 
															 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
														
 
															-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
														
 
															+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
														
 
															 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
														
 
															 int kvm_arch_hardware_enable(void);
														
@@ -1031,6 +1036,8 @@ void kvm_unregister_device_ops(u32 type);
 
															 extern struct kvm_device_ops kvm_mpic_ops;
														
 
															 extern struct kvm_device_ops kvm_xics_ops;
														
 
															+extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
														
 
															+extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
														
 
															 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
														
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -37,6 +37,25 @@ TRACE_EVENT(kvm_userspace_exit,
 
															 		  __entry->errno < 0 ? -__entry->errno : __entry->reason)
														
 
															 );
														
 
															+TRACE_EVENT(kvm_vcpu_wakeup,
														
 
															+	    TP_PROTO(__u64 ns, bool waited),
														
 
															+	    TP_ARGS(ns, waited),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(	__u64,		ns		)
														
 
															+		__field(	bool,		waited		)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->ns		= ns;
														
 
															+		__entry->waited		= waited;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("%s time %lld ns",
														
 
															+		  __entry->waited ? "wait" : "poll",
														
 
															+		  __entry->ns)
														
 
															+);
														
 
															+
														
 
															 #if defined(CONFIG_HAVE_KVM_IRQFD)
														
 
															 TRACE_EVENT(kvm_set_irq,
														
 
															 	TP_PROTO(unsigned int gsi, int level, int irq_source_id),
														
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -491,6 +491,11 @@ struct kvm_s390_emerg_info {
 
															 	__u16 code;
														
 
															 };
														
 
															+#define KVM_S390_STOP_FLAG_STORE_STATUS	0x01
														
 
															+struct kvm_s390_stop_info {
														
 
															+	__u32 flags;
														
 
															+};
														
 
															+
														
 
															 struct kvm_s390_mchk_info {
														
 
															 	__u64 cr14;
														
 
															 	__u64 mcic;
														
@@ -509,6 +514,7 @@ struct kvm_s390_irq {
 
															 		struct kvm_s390_emerg_info emerg;
														
 
															 		struct kvm_s390_extcall_info extcall;
														
 
															 		struct kvm_s390_prefix_info prefix;
														
 
															+		struct kvm_s390_stop_info stop;
														
 
															 		struct kvm_s390_mchk_info mchk;
														
 
															 		char reserved[64];
														
 
															 	} u;
														
@@ -753,6 +759,7 @@ struct kvm_ppc_smmu_info {
 
															 #define KVM_CAP_PPC_FIXUP_HCALL 103
														
 
															 #define KVM_CAP_PPC_ENABLE_HCALL 104
														
 
															 #define KVM_CAP_CHECK_EXTENSION_VM 105
														
 
															+#define KVM_CAP_S390_USER_SIGP 106
														
 
															 #ifdef KVM_CAP_IRQ_ROUTING
														
@@ -952,6 +959,8 @@ enum kvm_device_type {
 
															 #define KVM_DEV_TYPE_ARM_VGIC_V2	KVM_DEV_TYPE_ARM_VGIC_V2
														
 
															 	KVM_DEV_TYPE_FLIC,
														
 
															 #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
														
 
															+	KVM_DEV_TYPE_ARM_VGIC_V3,
														
 
															+#define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
														
 
															 	KVM_DEV_TYPE_MAX,
														
 
															 };
														
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -37,3 +37,13 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
 
															 config KVM_VFIO
														
 
															        bool
														
 
															+
														
 
															+config HAVE_KVM_ARCH_TLB_FLUSH_ALL
														
 
															+       bool
														
 
															+
														
 
															+config KVM_GENERIC_DIRTYLOG_READ_PROTECT
														
 
															+       bool
														
 
															+
														
 
															+config KVM_COMPAT
														
 
															+       def_bool y
														
 
															+       depends on COMPAT && !S390
														
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -0,0 +1,847 @@
 
															+/*
														
 
															+ * Contains GICv2 specific emulation code, was in vgic.c before.
														
 
															+ *
														
 
															+ * Copyright (C) 2012 ARM Ltd.
														
 
															+ * Author: Marc Zyngier <marc.zyngier@arm.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public License
														
 
															+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/cpu.h>
														
 
															+#include <linux/kvm.h>
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/interrupt.h>
														
 
															+#include <linux/io.h>
														
 
															+#include <linux/uaccess.h>
														
 
															+
														
 
															+#include <linux/irqchip/arm-gic.h>
														
 
															+
														
 
															+#include <asm/kvm_emulate.h>
														
 
															+#include <asm/kvm_arm.h>
														
 
															+#include <asm/kvm_mmu.h>
														
 
															+
														
 
															+#include "vgic.h"
														
 
															+
														
 
															+#define GICC_ARCH_VERSION_V2		0x2
														
 
															+
														
 
															+static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
														
 
															+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
														
 
															+{
														
 
															+	return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
														
 
															+			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															+{
														
 
															+	u32 reg;
														
 
															+	u32 word_offset = offset & 3;
														
 
															+
														
 
															+	switch (offset & ~3) {
														
 
															+	case 0:			/* GICD_CTLR */
														
 
															+		reg = vcpu->kvm->arch.vgic.enabled;
														
 
															+		vgic_reg_access(mmio, &reg, word_offset,
														
 
															+				ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
														
 
															+		if (mmio->is_write) {
														
 
															+			vcpu->kvm->arch.vgic.enabled = reg & 1;
														
 
															+			vgic_update_state(vcpu->kvm);
														
 
															+			return true;
														
 
															+		}
														
 
															+		break;
														
 
															+
														
 
															+	case 4:			/* GICD_TYPER */
														
 
															+		reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
														
 
															+		reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
														
 
															+		vgic_reg_access(mmio, &reg, word_offset,
														
 
															+				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
														
 
															+		break;
														
 
															+
														
 
															+	case 8:			/* GICD_IIDR */
														
 
															+		reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
														
 
															+		vgic_reg_access(mmio, &reg, word_offset,
														
 
															+				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
														
 
															+				       struct kvm_exit_mmio *mmio,
														
 
															+				       phys_addr_t offset)
														
 
															+{
														
 
															+	return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
														
 
															+				      vcpu->vcpu_id, ACCESS_WRITE_SETBIT);
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
														
 
															+					 struct kvm_exit_mmio *mmio,
														
 
															+					 phys_addr_t offset)
														
 
															+{
														
 
															+	return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
														
 
															+				      vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT);
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
														
 
															+					struct kvm_exit_mmio *mmio,
														
 
															+					phys_addr_t offset)
														
 
															+{
														
 
															+	return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset,
														
 
															+					   vcpu->vcpu_id);
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
														
 
															+					  struct kvm_exit_mmio *mmio,
														
 
															+					  phys_addr_t offset)
														
 
															+{
														
 
															+	return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset,
														
 
															+					     vcpu->vcpu_id);
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
														
 
															+				     struct kvm_exit_mmio *mmio,
														
 
															+				     phys_addr_t offset)
														
 
															+{
														
 
															+	u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
														
 
															+					vcpu->vcpu_id, offset);
														
 
															+	vgic_reg_access(mmio, reg, offset,
														
 
															+			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+#define GICD_ITARGETSR_SIZE	32
														
 
															+#define GICD_CPUTARGETS_BITS	8
														
 
															+#define GICD_IRQS_PER_ITARGETSR	(GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
														
 
															+static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
														
 
															+{
														
 
															+	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															+	int i;
														
 
															+	u32 val = 0;
														
 
															+
														
 
															+	irq -= VGIC_NR_PRIVATE_IRQS;
														
 
															+
														
 
															+	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
														
 
															+		val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
														
 
															+
														
 
															+	return val;
														
 
															+}
														
 
															+
														
 
															+static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
														
 
															+{
														
 
															+	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+	int i, c;
														
 
															+	unsigned long *bmap;
														
 
															+	u32 target;
														
 
															+
														
 
															+	irq -= VGIC_NR_PRIVATE_IRQS;
														
 
															+
														
 
															+	/*
														
 
															+	 * Pick the LSB in each byte. This ensures we target exactly
														
 
															+	 * one vcpu per IRQ. If the byte is null, assume we target
														
 
															+	 * CPU0.
														
 
															+	 */
														
 
															+	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
														
 
															+		int shift = i * GICD_CPUTARGETS_BITS;
														
 
															+
														
 
															+		target = ffs((val >> shift) & 0xffU);
														
 
															+		target = target ? (target - 1) : 0;
														
 
															+		dist->irq_spi_cpu[irq + i] = target;
														
 
															+		kvm_for_each_vcpu(c, vcpu, kvm) {
														
 
															+			bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
														
 
															+			if (c == target)
														
 
															+				set_bit(irq + i, bmap);
														
 
															+			else
														
 
															+				clear_bit(irq + i, bmap);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
														
 
															+				   struct kvm_exit_mmio *mmio,
														
 
															+				   phys_addr_t offset)
														
 
															+{
														
 
															+	u32 reg;
														
 
															+
														
 
															+	/* We treat the banked interrupts targets as read-only */
														
 
															+	if (offset < 32) {
														
 
															+		u32 roreg;
														
 
															+
														
 
															+		roreg = 1 << vcpu->vcpu_id;
														
 
															+		roreg |= roreg << 8;
														
 
															+		roreg |= roreg << 16;
														
 
															+
														
 
															+		vgic_reg_access(mmio, &roreg, offset,
														
 
															+				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
														
 
															+		return false;
														
 
															+	}
														
 
															+
														
 
															+	reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
														
 
															+	vgic_reg_access(mmio, &reg, offset,
														
 
															+			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
														
 
															+	if (mmio->is_write) {
														
 
															+		vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
														
 
															+		vgic_update_state(vcpu->kvm);
														
 
															+		return true;
														
 
															+	}
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
														
 
															+				struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															+{
														
 
															+	u32 *reg;
														
 
															+
														
 
															+	reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
														
 
															+				  vcpu->vcpu_id, offset >> 1);
														
 
															+
														
 
															+	return vgic_handle_cfg_reg(reg, mmio, offset);
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
														
 
															+				struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															+{
														
 
															+	u32 reg;
														
 
															+
														
 
															+	vgic_reg_access(mmio, &reg, offset,
														
 
															+			ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
														
 
															+	if (mmio->is_write) {
														
 
															+		vgic_dispatch_sgi(vcpu, reg);
														
 
															+		vgic_update_state(vcpu->kvm);
														
 
															+		return true;
														
 
															+	}
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
														
 
															+static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
														
 
															+					struct kvm_exit_mmio *mmio,
														
 
															+					phys_addr_t offset)
														
 
															+{
														
 
															+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															+	int sgi;
														
 
															+	int min_sgi = (offset & ~0x3);
														
 
															+	int max_sgi = min_sgi + 3;
														
 
															+	int vcpu_id = vcpu->vcpu_id;
														
 
															+	u32 reg = 0;
														
 
															+
														
 
															+	/* Copy source SGIs from distributor side */
														
 
															+	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
														
 
															+		u8 sources = *vgic_get_sgi_sources(dist, vcpu_id, sgi);
														
 
															+
														
 
															+		reg |= ((u32)sources) << (8 * (sgi - min_sgi));
														
 
															+	}
														
 
															+
														
 
															+	mmio_data_write(mmio, ~0, reg);
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
														
 
															+					 struct kvm_exit_mmio *mmio,
														
 
															+					 phys_addr_t offset, bool set)
														
 
															+{
														
 
															+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															+	int sgi;
														
 
															+	int min_sgi = (offset & ~0x3);
														
 
															+	int max_sgi = min_sgi + 3;
														
 
															+	int vcpu_id = vcpu->vcpu_id;
														
 
															+	u32 reg;
														
 
															+	bool updated = false;
														
 
															+
														
 
															+	reg = mmio_data_read(mmio, ~0);
														
 
															+
														
 
															+	/* Clear pending SGIs on the distributor */
														
 
															+	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
														
 
															+		u8 mask = reg >> (8 * (sgi - min_sgi));
														
 
															+		u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
														
 
															+
														
 
															+		if (set) {
														
 
															+			if ((*src & mask) != mask)
														
 
															+				updated = true;
														
 
															+			*src |= mask;
														
 
															+		} else {
														
 
															+			if (*src & mask)
														
 
															+				updated = true;
														
 
															+			*src &= ~mask;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (updated)
														
 
															+		vgic_update_state(vcpu->kvm);
														
 
															+
														
 
															+	return updated;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
														
 
															+				struct kvm_exit_mmio *mmio,
														
 
															+				phys_addr_t offset)
														
 
															+{
														
 
															+	if (!mmio->is_write)
														
 
															+		return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
														
 
															+	else
														
 
															+		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
														
 
															+				  struct kvm_exit_mmio *mmio,
														
 
															+				  phys_addr_t offset)
														
 
															+{
														
 
															+	if (!mmio->is_write)
														
 
															+		return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
														
 
															+	else
														
 
															+		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
														
 
															+}
														
 
															+
														
 
															+static const struct kvm_mmio_range vgic_dist_ranges[] = {
														
 
															+	{
														
 
															+		.base		= GIC_DIST_CTRL,
														
 
															+		.len		= 12,
														
 
															+		.bits_per_irq	= 0,
														
 
															+		.handle_mmio	= handle_mmio_misc,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_IGROUP,
														
 
															+		.len		= VGIC_MAX_IRQS / 8,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_ENABLE_SET,
														
 
															+		.len		= VGIC_MAX_IRQS / 8,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_set_enable_reg,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_ENABLE_CLEAR,
														
 
															+		.len		= VGIC_MAX_IRQS / 8,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_clear_enable_reg,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_PENDING_SET,
														
 
															+		.len		= VGIC_MAX_IRQS / 8,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_set_pending_reg,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_PENDING_CLEAR,
														
 
															+		.len		= VGIC_MAX_IRQS / 8,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_clear_pending_reg,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_ACTIVE_SET,
														
 
															+		.len		= VGIC_MAX_IRQS / 8,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_ACTIVE_CLEAR,
														
 
															+		.len		= VGIC_MAX_IRQS / 8,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_PRI,
														
 
															+		.len		= VGIC_MAX_IRQS,
														
 
															+		.bits_per_irq	= 8,
														
 
															+		.handle_mmio	= handle_mmio_priority_reg,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_TARGET,
														
 
															+		.len		= VGIC_MAX_IRQS,
														
 
															+		.bits_per_irq	= 8,
														
 
															+		.handle_mmio	= handle_mmio_target_reg,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_CONFIG,
														
 
															+		.len		= VGIC_MAX_IRQS / 4,
														
 
															+		.bits_per_irq	= 2,
														
 
															+		.handle_mmio	= handle_mmio_cfg_reg,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_SOFTINT,
														
 
															+		.len		= 4,
														
 
															+		.handle_mmio	= handle_mmio_sgi_reg,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_SGI_PENDING_CLEAR,
														
 
															+		.len		= VGIC_NR_SGIS,
														
 
															+		.handle_mmio	= handle_mmio_sgi_clear,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_DIST_SGI_PENDING_SET,
														
 
															+		.len		= VGIC_NR_SGIS,
														
 
															+		.handle_mmio	= handle_mmio_sgi_set,
														
 
															+	},
														
 
															+	{}
														
 
															+};
														
 
															+
														
 
															+static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
														
 
															+				struct kvm_exit_mmio *mmio)
														
 
															+{
														
 
															+	unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base;
														
 
															+
														
 
															+	if (!is_in_range(mmio->phys_addr, mmio->len, base,
														
 
															+			 KVM_VGIC_V2_DIST_SIZE))
														
 
															+		return false;
														
 
															+
														
 
															+	/* GICv2 does not support accesses wider than 32 bits */
														
 
															+	if (mmio->len > 4) {
														
 
															+		kvm_inject_dabt(vcpu, mmio->phys_addr);
														
 
															+		return true;
														
 
															+	}
														
 
															+
														
 
															+	return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base);
														
 
															+}
														
 
															+
														
 
															+static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
														
 
															+{
														
 
															+	struct kvm *kvm = vcpu->kvm;
														
 
															+	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															+	int nrcpus = atomic_read(&kvm->online_vcpus);
														
 
															+	u8 target_cpus;
														
 
															+	int sgi, mode, c, vcpu_id;
														
 
															+
														
 
															+	vcpu_id = vcpu->vcpu_id;
														
 
															+
														
 
															+	sgi = reg & 0xf;
														
 
															+	target_cpus = (reg >> 16) & 0xff;
														
 
															+	mode = (reg >> 24) & 3;
														
 
															+
														
 
															+	switch (mode) {
														
 
															+	case 0:
														
 
															+		if (!target_cpus)
														
 
															+			return;
														
 
															+		break;
														
 
															+
														
 
															+	case 1:
														
 
															+		target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
														
 
															+		break;
														
 
															+
														
 
															+	case 2:
														
 
															+		target_cpus = 1 << vcpu_id;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	kvm_for_each_vcpu(c, vcpu, kvm) {
														
 
															+		if (target_cpus & 1) {
														
 
															+			/* Flag the SGI as pending */
														
 
															+			vgic_dist_irq_set_pending(vcpu, sgi);
														
 
															+			*vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
														
 
															+			kvm_debug("SGI%d from CPU%d to CPU%d\n",
														
 
															+				  sgi, vcpu_id, c);
														
 
															+		}
														
 
															+
														
 
															+		target_cpus >>= 1;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq)
														
 
															+{
														
 
															+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															+	unsigned long sources;
														
 
															+	int vcpu_id = vcpu->vcpu_id;
														
 
															+	int c;
														
 
															+
														
 
															+	sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
														
 
															+
														
 
															+	for_each_set_bit(c, &sources, dist->nr_cpus) {
														
 
															+		if (vgic_queue_irq(vcpu, c, irq))
														
 
															+			clear_bit(c, &sources);
														
 
															+	}
														
 
															+
														
 
															+	*vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
														
 
															+
														
 
															+	/*
														
 
															+	 * If the sources bitmap has been cleared it means that we
														
 
															+	 * could queue all the SGIs onto link registers (see the
														
 
															+	 * clear_bit above), and therefore we are done with them in
														
 
															+	 * our emulated gic and can get rid of them.
														
 
															+	 */
														
 
															+	if (!sources) {
														
 
															+		vgic_dist_irq_clear_pending(vcpu, irq);
														
 
															+		vgic_cpu_irq_clear(vcpu, irq);
														
 
															+		return true;
														
 
															+	}
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
														
 
															+ * @kvm: pointer to the kvm struct
														
 
															+ *
														
 
															+ * Map the virtual CPU interface into the VM before running any VCPUs.  We
														
 
															+ * can't do this at creation time, because user space must first set the
														
 
															+ * virtual CPU interface address in the guest physical address space.
														
 
															+ */
														
 
															+static int vgic_v2_map_resources(struct kvm *kvm,
														
 
															+				 const struct vgic_params *params)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	if (!irqchip_in_kernel(kvm))
														
 
															+		return 0;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+
														
 
															+	if (vgic_ready(kvm))
														
 
															+		goto out;
														
 
															+
														
 
															+	if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
														
 
															+	    IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
														
 
															+		kvm_err("Need to set vgic cpu and dist addresses first\n");
														
 
															+		ret = -ENXIO;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * Initialize the vgic if this hasn't already been done on demand by
														
 
															+	 * accessing the vgic state from userspace.
														
 
															+	 */
														
 
															+	ret = vgic_init(kvm);
														
 
															+	if (ret) {
														
 
															+		kvm_err("Unable to allocate maps\n");
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
														
 
															+				    params->vcpu_base, KVM_VGIC_V2_CPU_SIZE,
														
 
															+				    true);
														
 
															+	if (ret) {
														
 
															+		kvm_err("Unable to remap VGIC CPU to VCPU\n");
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	kvm->arch.vgic.ready = true;
														
 
															+out:
														
 
															+	if (ret)
														
 
															+		kvm_vgic_destroy(kvm);
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void vgic_v2_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
														
 
															+{
														
 
															+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															+
														
 
															+	*vgic_get_sgi_sources(dist, vcpu->vcpu_id, irq) |= 1 << source;
														
 
															+}
														
 
															+
														
 
															+static int vgic_v2_init_model(struct kvm *kvm)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4)
														
 
															+		vgic_set_target_reg(kvm, 0, i);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+void vgic_v2_init_emulation(struct kvm *kvm)
														
 
															+{
														
 
															+	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															+
														
 
															+	dist->vm_ops.handle_mmio = vgic_v2_handle_mmio;
														
 
															+	dist->vm_ops.queue_sgi = vgic_v2_queue_sgi;
														
 
															+	dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source;
														
 
															+	dist->vm_ops.init_model = vgic_v2_init_model;
														
 
															+	dist->vm_ops.map_resources = vgic_v2_map_resources;
														
 
															+
														
 
															+	kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS;
														
 
															+}
														
 
															+
														
 
															+static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
														
 
															+				 struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															+{
														
 
															+	bool updated = false;
														
 
															+	struct vgic_vmcr vmcr;
														
 
															+	u32 *vmcr_field;
														
 
															+	u32 reg;
														
 
															+
														
 
															+	vgic_get_vmcr(vcpu, &vmcr);
														
 
															+
														
 
															+	switch (offset & ~0x3) {
														
 
															+	case GIC_CPU_CTRL:
														
 
															+		vmcr_field = &vmcr.ctlr;
														
 
															+		break;
														
 
															+	case GIC_CPU_PRIMASK:
														
 
															+		vmcr_field = &vmcr.pmr;
														
 
															+		break;
														
 
															+	case GIC_CPU_BINPOINT:
														
 
															+		vmcr_field = &vmcr.bpr;
														
 
															+		break;
														
 
															+	case GIC_CPU_ALIAS_BINPOINT:
														
 
															+		vmcr_field = &vmcr.abpr;
														
 
															+		break;
														
 
															+	default:
														
 
															+		BUG();
														
 
															+	}
														
 
															+
														
 
															+	if (!mmio->is_write) {
														
 
															+		reg = *vmcr_field;
														
 
															+		mmio_data_write(mmio, ~0, reg);
														
 
															+	} else {
														
 
															+		reg = mmio_data_read(mmio, ~0);
														
 
															+		if (reg != *vmcr_field) {
														
 
															+			*vmcr_field = reg;
														
 
															+			vgic_set_vmcr(vcpu, &vmcr);
														
 
															+			updated = true;
														
 
															+		}
														
 
															+	}
														
 
															+	return updated;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
														
 
															+			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															+{
														
 
															+	return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
														
 
															+}
														
 
															+
														
 
															+static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
														
 
															+				  struct kvm_exit_mmio *mmio,
														
 
															+				  phys_addr_t offset)
														
 
															+{
														
 
															+	u32 reg;
														
 
															+
														
 
															+	if (mmio->is_write)
														
 
															+		return false;
														
 
															+
														
 
															+	/* GICC_IIDR */
														
 
															+	reg = (PRODUCT_ID_KVM << 20) |
														
 
															+	      (GICC_ARCH_VERSION_V2 << 16) |
														
 
															+	      (IMPLEMENTER_ARM << 0);
														
 
															+	mmio_data_write(mmio, ~0, reg);
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * CPU Interface Register accesses - these are not accessed by the VM, but by
														
 
															+ * user space for saving and restoring VGIC state.
														
 
															+ */
														
 
															+static const struct kvm_mmio_range vgic_cpu_ranges[] = {
														
 
															+	{
														
 
															+		.base		= GIC_CPU_CTRL,
														
 
															+		.len		= 12,
														
 
															+		.handle_mmio	= handle_cpu_mmio_misc,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_CPU_ALIAS_BINPOINT,
														
 
															+		.len		= 4,
														
 
															+		.handle_mmio	= handle_mmio_abpr,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_CPU_ACTIVEPRIO,
														
 
															+		.len		= 16,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GIC_CPU_IDENT,
														
 
															+		.len		= 4,
														
 
															+		.handle_mmio	= handle_cpu_mmio_ident,
														
 
															+	},
														
 
															+};
														
 
															+
														
 
															+static int vgic_attr_regs_access(struct kvm_device *dev,
														
 
															+				 struct kvm_device_attr *attr,
														
 
															+				 u32 *reg, bool is_write)
														
 
															+{
														
 
															+	const struct kvm_mmio_range *r = NULL, *ranges;
														
 
															+	phys_addr_t offset;
														
 
															+	int ret, cpuid, c;
														
 
															+	struct kvm_vcpu *vcpu, *tmp_vcpu;
														
 
															+	struct vgic_dist *vgic;
														
 
															+	struct kvm_exit_mmio mmio;
														
 
															+
														
 
															+	offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
														
 
															+	cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
														
 
															+		KVM_DEV_ARM_VGIC_CPUID_SHIFT;
														
 
															+
														
 
															+	mutex_lock(&dev->kvm->lock);
														
 
															+
														
 
															+	ret = vgic_init(dev->kvm);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
														
 
															+		ret = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	vcpu = kvm_get_vcpu(dev->kvm, cpuid);
														
 
															+	vgic = &dev->kvm->arch.vgic;
														
 
															+
														
 
															+	mmio.len = 4;
														
 
															+	mmio.is_write = is_write;
														
 
															+	if (is_write)
														
 
															+		mmio_data_write(&mmio, ~0, *reg);
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
														
 
															+		mmio.phys_addr = vgic->vgic_dist_base + offset;
														
 
															+		ranges = vgic_dist_ranges;
														
 
															+		break;
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
														
 
															+		mmio.phys_addr = vgic->vgic_cpu_base + offset;
														
 
															+		ranges = vgic_cpu_ranges;
														
 
															+		break;
														
 
															+	default:
														
 
															+		BUG();
														
 
															+	}
														
 
															+	r = vgic_find_range(ranges, &mmio, offset);
														
 
															+
														
 
															+	if (unlikely(!r || !r->handle_mmio)) {
														
 
															+		ret = -ENXIO;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+
														
 
															+	spin_lock(&vgic->lock);
														
 
															+
														
 
															+	/*
														
 
															+	 * Ensure that no other VCPU is running by checking the vcpu->cpu
														
 
															+	 * field.  If no other VPCUs are running we can safely access the VGIC
														
 
															+	 * state, because even if another VPU is run after this point, that
														
 
															+	 * VCPU will not touch the vgic state, because it will block on
														
 
															+	 * getting the vgic->lock in kvm_vgic_sync_hwstate().
														
 
															+	 */
														
 
															+	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
														
 
															+		if (unlikely(tmp_vcpu->cpu != -1)) {
														
 
															+			ret = -EBUSY;
														
 
															+			goto out_vgic_unlock;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * Move all pending IRQs from the LRs on all VCPUs so the pending
														
 
															+	 * state can be properly represented in the register state accessible
														
 
															+	 * through this API.
														
 
															+	 */
														
 
															+	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
														
 
															+		vgic_unqueue_irqs(tmp_vcpu);
														
 
															+
														
 
															+	offset -= r->base;
														
 
															+	r->handle_mmio(vcpu, &mmio, offset);
														
 
															+
														
 
															+	if (!is_write)
														
 
															+		*reg = mmio_data_read(&mmio, ~0);
														
 
															+
														
 
															+	ret = 0;
														
 
															+out_vgic_unlock:
														
 
															+	spin_unlock(&vgic->lock);
														
 
															+out:
														
 
															+	mutex_unlock(&dev->kvm->lock);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int vgic_v2_create(struct kvm_device *dev, u32 type)
														
 
															+{
														
 
															+	return kvm_vgic_create(dev->kvm, type);
														
 
															+}
														
 
															+
														
 
															+static void vgic_v2_destroy(struct kvm_device *dev)
														
 
															+{
														
 
															+	kfree(dev);
														
 
															+}
														
 
															+
														
 
															+static int vgic_v2_set_attr(struct kvm_device *dev,
														
 
															+			    struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = vgic_set_common_attr(dev, attr);
														
 
															+	if (ret != -ENXIO)
														
 
															+		return ret;
														
 
															+
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
														
 
															+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
														
 
															+		u32 reg;
														
 
															+
														
 
															+		if (get_user(reg, uaddr))
														
 
															+			return -EFAULT;
														
 
															+
														
 
															+		return vgic_attr_regs_access(dev, attr, &reg, true);
														
 
															+	}
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int vgic_v2_get_attr(struct kvm_device *dev,
														
 
															+			    struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = vgic_get_common_attr(dev, attr);
														
 
															+	if (ret != -ENXIO)
														
 
															+		return ret;
														
 
															+
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
														
 
															+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
														
 
															+		u32 reg = 0;
														
 
															+
														
 
															+		ret = vgic_attr_regs_access(dev, attr, &reg, false);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+		return put_user(reg, uaddr);
														
 
															+	}
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int vgic_v2_has_attr(struct kvm_device *dev,
														
 
															+			    struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	phys_addr_t offset;
														
 
															+
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_ADDR:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_VGIC_V2_ADDR_TYPE_DIST:
														
 
															+		case KVM_VGIC_V2_ADDR_TYPE_CPU:
														
 
															+			return 0;
														
 
															+		}
														
 
															+		break;
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
														
 
															+		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
														
 
															+		return vgic_has_attr_regs(vgic_dist_ranges, offset);
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
														
 
															+		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
														
 
															+		return vgic_has_attr_regs(vgic_cpu_ranges, offset);
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
														
 
															+		return 0;
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_CTRL:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_DEV_ARM_VGIC_CTRL_INIT:
														
 
															+			return 0;
														
 
															+		}
														
 
															+	}
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+struct kvm_device_ops kvm_arm_vgic_v2_ops = {
														
 
															+	.name = "kvm-arm-vgic-v2",
														
 
															+	.create = vgic_v2_create,
														
 
															+	.destroy = vgic_v2_destroy,
														
 
															+	.set_attr = vgic_v2_set_attr,
														
 
															+	.get_attr = vgic_v2_get_attr,
														
 
															+	.has_attr = vgic_v2_has_attr,
														
 
															+};
														
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -229,12 +229,16 @@ int vgic_v2_probe(struct device_node *vgic_node,
 
															 		goto out_unmap;
														
 
															 	}
														
 
															+	vgic->can_emulate_gicv2 = true;
														
 
															+	kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
														
 
															+
														
 
															 	vgic->vcpu_base = vcpu_res.start;
														
 
															 	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
														
 
															 		 vctrl_res.start, vgic->maint_irq);
														
 
															 	vgic->type = VGIC_V2;
														
 
															+	vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS;
														
 
															 	*ops = &vgic_v2_ops;
														
 
															 	*params = vgic;
														
 
															 	goto out;
														
--- a/virt/kvm/arm/vgic-v3-emul.c
+++ b/virt/kvm/arm/vgic-v3-emul.c
@@ -0,0 +1,1036 @@
 
															+/*
														
 
															+ * GICv3 distributor and redistributor emulation
														
 
															+ *
														
 
															+ * GICv3 emulation is currently only supported on a GICv3 host (because
														
 
															+ * we rely on the hardware's CPU interface virtualization support), but
														
 
															+ * supports both hardware with or without the optional GICv2 backwards
														
 
															+ * compatibility features.
														
 
															+ *
														
 
															+ * Limitations of the emulation:
														
 
															+ * (RAZ/WI: read as zero, write ignore, RAO/WI: read as one, write ignore)
														
 
															+ * - We do not support LPIs (yet). TYPER.LPIS is reported as 0 and is RAZ/WI.
														
 
															+ * - We do not support the message based interrupts (MBIs) triggered by
														
 
															+ *   writes to the GICD_{SET,CLR}SPI_* registers. TYPER.MBIS is reported as 0.
														
 
															+ * - We do not support the (optional) backwards compatibility feature.
														
 
															+ *   GICD_CTLR.ARE resets to 1 and is RAO/WI. If the _host_ GIC supports
														
 
															+ *   the compatiblity feature, you can use a GICv2 in the guest, though.
														
 
															+ * - We only support a single security state. GICD_CTLR.DS is 1 and is RAO/WI.
														
 
															+ * - Priorities are not emulated (same as the GICv2 emulation). Linux
														
 
															+ *   as a guest is fine with this, because it does not use priorities.
														
 
															+ * - We only support Group1 interrupts. Again Linux uses only those.
														
 
															+ *
														
 
															+ * Copyright (C) 2014 ARM Ltd.
														
 
															+ * Author: Andre Przywara <andre.przywara@arm.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public License
														
 
															+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/cpu.h>
														
 
															+#include <linux/kvm.h>
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/interrupt.h>
														
 
															+
														
 
															+#include <linux/irqchip/arm-gic-v3.h>
														
 
															+#include <kvm/arm_vgic.h>
														
 
															+
														
 
															+#include <asm/kvm_emulate.h>
														
 
															+#include <asm/kvm_arm.h>
														
 
															+#include <asm/kvm_mmu.h>
														
 
															+
														
 
															+#include "vgic.h"
														
 
															+
														
 
															+static bool handle_mmio_rao_wi(struct kvm_vcpu *vcpu,
														
 
															+			       struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															+{
														
 
															+	u32 reg = 0xffffffff;
														
 
															+
														
 
															+	vgic_reg_access(mmio, &reg, offset,
														
 
															+			ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_ctlr(struct kvm_vcpu *vcpu,
														
 
															+			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															+{
														
 
															+	u32 reg = 0;
														
 
															+
														
 
															+	/*
														
 
															+	 * Force ARE and DS to 1, the guest cannot change this.
														
 
															+	 * For the time being we only support Group1 interrupts.
														
 
															+	 */
														
 
															+	if (vcpu->kvm->arch.vgic.enabled)
														
 
															+		reg = GICD_CTLR_ENABLE_SS_G1;
														
 
															+	reg |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
														
 
															+
														
 
															+	vgic_reg_access(mmio, &reg, offset,
														
 
															+			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
														
 
															+	if (mmio->is_write) {
														
 
															+		if (reg & GICD_CTLR_ENABLE_SS_G0)
														
 
															+			kvm_info("guest tried to enable unsupported Group0 interrupts\n");
														
 
															+		vcpu->kvm->arch.vgic.enabled = !!(reg & GICD_CTLR_ENABLE_SS_G1);
														
 
															+		vgic_update_state(vcpu->kvm);
														
 
															+		return true;
														
 
															+	}
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * As this implementation does not provide compatibility
														
 
															+ * with GICv2 (ARE==1), we report zero CPUs in bits [5..7].
														
 
															+ * Also LPIs and MBIs are not supported, so we set the respective bits to 0.
														
 
															+ * Also we report at most 2**10=1024 interrupt IDs (to match 1024 SPIs).
														
 
															+ */
														
 
															+#define INTERRUPT_ID_BITS 10
														
 
															+static bool handle_mmio_typer(struct kvm_vcpu *vcpu,
														
 
															+			      struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															+{
														
 
															+	u32 reg;
														
 
															+
														
 
															+	reg = (min(vcpu->kvm->arch.vgic.nr_irqs, 1024) >> 5) - 1;
														
 
															+
														
 
															+	reg |= (INTERRUPT_ID_BITS - 1) << 19;
														
 
															+
														
 
															+	vgic_reg_access(mmio, &reg, offset,
														
 
															+			ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_iidr(struct kvm_vcpu *vcpu,
														
 
															+			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															+{
														
 
															+	u32 reg;
														
 
															+
														
 
															+	reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
														
 
															+	vgic_reg_access(mmio, &reg, offset,
														
 
															+			ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_set_enable_reg_dist(struct kvm_vcpu *vcpu,
														
 
															+					    struct kvm_exit_mmio *mmio,
														
 
															+					    phys_addr_t offset)
														
 
															+{
														
 
															+	if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
														
 
															+		return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
														
 
															+					      vcpu->vcpu_id,
														
 
															+					      ACCESS_WRITE_SETBIT);
														
 
															+
														
 
															+	vgic_reg_access(mmio, NULL, offset,
														
 
															+			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_clear_enable_reg_dist(struct kvm_vcpu *vcpu,
														
 
															+					      struct kvm_exit_mmio *mmio,
														
 
															+					      phys_addr_t offset)
														
 
															+{
														
 
															+	if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
														
 
															+		return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
														
 
															+					      vcpu->vcpu_id,
														
 
															+					      ACCESS_WRITE_CLEARBIT);
														
 
															+
														
 
															+	vgic_reg_access(mmio, NULL, offset,
														
 
															+			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_set_pending_reg_dist(struct kvm_vcpu *vcpu,
														
 
															+					     struct kvm_exit_mmio *mmio,
														
 
															+					     phys_addr_t offset)
														
 
															+{
														
 
															+	if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
														
 
															+		return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset,
														
 
															+						   vcpu->vcpu_id);
														
 
															+
														
 
															+	vgic_reg_access(mmio, NULL, offset,
														
 
															+			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_clear_pending_reg_dist(struct kvm_vcpu *vcpu,
														
 
															+					       struct kvm_exit_mmio *mmio,
														
 
															+					       phys_addr_t offset)
														
 
															+{
														
 
															+	if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
														
 
															+		return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset,
														
 
															+						     vcpu->vcpu_id);
														
 
															+
														
 
															+	vgic_reg_access(mmio, NULL, offset,
														
 
															+			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_priority_reg_dist(struct kvm_vcpu *vcpu,
														
 
															+					  struct kvm_exit_mmio *mmio,
														
 
															+					  phys_addr_t offset)
														
 
															+{
														
 
															+	u32 *reg;
														
 
															+
														
 
															+	if (unlikely(offset < VGIC_NR_PRIVATE_IRQS)) {
														
 
															+		vgic_reg_access(mmio, NULL, offset,
														
 
															+				ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
														
 
															+		return false;
														
 
															+	}
														
 
															+
														
 
															+	reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
														
 
															+				   vcpu->vcpu_id, offset);
														
 
															+	vgic_reg_access(mmio, reg, offset,
														
 
															+		ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_cfg_reg_dist(struct kvm_vcpu *vcpu,
														
 
															+				     struct kvm_exit_mmio *mmio,
														
 
															+				     phys_addr_t offset)
														
 
															+{
														
 
															+	u32 *reg;
														
 
															+
														
 
															+	if (unlikely(offset < VGIC_NR_PRIVATE_IRQS / 4)) {
														
 
															+		vgic_reg_access(mmio, NULL, offset,
														
 
															+				ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
														
 
															+		return false;
														
 
															+	}
														
 
															+
														
 
															+	reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
														
 
															+				  vcpu->vcpu_id, offset >> 1);
														
 
															+
														
 
															+	return vgic_handle_cfg_reg(reg, mmio, offset);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * We use a compressed version of the MPIDR (all 32 bits in one 32-bit word)
														
 
															+ * when we store the target MPIDR written by the guest.
														
 
															+ */
														
 
															+static u32 compress_mpidr(unsigned long mpidr)
														
 
															+{
														
 
															+	u32 ret;
														
 
															+
														
 
															+	ret = MPIDR_AFFINITY_LEVEL(mpidr, 0);
														
 
															+	ret |= MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8;
														
 
															+	ret |= MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16;
														
 
															+	ret |= MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24;
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static unsigned long uncompress_mpidr(u32 value)
														
 
															+{
														
 
															+	unsigned long mpidr;
														
 
															+
														
 
															+	mpidr  = ((value >>  0) & 0xFF) << MPIDR_LEVEL_SHIFT(0);
														
 
															+	mpidr |= ((value >>  8) & 0xFF) << MPIDR_LEVEL_SHIFT(1);
														
 
															+	mpidr |= ((value >> 16) & 0xFF) << MPIDR_LEVEL_SHIFT(2);
														
 
															+	mpidr |= (u64)((value >> 24) & 0xFF) << MPIDR_LEVEL_SHIFT(3);
														
 
															+
														
 
															+	return mpidr;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Lookup the given MPIDR value to get the vcpu_id (if there is one)
														
 
															+ * and store that in the irq_spi_cpu[] array.
														
 
															+ * This limits the number of VCPUs to 255 for now, extending the data
														
 
															+ * type (or storing kvm_vcpu pointers) should lift the limit.
														
 
															+ * Store the original MPIDR value in an extra array to support read-as-written.
														
 
															+ * Unallocated MPIDRs are translated to a special value and caught
														
 
															+ * before any array accesses.
														
 
															+ */
														
 
															+static bool handle_mmio_route_reg(struct kvm_vcpu *vcpu,
														
 
															+				  struct kvm_exit_mmio *mmio,
														
 
															+				  phys_addr_t offset)
														
 
															+{
														
 
															+	struct kvm *kvm = vcpu->kvm;
														
 
															+	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															+	int spi;
														
 
															+	u32 reg;
														
 
															+	int vcpu_id;
														
 
															+	unsigned long *bmap, mpidr;
														
 
															+
														
 
															+	/*
														
 
															+	 * The upper 32 bits of each 64 bit register are zero,
														
 
															+	 * as we don't support Aff3.
														
 
															+	 */
														
 
															+	if ((offset & 4)) {
														
 
															+		vgic_reg_access(mmio, NULL, offset,
														
 
															+				ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
														
 
															+		return false;
														
 
															+	}
														
 
															+
														
 
															+	/* This region only covers SPIs, so no handling of private IRQs here. */
														
 
															+	spi = offset / 8;
														
 
															+
														
 
															+	/* get the stored MPIDR for this IRQ */
														
 
															+	mpidr = uncompress_mpidr(dist->irq_spi_mpidr[spi]);
														
 
															+	reg = mpidr;
														
 
															+
														
 
															+	vgic_reg_access(mmio, &reg, offset,
														
 
															+			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
														
 
															+
														
 
															+	if (!mmio->is_write)
														
 
															+		return false;
														
 
															+
														
 
															+	/*
														
 
															+	 * Now clear the currently assigned vCPU from the map, making room
														
 
															+	 * for the new one to be written below
														
 
															+	 */
														
 
															+	vcpu = kvm_mpidr_to_vcpu(kvm, mpidr);
														
 
															+	if (likely(vcpu)) {
														
 
															+		vcpu_id = vcpu->vcpu_id;
														
 
															+		bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]);
														
 
															+		__clear_bit(spi, bmap);
														
 
															+	}
														
 
															+
														
 
															+	dist->irq_spi_mpidr[spi] = compress_mpidr(reg);
														
 
															+	vcpu = kvm_mpidr_to_vcpu(kvm, reg & MPIDR_HWID_BITMASK);
														
 
															+
														
 
															+	/*
														
 
															+	 * The spec says that non-existent MPIDR values should not be
														
 
															+	 * forwarded to any existent (v)CPU, but should be able to become
														
 
															+	 * pending anyway. We simply keep the irq_spi_target[] array empty, so
														
 
															+	 * the interrupt will never be injected.
														
 
															+	 * irq_spi_cpu[irq] gets a magic value in this case.
														
 
															+	 */
														
 
															+	if (likely(vcpu)) {
														
 
															+		vcpu_id = vcpu->vcpu_id;
														
 
															+		dist->irq_spi_cpu[spi] = vcpu_id;
														
 
															+		bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]);
														
 
															+		__set_bit(spi, bmap);
														
 
															+	} else {
														
 
															+		dist->irq_spi_cpu[spi] = VCPU_NOT_ALLOCATED;
														
 
															+	}
														
 
															+
														
 
															+	vgic_update_state(kvm);
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * We should be careful about promising too much when a guest reads
														
 
															+ * this register. Don't claim to be like any hardware implementation,
														
 
															+ * but just report the GIC as version 3 - which is what a Linux guest
														
 
															+ * would check.
														
 
															+ */
														
 
															+static bool handle_mmio_idregs(struct kvm_vcpu *vcpu,
														
 
															+			       struct kvm_exit_mmio *mmio,
														
 
															+			       phys_addr_t offset)
														
 
															+{
														
 
															+	u32 reg = 0;
														
 
															+
														
 
															+	switch (offset + GICD_IDREGS) {
														
 
															+	case GICD_PIDR2:
														
 
															+		reg = 0x3b;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	vgic_reg_access(mmio, &reg, offset,
														
 
															+			ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static const struct kvm_mmio_range vgic_v3_dist_ranges[] = {
														
 
															+	{
														
 
															+		.base           = GICD_CTLR,
														
 
															+		.len            = 0x04,
														
 
															+		.bits_per_irq   = 0,
														
 
															+		.handle_mmio    = handle_mmio_ctlr,
														
 
															+	},
														
 
															+	{
														
 
															+		.base           = GICD_TYPER,
														
 
															+		.len            = 0x04,
														
 
															+		.bits_per_irq   = 0,
														
 
															+		.handle_mmio    = handle_mmio_typer,
														
 
															+	},
														
 
															+	{
														
 
															+		.base           = GICD_IIDR,
														
 
															+		.len            = 0x04,
														
 
															+		.bits_per_irq   = 0,
														
 
															+		.handle_mmio    = handle_mmio_iidr,
														
 
															+	},
														
 
															+	{
														
 
															+		/* this register is optional, it is RAZ/WI if not implemented */
														
 
															+		.base           = GICD_STATUSR,
														
 
															+		.len            = 0x04,
														
 
															+		.bits_per_irq   = 0,
														
 
															+		.handle_mmio    = handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		/* this write only register is WI when TYPER.MBIS=0 */
														
 
															+		.base		= GICD_SETSPI_NSR,
														
 
															+		.len		= 0x04,
														
 
															+		.bits_per_irq	= 0,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		/* this write only register is WI when TYPER.MBIS=0 */
														
 
															+		.base		= GICD_CLRSPI_NSR,
														
 
															+		.len		= 0x04,
														
 
															+		.bits_per_irq	= 0,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		/* this is RAZ/WI when DS=1 */
														
 
															+		.base		= GICD_SETSPI_SR,
														
 
															+		.len		= 0x04,
														
 
															+		.bits_per_irq	= 0,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		/* this is RAZ/WI when DS=1 */
														
 
															+		.base		= GICD_CLRSPI_SR,
														
 
															+		.len		= 0x04,
														
 
															+		.bits_per_irq	= 0,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICD_IGROUPR,
														
 
															+		.len		= 0x80,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_rao_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICD_ISENABLER,
														
 
															+		.len		= 0x80,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_set_enable_reg_dist,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICD_ICENABLER,
														
 
															+		.len		= 0x80,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_clear_enable_reg_dist,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICD_ISPENDR,
														
 
															+		.len		= 0x80,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_set_pending_reg_dist,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICD_ICPENDR,
														
 
															+		.len		= 0x80,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_clear_pending_reg_dist,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICD_ISACTIVER,
														
 
															+		.len		= 0x80,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICD_ICACTIVER,
														
 
															+		.len		= 0x80,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICD_IPRIORITYR,
														
 
															+		.len		= 0x400,
														
 
															+		.bits_per_irq	= 8,
														
 
															+		.handle_mmio	= handle_mmio_priority_reg_dist,
														
 
															+	},
														
 
															+	{
														
 
															+		/* TARGETSRn is RES0 when ARE=1 */
														
 
															+		.base		= GICD_ITARGETSR,
														
 
															+		.len		= 0x400,
														
 
															+		.bits_per_irq	= 8,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICD_ICFGR,
														
 
															+		.len		= 0x100,
														
 
															+		.bits_per_irq	= 2,
														
 
															+		.handle_mmio	= handle_mmio_cfg_reg_dist,
														
 
															+	},
														
 
															+	{
														
 
															+		/* this is RAZ/WI when DS=1 */
														
 
															+		.base		= GICD_IGRPMODR,
														
 
															+		.len		= 0x80,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		/* this is RAZ/WI when DS=1 */
														
 
															+		.base		= GICD_NSACR,
														
 
															+		.len		= 0x100,
														
 
															+		.bits_per_irq	= 2,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		/* this is RAZ/WI when ARE=1 */
														
 
															+		.base		= GICD_SGIR,
														
 
															+		.len		= 0x04,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		/* this is RAZ/WI when ARE=1 */
														
 
															+		.base		= GICD_CPENDSGIR,
														
 
															+		.len		= 0x10,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		/* this is RAZ/WI when ARE=1 */
														
 
															+		.base           = GICD_SPENDSGIR,
														
 
															+		.len            = 0x10,
														
 
															+		.handle_mmio    = handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICD_IROUTER + 0x100,
														
 
															+		.len		= 0x1ee0,
														
 
															+		.bits_per_irq	= 64,
														
 
															+		.handle_mmio	= handle_mmio_route_reg,
														
 
															+	},
														
 
															+	{
														
 
															+		.base           = GICD_IDREGS,
														
 
															+		.len            = 0x30,
														
 
															+		.bits_per_irq   = 0,
														
 
															+		.handle_mmio    = handle_mmio_idregs,
														
 
															+	},
														
 
															+	{},
														
 
															+};
														
 
															+
														
 
															+static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu,
														
 
															+					      struct kvm_exit_mmio *mmio,
														
 
															+					      phys_addr_t offset)
														
 
															+{
														
 
															+	struct kvm_vcpu *redist_vcpu = mmio->private;
														
 
															+
														
 
															+	return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
														
 
															+				      redist_vcpu->vcpu_id,
														
 
															+				      ACCESS_WRITE_SETBIT);
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_clear_enable_reg_redist(struct kvm_vcpu *vcpu,
														
 
															+						struct kvm_exit_mmio *mmio,
														
 
															+						phys_addr_t offset)
														
 
															+{
														
 
															+	struct kvm_vcpu *redist_vcpu = mmio->private;
														
 
															+
														
 
															+	return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
														
 
															+				      redist_vcpu->vcpu_id,
														
 
															+				      ACCESS_WRITE_CLEARBIT);
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_set_pending_reg_redist(struct kvm_vcpu *vcpu,
														
 
															+					       struct kvm_exit_mmio *mmio,
														
 
															+					       phys_addr_t offset)
														
 
															+{
														
 
															+	struct kvm_vcpu *redist_vcpu = mmio->private;
														
 
															+
														
 
															+	return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset,
														
 
															+					   redist_vcpu->vcpu_id);
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_clear_pending_reg_redist(struct kvm_vcpu *vcpu,
														
 
															+						 struct kvm_exit_mmio *mmio,
														
 
															+						 phys_addr_t offset)
														
 
															+{
														
 
															+	struct kvm_vcpu *redist_vcpu = mmio->private;
														
 
															+
														
 
															+	return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset,
														
 
															+					     redist_vcpu->vcpu_id);
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_priority_reg_redist(struct kvm_vcpu *vcpu,
														
 
															+					    struct kvm_exit_mmio *mmio,
														
 
															+					    phys_addr_t offset)
														
 
															+{
														
 
															+	struct kvm_vcpu *redist_vcpu = mmio->private;
														
 
															+	u32 *reg;
														
 
															+
														
 
															+	reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
														
 
															+				   redist_vcpu->vcpu_id, offset);
														
 
															+	vgic_reg_access(mmio, reg, offset,
														
 
															+			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu,
														
 
															+				       struct kvm_exit_mmio *mmio,
														
 
															+				       phys_addr_t offset)
														
 
															+{
														
 
															+	struct kvm_vcpu *redist_vcpu = mmio->private;
														
 
															+
														
 
															+	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
														
 
															+				       redist_vcpu->vcpu_id, offset >> 1);
														
 
															+
														
 
															+	return vgic_handle_cfg_reg(reg, mmio, offset);
														
 
															+}
														
 
															+
														
 
															+static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = {
														
 
															+	{
														
 
															+		.base		= GICR_IGROUPR0,
														
 
															+		.len		= 0x04,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_rao_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICR_ISENABLER0,
														
 
															+		.len		= 0x04,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_set_enable_reg_redist,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICR_ICENABLER0,
														
 
															+		.len		= 0x04,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_clear_enable_reg_redist,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICR_ISPENDR0,
														
 
															+		.len		= 0x04,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_set_pending_reg_redist,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICR_ICPENDR0,
														
 
															+		.len		= 0x04,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_clear_pending_reg_redist,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICR_ISACTIVER0,
														
 
															+		.len		= 0x04,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICR_ICACTIVER0,
														
 
															+		.len		= 0x04,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICR_IPRIORITYR0,
														
 
															+		.len		= 0x20,
														
 
															+		.bits_per_irq	= 8,
														
 
															+		.handle_mmio	= handle_mmio_priority_reg_redist,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICR_ICFGR0,
														
 
															+		.len		= 0x08,
														
 
															+		.bits_per_irq	= 2,
														
 
															+		.handle_mmio	= handle_mmio_cfg_reg_redist,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICR_IGRPMODR0,
														
 
															+		.len		= 0x04,
														
 
															+		.bits_per_irq	= 1,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base		= GICR_NSACR,
														
 
															+		.len		= 0x04,
														
 
															+		.handle_mmio	= handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{},
														
 
															+};
														
 
															+
														
 
															+static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu,
														
 
															+				    struct kvm_exit_mmio *mmio,
														
 
															+				    phys_addr_t offset)
														
 
															+{
														
 
															+	/* since we don't support LPIs, this register is zero for now */
														
 
															+	vgic_reg_access(mmio, NULL, offset,
														
 
															+			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
														
 
															+				     struct kvm_exit_mmio *mmio,
														
 
															+				     phys_addr_t offset)
														
 
															+{
														
 
															+	u32 reg;
														
 
															+	u64 mpidr;
														
 
															+	struct kvm_vcpu *redist_vcpu = mmio->private;
														
 
															+	int target_vcpu_id = redist_vcpu->vcpu_id;
														
 
															+
														
 
															+	/* the upper 32 bits contain the affinity value */
														
 
															+	if ((offset & ~3) == 4) {
														
 
															+		mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu);
														
 
															+		reg = compress_mpidr(mpidr);
														
 
															+
														
 
															+		vgic_reg_access(mmio, &reg, offset,
														
 
															+				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
														
 
															+		return false;
														
 
															+	}
														
 
															+
														
 
															+	reg = redist_vcpu->vcpu_id << 8;
														
 
															+	if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
														
 
															+		reg |= GICR_TYPER_LAST;
														
 
															+	vgic_reg_access(mmio, &reg, offset,
														
 
															+			ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static const struct kvm_mmio_range vgic_redist_ranges[] = {
														
 
															+	{
														
 
															+		.base           = GICR_CTLR,
														
 
															+		.len            = 0x04,
														
 
															+		.bits_per_irq   = 0,
														
 
															+		.handle_mmio    = handle_mmio_ctlr_redist,
														
 
															+	},
														
 
															+	{
														
 
															+		.base           = GICR_TYPER,
														
 
															+		.len            = 0x08,
														
 
															+		.bits_per_irq   = 0,
														
 
															+		.handle_mmio    = handle_mmio_typer_redist,
														
 
															+	},
														
 
															+	{
														
 
															+		.base           = GICR_IIDR,
														
 
															+		.len            = 0x04,
														
 
															+		.bits_per_irq   = 0,
														
 
															+		.handle_mmio    = handle_mmio_iidr,
														
 
															+	},
														
 
															+	{
														
 
															+		.base           = GICR_WAKER,
														
 
															+		.len            = 0x04,
														
 
															+		.bits_per_irq   = 0,
														
 
															+		.handle_mmio    = handle_mmio_raz_wi,
														
 
															+	},
														
 
															+	{
														
 
															+		.base           = GICR_IDREGS,
														
 
															+		.len            = 0x30,
														
 
															+		.bits_per_irq   = 0,
														
 
															+		.handle_mmio    = handle_mmio_idregs,
														
 
															+	},
														
 
															+	{},
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * This function splits accesses between the distributor and the two
														
 
															+ * redistributor parts (private/SPI). As each redistributor is accessible
														
 
															+ * from any CPU, we have to determine the affected VCPU by taking the faulting
														
 
															+ * address into account. We then pass this VCPU to the handler function via
														
 
															+ * the private parameter.
														
 
															+ */
														
 
															+#define SGI_BASE_OFFSET SZ_64K
														
 
															+static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
														
 
															+				struct kvm_exit_mmio *mmio)
														
 
															+{
														
 
															+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															+	unsigned long dbase = dist->vgic_dist_base;
														
 
															+	unsigned long rdbase = dist->vgic_redist_base;
														
 
															+	int nrcpus = atomic_read(&vcpu->kvm->online_vcpus);
														
 
															+	int vcpu_id;
														
 
															+	const struct kvm_mmio_range *mmio_range;
														
 
															+
														
 
															+	if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) {
														
 
															+		return vgic_handle_mmio_range(vcpu, run, mmio,
														
 
															+					      vgic_v3_dist_ranges, dbase);
														
 
															+	}
														
 
															+
														
 
															+	if (!is_in_range(mmio->phys_addr, mmio->len, rdbase,
														
 
															+	    GIC_V3_REDIST_SIZE * nrcpus))
														
 
															+		return false;
														
 
															+
														
 
															+	vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE;
														
 
															+	rdbase += (vcpu_id * GIC_V3_REDIST_SIZE);
														
 
															+	mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id);
														
 
															+
														
 
															+	if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) {
														
 
															+		rdbase += SGI_BASE_OFFSET;
														
 
															+		mmio_range = vgic_redist_sgi_ranges;
														
 
															+	} else {
														
 
															+		mmio_range = vgic_redist_ranges;
														
 
															+	}
														
 
															+	return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase);
														
 
															+}
														
 
															+
														
 
															+static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq)
														
 
															+{
														
 
															+	if (vgic_queue_irq(vcpu, 0, irq)) {
														
 
															+		vgic_dist_irq_clear_pending(vcpu, irq);
														
 
															+		vgic_cpu_irq_clear(vcpu, irq);
														
 
															+		return true;
														
 
															+	}
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															+static int vgic_v3_map_resources(struct kvm *kvm,
														
 
															+				 const struct vgic_params *params)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															+
														
 
															+	if (!irqchip_in_kernel(kvm))
														
 
															+		return 0;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+
														
 
															+	if (vgic_ready(kvm))
														
 
															+		goto out;
														
 
															+
														
 
															+	if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
														
 
															+	    IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) {
														
 
															+		kvm_err("Need to set vgic distributor addresses first\n");
														
 
															+		ret = -ENXIO;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * For a VGICv3 we require the userland to explicitly initialize
														
 
															+	 * the VGIC before we need to use it.
														
 
															+	 */
														
 
															+	if (!vgic_initialized(kvm)) {
														
 
															+		ret = -EBUSY;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	kvm->arch.vgic.ready = true;
														
 
															+out:
														
 
															+	if (ret)
														
 
															+		kvm_vgic_destroy(kvm);
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int vgic_v3_init_model(struct kvm *kvm)
														
 
															+{
														
 
															+	int i;
														
 
															+	u32 mpidr;
														
 
															+	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															+	int nr_spis = dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
														
 
															+
														
 
															+	dist->irq_spi_mpidr = kcalloc(nr_spis, sizeof(dist->irq_spi_mpidr[0]),
														
 
															+				      GFP_KERNEL);
														
 
															+
														
 
															+	if (!dist->irq_spi_mpidr)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/* Initialize the target VCPUs for each IRQ to VCPU 0 */
														
 
															+	mpidr = compress_mpidr(kvm_vcpu_get_mpidr_aff(kvm_get_vcpu(kvm, 0)));
														
 
															+	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i++) {
														
 
															+		dist->irq_spi_cpu[i - VGIC_NR_PRIVATE_IRQS] = 0;
														
 
															+		dist->irq_spi_mpidr[i - VGIC_NR_PRIVATE_IRQS] = mpidr;
														
 
															+		vgic_bitmap_set_irq_val(dist->irq_spi_target, 0, i, 1);
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/* GICv3 does not keep track of SGI sources anymore. */
														
 
															+static void vgic_v3_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+void vgic_v3_init_emulation(struct kvm *kvm)
														
 
															+{
														
 
															+	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															+
														
 
															+	dist->vm_ops.handle_mmio = vgic_v3_handle_mmio;
														
 
															+	dist->vm_ops.queue_sgi = vgic_v3_queue_sgi;
														
 
															+	dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source;
														
 
															+	dist->vm_ops.init_model = vgic_v3_init_model;
														
 
															+	dist->vm_ops.map_resources = vgic_v3_map_resources;
														
 
															+
														
 
															+	kvm->arch.max_vcpus = KVM_MAX_VCPUS;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI
														
 
															+ * generation register ICC_SGI1R_EL1) with a given VCPU.
														
 
															+ * If the VCPU's MPIDR matches, return the level0 affinity, otherwise
														
 
															+ * return -1.
														
 
															+ */
														
 
															+static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	unsigned long affinity;
														
 
															+	int level0;
														
 
															+
														
 
															+	/*
														
 
															+	 * Split the current VCPU's MPIDR into affinity level 0 and the
														
 
															+	 * rest as this is what we have to compare against.
														
 
															+	 */
														
 
															+	affinity = kvm_vcpu_get_mpidr_aff(vcpu);
														
 
															+	level0 = MPIDR_AFFINITY_LEVEL(affinity, 0);
														
 
															+	affinity &= ~MPIDR_LEVEL_MASK;
														
 
															+
														
 
															+	/* bail out if the upper three levels don't match */
														
 
															+	if (sgi_aff != affinity)
														
 
															+		return -1;
														
 
															+
														
 
															+	/* Is this VCPU's bit set in the mask ? */
														
 
															+	if (!(sgi_cpu_mask & BIT(level0)))
														
 
															+		return -1;
														
 
															+
														
 
															+	return level0;
														
 
															+}
														
 
															+
														
 
															+#define SGI_AFFINITY_LEVEL(reg, level) \
														
 
															+	((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \
														
 
															+	>> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
														
 
															+
														
 
															+/**
														
 
															+ * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
														
 
															+ * @vcpu: The VCPU requesting a SGI
														
 
															+ * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU
														
 
															+ *
														
 
															+ * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
														
 
															+ * This will trap in sys_regs.c and call this function.
														
 
															+ * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the
														
 
															+ * target processors as well as a bitmask of 16 Aff0 CPUs.
														
 
															+ * If the interrupt routing mode bit is not set, we iterate over all VCPUs to
														
 
															+ * check for matching ones. If this bit is set, we signal all, but not the
														
 
															+ * calling VCPU.
														
 
															+ */
														
 
															+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
														
 
															+{
														
 
															+	struct kvm *kvm = vcpu->kvm;
														
 
															+	struct kvm_vcpu *c_vcpu;
														
 
															+	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															+	u16 target_cpus;
														
 
															+	u64 mpidr;
														
 
															+	int sgi, c;
														
 
															+	int vcpu_id = vcpu->vcpu_id;
														
 
															+	bool broadcast;
														
 
															+	int updated = 0;
														
 
															+
														
 
															+	sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
														
 
															+	broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
														
 
															+	target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
														
 
															+	mpidr = SGI_AFFINITY_LEVEL(reg, 3);
														
 
															+	mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
														
 
															+	mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
														
 
															+
														
 
															+	/*
														
 
															+	 * We take the dist lock here, because we come from the sysregs
														
 
															+	 * code path and not from the MMIO one (which already takes the lock).
														
 
															+	 */
														
 
															+	spin_lock(&dist->lock);
														
 
															+
														
 
															+	/*
														
 
															+	 * We iterate over all VCPUs to find the MPIDRs matching the request.
														
 
															+	 * If we have handled one CPU, we clear it's bit to detect early
														
 
															+	 * if we are already finished. This avoids iterating through all
														
 
															+	 * VCPUs when most of the times we just signal a single VCPU.
														
 
															+	 */
														
 
															+	kvm_for_each_vcpu(c, c_vcpu, kvm) {
														
 
															+
														
 
															+		/* Exit early if we have dealt with all requested CPUs */
														
 
															+		if (!broadcast && target_cpus == 0)
														
 
															+			break;
														
 
															+
														
 
															+		 /* Don't signal the calling VCPU */
														
 
															+		if (broadcast && c == vcpu_id)
														
 
															+			continue;
														
 
															+
														
 
															+		if (!broadcast) {
														
 
															+			int level0;
														
 
															+
														
 
															+			level0 = match_mpidr(mpidr, target_cpus, c_vcpu);
														
 
															+			if (level0 == -1)
														
 
															+				continue;
														
 
															+
														
 
															+			/* remove this matching VCPU from the mask */
														
 
															+			target_cpus &= ~BIT(level0);
														
 
															+		}
														
 
															+
														
 
															+		/* Flag the SGI as pending */
														
 
															+		vgic_dist_irq_set_pending(c_vcpu, sgi);
														
 
															+		updated = 1;
														
 
															+		kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
														
 
															+	}
														
 
															+	if (updated)
														
 
															+		vgic_update_state(vcpu->kvm);
														
 
															+	spin_unlock(&dist->lock);
														
 
															+	if (updated)
														
 
															+		vgic_kick_vcpus(vcpu->kvm);
														
 
															+}
														
 
															+
														
 
															+static int vgic_v3_create(struct kvm_device *dev, u32 type)
														
 
															+{
														
 
															+	return kvm_vgic_create(dev->kvm, type);
														
 
															+}
														
 
															+
														
 
															+static void vgic_v3_destroy(struct kvm_device *dev)
														
 
															+{
														
 
															+	kfree(dev);
														
 
															+}
														
 
															+
														
 
															+static int vgic_v3_set_attr(struct kvm_device *dev,
														
 
															+			    struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = vgic_set_common_attr(dev, attr);
														
 
															+	if (ret != -ENXIO)
														
 
															+		return ret;
														
 
															+
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
														
 
															+		return -ENXIO;
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int vgic_v3_get_attr(struct kvm_device *dev,
														
 
															+			    struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = vgic_get_common_attr(dev, attr);
														
 
															+	if (ret != -ENXIO)
														
 
															+		return ret;
														
 
															+
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
														
 
															+		return -ENXIO;
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int vgic_v3_has_attr(struct kvm_device *dev,
														
 
															+			    struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_ADDR:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_VGIC_V2_ADDR_TYPE_DIST:
														
 
															+		case KVM_VGIC_V2_ADDR_TYPE_CPU:
														
 
															+			return -ENXIO;
														
 
															+		case KVM_VGIC_V3_ADDR_TYPE_DIST:
														
 
															+		case KVM_VGIC_V3_ADDR_TYPE_REDIST:
														
 
															+			return 0;
														
 
															+		}
														
 
															+		break;
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
														
 
															+		return -ENXIO;
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
														
 
															+		return 0;
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_CTRL:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_DEV_ARM_VGIC_CTRL_INIT:
														
 
															+			return 0;
														
 
															+		}
														
 
															+	}
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+struct kvm_device_ops kvm_arm_vgic_v3_ops = {
														
 
															+	.name = "kvm-arm-vgic-v3",
														
 
															+	.create = vgic_v3_create,
														
 
															+	.destroy = vgic_v3_destroy,
														
 
															+	.set_attr = vgic_v3_set_attr,
														
 
															+	.get_attr = vgic_v3_get_attr,
														
 
															+	.has_attr = vgic_v3_has_attr,
														
 
															+};
														
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -34,6 +34,7 @@
 
															 #define GICH_LR_VIRTUALID		(0x3ffUL << 0)
														
 
															 #define GICH_LR_PHYSID_CPUID_SHIFT	(10)
														
 
															 #define GICH_LR_PHYSID_CPUID		(7UL << GICH_LR_PHYSID_CPUID_SHIFT)
														
 
															+#define ICH_LR_VIRTUALID_MASK		(BIT_ULL(32) - 1)
														
 
															 /*
														
 
															  * LRs are stored in reverse order in memory. make sure we index them
														
@@ -48,12 +49,17 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
 
															 	struct vgic_lr lr_desc;
														
 
															 	u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
														
 
															-	lr_desc.irq	= val & GICH_LR_VIRTUALID;
														
 
															-	if (lr_desc.irq <= 15)
														
 
															-		lr_desc.source	= (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
														
 
															+	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
														
 
															+		lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
														
 
															 	else
														
 
															-		lr_desc.source = 0;
														
 
															-	lr_desc.state	= 0;
														
 
															+		lr_desc.irq = val & GICH_LR_VIRTUALID;
														
 
															+
														
 
															+	lr_desc.source = 0;
														
 
															+	if (lr_desc.irq <= 15 &&
														
 
															+	    vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
														
 
															+		lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
														
 
															+
														
 
															+	lr_desc.state = 0;
														
 
															 	if (val & ICH_LR_PENDING_BIT)
														
 
															 		lr_desc.state |= LR_STATE_PENDING;
														
@@ -68,8 +74,20 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
 
															 static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
														
 
															 			   struct vgic_lr lr_desc)
														
 
															 {
														
 
															-	u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) |
														
 
															-		      lr_desc.irq);
														
 
															+	u64 lr_val;
														
 
															+
														
 
															+	lr_val = lr_desc.irq;
														
 
															+
														
 
															+	/*
														
 
															+	 * Currently all guest IRQs are Group1, as Group0 would result
														
 
															+	 * in a FIQ in the guest, which it wouldn't expect.
														
 
															+	 * Eventually we want to make this configurable, so we may revisit
														
 
															+	 * this in the future.
														
 
															+	 */
														
 
															+	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
														
 
															+		lr_val |= ICH_LR_GROUP;
														
 
															+	else
														
 
															+		lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
														
 
															 	if (lr_desc.state & LR_STATE_PENDING)
														
 
															 		lr_val |= ICH_LR_PENDING_BIT;
														
@@ -145,15 +163,27 @@ static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 
															 static void vgic_v3_enable(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
														
 
															+
														
 
															 	/*
														
 
															 	 * By forcing VMCR to zero, the GIC will restore the binary
														
 
															 	 * points to their reset values. Anything else resets to zero
														
 
															 	 * anyway.
														
 
															 	 */
														
 
															-	vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0;
														
 
															+	vgic_v3->vgic_vmcr = 0;
														
 
															+
														
 
															+	/*
														
 
															+	 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
														
 
															+	 * way, so we force SRE to 1 to demonstrate this to the guest.
														
 
															+	 * This goes with the spec allowing the value to be RAO/WI.
														
 
															+	 */
														
 
															+	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
														
 
															+		vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
														
 
															+	else
														
 
															+		vgic_v3->vgic_sre = 0;
														
 
															 	/* Get the show on the road... */
														
 
															-	vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN;
														
 
															+	vgic_v3->vgic_hcr = ICH_HCR_EN;
														
 
															 }
														
 
															 static const struct vgic_ops vgic_v3_ops = {
														
@@ -205,35 +235,37 @@ int vgic_v3_probe(struct device_node *vgic_node,
 
															 	 * maximum of 16 list registers. Just ignore bit 4...
														
 
															 	 */
														
 
															 	vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
														
 
															+	vgic->can_emulate_gicv2 = false;
														
 
															 	if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
														
 
															 		gicv_idx = 1;
														
 
															 	gicv_idx += 3; /* Also skip GICD, GICC, GICH */
														
 
															 	if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
														
 
															-		kvm_err("Cannot obtain GICV region\n");
														
 
															-		ret = -ENXIO;
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	if (!PAGE_ALIGNED(vcpu_res.start)) {
														
 
															-		kvm_err("GICV physical address 0x%llx not page aligned\n",
														
 
															+		kvm_info("GICv3: no GICV resource entry\n");
														
 
															+		vgic->vcpu_base = 0;
														
 
															+	} else if (!PAGE_ALIGNED(vcpu_res.start)) {
														
 
															+		pr_warn("GICV physical address 0x%llx not page aligned\n",
														
 
															 			(unsigned long long)vcpu_res.start);
														
 
															-		ret = -ENXIO;
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
														
 
															-		kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
														
 
															+		vgic->vcpu_base = 0;
														
 
															+	} else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
														
 
															+		pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
														
 
															 			(unsigned long long)resource_size(&vcpu_res),
														
 
															 			PAGE_SIZE);
														
 
															-		ret = -ENXIO;
														
 
															-		goto out;
														
 
															+		vgic->vcpu_base = 0;
														
 
															+	} else {
														
 
															+		vgic->vcpu_base = vcpu_res.start;
														
 
															+		vgic->can_emulate_gicv2 = true;
														
 
															+		kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
														
 
															+					KVM_DEV_TYPE_ARM_VGIC_V2);
														
 
															 	}
														
 
															+	if (vgic->vcpu_base == 0)
														
 
															+		kvm_info("disabling GICv2 emulation\n");
														
 
															+	kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3);
														
 
															-	vgic->vcpu_base = vcpu_res.start;
														
 
															 	vgic->vctrl_base = NULL;
														
 
															 	vgic->type = VGIC_V3;
														
 
															+	vgic->max_gic_vcpus = KVM_MAX_VCPUS;
														
 
															 	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
														
 
															 		 vcpu_res.start, vgic->maint_irq);
														
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -75,37 +75,31 @@
 
															  *   inactive as long as the external input line is held high.
														
 
															  */
														
 
															-#define VGIC_ADDR_UNDEF		(-1)
														
 
															-#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
														
 
															-
														
 
															-#define PRODUCT_ID_KVM		0x4b	/* ASCII code K */
														
 
															-#define IMPLEMENTER_ARM		0x43b
														
 
															-#define GICC_ARCH_VERSION_V2	0x2
														
 
															-
														
 
															-#define ACCESS_READ_VALUE	(1 << 0)
														
 
															-#define ACCESS_READ_RAZ		(0 << 0)
														
 
															-#define ACCESS_READ_MASK(x)	((x) & (1 << 0))
														
 
															-#define ACCESS_WRITE_IGNORED	(0 << 1)
														
 
															-#define ACCESS_WRITE_SETBIT	(1 << 1)
														
 
															-#define ACCESS_WRITE_CLEARBIT	(2 << 1)
														
 
															-#define ACCESS_WRITE_VALUE	(3 << 1)
														
 
															-#define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
														
 
															-
														
 
															-static int vgic_init(struct kvm *kvm);
														
 
															+#include "vgic.h"
														
 
															+
														
 
															 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
														
 
															 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
														
 
															-static void vgic_update_state(struct kvm *kvm);
														
 
															-static void vgic_kick_vcpus(struct kvm *kvm);
														
 
															-static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
														
 
															-static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
														
 
															 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
														
 
															 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
														
 
															-static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
														
 
															-static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
														
 
															 static const struct vgic_ops *vgic_ops;
														
 
															 static const struct vgic_params *vgic;
														
 
															+static void add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
														
 
															+{
														
 
															+	vcpu->kvm->arch.vgic.vm_ops.add_sgi_source(vcpu, irq, source);
														
 
															+}
														
 
															+
														
 
															+static bool queue_sgi(struct kvm_vcpu *vcpu, int irq)
														
 
															+{
														
 
															+	return vcpu->kvm->arch.vgic.vm_ops.queue_sgi(vcpu, irq);
														
 
															+}
														
 
															+
														
 
															+int kvm_vgic_map_resources(struct kvm *kvm)
														
 
															+{
														
 
															+	return kvm->arch.vgic.vm_ops.map_resources(kvm, vgic);
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * struct vgic_bitmap contains a bitmap made of unsigned longs, but
														
 
															  * extracts u32s out of them.
														
@@ -160,8 +154,7 @@ static unsigned long *u64_to_bitmask(u64 *val)
 
															 	return (unsigned long *)val;
														
 
															 }
														
 
															-static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
														
 
															-				int cpuid, u32 offset)
														
 
															+u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset)
														
 
															 {
														
 
															 	offset >>= 2;
														
 
															 	if (!offset)
														
@@ -179,8 +172,8 @@ static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
 
															 	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
														
 
															 }
														
 
															-static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
														
 
															-				    int irq, int val)
														
 
															+void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
														
 
															+			     int irq, int val)
														
 
															 {
														
 
															 	unsigned long *reg;
														
@@ -202,7 +195,7 @@ static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
 
															 	return x->private + cpuid;
														
 
															 }
														
 
															-static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
														
 
															+unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
														
 
															 {
														
 
															 	return x->shared;
														
 
															 }
														
@@ -229,7 +222,7 @@ static void vgic_free_bytemap(struct vgic_bytemap *b)
 
															 	b->shared = NULL;
														
 
															 }
														
 
															-static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
														
 
															+u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
														
 
															 {
														
 
															 	u32 *reg;
														
@@ -326,14 +319,14 @@ static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
 
															 	return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
														
 
															 }
														
 
															-static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
														
 
															+void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
														
 
															 {
														
 
															 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															 	vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
														
 
															 }
														
 
															-static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
														
 
															+void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
														
 
															 {
														
 
															 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
@@ -349,7 +342,7 @@ static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
 
															 			vcpu->arch.vgic_cpu.pending_shared);
														
 
															 }
														
 
															-static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
														
 
															+void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
														
 
															 {
														
 
															 	if (irq < VGIC_NR_PRIVATE_IRQS)
														
 
															 		clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
														
@@ -363,16 +356,6 @@ static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
 
															 	return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
														
 
															 }
														
 
															-static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
														
 
															-{
														
 
															-	return le32_to_cpu(*((u32 *)mmio->data)) & mask;
														
 
															-}
														
 
															-
														
 
															-static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
														
 
															-{
														
 
															-	*((u32 *)mmio->data) = cpu_to_le32(value) & mask;
														
 
															-}
														
 
															-
														
 
															 /**
														
 
															  * vgic_reg_access - access vgic register
														
 
															  * @mmio:   pointer to the data describing the mmio access
														
@@ -384,8 +367,8 @@ static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
 
															  * modes defined for vgic register access
														
 
															  * (read,raz,write-ignored,setbit,clearbit,write)
														
 
															  */
														
 
															-static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
														
 
															-			    phys_addr_t offset, int mode)
														
 
															+void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
														
 
															+		     phys_addr_t offset, int mode)
														
 
															 {
														
 
															 	int word_offset = (offset & 3) * 8;
														
 
															 	u32 mask = (1UL << (mmio->len * 8)) - 1;
														
@@ -434,107 +417,58 @@ static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
 
															 	}
														
 
															 }
														
 
															-static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
														
 
															-			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															-{
														
 
															-	u32 reg;
														
 
															-	u32 word_offset = offset & 3;
														
 
															-
														
 
															-	switch (offset & ~3) {
														
 
															-	case 0:			/* GICD_CTLR */
														
 
															-		reg = vcpu->kvm->arch.vgic.enabled;
														
 
															-		vgic_reg_access(mmio, &reg, word_offset,
														
 
															-				ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
														
 
															-		if (mmio->is_write) {
														
 
															-			vcpu->kvm->arch.vgic.enabled = reg & 1;
														
 
															-			vgic_update_state(vcpu->kvm);
														
 
															-			return true;
														
 
															-		}
														
 
															-		break;
														
 
															-
														
 
															-	case 4:			/* GICD_TYPER */
														
 
															-		reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
														
 
															-		reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
														
 
															-		vgic_reg_access(mmio, &reg, word_offset,
														
 
															-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
														
 
															-		break;
														
 
															-
														
 
															-	case 8:			/* GICD_IIDR */
														
 
															-		reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
														
 
															-		vgic_reg_access(mmio, &reg, word_offset,
														
 
															-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
														
 
															-		break;
														
 
															-	}
														
 
															-
														
 
															-	return false;
														
 
															-}
														
 
															-
														
 
															-static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
														
 
															-			       struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															+bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
														
 
															+			phys_addr_t offset)
														
 
															 {
														
 
															 	vgic_reg_access(mmio, NULL, offset,
														
 
															 			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
														
 
															 	return false;
														
 
															 }
														
 
															-static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
														
 
															-				       struct kvm_exit_mmio *mmio,
														
 
															-				       phys_addr_t offset)
														
 
															+bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
														
 
															+			    phys_addr_t offset, int vcpu_id, int access)
														
 
															 {
														
 
															-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
														
 
															-				       vcpu->vcpu_id, offset);
														
 
															-	vgic_reg_access(mmio, reg, offset,
														
 
															-			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
														
 
															-	if (mmio->is_write) {
														
 
															-		vgic_update_state(vcpu->kvm);
														
 
															-		return true;
														
 
															-	}
														
 
															-
														
 
															-	return false;
														
 
															-}
														
 
															+	u32 *reg;
														
 
															+	int mode = ACCESS_READ_VALUE | access;
														
 
															+	struct kvm_vcpu *target_vcpu = kvm_get_vcpu(kvm, vcpu_id);
														
 
															-static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
														
 
															-					 struct kvm_exit_mmio *mmio,
														
 
															-					 phys_addr_t offset)
														
 
															-{
														
 
															-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
														
 
															-				       vcpu->vcpu_id, offset);
														
 
															-	vgic_reg_access(mmio, reg, offset,
														
 
															-			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
														
 
															+	reg = vgic_bitmap_get_reg(&kvm->arch.vgic.irq_enabled, vcpu_id, offset);
														
 
															+	vgic_reg_access(mmio, reg, offset, mode);
														
 
															 	if (mmio->is_write) {
														
 
															-		if (offset < 4) /* Force SGI enabled */
														
 
															-			*reg |= 0xffff;
														
 
															-		vgic_retire_disabled_irqs(vcpu);
														
 
															-		vgic_update_state(vcpu->kvm);
														
 
															+		if (access & ACCESS_WRITE_CLEARBIT) {
														
 
															+			if (offset < 4) /* Force SGI enabled */
														
 
															+				*reg |= 0xffff;
														
 
															+			vgic_retire_disabled_irqs(target_vcpu);
														
 
															+		}
														
 
															+		vgic_update_state(kvm);
														
 
															 		return true;
														
 
															 	}
														
 
															 	return false;
														
 
															 }
														
 
															-static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
														
 
															-					struct kvm_exit_mmio *mmio,
														
 
															-					phys_addr_t offset)
														
 
															+bool vgic_handle_set_pending_reg(struct kvm *kvm,
														
 
															+				 struct kvm_exit_mmio *mmio,
														
 
															+				 phys_addr_t offset, int vcpu_id)
														
 
															 {
														
 
															 	u32 *reg, orig;
														
 
															 	u32 level_mask;
														
 
															-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															+	int mode = ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT;
														
 
															+	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															-	reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset);
														
 
															+	reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu_id, offset);
														
 
															 	level_mask = (~(*reg));
														
 
															 	/* Mark both level and edge triggered irqs as pending */
														
 
															-	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
														
 
															+	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
														
 
															 	orig = *reg;
														
 
															-	vgic_reg_access(mmio, reg, offset,
														
 
															-			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
														
 
															+	vgic_reg_access(mmio, reg, offset, mode);
														
 
															 	if (mmio->is_write) {
														
 
															 		/* Set the soft-pending flag only for level-triggered irqs */
														
 
															 		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
														
 
															-					  vcpu->vcpu_id, offset);
														
 
															-		vgic_reg_access(mmio, reg, offset,
														
 
															-				ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
														
 
															+					  vcpu_id, offset);
														
 
															+		vgic_reg_access(mmio, reg, offset, mode);
														
 
															 		*reg &= level_mask;
														
 
															 		/* Ignore writes to SGIs */
														
@@ -543,31 +477,30 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 
															 			*reg |= orig & 0xffff;
														
 
															 		}
														
 
															-		vgic_update_state(vcpu->kvm);
														
 
															+		vgic_update_state(kvm);
														
 
															 		return true;
														
 
															 	}
														
 
															 	return false;
														
 
															 }
														
 
															-static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
														
 
															-					  struct kvm_exit_mmio *mmio,
														
 
															-					  phys_addr_t offset)
														
 
															+bool vgic_handle_clear_pending_reg(struct kvm *kvm,
														
 
															+				   struct kvm_exit_mmio *mmio,
														
 
															+				   phys_addr_t offset, int vcpu_id)
														
 
															 {
														
 
															 	u32 *level_active;
														
 
															 	u32 *reg, orig;
														
 
															-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															+	int mode = ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT;
														
 
															+	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															-	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
														
 
															+	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
														
 
															 	orig = *reg;
														
 
															-	vgic_reg_access(mmio, reg, offset,
														
 
															-			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
														
 
															+	vgic_reg_access(mmio, reg, offset, mode);
														
 
															 	if (mmio->is_write) {
														
 
															 		/* Re-set level triggered level-active interrupts */
														
 
															 		level_active = vgic_bitmap_get_reg(&dist->irq_level,
														
 
															-					  vcpu->vcpu_id, offset);
														
 
															-		reg = vgic_bitmap_get_reg(&dist->irq_pending,
														
 
															-					  vcpu->vcpu_id, offset);
														
 
															+					  vcpu_id, offset);
														
 
															+		reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
														
 
															 		*reg |= *level_active;
														
 
															 		/* Ignore writes to SGIs */
														
@@ -578,101 +511,12 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 
															 		/* Clear soft-pending flags */
														
 
															 		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
														
 
															-					  vcpu->vcpu_id, offset);
														
 
															-		vgic_reg_access(mmio, reg, offset,
														
 
															-				ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
														
 
															+					  vcpu_id, offset);
														
 
															+		vgic_reg_access(mmio, reg, offset, mode);
														
 
															-		vgic_update_state(vcpu->kvm);
														
 
															+		vgic_update_state(kvm);
														
 
															 		return true;
														
 
															 	}
														
 
															-
														
 
															-	return false;
														
 
															-}
														
 
															-
														
 
															-static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
														
 
															-				     struct kvm_exit_mmio *mmio,
														
 
															-				     phys_addr_t offset)
														
 
															-{
														
 
															-	u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
														
 
															-					vcpu->vcpu_id, offset);
														
 
															-	vgic_reg_access(mmio, reg, offset,
														
 
															-			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
														
 
															-	return false;
														
 
															-}
														
 
															-
														
 
															-#define GICD_ITARGETSR_SIZE	32
														
 
															-#define GICD_CPUTARGETS_BITS	8
														
 
															-#define GICD_IRQS_PER_ITARGETSR	(GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
														
 
															-static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
														
 
															-{
														
 
															-	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															-	int i;
														
 
															-	u32 val = 0;
														
 
															-
														
 
															-	irq -= VGIC_NR_PRIVATE_IRQS;
														
 
															-
														
 
															-	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
														
 
															-		val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
														
 
															-
														
 
															-	return val;
														
 
															-}
														
 
															-
														
 
															-static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
														
 
															-{
														
 
															-	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															-	struct kvm_vcpu *vcpu;
														
 
															-	int i, c;
														
 
															-	unsigned long *bmap;
														
 
															-	u32 target;
														
 
															-
														
 
															-	irq -= VGIC_NR_PRIVATE_IRQS;
														
 
															-
														
 
															-	/*
														
 
															-	 * Pick the LSB in each byte. This ensures we target exactly
														
 
															-	 * one vcpu per IRQ. If the byte is null, assume we target
														
 
															-	 * CPU0.
														
 
															-	 */
														
 
															-	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
														
 
															-		int shift = i * GICD_CPUTARGETS_BITS;
														
 
															-		target = ffs((val >> shift) & 0xffU);
														
 
															-		target = target ? (target - 1) : 0;
														
 
															-		dist->irq_spi_cpu[irq + i] = target;
														
 
															-		kvm_for_each_vcpu(c, vcpu, kvm) {
														
 
															-			bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
														
 
															-			if (c == target)
														
 
															-				set_bit(irq + i, bmap);
														
 
															-			else
														
 
															-				clear_bit(irq + i, bmap);
														
 
															-		}
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
														
 
															-				   struct kvm_exit_mmio *mmio,
														
 
															-				   phys_addr_t offset)
														
 
															-{
														
 
															-	u32 reg;
														
 
															-
														
 
															-	/* We treat the banked interrupts targets as read-only */
														
 
															-	if (offset < 32) {
														
 
															-		u32 roreg = 1 << vcpu->vcpu_id;
														
 
															-		roreg |= roreg << 8;
														
 
															-		roreg |= roreg << 16;
														
 
															-
														
 
															-		vgic_reg_access(mmio, &roreg, offset,
														
 
															-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
														
 
															-		return false;
														
 
															-	}
														
 
															-
														
 
															-	reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
														
 
															-	vgic_reg_access(mmio, &reg, offset,
														
 
															-			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
														
 
															-	if (mmio->is_write) {
														
 
															-		vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
														
 
															-		vgic_update_state(vcpu->kvm);
														
 
															-		return true;
														
 
															-	}
														
 
															-
														
 
															 	return false;
														
 
															 }
														
@@ -711,14 +555,10 @@ static u16 vgic_cfg_compress(u32 val)
 
															  * LSB is always 0. As such, we only keep the upper bit, and use the
														
 
															  * two above functions to compress/expand the bits
														
 
															  */
														
 
															-static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
														
 
															-				struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															+bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
														
 
															+			 phys_addr_t offset)
														
 
															 {
														
 
															 	u32 val;
														
 
															-	u32 *reg;
														
 
															-
														
 
															-	reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
														
 
															-				  vcpu->vcpu_id, offset >> 1);
														
 
															 	if (offset & 4)
														
 
															 		val = *reg >> 16;
														
@@ -747,21 +587,6 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
 
															 	return false;
														
 
															 }
														
 
															-static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
														
 
															-				struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															-{
														
 
															-	u32 reg;
														
 
															-	vgic_reg_access(mmio, &reg, offset,
														
 
															-			ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
														
 
															-	if (mmio->is_write) {
														
 
															-		vgic_dispatch_sgi(vcpu, reg);
														
 
															-		vgic_update_state(vcpu->kvm);
														
 
															-		return true;
														
 
															-	}
														
 
															-
														
 
															-	return false;
														
 
															-}
														
 
															-
														
 
															 /**
														
 
															  * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
														
 
															  * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
														
@@ -774,11 +599,9 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
 
															  * to the distributor but the active state stays in the LRs, because we don't
														
 
															  * track the active state on the distributor side.
														
 
															  */
														
 
															-static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
														
 
															+void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
														
 
															-	int vcpu_id = vcpu->vcpu_id;
														
 
															 	int i;
														
 
															 	for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
														
@@ -805,7 +628,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 
															 		 */
														
 
															 		vgic_dist_irq_set_pending(vcpu, lr.irq);
														
 
															 		if (lr.irq < VGIC_NR_SGIS)
														
 
															-			*vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source;
														
 
															+			add_sgi_source(vcpu, lr.irq, lr.source);
														
 
															 		lr.state &= ~LR_STATE_PENDING;
														
 
															 		vgic_set_lr(vcpu, i, lr);
														
@@ -824,188 +647,12 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 
															 	}
														
 
															 }
														
 
															-/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
														
 
															-static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
														
 
															-					struct kvm_exit_mmio *mmio,
														
 
															-					phys_addr_t offset)
														
 
															-{
														
 
															-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															-	int sgi;
														
 
															-	int min_sgi = (offset & ~0x3);
														
 
															-	int max_sgi = min_sgi + 3;
														
 
															-	int vcpu_id = vcpu->vcpu_id;
														
 
															-	u32 reg = 0;
														
 
															-
														
 
															-	/* Copy source SGIs from distributor side */
														
 
															-	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
														
 
															-		int shift = 8 * (sgi - min_sgi);
														
 
															-		reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
														
 
															-	}
														
 
															-
														
 
															-	mmio_data_write(mmio, ~0, reg);
														
 
															-	return false;
														
 
															-}
														
 
															-
														
 
															-static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
														
 
															-					 struct kvm_exit_mmio *mmio,
														
 
															-					 phys_addr_t offset, bool set)
														
 
															-{
														
 
															-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															-	int sgi;
														
 
															-	int min_sgi = (offset & ~0x3);
														
 
															-	int max_sgi = min_sgi + 3;
														
 
															-	int vcpu_id = vcpu->vcpu_id;
														
 
															-	u32 reg;
														
 
															-	bool updated = false;
														
 
															-
														
 
															-	reg = mmio_data_read(mmio, ~0);
														
 
															-
														
 
															-	/* Clear pending SGIs on the distributor */
														
 
															-	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
														
 
															-		u8 mask = reg >> (8 * (sgi - min_sgi));
														
 
															-		u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
														
 
															-		if (set) {
														
 
															-			if ((*src & mask) != mask)
														
 
															-				updated = true;
														
 
															-			*src |= mask;
														
 
															-		} else {
														
 
															-			if (*src & mask)
														
 
															-				updated = true;
														
 
															-			*src &= ~mask;
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	if (updated)
														
 
															-		vgic_update_state(vcpu->kvm);
														
 
															-
														
 
															-	return updated;
														
 
															-}
														
 
															-
														
 
															-static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
														
 
															-				struct kvm_exit_mmio *mmio,
														
 
															-				phys_addr_t offset)
														
 
															-{
														
 
															-	if (!mmio->is_write)
														
 
															-		return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
														
 
															-	else
														
 
															-		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
														
 
															-}
														
 
															-
														
 
															-static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
														
 
															-				  struct kvm_exit_mmio *mmio,
														
 
															-				  phys_addr_t offset)
														
 
															-{
														
 
															-	if (!mmio->is_write)
														
 
															-		return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
														
 
															-	else
														
 
															-		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * I would have liked to use the kvm_bus_io_*() API instead, but it
														
 
															- * cannot cope with banked registers (only the VM pointer is passed
														
 
															- * around, and we need the vcpu). One of these days, someone please
														
 
															- * fix it!
														
 
															- */
														
 
															-struct mmio_range {
														
 
															-	phys_addr_t base;
														
 
															-	unsigned long len;
														
 
															-	int bits_per_irq;
														
 
															-	bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
														
 
															-			    phys_addr_t offset);
														
 
															-};
														
 
															-
														
 
															-static const struct mmio_range vgic_dist_ranges[] = {
														
 
															-	{
														
 
															-		.base		= GIC_DIST_CTRL,
														
 
															-		.len		= 12,
														
 
															-		.bits_per_irq	= 0,
														
 
															-		.handle_mmio	= handle_mmio_misc,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_IGROUP,
														
 
															-		.len		= VGIC_MAX_IRQS / 8,
														
 
															-		.bits_per_irq	= 1,
														
 
															-		.handle_mmio	= handle_mmio_raz_wi,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_ENABLE_SET,
														
 
															-		.len		= VGIC_MAX_IRQS / 8,
														
 
															-		.bits_per_irq	= 1,
														
 
															-		.handle_mmio	= handle_mmio_set_enable_reg,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_ENABLE_CLEAR,
														
 
															-		.len		= VGIC_MAX_IRQS / 8,
														
 
															-		.bits_per_irq	= 1,
														
 
															-		.handle_mmio	= handle_mmio_clear_enable_reg,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_PENDING_SET,
														
 
															-		.len		= VGIC_MAX_IRQS / 8,
														
 
															-		.bits_per_irq	= 1,
														
 
															-		.handle_mmio	= handle_mmio_set_pending_reg,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_PENDING_CLEAR,
														
 
															-		.len		= VGIC_MAX_IRQS / 8,
														
 
															-		.bits_per_irq	= 1,
														
 
															-		.handle_mmio	= handle_mmio_clear_pending_reg,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_ACTIVE_SET,
														
 
															-		.len		= VGIC_MAX_IRQS / 8,
														
 
															-		.bits_per_irq	= 1,
														
 
															-		.handle_mmio	= handle_mmio_raz_wi,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_ACTIVE_CLEAR,
														
 
															-		.len		= VGIC_MAX_IRQS / 8,
														
 
															-		.bits_per_irq	= 1,
														
 
															-		.handle_mmio	= handle_mmio_raz_wi,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_PRI,
														
 
															-		.len		= VGIC_MAX_IRQS,
														
 
															-		.bits_per_irq	= 8,
														
 
															-		.handle_mmio	= handle_mmio_priority_reg,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_TARGET,
														
 
															-		.len		= VGIC_MAX_IRQS,
														
 
															-		.bits_per_irq	= 8,
														
 
															-		.handle_mmio	= handle_mmio_target_reg,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_CONFIG,
														
 
															-		.len		= VGIC_MAX_IRQS / 4,
														
 
															-		.bits_per_irq	= 2,
														
 
															-		.handle_mmio	= handle_mmio_cfg_reg,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_SOFTINT,
														
 
															-		.len		= 4,
														
 
															-		.handle_mmio	= handle_mmio_sgi_reg,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_SGI_PENDING_CLEAR,
														
 
															-		.len		= VGIC_NR_SGIS,
														
 
															-		.handle_mmio	= handle_mmio_sgi_clear,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_DIST_SGI_PENDING_SET,
														
 
															-		.len		= VGIC_NR_SGIS,
														
 
															-		.handle_mmio	= handle_mmio_sgi_set,
														
 
															-	},
														
 
															-	{}
														
 
															-};
														
 
															-
														
 
															-static const
														
 
															-struct mmio_range *find_matching_range(const struct mmio_range *ranges,
														
 
															+const
														
 
															+struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
														
 
															 				       struct kvm_exit_mmio *mmio,
														
 
															 				       phys_addr_t offset)
														
 
															 {
														
 
															-	const struct mmio_range *r = ranges;
														
 
															+	const struct kvm_mmio_range *r = ranges;
														
 
															 	while (r->len) {
														
 
															 		if (offset >= r->base &&
														
@@ -1018,7 +665,7 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges,
 
															 }
														
 
															 static bool vgic_validate_access(const struct vgic_dist *dist,
														
 
															-				 const struct mmio_range *range,
														
 
															+				 const struct kvm_mmio_range *range,
														
 
															 				 unsigned long offset)
														
 
															 {
														
 
															 	int irq;
														
@@ -1033,37 +680,76 @@ static bool vgic_validate_access(const struct vgic_dist *dist,
 
															 	return true;
														
 
															 }
														
 
															+/*
														
 
															+ * Call the respective handler function for the given range.
														
 
															+ * We split up any 64 bit accesses into two consecutive 32 bit
														
 
															+ * handler calls and merge the result afterwards.
														
 
															+ * We do this in a little endian fashion regardless of the host's
														
 
															+ * or guest's endianness, because the GIC is always LE and the rest of
														
 
															+ * the code (vgic_reg_access) also puts it in a LE fashion already.
														
 
															+ * At this point we have already identified the handle function, so
														
 
															+ * range points to that one entry and offset is relative to this.
														
 
															+ */
														
 
															+static bool call_range_handler(struct kvm_vcpu *vcpu,
														
 
															+			       struct kvm_exit_mmio *mmio,
														
 
															+			       unsigned long offset,
														
 
															+			       const struct kvm_mmio_range *range)
														
 
															+{
														
 
															+	u32 *data32 = (void *)mmio->data;
														
 
															+	struct kvm_exit_mmio mmio32;
														
 
															+	bool ret;
														
 
															+
														
 
															+	if (likely(mmio->len <= 4))
														
 
															+		return range->handle_mmio(vcpu, mmio, offset);
														
 
															+
														
 
															+	/*
														
 
															+	 * Any access bigger than 4 bytes (that we currently handle in KVM)
														
 
															+	 * is actually 8 bytes long, caused by a 64-bit access
														
 
															+	 */
														
 
															+
														
 
															+	mmio32.len = 4;
														
 
															+	mmio32.is_write = mmio->is_write;
														
 
															+	mmio32.private = mmio->private;
														
 
															+
														
 
															+	mmio32.phys_addr = mmio->phys_addr + 4;
														
 
															+	if (mmio->is_write)
														
 
															+		*(u32 *)mmio32.data = data32[1];
														
 
															+	ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
														
 
															+	if (!mmio->is_write)
														
 
															+		data32[1] = *(u32 *)mmio32.data;
														
 
															+
														
 
															+	mmio32.phys_addr = mmio->phys_addr;
														
 
															+	if (mmio->is_write)
														
 
															+		*(u32 *)mmio32.data = data32[0];
														
 
															+	ret |= range->handle_mmio(vcpu, &mmio32, offset);
														
 
															+	if (!mmio->is_write)
														
 
															+		data32[0] = *(u32 *)mmio32.data;
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 /**
														
 
															- * vgic_handle_mmio - handle an in-kernel MMIO access
														
 
															+ * vgic_handle_mmio_range - handle an in-kernel MMIO access
														
 
															  * @vcpu:	pointer to the vcpu performing the access
														
 
															  * @run:	pointer to the kvm_run structure
														
 
															  * @mmio:	pointer to the data describing the access
														
 
															+ * @ranges:	array of MMIO ranges in a given region
														
 
															+ * @mmio_base:	base address of that region
														
 
															  *
														
 
															- * returns true if the MMIO access has been performed in kernel space,
														
 
															- * and false if it needs to be emulated in user space.
														
 
															+ * returns true if the MMIO access could be performed
														
 
															  */
														
 
															-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
														
 
															-		      struct kvm_exit_mmio *mmio)
														
 
															+bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
														
 
															+			    struct kvm_exit_mmio *mmio,
														
 
															+			    const struct kvm_mmio_range *ranges,
														
 
															+			    unsigned long mmio_base)
														
 
															 {
														
 
															-	const struct mmio_range *range;
														
 
															+	const struct kvm_mmio_range *range;
														
 
															 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															-	unsigned long base = dist->vgic_dist_base;
														
 
															 	bool updated_state;
														
 
															 	unsigned long offset;
														
 
															-	if (!irqchip_in_kernel(vcpu->kvm) ||
														
 
															-	    mmio->phys_addr < base ||
														
 
															-	    (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE))
														
 
															-		return false;
														
 
															-
														
 
															-	/* We don't support ldrd / strd or ldm / stm to the emulated vgic */
														
 
															-	if (mmio->len > 4) {
														
 
															-		kvm_inject_dabt(vcpu, mmio->phys_addr);
														
 
															-		return true;
														
 
															-	}
														
 
															-
														
 
															-	offset = mmio->phys_addr - base;
														
 
															-	range = find_matching_range(vgic_dist_ranges, mmio, offset);
														
 
															+	offset = mmio->phys_addr - mmio_base;
														
 
															+	range = vgic_find_range(ranges, mmio, offset);
														
 
															 	if (unlikely(!range || !range->handle_mmio)) {
														
 
															 		pr_warn("Unhandled access %d %08llx %d\n",
														
 
															 			mmio->is_write, mmio->phys_addr, mmio->len);
														
@@ -1071,12 +757,12 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
															 	}
														
 
															 	spin_lock(&vcpu->kvm->arch.vgic.lock);
														
 
															-	offset = mmio->phys_addr - range->base - base;
														
 
															+	offset -= range->base;
														
 
															 	if (vgic_validate_access(dist, range, offset)) {
														
 
															-		updated_state = range->handle_mmio(vcpu, mmio, offset);
														
 
															+		updated_state = call_range_handler(vcpu, mmio, offset, range);
														
 
															 	} else {
														
 
															-		vgic_reg_access(mmio, NULL, offset,
														
 
															-				ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
														
 
															+		if (!mmio->is_write)
														
 
															+			memset(mmio->data, 0, mmio->len);
														
 
															 		updated_state = false;
														
 
															 	}
														
 
															 	spin_unlock(&vcpu->kvm->arch.vgic.lock);
														
@@ -1089,50 +775,28 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
															 	return true;
														
 
															 }
														
 
															-static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
														
 
															-{
														
 
															-	return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
														
 
															-}
														
 
															-
														
 
															-static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
														
 
															+/**
														
 
															+ * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation
														
 
															+ * @vcpu:      pointer to the vcpu performing the access
														
 
															+ * @run:       pointer to the kvm_run structure
														
 
															+ * @mmio:      pointer to the data describing the access
														
 
															+ *
														
 
															+ * returns true if the MMIO access has been performed in kernel space,
														
 
															+ * and false if it needs to be emulated in user space.
														
 
															+ * Calls the actual handling routine for the selected VGIC model.
														
 
															+ */
														
 
															+bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
														
 
															+		      struct kvm_exit_mmio *mmio)
														
 
															 {
														
 
															-	struct kvm *kvm = vcpu->kvm;
														
 
															-	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															-	int nrcpus = atomic_read(&kvm->online_vcpus);
														
 
															-	u8 target_cpus;
														
 
															-	int sgi, mode, c, vcpu_id;
														
 
															-
														
 
															-	vcpu_id = vcpu->vcpu_id;
														
 
															-
														
 
															-	sgi = reg & 0xf;
														
 
															-	target_cpus = (reg >> 16) & 0xff;
														
 
															-	mode = (reg >> 24) & 3;
														
 
															-
														
 
															-	switch (mode) {
														
 
															-	case 0:
														
 
															-		if (!target_cpus)
														
 
															-			return;
														
 
															-		break;
														
 
															-
														
 
															-	case 1:
														
 
															-		target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
														
 
															-		break;
														
 
															-
														
 
															-	case 2:
														
 
															-		target_cpus = 1 << vcpu_id;
														
 
															-		break;
														
 
															-	}
														
 
															-
														
 
															-	kvm_for_each_vcpu(c, vcpu, kvm) {
														
 
															-		if (target_cpus & 1) {
														
 
															-			/* Flag the SGI as pending */
														
 
															-			vgic_dist_irq_set_pending(vcpu, sgi);
														
 
															-			*vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
														
 
															-			kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
														
 
															-		}
														
 
															+	if (!irqchip_in_kernel(vcpu->kvm))
														
 
															+		return false;
														
 
															-		target_cpus >>= 1;
														
 
															-	}
														
 
															+	/*
														
 
															+	 * This will currently call either vgic_v2_handle_mmio() or
														
 
															+	 * vgic_v3_handle_mmio(), which in turn will call
														
 
															+	 * vgic_handle_mmio_range() defined above.
														
 
															+	 */
														
 
															+	return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio);
														
 
															 }
														
 
															 static int vgic_nr_shared_irqs(struct vgic_dist *dist)
														
@@ -1173,7 +837,7 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 
															  * Update the interrupt state and determine which CPUs have pending
														
 
															  * interrupts. Must be called with distributor lock held.
														
 
															  */
														
 
															-static void vgic_update_state(struct kvm *kvm)
														
 
															+void vgic_update_state(struct kvm *kvm)
														
 
															 {
														
 
															 	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															 	struct kvm_vcpu *vcpu;
														
@@ -1234,12 +898,12 @@ static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
 
															 	vgic_ops->disable_underflow(vcpu);
														
 
															 }
														
 
															-static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
														
 
															+void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
														
 
															 {
														
 
															 	vgic_ops->get_vmcr(vcpu, vmcr);
														
 
															 }
														
 
															-static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
														
 
															+void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
														
 
															 {
														
 
															 	vgic_ops->set_vmcr(vcpu, vmcr);
														
 
															 }
														
@@ -1288,8 +952,9 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 
															 /*
														
 
															  * Queue an interrupt to a CPU virtual interface. Return true on success,
														
 
															  * or false if it wasn't possible to queue it.
														
 
															+ * sgi_source must be zero for any non-SGI interrupts.
														
 
															  */
														
 
															-static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
														
 
															+bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
														
 
															 {
														
 
															 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
														
 
															 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
@@ -1338,37 +1003,6 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
															 	return true;
														
 
															 }
														
 
															-static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
														
 
															-{
														
 
															-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															-	unsigned long sources;
														
 
															-	int vcpu_id = vcpu->vcpu_id;
														
 
															-	int c;
														
 
															-
														
 
															-	sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
														
 
															-
														
 
															-	for_each_set_bit(c, &sources, dist->nr_cpus) {
														
 
															-		if (vgic_queue_irq(vcpu, c, irq))
														
 
															-			clear_bit(c, &sources);
														
 
															-	}
														
 
															-
														
 
															-	*vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
														
 
															-
														
 
															-	/*
														
 
															-	 * If the sources bitmap has been cleared it means that we
														
 
															-	 * could queue all the SGIs onto link registers (see the
														
 
															-	 * clear_bit above), and therefore we are done with them in
														
 
															-	 * our emulated gic and can get rid of them.
														
 
															-	 */
														
 
															-	if (!sources) {
														
 
															-		vgic_dist_irq_clear_pending(vcpu, irq);
														
 
															-		vgic_cpu_irq_clear(vcpu, irq);
														
 
															-		return true;
														
 
															-	}
														
 
															-
														
 
															-	return false;
														
 
															-}
														
 
															-
														
 
															 static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
														
 
															 {
														
 
															 	if (!vgic_can_sample_irq(vcpu, irq))
														
@@ -1413,7 +1047,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
															 	/* SGIs */
														
 
															 	for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
														
 
															-		if (!vgic_queue_sgi(vcpu, i))
														
 
															+		if (!queue_sgi(vcpu, i))
														
 
															 			overflow = 1;
														
 
															 	}
														
@@ -1575,7 +1209,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 
															 	return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
														
 
															 }
														
 
															-static void vgic_kick_vcpus(struct kvm *kvm)
														
 
															+void vgic_kick_vcpus(struct kvm *kvm)
														
 
															 {
														
 
															 	struct kvm_vcpu *vcpu;
														
 
															 	int c;
														
@@ -1615,7 +1249,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
															 	struct kvm_vcpu *vcpu;
														
 
															 	int edge_triggered, level_triggered;
														
 
															 	int enabled;
														
 
															-	bool ret = true;
														
 
															+	bool ret = true, can_inject = true;
														
 
															 	spin_lock(&dist->lock);
														
@@ -1630,6 +1264,11 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
															 	if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
														
 
															 		cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
														
 
															+		if (cpuid == VCPU_NOT_ALLOCATED) {
														
 
															+			/* Pretend we use CPU0, and prevent injection */
														
 
															+			cpuid = 0;
														
 
															+			can_inject = false;
														
 
															+		}
														
 
															 		vcpu = kvm_get_vcpu(kvm, cpuid);
														
 
															 	}
														
@@ -1652,7 +1291,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
															 	enabled = vgic_irq_is_enabled(vcpu, irq_num);
														
 
															-	if (!enabled) {
														
 
															+	if (!enabled || !can_inject) {
														
 
															 		ret = false;
														
 
															 		goto out;
														
 
															 	}
														
@@ -1698,6 +1337,16 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 
															 	int vcpu_id;
														
 
															 	if (unlikely(!vgic_initialized(kvm))) {
														
 
															+		/*
														
 
															+		 * We only provide the automatic initialization of the VGIC
														
 
															+		 * for the legacy case of a GICv2. Any other type must
														
 
															+		 * be explicitly initialized once setup with the respective
														
 
															+		 * KVM device call.
														
 
															+		 */
														
 
															+		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
														
 
															+			ret = -EBUSY;
														
 
															+			goto out;
														
 
															+		}
														
 
															 		mutex_lock(&kvm->lock);
														
 
															 		ret = vgic_init(kvm);
														
 
															 		mutex_unlock(&kvm->lock);
														
@@ -1762,6 +1411,17 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 
															 	return 0;
														
 
															 }
														
 
															+/**
														
 
															+ * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
														
 
															+ *
														
 
															+ * The host's GIC naturally limits the maximum amount of VCPUs a guest
														
 
															+ * can use.
														
 
															+ */
														
 
															+int kvm_vgic_get_max_vcpus(void)
														
 
															+{
														
 
															+	return vgic->max_gic_vcpus;
														
 
															+}
														
 
															+
														
 
															 void kvm_vgic_destroy(struct kvm *kvm)
														
 
															 {
														
 
															 	struct vgic_dist *dist = &kvm->arch.vgic;
														
@@ -1784,6 +1444,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
 
															 	}
														
 
															 	kfree(dist->irq_sgi_sources);
														
 
															 	kfree(dist->irq_spi_cpu);
														
 
															+	kfree(dist->irq_spi_mpidr);
														
 
															 	kfree(dist->irq_spi_target);
														
 
															 	kfree(dist->irq_pending_on_cpu);
														
 
															 	dist->irq_sgi_sources = NULL;
														
@@ -1797,7 +1458,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
 
															  * Allocate and initialize the various data structures. Must be called
														
 
															  * with kvm->lock held!
														
 
															  */
														
 
															-static int vgic_init(struct kvm *kvm)
														
 
															+int vgic_init(struct kvm *kvm)
														
 
															 {
														
 
															 	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															 	struct kvm_vcpu *vcpu;
														
@@ -1809,7 +1470,7 @@ static int vgic_init(struct kvm *kvm)
 
															 	nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
														
 
															 	if (!nr_cpus)		/* No vcpus? Can't be good... */
														
 
															-		return -EINVAL;
														
 
															+		return -ENODEV;
														
 
															 	/*
														
 
															 	 * If nobody configured the number of interrupts, use the
														
@@ -1852,8 +1513,9 @@ static int vgic_init(struct kvm *kvm)
 
															 	if (ret)
														
 
															 		goto out;
														
 
															-	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
														
 
															-		vgic_set_target_reg(kvm, 0, i);
														
 
															+	ret = kvm->arch.vgic.vm_ops.init_model(kvm);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															 	kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
														
 
															 		ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
														
@@ -1882,71 +1544,48 @@ static int vgic_init(struct kvm *kvm)
 
															 	return ret;
														
 
															 }
														
 
															-/**
														
 
															- * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
														
 
															- * @kvm: pointer to the kvm struct
														
 
															- *
														
 
															- * Map the virtual CPU interface into the VM before running any VCPUs.  We
														
 
															- * can't do this at creation time, because user space must first set the
														
 
															- * virtual CPU interface address in the guest physical address space.
														
 
															- */
														
 
															-int kvm_vgic_map_resources(struct kvm *kvm)
														
 
															+static int init_vgic_model(struct kvm *kvm, int type)
														
 
															 {
														
 
															-	int ret = 0;
														
 
															-
														
 
															-	if (!irqchip_in_kernel(kvm))
														
 
															-		return 0;
														
 
															-
														
 
															-	mutex_lock(&kvm->lock);
														
 
															-
														
 
															-	if (vgic_ready(kvm))
														
 
															-		goto out;
														
 
															-
														
 
															-	if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
														
 
															-	    IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
														
 
															-		kvm_err("Need to set vgic cpu and dist addresses first\n");
														
 
															-		ret = -ENXIO;
														
 
															-		goto out;
														
 
															-	}
														
 
															-
														
 
															-	/*
														
 
															-	 * Initialize the vgic if this hasn't already been done on demand by
														
 
															-	 * accessing the vgic state from userspace.
														
 
															-	 */
														
 
															-	ret = vgic_init(kvm);
														
 
															-	if (ret) {
														
 
															-		kvm_err("Unable to allocate maps\n");
														
 
															-		goto out;
														
 
															+	switch (type) {
														
 
															+	case KVM_DEV_TYPE_ARM_VGIC_V2:
														
 
															+		vgic_v2_init_emulation(kvm);
														
 
															+		break;
														
 
															+#ifdef CONFIG_ARM_GIC_V3
														
 
															+	case KVM_DEV_TYPE_ARM_VGIC_V3:
														
 
															+		vgic_v3_init_emulation(kvm);
														
 
															+		break;
														
 
															+#endif
														
 
															+	default:
														
 
															+		return -ENODEV;
														
 
															 	}
														
 
															-	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
														
 
															-				    vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE,
														
 
															-				    true);
														
 
															-	if (ret) {
														
 
															-		kvm_err("Unable to remap VGIC CPU to VCPU\n");
														
 
															-		goto out;
														
 
															-	}
														
 
															+	if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus)
														
 
															+		return -E2BIG;
														
 
															-	kvm->arch.vgic.ready = true;
														
 
															-out:
														
 
															-	if (ret)
														
 
															-		kvm_vgic_destroy(kvm);
														
 
															-	mutex_unlock(&kvm->lock);
														
 
															-	return ret;
														
 
															+	return 0;
														
 
															 }
														
 
															-int kvm_vgic_create(struct kvm *kvm)
														
 
															+int kvm_vgic_create(struct kvm *kvm, u32 type)
														
 
															 {
														
 
															 	int i, vcpu_lock_idx = -1, ret;
														
 
															 	struct kvm_vcpu *vcpu;
														
 
															 	mutex_lock(&kvm->lock);
														
 
															-	if (kvm->arch.vgic.vctrl_base) {
														
 
															+	if (irqchip_in_kernel(kvm)) {
														
 
															 		ret = -EEXIST;
														
 
															 		goto out;
														
 
															 	}
														
 
															+	/*
														
 
															+	 * This function is also called by the KVM_CREATE_IRQCHIP handler,
														
 
															+	 * which had no chance yet to check the availability of the GICv2
														
 
															+	 * emulation. So check this here again. KVM_CREATE_DEVICE does
														
 
															+	 * the proper checks already.
														
 
															+	 */
														
 
															+	if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															 	/*
														
 
															 	 * Any time a vcpu is run, vcpu_load is called which tries to grab the
														
 
															 	 * vcpu->mutex.  By grabbing the vcpu->mutex of all VCPUs we ensure
														
@@ -1965,11 +1604,17 @@ int kvm_vgic_create(struct kvm *kvm)
 
															 	}
														
 
															 	ret = 0;
														
 
															+	ret = init_vgic_model(kvm, type);
														
 
															+	if (ret)
														
 
															+		goto out_unlock;
														
 
															+
														
 
															 	spin_lock_init(&kvm->arch.vgic.lock);
														
 
															 	kvm->arch.vgic.in_kernel = true;
														
 
															+	kvm->arch.vgic.vgic_model = type;
														
 
															 	kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
														
 
															 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
														
 
															 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
														
 
															+	kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
														
 
															 out_unlock:
														
 
															 	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
														
@@ -2022,7 +1667,7 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
 
															 /**
														
 
															  * kvm_vgic_addr - set or get vgic VM base addresses
														
 
															  * @kvm:   pointer to the vm struct
														
 
															- * @type:  the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX
														
 
															+ * @type:  the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
														
 
															  * @addr:  pointer to address value
														
 
															  * @write: if true set the address in the VM address space, if false read the
														
 
															  *          address
														
@@ -2036,216 +1681,64 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
 
															 {
														
 
															 	int r = 0;
														
 
															 	struct vgic_dist *vgic = &kvm->arch.vgic;
														
 
															+	int type_needed;
														
 
															+	phys_addr_t *addr_ptr, block_size;
														
 
															+	phys_addr_t alignment;
														
 
															 	mutex_lock(&kvm->lock);
														
 
															 	switch (type) {
														
 
															 	case KVM_VGIC_V2_ADDR_TYPE_DIST:
														
 
															-		if (write) {
														
 
															-			r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
														
 
															-					       *addr, KVM_VGIC_V2_DIST_SIZE);
														
 
															-		} else {
														
 
															-			*addr = vgic->vgic_dist_base;
														
 
															-		}
														
 
															+		type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
														
 
															+		addr_ptr = &vgic->vgic_dist_base;
														
 
															+		block_size = KVM_VGIC_V2_DIST_SIZE;
														
 
															+		alignment = SZ_4K;
														
 
															 		break;
														
 
															 	case KVM_VGIC_V2_ADDR_TYPE_CPU:
														
 
															-		if (write) {
														
 
															-			r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
														
 
															-					       *addr, KVM_VGIC_V2_CPU_SIZE);
														
 
															-		} else {
														
 
															-			*addr = vgic->vgic_cpu_base;
														
 
															-		}
														
 
															+		type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
														
 
															+		addr_ptr = &vgic->vgic_cpu_base;
														
 
															+		block_size = KVM_VGIC_V2_CPU_SIZE;
														
 
															+		alignment = SZ_4K;
														
 
															 		break;
														
 
															-	default:
														
 
															-		r = -ENODEV;
														
 
															-	}
														
 
															-
														
 
															-	mutex_unlock(&kvm->lock);
														
 
															-	return r;
														
 
															-}
														
 
															-
														
 
															-static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
														
 
															-				 struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															-{
														
 
															-	bool updated = false;
														
 
															-	struct vgic_vmcr vmcr;
														
 
															-	u32 *vmcr_field;
														
 
															-	u32 reg;
														
 
															-
														
 
															-	vgic_get_vmcr(vcpu, &vmcr);
														
 
															-
														
 
															-	switch (offset & ~0x3) {
														
 
															-	case GIC_CPU_CTRL:
														
 
															-		vmcr_field = &vmcr.ctlr;
														
 
															-		break;
														
 
															-	case GIC_CPU_PRIMASK:
														
 
															-		vmcr_field = &vmcr.pmr;
														
 
															+#ifdef CONFIG_ARM_GIC_V3
														
 
															+	case KVM_VGIC_V3_ADDR_TYPE_DIST:
														
 
															+		type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
														
 
															+		addr_ptr = &vgic->vgic_dist_base;
														
 
															+		block_size = KVM_VGIC_V3_DIST_SIZE;
														
 
															+		alignment = SZ_64K;
														
 
															 		break;
														
 
															-	case GIC_CPU_BINPOINT:
														
 
															-		vmcr_field = &vmcr.bpr;
														
 
															-		break;
														
 
															-	case GIC_CPU_ALIAS_BINPOINT:
														
 
															-		vmcr_field = &vmcr.abpr;
														
 
															+	case KVM_VGIC_V3_ADDR_TYPE_REDIST:
														
 
															+		type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
														
 
															+		addr_ptr = &vgic->vgic_redist_base;
														
 
															+		block_size = KVM_VGIC_V3_REDIST_SIZE;
														
 
															+		alignment = SZ_64K;
														
 
															 		break;
														
 
															+#endif
														
 
															 	default:
														
 
															-		BUG();
														
 
															-	}
														
 
															-
														
 
															-	if (!mmio->is_write) {
														
 
															-		reg = *vmcr_field;
														
 
															-		mmio_data_write(mmio, ~0, reg);
														
 
															-	} else {
														
 
															-		reg = mmio_data_read(mmio, ~0);
														
 
															-		if (reg != *vmcr_field) {
														
 
															-			*vmcr_field = reg;
														
 
															-			vgic_set_vmcr(vcpu, &vmcr);
														
 
															-			updated = true;
														
 
															-		}
														
 
															-	}
														
 
															-	return updated;
														
 
															-}
														
 
															-
														
 
															-static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
														
 
															-			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
														
 
															-{
														
 
															-	return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
														
 
															-}
														
 
															-
														
 
															-static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
														
 
															-				  struct kvm_exit_mmio *mmio,
														
 
															-				  phys_addr_t offset)
														
 
															-{
														
 
															-	u32 reg;
														
 
															-
														
 
															-	if (mmio->is_write)
														
 
															-		return false;
														
 
															-
														
 
															-	/* GICC_IIDR */
														
 
															-	reg = (PRODUCT_ID_KVM << 20) |
														
 
															-	      (GICC_ARCH_VERSION_V2 << 16) |
														
 
															-	      (IMPLEMENTER_ARM << 0);
														
 
															-	mmio_data_write(mmio, ~0, reg);
														
 
															-	return false;
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * CPU Interface Register accesses - these are not accessed by the VM, but by
														
 
															- * user space for saving and restoring VGIC state.
														
 
															- */
														
 
															-static const struct mmio_range vgic_cpu_ranges[] = {
														
 
															-	{
														
 
															-		.base		= GIC_CPU_CTRL,
														
 
															-		.len		= 12,
														
 
															-		.handle_mmio	= handle_cpu_mmio_misc,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_CPU_ALIAS_BINPOINT,
														
 
															-		.len		= 4,
														
 
															-		.handle_mmio	= handle_mmio_abpr,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_CPU_ACTIVEPRIO,
														
 
															-		.len		= 16,
														
 
															-		.handle_mmio	= handle_mmio_raz_wi,
														
 
															-	},
														
 
															-	{
														
 
															-		.base		= GIC_CPU_IDENT,
														
 
															-		.len		= 4,
														
 
															-		.handle_mmio	= handle_cpu_mmio_ident,
														
 
															-	},
														
 
															-};
														
 
															-
														
 
															-static int vgic_attr_regs_access(struct kvm_device *dev,
														
 
															-				 struct kvm_device_attr *attr,
														
 
															-				 u32 *reg, bool is_write)
														
 
															-{
														
 
															-	const struct mmio_range *r = NULL, *ranges;
														
 
															-	phys_addr_t offset;
														
 
															-	int ret, cpuid, c;
														
 
															-	struct kvm_vcpu *vcpu, *tmp_vcpu;
														
 
															-	struct vgic_dist *vgic;
														
 
															-	struct kvm_exit_mmio mmio;
														
 
															-
														
 
															-	offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
														
 
															-	cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
														
 
															-		KVM_DEV_ARM_VGIC_CPUID_SHIFT;
														
 
															-
														
 
															-	mutex_lock(&dev->kvm->lock);
														
 
															-
														
 
															-	ret = vgic_init(dev->kvm);
														
 
															-	if (ret)
														
 
															-		goto out;
														
 
															-
														
 
															-	if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
														
 
															-		ret = -EINVAL;
														
 
															+		r = -ENODEV;
														
 
															 		goto out;
														
 
															 	}
														
 
															-	vcpu = kvm_get_vcpu(dev->kvm, cpuid);
														
 
															-	vgic = &dev->kvm->arch.vgic;
														
 
															-
														
 
															-	mmio.len = 4;
														
 
															-	mmio.is_write = is_write;
														
 
															-	if (is_write)
														
 
															-		mmio_data_write(&mmio, ~0, *reg);
														
 
															-	switch (attr->group) {
														
 
															-	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
														
 
															-		mmio.phys_addr = vgic->vgic_dist_base + offset;
														
 
															-		ranges = vgic_dist_ranges;
														
 
															-		break;
														
 
															-	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
														
 
															-		mmio.phys_addr = vgic->vgic_cpu_base + offset;
														
 
															-		ranges = vgic_cpu_ranges;
														
 
															-		break;
														
 
															-	default:
														
 
															-		BUG();
														
 
															-	}
														
 
															-	r = find_matching_range(ranges, &mmio, offset);
														
 
															-
														
 
															-	if (unlikely(!r || !r->handle_mmio)) {
														
 
															-		ret = -ENXIO;
														
 
															+	if (vgic->vgic_model != type_needed) {
														
 
															+		r = -ENODEV;
														
 
															 		goto out;
														
 
															 	}
														
 
															-
														
 
															-	spin_lock(&vgic->lock);
														
 
															-
														
 
															-	/*
														
 
															-	 * Ensure that no other VCPU is running by checking the vcpu->cpu
														
 
															-	 * field.  If no other VPCUs are running we can safely access the VGIC
														
 
															-	 * state, because even if another VPU is run after this point, that
														
 
															-	 * VCPU will not touch the vgic state, because it will block on
														
 
															-	 * getting the vgic->lock in kvm_vgic_sync_hwstate().
														
 
															-	 */
														
 
															-	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
														
 
															-		if (unlikely(tmp_vcpu->cpu != -1)) {
														
 
															-			ret = -EBUSY;
														
 
															-			goto out_vgic_unlock;
														
 
															-		}
														
 
															+	if (write) {
														
 
															+		if (!IS_ALIGNED(*addr, alignment))
														
 
															+			r = -EINVAL;
														
 
															+		else
														
 
															+			r = vgic_ioaddr_assign(kvm, addr_ptr, *addr,
														
 
															+					       block_size);
														
 
															+	} else {
														
 
															+		*addr = *addr_ptr;
														
 
															 	}
														
 
															-	/*
														
 
															-	 * Move all pending IRQs from the LRs on all VCPUs so the pending
														
 
															-	 * state can be properly represented in the register state accessible
														
 
															-	 * through this API.
														
 
															-	 */
														
 
															-	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
														
 
															-		vgic_unqueue_irqs(tmp_vcpu);
														
 
															-
														
 
															-	offset -= r->base;
														
 
															-	r->handle_mmio(vcpu, &mmio, offset);
														
 
															-
														
 
															-	if (!is_write)
														
 
															-		*reg = mmio_data_read(&mmio, ~0);
														
 
															-
														
 
															-	ret = 0;
														
 
															-out_vgic_unlock:
														
 
															-	spin_unlock(&vgic->lock);
														
 
															 out:
														
 
															-	mutex_unlock(&dev->kvm->lock);
														
 
															-	return ret;
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+	return r;
														
 
															 }
														
 
															-static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															+int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															 {
														
 
															 	int r;
														
@@ -2261,17 +1754,6 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
															 		r = kvm_vgic_addr(dev->kvm, type, &addr, true);
														
 
															 		return (r == -ENODEV) ? -ENXIO : r;
														
 
															 	}
														
 
															-
														
 
															-	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
														
 
															-	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
														
 
															-		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
														
 
															-		u32 reg;
														
 
															-
														
 
															-		if (get_user(reg, uaddr))
														
 
															-			return -EFAULT;
														
 
															-
														
 
															-		return vgic_attr_regs_access(dev, attr, &reg, true);
														
 
															-	}
														
 
															 	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
														
 
															 		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
														
 
															 		u32 val;
														
@@ -2302,13 +1784,20 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
															 		return ret;
														
 
															 	}
														
 
															-
														
 
															+	case KVM_DEV_ARM_VGIC_GRP_CTRL: {
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_DEV_ARM_VGIC_CTRL_INIT:
														
 
															+			r = vgic_init(dev->kvm);
														
 
															+			return r;
														
 
															+		}
														
 
															+		break;
														
 
															+	}
														
 
															 	}
														
 
															 	return -ENXIO;
														
 
															 }
														
 
															-static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															+int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															 {
														
 
															 	int r = -ENXIO;
														
@@ -2326,20 +1815,9 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
															 			return -EFAULT;
														
 
															 		break;
														
 
															 	}
														
 
															-
														
 
															-	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
														
 
															-	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
														
 
															-		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
														
 
															-		u32 reg = 0;
														
 
															-
														
 
															-		r = vgic_attr_regs_access(dev, attr, &reg, false);
														
 
															-		if (r)
														
 
															-			return r;
														
 
															-		r = put_user(reg, uaddr);
														
 
															-		break;
														
 
															-	}
														
 
															 	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
														
 
															 		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
														
 
															+
														
 
															 		r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
														
 
															 		break;
														
 
															 	}
														
@@ -2349,61 +1827,17 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
															 	return r;
														
 
															 }
														
 
															-static int vgic_has_attr_regs(const struct mmio_range *ranges,
														
 
															-			      phys_addr_t offset)
														
 
															+int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset)
														
 
															 {
														
 
															 	struct kvm_exit_mmio dev_attr_mmio;
														
 
															 	dev_attr_mmio.len = 4;
														
 
															-	if (find_matching_range(ranges, &dev_attr_mmio, offset))
														
 
															+	if (vgic_find_range(ranges, &dev_attr_mmio, offset))
														
 
															 		return 0;
														
 
															 	else
														
 
															 		return -ENXIO;
														
 
															 }
														
 
															-static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															-{
														
 
															-	phys_addr_t offset;
														
 
															-
														
 
															-	switch (attr->group) {
														
 
															-	case KVM_DEV_ARM_VGIC_GRP_ADDR:
														
 
															-		switch (attr->attr) {
														
 
															-		case KVM_VGIC_V2_ADDR_TYPE_DIST:
														
 
															-		case KVM_VGIC_V2_ADDR_TYPE_CPU:
														
 
															-			return 0;
														
 
															-		}
														
 
															-		break;
														
 
															-	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
														
 
															-		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
														
 
															-		return vgic_has_attr_regs(vgic_dist_ranges, offset);
														
 
															-	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
														
 
															-		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
														
 
															-		return vgic_has_attr_regs(vgic_cpu_ranges, offset);
														
 
															-	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
														
 
															-		return 0;
														
 
															-	}
														
 
															-	return -ENXIO;
														
 
															-}
														
 
															-
														
 
															-static void vgic_destroy(struct kvm_device *dev)
														
 
															-{
														
 
															-	kfree(dev);
														
 
															-}
														
 
															-
														
 
															-static int vgic_create(struct kvm_device *dev, u32 type)
														
 
															-{
														
 
															-	return kvm_vgic_create(dev->kvm);
														
 
															-}
														
 
															-
														
 
															-static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
														
 
															-	.name = "kvm-arm-vgic",
														
 
															-	.create = vgic_create,
														
 
															-	.destroy = vgic_destroy,
														
 
															-	.set_attr = vgic_set_attr,
														
 
															-	.get_attr = vgic_get_attr,
														
 
															-	.has_attr = vgic_has_attr,
														
 
															-};
														
 
															-
														
 
															 static void vgic_init_maintenance_interrupt(void *info)
														
 
															 {
														
 
															 	enable_percpu_irq(vgic->maint_irq, 0);
														
@@ -2474,8 +1908,7 @@ int kvm_vgic_hyp_init(void)
 
															 	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
														
 
															-	return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
														
 
															-				       KVM_DEV_TYPE_ARM_VGIC_V2);
														
 
															+	return 0;
														
 
															 out_free_irq:
														
 
															 	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
														
--- a/virt/kvm/arm/vgic.h
+++ b/virt/kvm/arm/vgic.h
@@ -0,0 +1,123 @@
 
															+/*
														
 
															+ * Copyright (C) 2012-2014 ARM Ltd.
														
 
															+ * Author: Marc Zyngier <marc.zyngier@arm.com>
														
 
															+ *
														
 
															+ * Derived from virt/kvm/arm/vgic.c
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public License
														
 
															+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __KVM_VGIC_H__
														
 
															+#define __KVM_VGIC_H__
														
 
															+
														
 
															+#define VGIC_ADDR_UNDEF		(-1)
														
 
															+#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
														
 
															+
														
 
															+#define PRODUCT_ID_KVM		0x4b	/* ASCII code K */
														
 
															+#define IMPLEMENTER_ARM		0x43b
														
 
															+
														
 
															+#define ACCESS_READ_VALUE	(1 << 0)
														
 
															+#define ACCESS_READ_RAZ		(0 << 0)
														
 
															+#define ACCESS_READ_MASK(x)	((x) & (1 << 0))
														
 
															+#define ACCESS_WRITE_IGNORED	(0 << 1)
														
 
															+#define ACCESS_WRITE_SETBIT	(1 << 1)
														
 
															+#define ACCESS_WRITE_CLEARBIT	(2 << 1)
														
 
															+#define ACCESS_WRITE_VALUE	(3 << 1)
														
 
															+#define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
														
 
															+
														
 
															+#define VCPU_NOT_ALLOCATED	((u8)-1)
														
 
															+
														
 
															+unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x);
														
 
															+
														
 
															+void vgic_update_state(struct kvm *kvm);
														
 
															+int vgic_init_common_maps(struct kvm *kvm);
														
 
															+
														
 
															+u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset);
														
 
															+u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset);
														
 
															+
														
 
															+void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq);
														
 
															+void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq);
														
 
															+void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq);
														
 
															+void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
														
 
															+			     int irq, int val);
														
 
															+
														
 
															+void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
														
 
															+void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
														
 
															+
														
 
															+bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq);
														
 
															+void vgic_unqueue_irqs(struct kvm_vcpu *vcpu);
														
 
															+
														
 
															+void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
														
 
															+		     phys_addr_t offset, int mode);
														
 
															+bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
														
 
															+			phys_addr_t offset);
														
 
															+
														
 
															+static inline
														
 
															+u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
														
 
															+{
														
 
															+	return le32_to_cpu(*((u32 *)mmio->data)) & mask;
														
 
															+}
														
 
															+
														
 
															+static inline
														
 
															+void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
														
 
															+{
														
 
															+	*((u32 *)mmio->data) = cpu_to_le32(value) & mask;
														
 
															+}
														
 
															+
														
 
															+struct kvm_mmio_range {
														
 
															+	phys_addr_t base;
														
 
															+	unsigned long len;
														
 
															+	int bits_per_irq;
														
 
															+	bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
														
 
															+			    phys_addr_t offset);
														
 
															+};
														
 
															+
														
 
															+static inline bool is_in_range(phys_addr_t addr, unsigned long len,
														
 
															+			       phys_addr_t baseaddr, unsigned long size)
														
 
															+{
														
 
															+	return (addr >= baseaddr) && (addr + len <= baseaddr + size);
														
 
															+}
														
 
															+
														
 
															+const
														
 
															+struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
														
 
															+				       struct kvm_exit_mmio *mmio,
														
 
															+				       phys_addr_t offset);
														
 
															+
														
 
															+bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
														
 
															+			    struct kvm_exit_mmio *mmio,
														
 
															+			    const struct kvm_mmio_range *ranges,
														
 
															+			    unsigned long mmio_base);
														
 
															+
														
 
															+bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
														
 
															+			    phys_addr_t offset, int vcpu_id, int access);
														
 
															+
														
 
															+bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
														
 
															+				 phys_addr_t offset, int vcpu_id);
														
 
															+
														
 
															+bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
														
 
															+				   phys_addr_t offset, int vcpu_id);
														
 
															+
														
 
															+bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
														
 
															+			 phys_addr_t offset);
														
 
															+
														
 
															+void vgic_kick_vcpus(struct kvm *kvm);
														
 
															+
														
 
															+int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset);
														
 
															+int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
														
 
															+int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
														
 
															+
														
 
															+int vgic_init(struct kvm *kvm);
														
 
															+void vgic_v2_init_emulation(struct kvm *kvm);
														
 
															+void vgic_v3_init_emulation(struct kvm *kvm);
														
 
															+
														
 
															+#endif
														
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -66,6 +66,9 @@
 
															 MODULE_AUTHOR("Qumranet");
														
 
															 MODULE_LICENSE("GPL");
														
 
															+unsigned int halt_poll_ns = 0;
														
 
															+module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
														
 
															+
														
 
															 /*
														
 
															  * Ordering of locks:
														
 
															  *
														
@@ -89,7 +92,7 @@ struct dentry *kvm_debugfs_dir;
 
															 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
														
 
															 			   unsigned long arg);
														
 
															-#ifdef CONFIG_COMPAT
														
 
															+#ifdef CONFIG_KVM_COMPAT
														
 
															 static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
														
 
															 				  unsigned long arg);
														
 
															 #endif
														
@@ -176,6 +179,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 
															 	return called;
														
 
															 }
														
 
															+#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
														
 
															 void kvm_flush_remote_tlbs(struct kvm *kvm)
														
 
															 {
														
 
															 	long dirty_count = kvm->tlbs_dirty;
														
@@ -186,6 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 
															 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
														
 
															+#endif
														
 
															 void kvm_reload_remote_mmus(struct kvm *kvm)
														
 
															 {
														
@@ -673,6 +678,7 @@ static void update_memslots(struct kvm_memslots *slots,
 
															 	if (!new->npages) {
														
 
															 		WARN_ON(!mslots[i].npages);
														
 
															 		new->base_gfn = 0;
														
 
															+		new->flags = 0;
														
 
															 		if (mslots[i].npages)
														
 
															 			slots->used_slots--;
														
 
															 	} else {
														
@@ -993,6 +999,86 @@ int kvm_get_dirty_log(struct kvm *kvm,
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
														
 
															+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
														
 
															+/**
														
 
															+ * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
														
 
															+ *	are dirty write protect them for next write.
														
 
															+ * @kvm:	pointer to kvm instance
														
 
															+ * @log:	slot id and address to which we copy the log
														
 
															+ * @is_dirty:	flag set if any page is dirty
														
 
															+ *
														
 
															+ * We need to keep it in mind that VCPU threads can write to the bitmap
														
 
															+ * concurrently. So, to avoid losing track of dirty pages we keep the
														
 
															+ * following order:
														
 
															+ *
														
 
															+ *    1. Take a snapshot of the bit and clear it if needed.
														
 
															+ *    2. Write protect the corresponding page.
														
 
															+ *    3. Copy the snapshot to the userspace.
														
 
															+ *    4. Upon return caller flushes TLB's if needed.
														
 
															+ *
														
 
															+ * Between 2 and 4, the guest may write to the page using the remaining TLB
														
 
															+ * entry.  This is not a problem because the page is reported dirty using
														
 
															+ * the snapshot taken before and step 4 ensures that writes done after
														
 
															+ * exiting to userspace will be logged for the next call.
														
 
															+ *
														
 
															+ */
														
 
															+int kvm_get_dirty_log_protect(struct kvm *kvm,
														
 
															+			struct kvm_dirty_log *log, bool *is_dirty)
														
 
															+{
														
 
															+	struct kvm_memory_slot *memslot;
														
 
															+	int r, i;
														
 
															+	unsigned long n;
														
 
															+	unsigned long *dirty_bitmap;
														
 
															+	unsigned long *dirty_bitmap_buffer;
														
 
															+
														
 
															+	r = -EINVAL;
														
 
															+	if (log->slot >= KVM_USER_MEM_SLOTS)
														
 
															+		goto out;
														
 
															+
														
 
															+	memslot = id_to_memslot(kvm->memslots, log->slot);
														
 
															+
														
 
															+	dirty_bitmap = memslot->dirty_bitmap;
														
 
															+	r = -ENOENT;
														
 
															+	if (!dirty_bitmap)
														
 
															+		goto out;
														
 
															+
														
 
															+	n = kvm_dirty_bitmap_bytes(memslot);
														
 
															+
														
 
															+	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
														
 
															+	memset(dirty_bitmap_buffer, 0, n);
														
 
															+
														
 
															+	spin_lock(&kvm->mmu_lock);
														
 
															+	*is_dirty = false;
														
 
															+	for (i = 0; i < n / sizeof(long); i++) {
														
 
															+		unsigned long mask;
														
 
															+		gfn_t offset;
														
 
															+
														
 
															+		if (!dirty_bitmap[i])
														
 
															+			continue;
														
 
															+
														
 
															+		*is_dirty = true;
														
 
															+
														
 
															+		mask = xchg(&dirty_bitmap[i], 0);
														
 
															+		dirty_bitmap_buffer[i] = mask;
														
 
															+
														
 
															+		offset = i * BITS_PER_LONG;
														
 
															+		kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset,
														
 
															+								mask);
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock(&kvm->mmu_lock);
														
 
															+
														
 
															+	r = -EFAULT;
														
 
															+	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
														
 
															+		goto out;
														
 
															+
														
 
															+	r = 0;
														
 
															+out:
														
 
															+	return r;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
														
 
															+#endif
														
 
															+
														
 
															 bool kvm_largepages_enabled(void)
														
 
															 {
														
 
															 	return largepages_enabled;
														
@@ -1551,6 +1637,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
 
															 	}
														
 
															 	return 0;
														
 
															 }
														
 
															+EXPORT_SYMBOL_GPL(kvm_write_guest);
														
 
															 int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
														
 
															 			      gpa_t gpa, unsigned long len)
														
@@ -1687,29 +1774,60 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(mark_page_dirty);
														
 
															+static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	if (kvm_arch_vcpu_runnable(vcpu)) {
														
 
															+		kvm_make_request(KVM_REQ_UNHALT, vcpu);
														
 
															+		return -EINTR;
														
 
															+	}
														
 
															+	if (kvm_cpu_has_pending_timer(vcpu))
														
 
															+		return -EINTR;
														
 
															+	if (signal_pending(current))
														
 
															+		return -EINTR;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * The vCPU has executed a HLT instruction with in-kernel mode enabled.
														
 
															  */
														
 
															 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	ktime_t start, cur;
														
 
															 	DEFINE_WAIT(wait);
														
 
															+	bool waited = false;
														
 
															+
														
 
															+	start = cur = ktime_get();
														
 
															+	if (halt_poll_ns) {
														
 
															+		ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
														
 
															+		do {
														
 
															+			/*
														
 
															+			 * This sets KVM_REQ_UNHALT if an interrupt
														
 
															+			 * arrives.
														
 
															+			 */
														
 
															+			if (kvm_vcpu_check_block(vcpu) < 0) {
														
 
															+				++vcpu->stat.halt_successful_poll;
														
 
															+				goto out;
														
 
															+			}
														
 
															+			cur = ktime_get();
														
 
															+		} while (single_task_running() && ktime_before(cur, stop));
														
 
															+	}
														
 
															 	for (;;) {
														
 
															 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
														
 
															-		if (kvm_arch_vcpu_runnable(vcpu)) {
														
 
															-			kvm_make_request(KVM_REQ_UNHALT, vcpu);
														
 
															-			break;
														
 
															-		}
														
 
															-		if (kvm_cpu_has_pending_timer(vcpu))
														
 
															-			break;
														
 
															-		if (signal_pending(current))
														
 
															+		if (kvm_vcpu_check_block(vcpu) < 0)
														
 
															 			break;
														
 
															+		waited = true;
														
 
															 		schedule();
														
 
															 	}
														
 
															 	finish_wait(&vcpu->wq, &wait);
														
 
															+	cur = ktime_get();
														
 
															+
														
 
															+out:
														
 
															+	trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_vcpu_block);
														
@@ -1892,7 +2010,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
 
															 static struct file_operations kvm_vcpu_fops = {
														
 
															 	.release        = kvm_vcpu_release,
														
 
															 	.unlocked_ioctl = kvm_vcpu_ioctl,
														
 
															-#ifdef CONFIG_COMPAT
														
 
															+#ifdef CONFIG_KVM_COMPAT
														
 
															 	.compat_ioctl   = kvm_vcpu_compat_ioctl,
														
 
															 #endif
														
 
															 	.mmap           = kvm_vcpu_mmap,
														
@@ -2182,7 +2300,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
 
															 	return r;
														
 
															 }
														
 
															-#ifdef CONFIG_COMPAT
														
 
															+#ifdef CONFIG_KVM_COMPAT
														
 
															 static long kvm_vcpu_compat_ioctl(struct file *filp,
														
 
															 				  unsigned int ioctl, unsigned long arg)
														
 
															 {
														
@@ -2274,7 +2392,7 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
 
															 static const struct file_operations kvm_device_fops = {
														
 
															 	.unlocked_ioctl = kvm_device_ioctl,
														
 
															-#ifdef CONFIG_COMPAT
														
 
															+#ifdef CONFIG_KVM_COMPAT
														
 
															 	.compat_ioctl = kvm_device_ioctl,
														
 
															 #endif
														
 
															 	.release = kvm_device_release,
														
@@ -2561,7 +2679,7 @@ static long kvm_vm_ioctl(struct file *filp,
 
															 	return r;
														
 
															 }
														
 
															-#ifdef CONFIG_COMPAT
														
 
															+#ifdef CONFIG_KVM_COMPAT
														
 
															 struct compat_kvm_dirty_log {
														
 
															 	__u32 slot;
														
 
															 	__u32 padding1;
														
@@ -2608,7 +2726,7 @@ static long kvm_vm_compat_ioctl(struct file *filp,
 
															 static struct file_operations kvm_vm_fops = {
														
 
															 	.release        = kvm_vm_release,
														
 
															 	.unlocked_ioctl = kvm_vm_ioctl,
														
 
															-#ifdef CONFIG_COMPAT
														
 
															+#ifdef CONFIG_KVM_COMPAT
														
 
															 	.compat_ioctl   = kvm_vm_compat_ioctl,
														
 
															 #endif
														
 
															 	.llseek		= noop_llseek,