Prechádzať zdrojové kódy

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "PPC changes will come next week.

   - s390: Support for runtime instrumentation within guests, support of
     248 VCPUs.

   - ARM: rewrite of the arm64 world switch in C, support for 16-bit VM
     identifiers.  Performance counter virtualization missed the boat.

   - x86: Support for more Hyper-V features (synthetic interrupt
     controller), MMU cleanups"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (115 commits)
  kvm: x86: Fix vmwrite to SECONDARY_VM_EXEC_CONTROL
  kvm/x86: Hyper-V SynIC timers tracepoints
  kvm/x86: Hyper-V SynIC tracepoints
  kvm/x86: Update SynIC timers on guest entry only
  kvm/x86: Skip SynIC vector check for QEMU side
  kvm/x86: Hyper-V fix SynIC timer disabling condition
  kvm/x86: Reorg stimer_expiration() to better control timer restart
  kvm/x86: Hyper-V unify stimer_start() and stimer_restart()
  kvm/x86: Drop stimer_stop() function
  kvm/x86: Hyper-V timers fix incorrect logical operation
  KVM: move architecture-dependent requests to arch/
  KVM: renumber vcpu->request bits
  KVM: document which architecture uses each request bit
  KVM: Remove unused KVM_REQ_KICK to save a bit in vcpu->requests
  kvm: x86: Check kvm_write_guest return value in kvm_write_wall_clock
  KVM: s390: implement the RI support of guest
  kvm/s390: drop unpaired smp_mb
  kvm: x86: fix comment about {mmu,nested_mmu}.gva_to_gpa
  KVM: x86: MMU: Use clear_page() instead of init_shadow_page_table()
  arm/arm64: KVM: Detect vGIC presence at runtime
  ...
Linus Torvalds 9 rokov pred
rodič
commit
1baa5efbeb
87 zmenil súbory, kde vykonal 4043 pridanie a 2393 odobranie
  1. 41 0
      Documentation/virtual/kvm/api.txt
  2. 2 1
      Documentation/virtual/kvm/devices/vm.txt
  3. 2 2
      Documentation/virtual/kvm/mmu.txt
  4. 1 0
      MAINTAINERS
  5. 17 17
      arch/arm/include/asm/kvm_arm.h
  6. 6 0
      arch/arm/include/asm/kvm_host.h
  7. 5 0
      arch/arm/include/asm/kvm_mmu.h
  8. 36 4
      arch/arm/kvm/arm.c
  9. 38 36
      arch/arm/kvm/emulate.c
  10. 6 0
      arch/arm/kvm/guest.c
  11. 3 0
      arch/arm/kvm/handle_exit.c
  12. 3 0
      arch/arm/kvm/mmio.c
  13. 3 3
      arch/arm/kvm/mmu.c
  14. 2 1
      arch/arm64/include/asm/kvm_arm.h
  15. 0 76
      arch/arm64/include/asm/kvm_asm.h
  16. 0 1
      arch/arm64/include/asm/kvm_emulate.h
  17. 86 1
      arch/arm64/include/asm/kvm_host.h
  18. 0 1
      arch/arm64/include/asm/kvm_mmio.h
  19. 8 1
      arch/arm64/include/asm/kvm_mmu.h
  20. 21 0
      arch/arm64/include/asm/sysreg.h
  21. 1 39
      arch/arm64/kernel/asm-offsets.c
  22. 1 2
      arch/arm64/kvm/Makefile
  23. 9 1
      arch/arm64/kvm/guest.c
  24. 4 0
      arch/arm64/kvm/handle_exit.c
  25. 9 0
      arch/arm64/kvm/hyp-init.S
  26. 1 1080
      arch/arm64/kvm/hyp.S
  27. 14 0
      arch/arm64/kvm/hyp/Makefile
  28. 140 0
      arch/arm64/kvm/hyp/debug-sr.c
  29. 160 0
      arch/arm64/kvm/hyp/entry.S
  30. 33 0
      arch/arm64/kvm/hyp/fpsimd.S
  31. 212 0
      arch/arm64/kvm/hyp/hyp-entry.S
  32. 90 0
      arch/arm64/kvm/hyp/hyp.h
  33. 175 0
      arch/arm64/kvm/hyp/switch.c
  34. 138 0
      arch/arm64/kvm/hyp/sysreg-sr.c
  35. 71 0
      arch/arm64/kvm/hyp/timer-sr.c
  36. 80 0
      arch/arm64/kvm/hyp/tlb.c
  37. 84 0
      arch/arm64/kvm/hyp/vgic-v2-sr.c
  38. 228 0
      arch/arm64/kvm/hyp/vgic-v3-sr.c
  39. 30 29
      arch/arm64/kvm/sys_regs.c
  40. 0 134
      arch/arm64/kvm/vgic-v2-switch.S
  41. 0 269
      arch/arm64/kvm/vgic-v3-switch.S
  42. 4 0
      arch/powerpc/include/asm/kvm_host.h
  43. 2 8
      arch/powerpc/kvm/book3s_hv.c
  44. 2 2
      arch/powerpc/kvm/book3s_pr.c
  45. 7 0
      arch/s390/include/asm/elf.h
  46. 50 7
      arch/s390/include/asm/kvm_host.h
  47. 7 1
      arch/s390/include/asm/sclp.h
  48. 5 0
      arch/s390/include/uapi/asm/kvm.h
  49. 6 0
      arch/s390/kernel/processor.c
  50. 9 0
      arch/s390/kernel/setup.c
  51. 3 8
      arch/s390/kvm/diag.c
  52. 27 11
      arch/s390/kvm/gaccess.c
  53. 3 4
      arch/s390/kvm/intercept.c
  54. 106 27
      arch/s390/kvm/interrupt.c
  55. 219 81
      arch/s390/kvm/kvm-s390.c
  56. 7 0
      arch/s390/kvm/kvm-s390.h
  57. 3 3
      arch/s390/kvm/trace-s390.h
  58. 2 2
      arch/s390/mm/pgtable.c
  59. 70 5
      arch/x86/include/asm/kvm_host.h
  60. 92 0
      arch/x86/include/uapi/asm/hyperv.h
  61. 704 4
      arch/x86/kvm/hyperv.c
  62. 55 0
      arch/x86/kvm/hyperv.h
  63. 2 2
      arch/x86/kvm/ioapic.c
  64. 4 3
      arch/x86/kvm/ioapic.h
  65. 1 1
      arch/x86/kvm/irq.c
  66. 38 3
      arch/x86/kvm/irq_comm.c
  67. 23 17
      arch/x86/kvm/lapic.c
  68. 7 2
      arch/x86/kvm/lapic.h
  69. 183 214
      arch/x86/kvm/mmu.c
  70. 8 7
      arch/x86/kvm/mmu_audit.c
  71. 9 11
      arch/x86/kvm/paging_tmpl.h
  72. 32 5
      arch/x86/kvm/svm.c
  73. 264 1
      arch/x86/kvm/trace.h
  74. 131 71
      arch/x86/kvm/vmx.c
  75. 96 14
      arch/x86/kvm/x86.c
  76. 1 87
      drivers/hv/hyperv_vmbus.h
  77. 14 2
      drivers/s390/char/sclp_early.c
  78. 6 0
      include/clocksource/arm_arch_timer.h
  79. 6 0
      include/kvm/arm_vgic.h
  80. 24 42
      include/linux/kvm_host.h
  81. 2 4
      include/linux/kvm_para.h
  82. 26 0
      include/uapi/linux/kvm.h
  83. 3 8
      virt/kvm/arm/vgic-v3.c
  84. 1 1
      virt/kvm/arm/vgic.c
  85. 1 2
      virt/kvm/async_pf.c
  86. 6 1
      virt/kvm/irqchip.c
  87. 12 34
      virt/kvm/kvm_main.c

+ 41 - 0
Documentation/virtual/kvm/api.txt

@@ -1451,6 +1451,7 @@ struct kvm_irq_routing_entry {
 		struct kvm_irq_routing_irqchip irqchip;
 		struct kvm_irq_routing_irqchip irqchip;
 		struct kvm_irq_routing_msi msi;
 		struct kvm_irq_routing_msi msi;
 		struct kvm_irq_routing_s390_adapter adapter;
 		struct kvm_irq_routing_s390_adapter adapter;
+		struct kvm_irq_routing_hv_sint hv_sint;
 		__u32 pad[8];
 		__u32 pad[8];
 	} u;
 	} u;
 };
 };
@@ -1459,6 +1460,7 @@ struct kvm_irq_routing_entry {
 #define KVM_IRQ_ROUTING_IRQCHIP 1
 #define KVM_IRQ_ROUTING_IRQCHIP 1
 #define KVM_IRQ_ROUTING_MSI 2
 #define KVM_IRQ_ROUTING_MSI 2
 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
+#define KVM_IRQ_ROUTING_HV_SINT 4
 
 
 No flags are specified so far, the corresponding field must be set to zero.
 No flags are specified so far, the corresponding field must be set to zero.
 
 
@@ -1482,6 +1484,10 @@ struct kvm_irq_routing_s390_adapter {
 	__u32 adapter_id;
 	__u32 adapter_id;
 };
 };
 
 
+struct kvm_irq_routing_hv_sint {
+	__u32 vcpu;
+	__u32 sint;
+};
 
 
 4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated)
 4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated)
 
 
@@ -3331,6 +3337,28 @@ the userspace IOAPIC should process the EOI and retrigger the interrupt if
 it is still asserted.  Vector is the LAPIC interrupt vector for which the
 it is still asserted.  Vector is the LAPIC interrupt vector for which the
 EOI was received.
 EOI was received.
 
 
+		struct kvm_hyperv_exit {
+#define KVM_EXIT_HYPERV_SYNIC          1
+			__u32 type;
+			union {
+				struct {
+					__u32 msr;
+					__u64 control;
+					__u64 evt_page;
+					__u64 msg_page;
+				} synic;
+			} u;
+		};
+		/* KVM_EXIT_HYPERV */
+                struct kvm_hyperv_exit hyperv;
+Indicates that the VCPU exits into userspace to process some tasks
+related to Hyper-V emulation.
+Valid values for 'type' are:
+	KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about
+Hyper-V SynIC state change. Notification is used to remap SynIC
+event/message pages and to enable/disable SynIC messages/events processing
+in userspace.
+
 		/* Fix the size of the union. */
 		/* Fix the size of the union. */
 		char padding[256];
 		char padding[256];
 	};
 	};
@@ -3685,3 +3713,16 @@ available, means that that the kernel has an implementation of the
 H_RANDOM hypercall backed by a hardware random-number generator.
 H_RANDOM hypercall backed by a hardware random-number generator.
 If present, the kernel H_RANDOM handler can be enabled for guest use
 If present, the kernel H_RANDOM handler can be enabled for guest use
 with the KVM_CAP_PPC_ENABLE_HCALL capability.
 with the KVM_CAP_PPC_ENABLE_HCALL capability.
+
+8.2 KVM_CAP_HYPERV_SYNIC
+
+Architectures: x86
+This capability, if KVM_CHECK_EXTENSION indicates that it is
+available, means that that the kernel has an implementation of the
+Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is
+used to support Windows Hyper-V based guest paravirt drivers(VMBus).
+
+In order to use SynIC, it has to be activated by setting this
+capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
+will disable the use of APIC hardware virtualization even if supported
+by the CPU, as it's incompatible with SynIC auto-EOI behavior.

+ 2 - 1
Documentation/virtual/kvm/devices/vm.txt

@@ -37,7 +37,8 @@ Returns: -EFAULT if the given address is not accessible
 Allows userspace to query the actual limit and set a new limit for
 Allows userspace to query the actual limit and set a new limit for
 the maximum guest memory size. The limit will be rounded up to
 the maximum guest memory size. The limit will be rounded up to
 2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by
 2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by
-the number of page table levels.
+the number of page table levels. In the case that there is no limit we will set
+the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX).
 
 
 2. GROUP: KVM_S390_VM_CPU_MODEL
 2. GROUP: KVM_S390_VM_CPU_MODEL
 Architectures: s390
 Architectures: s390

+ 2 - 2
Documentation/virtual/kvm/mmu.txt

@@ -203,10 +203,10 @@ Shadow pages contain the following information:
     page cannot be destroyed.  See role.invalid.
     page cannot be destroyed.  See role.invalid.
   parent_ptes:
   parent_ptes:
     The reverse mapping for the pte/ptes pointing at this page's spt. If
     The reverse mapping for the pte/ptes pointing at this page's spt. If
-    parent_ptes bit 0 is zero, only one spte points at this pages and
+    parent_ptes bit 0 is zero, only one spte points at this page and
     parent_ptes points at this single spte, otherwise, there exists multiple
     parent_ptes points at this single spte, otherwise, there exists multiple
     sptes pointing at this page and (parent_ptes & ~0x1) points at a data
     sptes pointing at this page and (parent_ptes & ~0x1) points at a data
-    structure with a list of parent_ptes.
+    structure with a list of parent sptes.
   unsync:
   unsync:
     If true, then the translations in this page may not match the guest's
     If true, then the translations in this page may not match the guest's
     translation.  This is equivalent to the state of the tlb when a pte is
     translation.  This is equivalent to the state of the tlb when a pte is

+ 1 - 0
MAINTAINERS

@@ -6089,6 +6089,7 @@ M:	Marc Zyngier <marc.zyngier@arm.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	kvmarm@lists.cs.columbia.edu
 L:	kvmarm@lists.cs.columbia.edu
 W:	http://systems.cs.columbia.edu/projects/kvm-arm
 W:	http://systems.cs.columbia.edu/projects/kvm-arm
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
 S:	Supported
 S:	Supported
 F:	arch/arm/include/uapi/asm/kvm*
 F:	arch/arm/include/uapi/asm/kvm*
 F:	arch/arm/include/asm/kvm*
 F:	arch/arm/include/asm/kvm*

+ 17 - 17
arch/arm/include/asm/kvm_arm.h

@@ -19,6 +19,7 @@
 #ifndef __ARM_KVM_ARM_H__
 #ifndef __ARM_KVM_ARM_H__
 #define __ARM_KVM_ARM_H__
 #define __ARM_KVM_ARM_H__
 
 
+#include <linux/const.h>
 #include <linux/types.h>
 #include <linux/types.h>
 
 
 /* Hyp Configuration Register (HCR) bits */
 /* Hyp Configuration Register (HCR) bits */
@@ -132,10 +133,9 @@
  * space.
  * space.
  */
  */
 #define KVM_PHYS_SHIFT	(40)
 #define KVM_PHYS_SHIFT	(40)
-#define KVM_PHYS_SIZE	(1ULL << KVM_PHYS_SHIFT)
-#define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1ULL)
-#define PTRS_PER_S2_PGD	(1ULL << (KVM_PHYS_SHIFT - 30))
-#define S2_PGD_ORDER	get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+#define KVM_PHYS_SIZE	(_AC(1, ULL) << KVM_PHYS_SHIFT)
+#define KVM_PHYS_MASK	(KVM_PHYS_SIZE - _AC(1, ULL))
+#define PTRS_PER_S2_PGD	(_AC(1, ULL) << (KVM_PHYS_SHIFT - 30))
 
 
 /* Virtualization Translation Control Register (VTCR) bits */
 /* Virtualization Translation Control Register (VTCR) bits */
 #define VTCR_SH0	(3 << 12)
 #define VTCR_SH0	(3 << 12)
@@ -162,17 +162,17 @@
 #define VTTBR_X		(5 - KVM_T0SZ)
 #define VTTBR_X		(5 - KVM_T0SZ)
 #endif
 #endif
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
-#define VTTBR_VMID_SHIFT  (48LLU)
-#define VTTBR_VMID_MASK	  (0xffLLU << VTTBR_VMID_SHIFT)
+#define VTTBR_BADDR_MASK  (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_VMID_SHIFT  _AC(48, ULL)
+#define VTTBR_VMID_MASK(size)	(_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 
 
 /* Hyp Syndrome Register (HSR) bits */
 /* Hyp Syndrome Register (HSR) bits */
 #define HSR_EC_SHIFT	(26)
 #define HSR_EC_SHIFT	(26)
-#define HSR_EC		(0x3fU << HSR_EC_SHIFT)
-#define HSR_IL		(1U << 25)
+#define HSR_EC		(_AC(0x3f, UL) << HSR_EC_SHIFT)
+#define HSR_IL		(_AC(1, UL) << 25)
 #define HSR_ISS		(HSR_IL - 1)
 #define HSR_ISS		(HSR_IL - 1)
 #define HSR_ISV_SHIFT	(24)
 #define HSR_ISV_SHIFT	(24)
-#define HSR_ISV		(1U << HSR_ISV_SHIFT)
+#define HSR_ISV		(_AC(1, UL) << HSR_ISV_SHIFT)
 #define HSR_SRT_SHIFT	(16)
 #define HSR_SRT_SHIFT	(16)
 #define HSR_SRT_MASK	(0xf << HSR_SRT_SHIFT)
 #define HSR_SRT_MASK	(0xf << HSR_SRT_SHIFT)
 #define HSR_FSC		(0x3f)
 #define HSR_FSC		(0x3f)
@@ -180,9 +180,9 @@
 #define HSR_SSE		(1 << 21)
 #define HSR_SSE		(1 << 21)
 #define HSR_WNR		(1 << 6)
 #define HSR_WNR		(1 << 6)
 #define HSR_CV_SHIFT	(24)
 #define HSR_CV_SHIFT	(24)
-#define HSR_CV		(1U << HSR_CV_SHIFT)
+#define HSR_CV		(_AC(1, UL) << HSR_CV_SHIFT)
 #define HSR_COND_SHIFT	(20)
 #define HSR_COND_SHIFT	(20)
-#define HSR_COND	(0xfU << HSR_COND_SHIFT)
+#define HSR_COND	(_AC(0xf, UL) << HSR_COND_SHIFT)
 
 
 #define FSC_FAULT	(0x04)
 #define FSC_FAULT	(0x04)
 #define FSC_ACCESS	(0x08)
 #define FSC_ACCESS	(0x08)
@@ -210,13 +210,13 @@
 #define HSR_EC_DABT	(0x24)
 #define HSR_EC_DABT	(0x24)
 #define HSR_EC_DABT_HYP	(0x25)
 #define HSR_EC_DABT_HYP	(0x25)
 
 
-#define HSR_WFI_IS_WFE		(1U << 0)
+#define HSR_WFI_IS_WFE		(_AC(1, UL) << 0)
 
 
-#define HSR_HVC_IMM_MASK	((1UL << 16) - 1)
+#define HSR_HVC_IMM_MASK	((_AC(1, UL) << 16) - 1)
 
 
-#define HSR_DABT_S1PTW		(1U << 7)
-#define HSR_DABT_CM		(1U << 8)
-#define HSR_DABT_EA		(1U << 9)
+#define HSR_DABT_S1PTW		(_AC(1, UL) << 7)
+#define HSR_DABT_CM		(_AC(1, UL) << 8)
+#define HSR_DABT_EA		(_AC(1, UL) << 9)
 
 
 #define kvm_arm_exception_type	\
 #define kvm_arm_exception_type	\
 	{0, "RESET" }, 		\
 	{0, "RESET" }, 		\

+ 6 - 0
arch/arm/include/asm/kvm_host.h

@@ -150,6 +150,12 @@ struct kvm_vcpu_stat {
 	u32 halt_successful_poll;
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;
 	u32 halt_attempted_poll;
 	u32 halt_wakeup;
 	u32 halt_wakeup;
+	u32 hvc_exit_stat;
+	u64 wfe_exit_stat;
+	u64 wfi_exit_stat;
+	u64 mmio_exit_user;
+	u64 mmio_exit_kernel;
+	u64 exits;
 };
 };
 
 
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);

+ 5 - 0
arch/arm/include/asm/kvm_mmu.h

@@ -279,6 +279,11 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
 				       pgd_t *merged_hyp_pgd,
 				       pgd_t *merged_hyp_pgd,
 				       unsigned long hyp_idmap_start) { }
 				       unsigned long hyp_idmap_start) { }
 
 
+static inline unsigned int kvm_get_vmid_bits(void)
+{
+	return 8;
+}
+
 #endif	/* !__ASSEMBLY__ */
 #endif	/* !__ASSEMBLY__ */
 
 
 #endif /* __ARM_KVM_MMU_H__ */
 #endif /* __ARM_KVM_MMU_H__ */

+ 36 - 4
arch/arm/kvm/arm.c

@@ -44,6 +44,7 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_psci.h>
 #include <asm/kvm_psci.h>
+#include <asm/sections.h>
 
 
 #ifdef REQUIRES_VIRT
 #ifdef REQUIRES_VIRT
 __asm__(".arch_extension	virt");
 __asm__(".arch_extension	virt");
@@ -58,9 +59,12 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
 
 
 /* The VMID used in the VTTBR */
 /* The VMID used in the VTTBR */
 static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
-static u8 kvm_next_vmid;
+static u32 kvm_next_vmid;
+static unsigned int kvm_vmid_bits __read_mostly;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
 
+static bool vgic_present;
+
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
 {
 	BUG_ON(preemptible());
 	BUG_ON(preemptible());
@@ -132,7 +136,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.vmid_gen = 0;
 	kvm->arch.vmid_gen = 0;
 
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
 	/* The maximum number of VCPUs is limited by the host's GIC model */
-	kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus();
+	kvm->arch.max_vcpus = vgic_present ?
+				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
 
 
 	return ret;
 	return ret;
 out_free_stage2_pgd:
 out_free_stage2_pgd:
@@ -172,6 +177,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	int r;
 	int r;
 	switch (ext) {
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
 	case KVM_CAP_IRQCHIP:
+		r = vgic_present;
+		break;
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_USER_MEMORY:
@@ -433,11 +440,12 @@ static void update_vttbr(struct kvm *kvm)
 	kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
 	kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
 	kvm->arch.vmid = kvm_next_vmid;
 	kvm->arch.vmid = kvm_next_vmid;
 	kvm_next_vmid++;
 	kvm_next_vmid++;
+	kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
 
 
 	/* update vttbr to be used with the new vmid */
 	/* update vttbr to be used with the new vmid */
 	pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
 	pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
 	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
 	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
-	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
+	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
 	kvm->arch.vttbr = pgd_phys | vmid;
 	kvm->arch.vttbr = pgd_phys | vmid;
 
 
 	spin_unlock(&kvm_vmid_lock);
 	spin_unlock(&kvm_vmid_lock);
@@ -603,6 +611,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
 		ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
 
 
 		vcpu->mode = OUTSIDE_GUEST_MODE;
 		vcpu->mode = OUTSIDE_GUEST_MODE;
+		vcpu->stat.exits++;
 		/*
 		/*
 		 * Back from guest
 		 * Back from guest
 		 *************************************************************/
 		 *************************************************************/
@@ -913,6 +922,8 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 
 
 	switch (dev_id) {
 	switch (dev_id) {
 	case KVM_ARM_DEVICE_VGIC_V2:
 	case KVM_ARM_DEVICE_VGIC_V2:
+		if (!vgic_present)
+			return -ENXIO;
 		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
 		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
 	default:
 	default:
 		return -ENODEV;
 		return -ENODEV;
@@ -927,6 +938,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 
 	switch (ioctl) {
 	switch (ioctl) {
 	case KVM_CREATE_IRQCHIP: {
 	case KVM_CREATE_IRQCHIP: {
+		if (!vgic_present)
+			return -ENXIO;
 		return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
 		return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
 	}
 	}
 	case KVM_ARM_SET_DEVICE_ADDR: {
 	case KVM_ARM_SET_DEVICE_ADDR: {
@@ -1067,6 +1080,12 @@ static int init_hyp_mode(void)
 		goto out_free_mappings;
 		goto out_free_mappings;
 	}
 	}
 
 
+	err = create_hyp_mappings(__start_rodata, __end_rodata);
+	if (err) {
+		kvm_err("Cannot map rodata section\n");
+		goto out_free_mappings;
+	}
+
 	/*
 	/*
 	 * Map the Hyp stack pages
 	 * Map the Hyp stack pages
 	 */
 	 */
@@ -1111,8 +1130,17 @@ static int init_hyp_mode(void)
 	 * Init HYP view of VGIC
 	 * Init HYP view of VGIC
 	 */
 	 */
 	err = kvm_vgic_hyp_init();
 	err = kvm_vgic_hyp_init();
-	if (err)
+	switch (err) {
+	case 0:
+		vgic_present = true;
+		break;
+	case -ENODEV:
+	case -ENXIO:
+		vgic_present = false;
+		break;
+	default:
 		goto out_free_context;
 		goto out_free_context;
+	}
 
 
 	/*
 	/*
 	 * Init HYP architected timer support
 	 * Init HYP architected timer support
@@ -1127,6 +1155,10 @@ static int init_hyp_mode(void)
 
 
 	kvm_perf_init();
 	kvm_perf_init();
 
 
+	/* set size of VMID supported by CPU */
+	kvm_vmid_bits = kvm_get_vmid_bits();
+	kvm_info("%d-bit VMID\n", kvm_vmid_bits);
+
 	kvm_info("Hyp mode initialized successfully\n");
 	kvm_info("Hyp mode initialized successfully\n");
 
 
 	return 0;
 	return 0;

+ 38 - 36
arch/arm/kvm/emulate.c

@@ -275,6 +275,40 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu)
 		return vbar;
 		return vbar;
 }
 }
 
 
+/*
+ * Switch to an exception mode, updating both CPSR and SPSR. Follow
+ * the logic described in AArch32.EnterMode() from the ARMv8 ARM.
+ */
+static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode)
+{
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
+
+	*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode;
+
+	switch (mode) {
+	case FIQ_MODE:
+		*vcpu_cpsr(vcpu) |= PSR_F_BIT;
+		/* Fall through */
+	case ABT_MODE:
+	case IRQ_MODE:
+		*vcpu_cpsr(vcpu) |= PSR_A_BIT;
+		/* Fall through */
+	default:
+		*vcpu_cpsr(vcpu) |= PSR_I_BIT;
+	}
+
+	*vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
+
+	if (sctlr & SCTLR_TE)
+		*vcpu_cpsr(vcpu) |= PSR_T_BIT;
+	if (sctlr & SCTLR_EE)
+		*vcpu_cpsr(vcpu) |= PSR_E_BIT;
+
+	/* Note: These now point to the mode banked copies */
+	*vcpu_spsr(vcpu) = cpsr;
+}
+
 /**
 /**
  * kvm_inject_undefined - inject an undefined exception into the guest
  * kvm_inject_undefined - inject an undefined exception into the guest
  * @vcpu: The VCPU to receive the undefined exception
  * @vcpu: The VCPU to receive the undefined exception
@@ -286,29 +320,13 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu)
  */
  */
 void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 {
 {
-	unsigned long new_lr_value;
-	unsigned long new_spsr_value;
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
-	u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
 	bool is_thumb = (cpsr & PSR_T_BIT);
 	bool is_thumb = (cpsr & PSR_T_BIT);
 	u32 vect_offset = 4;
 	u32 vect_offset = 4;
 	u32 return_offset = (is_thumb) ? 2 : 4;
 	u32 return_offset = (is_thumb) ? 2 : 4;
 
 
-	new_spsr_value = cpsr;
-	new_lr_value = *vcpu_pc(vcpu) - return_offset;
-
-	*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | UND_MODE;
-	*vcpu_cpsr(vcpu) |= PSR_I_BIT;
-	*vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
-
-	if (sctlr & SCTLR_TE)
-		*vcpu_cpsr(vcpu) |= PSR_T_BIT;
-	if (sctlr & SCTLR_EE)
-		*vcpu_cpsr(vcpu) |= PSR_E_BIT;
-
-	/* Note: These now point to UND banked copies */
-	*vcpu_spsr(vcpu) = cpsr;
-	*vcpu_reg(vcpu, 14) = new_lr_value;
+	kvm_update_psr(vcpu, UND_MODE);
+	*vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) - return_offset;
 
 
 	/* Branch to exception vector */
 	/* Branch to exception vector */
 	*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
 	*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
@@ -320,30 +338,14 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
  */
  */
 static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
 static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
 {
 {
-	unsigned long new_lr_value;
-	unsigned long new_spsr_value;
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
-	u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
 	bool is_thumb = (cpsr & PSR_T_BIT);
 	bool is_thumb = (cpsr & PSR_T_BIT);
 	u32 vect_offset;
 	u32 vect_offset;
 	u32 return_offset = (is_thumb) ? 4 : 0;
 	u32 return_offset = (is_thumb) ? 4 : 0;
 	bool is_lpae;
 	bool is_lpae;
 
 
-	new_spsr_value = cpsr;
-	new_lr_value = *vcpu_pc(vcpu) + return_offset;
-
-	*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | ABT_MODE;
-	*vcpu_cpsr(vcpu) |= PSR_I_BIT | PSR_A_BIT;
-	*vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
-
-	if (sctlr & SCTLR_TE)
-		*vcpu_cpsr(vcpu) |= PSR_T_BIT;
-	if (sctlr & SCTLR_EE)
-		*vcpu_cpsr(vcpu) |= PSR_E_BIT;
-
-	/* Note: These now point to ABT banked copies */
-	*vcpu_spsr(vcpu) = cpsr;
-	*vcpu_reg(vcpu, 14) = new_lr_value;
+	kvm_update_psr(vcpu, ABT_MODE);
+	*vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
 
 
 	if (is_pabt)
 	if (is_pabt)
 		vect_offset = 12;
 		vect_offset = 12;

+ 6 - 0
arch/arm/kvm/guest.c

@@ -33,6 +33,12 @@
 #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
 #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
 
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 struct kvm_stats_debugfs_item debugfs_entries[] = {
+	VCPU_STAT(hvc_exit_stat),
+	VCPU_STAT(wfe_exit_stat),
+	VCPU_STAT(wfi_exit_stat),
+	VCPU_STAT(mmio_exit_user),
+	VCPU_STAT(mmio_exit_kernel),
+	VCPU_STAT(exits),
 	{ NULL }
 	{ NULL }
 };
 };
 
 

+ 3 - 0
arch/arm/kvm/handle_exit.c

@@ -42,6 +42,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 
 	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
 	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
 		      kvm_vcpu_hvc_get_imm(vcpu));
 		      kvm_vcpu_hvc_get_imm(vcpu));
+	vcpu->stat.hvc_exit_stat++;
 
 
 	ret = kvm_psci_call(vcpu);
 	ret = kvm_psci_call(vcpu);
 	if (ret < 0) {
 	if (ret < 0) {
@@ -89,9 +90,11 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 {
 	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
 	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
 		trace_kvm_wfx(*vcpu_pc(vcpu), true);
 		trace_kvm_wfx(*vcpu_pc(vcpu), true);
+		vcpu->stat.wfe_exit_stat++;
 		kvm_vcpu_on_spin(vcpu);
 		kvm_vcpu_on_spin(vcpu);
 	} else {
 	} else {
 		trace_kvm_wfx(*vcpu_pc(vcpu), false);
 		trace_kvm_wfx(*vcpu_pc(vcpu), false);
+		vcpu->stat.wfi_exit_stat++;
 		kvm_vcpu_block(vcpu);
 		kvm_vcpu_block(vcpu);
 	}
 	}
 
 

+ 3 - 0
arch/arm/kvm/mmio.c

@@ -210,8 +210,11 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
 
 	if (!ret) {
 	if (!ret) {
 		/* We handled the access successfully in the kernel. */
 		/* We handled the access successfully in the kernel. */
+		vcpu->stat.mmio_exit_kernel++;
 		kvm_handle_mmio_return(vcpu, run);
 		kvm_handle_mmio_return(vcpu, run);
 		return 1;
 		return 1;
+	} else {
+		vcpu->stat.mmio_exit_user++;
 	}
 	}
 
 
 	run->exit_reason	= KVM_EXIT_MMIO;
 	run->exit_reason	= KVM_EXIT_MMIO;

+ 3 - 3
arch/arm/kvm/mmu.c

@@ -656,9 +656,9 @@ static void *kvm_alloc_hwpgd(void)
  * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
  * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
  * @kvm:	The KVM struct pointer for the VM.
  * @kvm:	The KVM struct pointer for the VM.
  *
  *
- * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
- * support either full 40-bit input addresses or limited to 32-bit input
- * addresses). Clears the allocated pages.
+ * Allocates only the stage-2 HW PGD level table(s) (can support either full
+ * 40-bit input addresses or limited to 32-bit input addresses). Clears the
+ * allocated pages.
  *
  *
  * Note we don't need locking here as this is only called when the VM is
  * Note we don't need locking here as this is only called when the VM is
  * created, which can only be done once.
  * created, which can only be done once.

+ 2 - 1
arch/arm64/include/asm/kvm_arm.h

@@ -125,6 +125,7 @@
 #define VTCR_EL2_SL0_LVL1	(1 << 6)
 #define VTCR_EL2_SL0_LVL1	(1 << 6)
 #define VTCR_EL2_T0SZ_MASK	0x3f
 #define VTCR_EL2_T0SZ_MASK	0x3f
 #define VTCR_EL2_T0SZ_40B	24
 #define VTCR_EL2_T0SZ_40B	24
+#define VTCR_EL2_VS		19
 
 
 /*
 /*
  * We configure the Stage-2 page tables to always restrict the IPA space to be
  * We configure the Stage-2 page tables to always restrict the IPA space to be
@@ -169,7 +170,7 @@
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
 #define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
 #define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
 #define VTTBR_VMID_SHIFT  (UL(48))
 #define VTTBR_VMID_SHIFT  (UL(48))
-#define VTTBR_VMID_MASK	  (UL(0xFF) << VTTBR_VMID_SHIFT)
+#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 
 
 /* Hyp System Trap Register */
 /* Hyp System Trap Register */
 #define HSTR_EL2_T(x)	(1 << x)
 #define HSTR_EL2_T(x)	(1 << x)

+ 0 - 76
arch/arm64/include/asm/kvm_asm.h

@@ -20,82 +20,6 @@
 
 
 #include <asm/virt.h>
 #include <asm/virt.h>
 
 
-/*
- * 0 is reserved as an invalid value.
- * Order *must* be kept in sync with the hyp switch code.
- */
-#define	MPIDR_EL1	1	/* MultiProcessor Affinity Register */
-#define	CSSELR_EL1	2	/* Cache Size Selection Register */
-#define	SCTLR_EL1	3	/* System Control Register */
-#define	ACTLR_EL1	4	/* Auxiliary Control Register */
-#define	CPACR_EL1	5	/* Coprocessor Access Control */
-#define	TTBR0_EL1	6	/* Translation Table Base Register 0 */
-#define	TTBR1_EL1	7	/* Translation Table Base Register 1 */
-#define	TCR_EL1		8	/* Translation Control Register */
-#define	ESR_EL1		9	/* Exception Syndrome Register */
-#define	AFSR0_EL1	10	/* Auxilary Fault Status Register 0 */
-#define	AFSR1_EL1	11	/* Auxilary Fault Status Register 1 */
-#define	FAR_EL1		12	/* Fault Address Register */
-#define	MAIR_EL1	13	/* Memory Attribute Indirection Register */
-#define	VBAR_EL1	14	/* Vector Base Address Register */
-#define	CONTEXTIDR_EL1	15	/* Context ID Register */
-#define	TPIDR_EL0	16	/* Thread ID, User R/W */
-#define	TPIDRRO_EL0	17	/* Thread ID, User R/O */
-#define	TPIDR_EL1	18	/* Thread ID, Privileged */
-#define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
-#define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
-#define	PAR_EL1		21	/* Physical Address Register */
-#define MDSCR_EL1	22	/* Monitor Debug System Control Register */
-#define MDCCINT_EL1	23	/* Monitor Debug Comms Channel Interrupt Enable Reg */
-
-/* 32bit specific registers. Keep them at the end of the range */
-#define	DACR32_EL2	24	/* Domain Access Control Register */
-#define	IFSR32_EL2	25	/* Instruction Fault Status Register */
-#define	FPEXC32_EL2	26	/* Floating-Point Exception Control Register */
-#define	DBGVCR32_EL2	27	/* Debug Vector Catch Register */
-#define	NR_SYS_REGS	28
-
-/* 32bit mapping */
-#define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
-#define c0_CSSELR	(CSSELR_EL1 * 2)/* Cache Size Selection Register */
-#define c1_SCTLR	(SCTLR_EL1 * 2)	/* System Control Register */
-#define c1_ACTLR	(ACTLR_EL1 * 2)	/* Auxiliary Control Register */
-#define c1_CPACR	(CPACR_EL1 * 2)	/* Coprocessor Access Control */
-#define c2_TTBR0	(TTBR0_EL1 * 2)	/* Translation Table Base Register 0 */
-#define c2_TTBR0_high	(c2_TTBR0 + 1)	/* TTBR0 top 32 bits */
-#define c2_TTBR1	(TTBR1_EL1 * 2)	/* Translation Table Base Register 1 */
-#define c2_TTBR1_high	(c2_TTBR1 + 1)	/* TTBR1 top 32 bits */
-#define c2_TTBCR	(TCR_EL1 * 2)	/* Translation Table Base Control R. */
-#define c3_DACR		(DACR32_EL2 * 2)/* Domain Access Control Register */
-#define c5_DFSR		(ESR_EL1 * 2)	/* Data Fault Status Register */
-#define c5_IFSR		(IFSR32_EL2 * 2)/* Instruction Fault Status Register */
-#define c5_ADFSR	(AFSR0_EL1 * 2)	/* Auxiliary Data Fault Status R */
-#define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
-#define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
-#define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
-#define c7_PAR		(PAR_EL1 * 2)	/* Physical Address Register */
-#define c7_PAR_high	(c7_PAR + 1)	/* PAR top 32 bits */
-#define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
-#define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
-#define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
-#define c13_CID		(CONTEXTIDR_EL1 * 2)	/* Context ID Register */
-#define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
-#define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
-#define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
-#define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
-#define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
-#define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
-
-#define cp14_DBGDSCRext	(MDSCR_EL1 * 2)
-#define cp14_DBGBCR0	(DBGBCR0_EL1 * 2)
-#define cp14_DBGBVR0	(DBGBVR0_EL1 * 2)
-#define cp14_DBGBXVR0	(cp14_DBGBVR0 + 1)
-#define cp14_DBGWCR0	(DBGWCR0_EL1 * 2)
-#define cp14_DBGWVR0	(DBGWVR0_EL1 * 2)
-#define cp14_DBGDCCINT	(MDCCINT_EL1 * 2)
-
-#define NR_COPRO_REGS	(NR_SYS_REGS * 2)
-
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_TRAP	  1
 #define ARM_EXCEPTION_TRAP	  1
 
 

+ 0 - 1
arch/arm64/include/asm/kvm_emulate.h

@@ -26,7 +26,6 @@
 
 
 #include <asm/esr.h>
 #include <asm/esr.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 #include <asm/kvm_mmio.h>
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 #include <asm/cputype.h>
 #include <asm/cputype.h>

+ 86 - 1
arch/arm64/include/asm/kvm_host.h

@@ -25,7 +25,6 @@
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/kvm_types.h>
 #include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 #include <asm/kvm_mmio.h>
 
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -85,6 +84,86 @@ struct kvm_vcpu_fault_info {
 	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
 	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
 };
 };
 
 
+/*
+ * 0 is reserved as an invalid value.
+ * Order should be kept in sync with the save/restore code.
+ */
+enum vcpu_sysreg {
+	__INVALID_SYSREG__,
+	MPIDR_EL1,	/* MultiProcessor Affinity Register */
+	CSSELR_EL1,	/* Cache Size Selection Register */
+	SCTLR_EL1,	/* System Control Register */
+	ACTLR_EL1,	/* Auxiliary Control Register */
+	CPACR_EL1,	/* Coprocessor Access Control */
+	TTBR0_EL1,	/* Translation Table Base Register 0 */
+	TTBR1_EL1,	/* Translation Table Base Register 1 */
+	TCR_EL1,	/* Translation Control Register */
+	ESR_EL1,	/* Exception Syndrome Register */
+	AFSR0_EL1,	/* Auxilary Fault Status Register 0 */
+	AFSR1_EL1,	/* Auxilary Fault Status Register 1 */
+	FAR_EL1,	/* Fault Address Register */
+	MAIR_EL1,	/* Memory Attribute Indirection Register */
+	VBAR_EL1,	/* Vector Base Address Register */
+	CONTEXTIDR_EL1,	/* Context ID Register */
+	TPIDR_EL0,	/* Thread ID, User R/W */
+	TPIDRRO_EL0,	/* Thread ID, User R/O */
+	TPIDR_EL1,	/* Thread ID, Privileged */
+	AMAIR_EL1,	/* Aux Memory Attribute Indirection Register */
+	CNTKCTL_EL1,	/* Timer Control Register (EL1) */
+	PAR_EL1,	/* Physical Address Register */
+	MDSCR_EL1,	/* Monitor Debug System Control Register */
+	MDCCINT_EL1,	/* Monitor Debug Comms Channel Interrupt Enable Reg */
+
+	/* 32bit specific registers. Keep them at the end of the range */
+	DACR32_EL2,	/* Domain Access Control Register */
+	IFSR32_EL2,	/* Instruction Fault Status Register */
+	FPEXC32_EL2,	/* Floating-Point Exception Control Register */
+	DBGVCR32_EL2,	/* Debug Vector Catch Register */
+
+	NR_SYS_REGS	/* Nothing after this line! */
+};
+
+/* 32bit mapping */
+#define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
+#define c0_CSSELR	(CSSELR_EL1 * 2)/* Cache Size Selection Register */
+#define c1_SCTLR	(SCTLR_EL1 * 2)	/* System Control Register */
+#define c1_ACTLR	(ACTLR_EL1 * 2)	/* Auxiliary Control Register */
+#define c1_CPACR	(CPACR_EL1 * 2)	/* Coprocessor Access Control */
+#define c2_TTBR0	(TTBR0_EL1 * 2)	/* Translation Table Base Register 0 */
+#define c2_TTBR0_high	(c2_TTBR0 + 1)	/* TTBR0 top 32 bits */
+#define c2_TTBR1	(TTBR1_EL1 * 2)	/* Translation Table Base Register 1 */
+#define c2_TTBR1_high	(c2_TTBR1 + 1)	/* TTBR1 top 32 bits */
+#define c2_TTBCR	(TCR_EL1 * 2)	/* Translation Table Base Control R. */
+#define c3_DACR		(DACR32_EL2 * 2)/* Domain Access Control Register */
+#define c5_DFSR		(ESR_EL1 * 2)	/* Data Fault Status Register */
+#define c5_IFSR		(IFSR32_EL2 * 2)/* Instruction Fault Status Register */
+#define c5_ADFSR	(AFSR0_EL1 * 2)	/* Auxiliary Data Fault Status R */
+#define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
+#define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
+#define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
+#define c7_PAR		(PAR_EL1 * 2)	/* Physical Address Register */
+#define c7_PAR_high	(c7_PAR + 1)	/* PAR top 32 bits */
+#define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
+#define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
+#define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
+#define c13_CID		(CONTEXTIDR_EL1 * 2)	/* Context ID Register */
+#define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
+#define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
+#define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
+#define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
+#define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
+#define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+
+#define cp14_DBGDSCRext	(MDSCR_EL1 * 2)
+#define cp14_DBGBCR0	(DBGBCR0_EL1 * 2)
+#define cp14_DBGBVR0	(DBGBVR0_EL1 * 2)
+#define cp14_DBGBXVR0	(cp14_DBGBVR0 + 1)
+#define cp14_DBGWCR0	(DBGWCR0_EL1 * 2)
+#define cp14_DBGWVR0	(DBGWVR0_EL1 * 2)
+#define cp14_DBGDCCINT	(MDCCINT_EL1 * 2)
+
+#define NR_COPRO_REGS	(NR_SYS_REGS * 2)
+
 struct kvm_cpu_context {
 struct kvm_cpu_context {
 	struct kvm_regs	gp_regs;
 	struct kvm_regs	gp_regs;
 	union {
 	union {
@@ -197,6 +276,12 @@ struct kvm_vcpu_stat {
 	u32 halt_successful_poll;
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;
 	u32 halt_attempted_poll;
 	u32 halt_wakeup;
 	u32 halt_wakeup;
+	u32 hvc_exit_stat;
+	u64 wfe_exit_stat;
+	u64 wfi_exit_stat;
+	u64 mmio_exit_user;
+	u64 mmio_exit_kernel;
+	u64 exits;
 };
 };
 
 
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);

+ 0 - 1
arch/arm64/include/asm/kvm_mmio.h

@@ -19,7 +19,6 @@
 #define __ARM64_KVM_MMIO_H__
 #define __ARM64_KVM_MMIO_H__
 
 
 #include <linux/kvm_host.h>
 #include <linux/kvm_host.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_arm.h>
 
 
 /*
 /*

+ 8 - 1
arch/arm64/include/asm/kvm_mmu.h

@@ -20,6 +20,7 @@
 
 
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/memory.h>
 #include <asm/memory.h>
+#include <asm/cpufeature.h>
 
 
 /*
 /*
  * As we only have the TTBR0_EL2 register, we cannot express
  * As we only have the TTBR0_EL2 register, we cannot express
@@ -158,7 +159,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 #define PTRS_PER_S2_PGD_SHIFT	(KVM_PHYS_SHIFT - PGDIR_SHIFT)
 #define PTRS_PER_S2_PGD_SHIFT	(KVM_PHYS_SHIFT - PGDIR_SHIFT)
 #endif
 #endif
 #define PTRS_PER_S2_PGD		(1 << PTRS_PER_S2_PGD_SHIFT)
 #define PTRS_PER_S2_PGD		(1 << PTRS_PER_S2_PGD_SHIFT)
-#define S2_PGD_ORDER		get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
 
 
 #define kvm_pgd_index(addr)	(((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
 #define kvm_pgd_index(addr)	(((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
 
 
@@ -302,5 +302,12 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
 	merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE);
 	merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE);
 }
 }
 
 
+static inline unsigned int kvm_get_vmid_bits(void)
+{
+	int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1);
+
+	return (cpuid_feature_extract_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
 #endif /* __ARM64_KVM_MMU_H__ */

+ 21 - 0
arch/arm64/include/asm/sysreg.h

@@ -20,6 +20,8 @@
 #ifndef __ASM_SYSREG_H
 #ifndef __ASM_SYSREG_H
 #define __ASM_SYSREG_H
 #define __ASM_SYSREG_H
 
 
+#include <linux/stringify.h>
+
 #include <asm/opcodes.h>
 #include <asm/opcodes.h>
 
 
 /*
 /*
@@ -208,6 +210,8 @@
 
 
 #else
 #else
 
 
+#include <linux/types.h>
+
 asm(
 asm(
 "	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
 "	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
 "	.equ	__reg_num_x\\num, \\num\n"
 "	.equ	__reg_num_x\\num, \\num\n"
@@ -232,6 +236,23 @@ static inline void config_sctlr_el1(u32 clear, u32 set)
 	val |= set;
 	val |= set;
 	asm volatile("msr sctlr_el1, %0" : : "r" (val));
 	asm volatile("msr sctlr_el1, %0" : : "r" (val));
 }
 }
+
+/*
+ * Unlike read_cpuid, calls to read_sysreg are never expected to be
+ * optimized away or replaced with synthetic values.
+ */
+#define read_sysreg(r) ({					\
+	u64 __val;						\
+	asm volatile("mrs %0, " __stringify(r) : "=r" (__val));	\
+	__val;							\
+})
+
+#define write_sysreg(v, r) do {					\
+	u64 __val = (u64)v;					\
+	asm volatile("msr " __stringify(r) ", %0"		\
+		     : : "r" (__val));				\
+} while (0)
+
 #endif
 #endif
 
 
 #endif	/* __ASM_SYSREG_H */
 #endif	/* __ASM_SYSREG_H */

+ 1 - 39
arch/arm64/kernel/asm-offsets.c

@@ -109,49 +109,11 @@ int main(void)
   DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));
   DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));
   DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));
   DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));
   DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
   DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
-  DEFINE(CPU_SP_EL1,		offsetof(struct kvm_regs, sp_el1));
-  DEFINE(CPU_ELR_EL1,		offsetof(struct kvm_regs, elr_el1));
-  DEFINE(CPU_SPSR,		offsetof(struct kvm_regs, spsr));
-  DEFINE(CPU_SYSREGS,		offsetof(struct kvm_cpu_context, sys_regs));
+  DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
   DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
   DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
   DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
   DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
   DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
   DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
-  DEFINE(VCPU_DEBUG_FLAGS,	offsetof(struct kvm_vcpu, arch.debug_flags));
-  DEFINE(VCPU_DEBUG_PTR,	offsetof(struct kvm_vcpu, arch.debug_ptr));
-  DEFINE(DEBUG_BCR, 		offsetof(struct kvm_guest_debug_arch, dbg_bcr));
-  DEFINE(DEBUG_BVR, 		offsetof(struct kvm_guest_debug_arch, dbg_bvr));
-  DEFINE(DEBUG_WCR, 		offsetof(struct kvm_guest_debug_arch, dbg_wcr));
-  DEFINE(DEBUG_WVR, 		offsetof(struct kvm_guest_debug_arch, dbg_wvr));
-  DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
-  DEFINE(VCPU_MDCR_EL2,	offsetof(struct kvm_vcpu, arch.mdcr_el2));
-  DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
   DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
   DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
-  DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state));
-  DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
-  DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
-  DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
-  DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
-  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
-  DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
-  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
-  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
-  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
-  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
-  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
-  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
-  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
-  DEFINE(VGIC_V3_CPU_SRE,	offsetof(struct vgic_cpu, vgic_v3.vgic_sre));
-  DEFINE(VGIC_V3_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
-  DEFINE(VGIC_V3_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
-  DEFINE(VGIC_V3_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
-  DEFINE(VGIC_V3_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
-  DEFINE(VGIC_V3_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
-  DEFINE(VGIC_V3_CPU_AP0R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
-  DEFINE(VGIC_V3_CPU_AP1R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
-  DEFINE(VGIC_V3_CPU_LR,	offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
-  DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
-  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
-  DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
 #endif
 #endif
 #ifdef CONFIG_CPU_PM
 #ifdef CONFIG_CPU_PM
   DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
   DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));

+ 1 - 2
arch/arm64/kvm/Makefile

@@ -10,6 +10,7 @@ KVM=../../../virt/kvm
 ARM=../../../arch/arm/kvm
 ARM=../../../arch/arm/kvm
 
 
 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
+obj-$(CONFIG_KVM_ARM_HOST) += hyp/
 
 
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
@@ -22,8 +23,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generi
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
-kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
-kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o

+ 9 - 1
arch/arm64/kvm/guest.c

@@ -28,13 +28,21 @@
 #include <asm/cputype.h>
 #include <asm/cputype.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/kvm.h>
 #include <asm/kvm.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_coproc.h>
 
 
 #include "trace.h"
 #include "trace.h"
 
 
+#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM }
+#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 struct kvm_stats_debugfs_item debugfs_entries[] = {
+	VCPU_STAT(hvc_exit_stat),
+	VCPU_STAT(wfe_exit_stat),
+	VCPU_STAT(wfi_exit_stat),
+	VCPU_STAT(mmio_exit_user),
+	VCPU_STAT(mmio_exit_kernel),
+	VCPU_STAT(exits),
 	{ NULL }
 	{ NULL }
 };
 };
 
 

+ 4 - 0
arch/arm64/kvm/handle_exit.c

@@ -23,6 +23,7 @@
 #include <linux/kvm_host.h>
 #include <linux/kvm_host.h>
 
 
 #include <asm/esr.h>
 #include <asm/esr.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_mmu.h>
@@ -39,6 +40,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 
 	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
 	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
 			    kvm_vcpu_hvc_get_imm(vcpu));
 			    kvm_vcpu_hvc_get_imm(vcpu));
+	vcpu->stat.hvc_exit_stat++;
 
 
 	ret = kvm_psci_call(vcpu);
 	ret = kvm_psci_call(vcpu);
 	if (ret < 0) {
 	if (ret < 0) {
@@ -71,9 +73,11 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 {
 	if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
 	if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
+		vcpu->stat.wfe_exit_stat++;
 		kvm_vcpu_on_spin(vcpu);
 		kvm_vcpu_on_spin(vcpu);
 	} else {
 	} else {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
+		vcpu->stat.wfi_exit_stat++;
 		kvm_vcpu_block(vcpu);
 		kvm_vcpu_block(vcpu);
 	}
 	}
 
 

+ 9 - 0
arch/arm64/kvm/hyp-init.S

@@ -94,6 +94,15 @@ __do_hyp_init:
 	 */
 	 */
 	mrs	x5, ID_AA64MMFR0_EL1
 	mrs	x5, ID_AA64MMFR0_EL1
 	bfi	x4, x5, #16, #3
 	bfi	x4, x5, #16, #3
+	/*
+	 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in
+	 * VTCR_EL2.
+	 */
+	mrs	x5, ID_AA64MMFR1_EL1
+	ubfx	x5, x5, #5, #1
+	lsl	x5, x5, #VTCR_EL2_VS
+	orr	x4, x4, x5
+
 	msr	vtcr_el2, x4
 	msr	vtcr_el2, x4
 
 
 	mrs	x4, mair_el1
 	mrs	x4, mair_el1

+ 1 - 1080
arch/arm64/kvm/hyp.S

@@ -17,910 +17,7 @@
 
 
 #include <linux/linkage.h>
 #include <linux/linkage.h>
 
 
-#include <asm/alternative.h>
-#include <asm/asm-offsets.h>
 #include <asm/assembler.h>
 #include <asm/assembler.h>
-#include <asm/cpufeature.h>
-#include <asm/debug-monitors.h>
-#include <asm/esr.h>
-#include <asm/fpsimdmacros.h>
-#include <asm/kvm.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_mmu.h>
-#include <asm/memory.h>
-
-#define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
-#define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
-#define CPU_SPSR_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_SPSR + 8*x)
-#define CPU_SYSREG_OFFSET(x)	(CPU_SYSREGS + 8*x)
-
-	.text
-	.pushsection	.hyp.text, "ax"
-	.align	PAGE_SHIFT
-
-.macro save_common_regs
-	// x2: base address for cpu context
-	// x3: tmp register
-
-	add	x3, x2, #CPU_XREG_OFFSET(19)
-	stp	x19, x20, [x3]
-	stp	x21, x22, [x3, #16]
-	stp	x23, x24, [x3, #32]
-	stp	x25, x26, [x3, #48]
-	stp	x27, x28, [x3, #64]
-	stp	x29, lr, [x3, #80]
-
-	mrs	x19, sp_el0
-	mrs	x20, elr_el2		// pc before entering el2
-	mrs	x21, spsr_el2		// pstate before entering el2
-
-	stp	x19, x20, [x3, #96]
-	str	x21, [x3, #112]
-
-	mrs	x22, sp_el1
-	mrs	x23, elr_el1
-	mrs	x24, spsr_el1
-
-	str	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
-	str	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
-	str	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
-.endm
-
-.macro restore_common_regs
-	// x2: base address for cpu context
-	// x3: tmp register
-
-	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
-	ldr	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
-	ldr	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
-
-	msr	sp_el1, x22
-	msr	elr_el1, x23
-	msr	spsr_el1, x24
-
-	add	x3, x2, #CPU_XREG_OFFSET(31)    // SP_EL0
-	ldp	x19, x20, [x3]
-	ldr	x21, [x3, #16]
-
-	msr	sp_el0, x19
-	msr	elr_el2, x20 		// pc on return from el2
-	msr	spsr_el2, x21 		// pstate on return from el2
-
-	add	x3, x2, #CPU_XREG_OFFSET(19)
-	ldp	x19, x20, [x3]
-	ldp	x21, x22, [x3, #16]
-	ldp	x23, x24, [x3, #32]
-	ldp	x25, x26, [x3, #48]
-	ldp	x27, x28, [x3, #64]
-	ldp	x29, lr, [x3, #80]
-.endm
-
-.macro save_host_regs
-	save_common_regs
-.endm
-
-.macro restore_host_regs
-	restore_common_regs
-.endm
-
-.macro save_fpsimd
-	// x2: cpu context address
-	// x3, x4: tmp regs
-	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
-	fpsimd_save x3, 4
-.endm
-
-.macro restore_fpsimd
-	// x2: cpu context address
-	// x3, x4: tmp regs
-	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
-	fpsimd_restore x3, 4
-.endm
-
-.macro save_guest_regs
-	// x0 is the vcpu address
-	// x1 is the return code, do not corrupt!
-	// x2 is the cpu context
-	// x3 is a tmp register
-	// Guest's x0-x3 are on the stack
-
-	// Compute base to save registers
-	add	x3, x2, #CPU_XREG_OFFSET(4)
-	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
-	stp	x8, x9, [x3, #32]
-	stp	x10, x11, [x3, #48]
-	stp	x12, x13, [x3, #64]
-	stp	x14, x15, [x3, #80]
-	stp	x16, x17, [x3, #96]
-	str	x18, [x3, #112]
-
-	pop	x6, x7			// x2, x3
-	pop	x4, x5			// x0, x1
-
-	add	x3, x2, #CPU_XREG_OFFSET(0)
-	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
-
-	save_common_regs
-.endm
-
-.macro restore_guest_regs
-	// x0 is the vcpu address.
-	// x2 is the cpu context
-	// x3 is a tmp register
-
-	// Prepare x0-x3 for later restore
-	add	x3, x2, #CPU_XREG_OFFSET(0)
-	ldp	x4, x5, [x3]
-	ldp	x6, x7, [x3, #16]
-	push	x4, x5		// Push x0-x3 on the stack
-	push	x6, x7
-
-	// x4-x18
-	ldp	x4, x5, [x3, #32]
-	ldp	x6, x7, [x3, #48]
-	ldp	x8, x9, [x3, #64]
-	ldp	x10, x11, [x3, #80]
-	ldp	x12, x13, [x3, #96]
-	ldp	x14, x15, [x3, #112]
-	ldp	x16, x17, [x3, #128]
-	ldr	x18, [x3, #144]
-
-	// x19-x29, lr, sp*, elr*, spsr*
-	restore_common_regs
-
-	// Last bits of the 64bit state
-	pop	x2, x3
-	pop	x0, x1
-
-	// Do not touch any register after this!
-.endm
-
-/*
- * Macros to perform system register save/restore.
- *
- * Ordering here is absolutely critical, and must be kept consistent
- * in {save,restore}_sysregs, {save,restore}_guest_32bit_state,
- * and in kvm_asm.h.
- *
- * In other words, don't touch any of these unless you know what
- * you are doing.
- */
-.macro save_sysregs
-	// x2: base address for cpu context
-	// x3: tmp register
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
-
-	mrs	x4,	vmpidr_el2
-	mrs	x5,	csselr_el1
-	mrs	x6,	sctlr_el1
-	mrs	x7,	actlr_el1
-	mrs	x8,	cpacr_el1
-	mrs	x9,	ttbr0_el1
-	mrs	x10,	ttbr1_el1
-	mrs	x11,	tcr_el1
-	mrs	x12,	esr_el1
-	mrs	x13, 	afsr0_el1
-	mrs	x14,	afsr1_el1
-	mrs	x15,	far_el1
-	mrs	x16,	mair_el1
-	mrs	x17,	vbar_el1
-	mrs	x18,	contextidr_el1
-	mrs	x19,	tpidr_el0
-	mrs	x20,	tpidrro_el0
-	mrs	x21,	tpidr_el1
-	mrs	x22, 	amair_el1
-	mrs	x23, 	cntkctl_el1
-	mrs	x24,	par_el1
-	mrs	x25,	mdscr_el1
-
-	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
-	stp	x8, x9, [x3, #32]
-	stp	x10, x11, [x3, #48]
-	stp	x12, x13, [x3, #64]
-	stp	x14, x15, [x3, #80]
-	stp	x16, x17, [x3, #96]
-	stp	x18, x19, [x3, #112]
-	stp	x20, x21, [x3, #128]
-	stp	x22, x23, [x3, #144]
-	stp	x24, x25, [x3, #160]
-.endm
-
-.macro save_debug type
-	// x4: pointer to register set
-	// x5: number of registers to skip
-	// x6..x22 trashed
-
-	adr	x22, 1f
-	add	x22, x22, x5, lsl #2
-	br	x22
-1:
-	mrs	x21, \type\()15_el1
-	mrs	x20, \type\()14_el1
-	mrs	x19, \type\()13_el1
-	mrs	x18, \type\()12_el1
-	mrs	x17, \type\()11_el1
-	mrs	x16, \type\()10_el1
-	mrs	x15, \type\()9_el1
-	mrs	x14, \type\()8_el1
-	mrs	x13, \type\()7_el1
-	mrs	x12, \type\()6_el1
-	mrs	x11, \type\()5_el1
-	mrs	x10, \type\()4_el1
-	mrs	x9, \type\()3_el1
-	mrs	x8, \type\()2_el1
-	mrs	x7, \type\()1_el1
-	mrs	x6, \type\()0_el1
-
-	adr	x22, 1f
-	add	x22, x22, x5, lsl #2
-	br	x22
-1:
-	str	x21, [x4, #(15 * 8)]
-	str	x20, [x4, #(14 * 8)]
-	str	x19, [x4, #(13 * 8)]
-	str	x18, [x4, #(12 * 8)]
-	str	x17, [x4, #(11 * 8)]
-	str	x16, [x4, #(10 * 8)]
-	str	x15, [x4, #(9 * 8)]
-	str	x14, [x4, #(8 * 8)]
-	str	x13, [x4, #(7 * 8)]
-	str	x12, [x4, #(6 * 8)]
-	str	x11, [x4, #(5 * 8)]
-	str	x10, [x4, #(4 * 8)]
-	str	x9, [x4, #(3 * 8)]
-	str	x8, [x4, #(2 * 8)]
-	str	x7, [x4, #(1 * 8)]
-	str	x6, [x4, #(0 * 8)]
-.endm
-
-.macro restore_sysregs
-	// x2: base address for cpu context
-	// x3: tmp register
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
-
-	ldp	x4, x5, [x3]
-	ldp	x6, x7, [x3, #16]
-	ldp	x8, x9, [x3, #32]
-	ldp	x10, x11, [x3, #48]
-	ldp	x12, x13, [x3, #64]
-	ldp	x14, x15, [x3, #80]
-	ldp	x16, x17, [x3, #96]
-	ldp	x18, x19, [x3, #112]
-	ldp	x20, x21, [x3, #128]
-	ldp	x22, x23, [x3, #144]
-	ldp	x24, x25, [x3, #160]
-
-	msr	vmpidr_el2,	x4
-	msr	csselr_el1,	x5
-	msr	sctlr_el1,	x6
-	msr	actlr_el1,	x7
-	msr	cpacr_el1,	x8
-	msr	ttbr0_el1,	x9
-	msr	ttbr1_el1,	x10
-	msr	tcr_el1,	x11
-	msr	esr_el1,	x12
-	msr	afsr0_el1,	x13
-	msr	afsr1_el1,	x14
-	msr	far_el1,	x15
-	msr	mair_el1,	x16
-	msr	vbar_el1,	x17
-	msr	contextidr_el1,	x18
-	msr	tpidr_el0,	x19
-	msr	tpidrro_el0,	x20
-	msr	tpidr_el1,	x21
-	msr	amair_el1,	x22
-	msr	cntkctl_el1,	x23
-	msr	par_el1,	x24
-	msr	mdscr_el1,	x25
-.endm
-
-.macro restore_debug type
-	// x4: pointer to register set
-	// x5: number of registers to skip
-	// x6..x22 trashed
-
-	adr	x22, 1f
-	add	x22, x22, x5, lsl #2
-	br	x22
-1:
-	ldr	x21, [x4, #(15 * 8)]
-	ldr	x20, [x4, #(14 * 8)]
-	ldr	x19, [x4, #(13 * 8)]
-	ldr	x18, [x4, #(12 * 8)]
-	ldr	x17, [x4, #(11 * 8)]
-	ldr	x16, [x4, #(10 * 8)]
-	ldr	x15, [x4, #(9 * 8)]
-	ldr	x14, [x4, #(8 * 8)]
-	ldr	x13, [x4, #(7 * 8)]
-	ldr	x12, [x4, #(6 * 8)]
-	ldr	x11, [x4, #(5 * 8)]
-	ldr	x10, [x4, #(4 * 8)]
-	ldr	x9, [x4, #(3 * 8)]
-	ldr	x8, [x4, #(2 * 8)]
-	ldr	x7, [x4, #(1 * 8)]
-	ldr	x6, [x4, #(0 * 8)]
-
-	adr	x22, 1f
-	add	x22, x22, x5, lsl #2
-	br	x22
-1:
-	msr	\type\()15_el1, x21
-	msr	\type\()14_el1, x20
-	msr	\type\()13_el1, x19
-	msr	\type\()12_el1, x18
-	msr	\type\()11_el1, x17
-	msr	\type\()10_el1, x16
-	msr	\type\()9_el1, x15
-	msr	\type\()8_el1, x14
-	msr	\type\()7_el1, x13
-	msr	\type\()6_el1, x12
-	msr	\type\()5_el1, x11
-	msr	\type\()4_el1, x10
-	msr	\type\()3_el1, x9
-	msr	\type\()2_el1, x8
-	msr	\type\()1_el1, x7
-	msr	\type\()0_el1, x6
-.endm
-
-.macro skip_32bit_state tmp, target
-	// Skip 32bit state if not needed
-	mrs	\tmp, hcr_el2
-	tbnz	\tmp, #HCR_RW_SHIFT, \target
-.endm
-
-.macro skip_tee_state tmp, target
-	// Skip ThumbEE state if not needed
-	mrs	\tmp, id_pfr0_el1
-	tbz	\tmp, #12, \target
-.endm
-
-.macro skip_debug_state tmp, target
-	ldr	\tmp, [x0, #VCPU_DEBUG_FLAGS]
-	tbz	\tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
-.endm
-
-/*
- * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled)
- */
-.macro skip_fpsimd_state tmp, target
-	mrs	\tmp, cptr_el2
-	tbnz	\tmp, #CPTR_EL2_TFP_SHIFT, \target
-.endm
-
-.macro compute_debug_state target
-	// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
-	// is set, we do a full save/restore cycle and disable trapping.
-	add	x25, x0, #VCPU_CONTEXT
-
-	// Check the state of MDSCR_EL1
-	ldr	x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)]
-	and	x26, x25, #DBG_MDSCR_KDE
-	and	x25, x25, #DBG_MDSCR_MDE
-	adds	xzr, x25, x26
-	b.eq	9998f		// Nothing to see there
-
-	// If any interesting bits was set, we must set the flag
-	mov	x26, #KVM_ARM64_DEBUG_DIRTY
-	str	x26, [x0, #VCPU_DEBUG_FLAGS]
-	b	9999f		// Don't skip restore
-
-9998:
-	// Otherwise load the flags from memory in case we recently
-	// trapped
-	skip_debug_state x25, \target
-9999:
-.endm
-
-.macro save_guest_32bit_state
-	skip_32bit_state x3, 1f
-
-	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
-	mrs	x4, spsr_abt
-	mrs	x5, spsr_und
-	mrs	x6, spsr_irq
-	mrs	x7, spsr_fiq
-	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
-	mrs	x4, dacr32_el2
-	mrs	x5, ifsr32_el2
-	stp	x4, x5, [x3]
-
-	skip_fpsimd_state x8, 2f
-	mrs	x6, fpexc32_el2
-	str	x6, [x3, #16]
-2:
-	skip_debug_state x8, 1f
-	mrs	x7, dbgvcr32_el2
-	str	x7, [x3, #24]
-1:
-.endm
-
-.macro restore_guest_32bit_state
-	skip_32bit_state x3, 1f
-
-	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
-	ldp	x4, x5, [x3]
-	ldp	x6, x7, [x3, #16]
-	msr	spsr_abt, x4
-	msr	spsr_und, x5
-	msr	spsr_irq, x6
-	msr	spsr_fiq, x7
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
-	ldp	x4, x5, [x3]
-	msr	dacr32_el2, x4
-	msr	ifsr32_el2, x5
-
-	skip_debug_state x8, 1f
-	ldr	x7, [x3, #24]
-	msr	dbgvcr32_el2, x7
-1:
-.endm
-
-.macro activate_traps
-	ldr     x2, [x0, #VCPU_HCR_EL2]
-
-	/*
-	 * We are about to set CPTR_EL2.TFP to trap all floating point
-	 * register accesses to EL2, however, the ARM ARM clearly states that
-	 * traps are only taken to EL2 if the operation would not otherwise
-	 * trap to EL1.  Therefore, always make sure that for 32-bit guests,
-	 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
-	 */
-	tbnz	x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state
-	mov	x3, #(1 << 30)
-	msr	fpexc32_el2, x3
-	isb
-99:
-	msr     hcr_el2, x2
-	mov	x2, #CPTR_EL2_TTA
-	orr     x2, x2, #CPTR_EL2_TFP
-	msr	cptr_el2, x2
-
-	mov	x2, #(1 << 15)	// Trap CP15 Cr=15
-	msr	hstr_el2, x2
-
-	// Monitor Debug Config - see kvm_arm_setup_debug()
-	ldr	x2, [x0, #VCPU_MDCR_EL2]
-	msr	mdcr_el2, x2
-.endm
-
-.macro deactivate_traps
-	mov	x2, #HCR_RW
-	msr	hcr_el2, x2
-	msr	hstr_el2, xzr
-
-	mrs	x2, mdcr_el2
-	and	x2, x2, #MDCR_EL2_HPMN_MASK
-	msr	mdcr_el2, x2
-.endm
-
-.macro activate_vm
-	ldr	x1, [x0, #VCPU_KVM]
-	kern_hyp_va	x1
-	ldr	x2, [x1, #KVM_VTTBR]
-	msr	vttbr_el2, x2
-.endm
-
-.macro deactivate_vm
-	msr	vttbr_el2, xzr
-.endm
-
-/*
- * Call into the vgic backend for state saving
- */
-.macro save_vgic_state
-alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF
-	bl	__save_vgic_v2_state
-alternative_else
-	bl	__save_vgic_v3_state
-alternative_endif
-	mrs	x24, hcr_el2
-	mov	x25, #HCR_INT_OVERRIDE
-	neg	x25, x25
-	and	x24, x24, x25
-	msr	hcr_el2, x24
-.endm
-
-/*
- * Call into the vgic backend for state restoring
- */
-.macro restore_vgic_state
-	mrs	x24, hcr_el2
-	ldr	x25, [x0, #VCPU_IRQ_LINES]
-	orr	x24, x24, #HCR_INT_OVERRIDE
-	orr	x24, x24, x25
-	msr	hcr_el2, x24
-alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF
-	bl	__restore_vgic_v2_state
-alternative_else
-	bl	__restore_vgic_v3_state
-alternative_endif
-.endm
-
-.macro save_timer_state
-	// x0: vcpu pointer
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va x2
-	ldr	w3, [x2, #KVM_TIMER_ENABLED]
-	cbz	w3, 1f
-
-	mrs	x3, cntv_ctl_el0
-	and	x3, x3, #3
-	str	w3, [x0, #VCPU_TIMER_CNTV_CTL]
-
-	isb
-
-	mrs	x3, cntv_cval_el0
-	str	x3, [x0, #VCPU_TIMER_CNTV_CVAL]
-
-1:
-	// Disable the virtual timer
-	msr	cntv_ctl_el0, xzr
-
-	// Allow physical timer/counter access for the host
-	mrs	x2, cnthctl_el2
-	orr	x2, x2, #3
-	msr	cnthctl_el2, x2
-
-	// Clear cntvoff for the host
-	msr	cntvoff_el2, xzr
-.endm
-
-.macro restore_timer_state
-	// x0: vcpu pointer
-	// Disallow physical timer access for the guest
-	// Physical counter access is allowed
-	mrs	x2, cnthctl_el2
-	orr	x2, x2, #1
-	bic	x2, x2, #2
-	msr	cnthctl_el2, x2
-
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va x2
-	ldr	w3, [x2, #KVM_TIMER_ENABLED]
-	cbz	w3, 1f
-
-	ldr	x3, [x2, #KVM_TIMER_CNTVOFF]
-	msr	cntvoff_el2, x3
-	ldr	x2, [x0, #VCPU_TIMER_CNTV_CVAL]
-	msr	cntv_cval_el0, x2
-	isb
-
-	ldr	w2, [x0, #VCPU_TIMER_CNTV_CTL]
-	and	x2, x2, #3
-	msr	cntv_ctl_el0, x2
-1:
-.endm
-
-__save_sysregs:
-	save_sysregs
-	ret
-
-__restore_sysregs:
-	restore_sysregs
-	ret
-
-/* Save debug state */
-__save_debug:
-	// x2: ptr to CPU context
-	// x3: ptr to debug reg struct
-	// x4/x5/x6-22/x24-26: trashed
-
-	mrs	x26, id_aa64dfr0_el1
-	ubfx	x24, x26, #12, #4	// Extract BRPs
-	ubfx	x25, x26, #20, #4	// Extract WRPs
-	mov	w26, #15
-	sub	w24, w26, w24		// How many BPs to skip
-	sub	w25, w26, w25		// How many WPs to skip
-
-	mov	x5, x24
-	add	x4, x3, #DEBUG_BCR
-	save_debug dbgbcr
-	add	x4, x3, #DEBUG_BVR
-	save_debug dbgbvr
-
-	mov	x5, x25
-	add	x4, x3, #DEBUG_WCR
-	save_debug dbgwcr
-	add	x4, x3, #DEBUG_WVR
-	save_debug dbgwvr
-
-	mrs	x21, mdccint_el1
-	str	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
-	ret
-
-/* Restore debug state */
-__restore_debug:
-	// x2: ptr to CPU context
-	// x3: ptr to debug reg struct
-	// x4/x5/x6-22/x24-26: trashed
-
-	mrs	x26, id_aa64dfr0_el1
-	ubfx	x24, x26, #12, #4	// Extract BRPs
-	ubfx	x25, x26, #20, #4	// Extract WRPs
-	mov	w26, #15
-	sub	w24, w26, w24		// How many BPs to skip
-	sub	w25, w26, w25		// How many WPs to skip
-
-	mov	x5, x24
-	add	x4, x3, #DEBUG_BCR
-	restore_debug dbgbcr
-	add	x4, x3, #DEBUG_BVR
-	restore_debug dbgbvr
-
-	mov	x5, x25
-	add	x4, x3, #DEBUG_WCR
-	restore_debug dbgwcr
-	add	x4, x3, #DEBUG_WVR
-	restore_debug dbgwvr
-
-	ldr	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
-	msr	mdccint_el1, x21
-
-	ret
-
-__save_fpsimd:
-	skip_fpsimd_state x3, 1f
-	save_fpsimd
-1:	ret
-
-__restore_fpsimd:
-	skip_fpsimd_state x3, 1f
-	restore_fpsimd
-1:	ret
-
-switch_to_guest_fpsimd:
-	push	x4, lr
-
-	mrs	x2, cptr_el2
-	bic	x2, x2, #CPTR_EL2_TFP
-	msr	cptr_el2, x2
-	isb
-
-	mrs	x0, tpidr_el2
-
-	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x2
-	bl __save_fpsimd
-
-	add	x2, x0, #VCPU_CONTEXT
-	bl __restore_fpsimd
-
-	skip_32bit_state x3, 1f
-	ldr	x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
-	msr	fpexc32_el2, x4
-1:
-	pop	x4, lr
-	pop	x2, x3
-	pop	x0, x1
-
-	eret
-
-/*
- * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
- *
- * This is the world switch. The first half of the function
- * deals with entering the guest, and anything from __kvm_vcpu_return
- * to the end of the function deals with reentering the host.
- * On the enter path, only x0 (vcpu pointer) must be preserved until
- * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception
- * code) must both be preserved until the epilogue.
- * In both cases, x2 points to the CPU context we're saving/restoring from/to.
- */
-ENTRY(__kvm_vcpu_run)
-	kern_hyp_va	x0
-	msr	tpidr_el2, x0	// Save the vcpu register
-
-	// Host context
-	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x2
-
-	save_host_regs
-	bl __save_sysregs
-
-	compute_debug_state 1f
-	add	x3, x0, #VCPU_HOST_DEBUG_STATE
-	bl	__save_debug
-1:
-	activate_traps
-	activate_vm
-
-	restore_vgic_state
-	restore_timer_state
-
-	// Guest context
-	add	x2, x0, #VCPU_CONTEXT
-
-	// We must restore the 32-bit state before the sysregs, thanks
-	// to Cortex-A57 erratum #852523.
-	restore_guest_32bit_state
-	bl __restore_sysregs
-
-	skip_debug_state x3, 1f
-	ldr	x3, [x0, #VCPU_DEBUG_PTR]
-	kern_hyp_va x3
-	bl	__restore_debug
-1:
-	restore_guest_regs
-
-	// That's it, no more messing around.
-	eret
-
-__kvm_vcpu_return:
-	// Assume x0 is the vcpu pointer, x1 the return code
-	// Guest's x0-x3 are on the stack
-
-	// Guest context
-	add	x2, x0, #VCPU_CONTEXT
-
-	save_guest_regs
-	bl __save_fpsimd
-	bl __save_sysregs
-
-	skip_debug_state x3, 1f
-	ldr	x3, [x0, #VCPU_DEBUG_PTR]
-	kern_hyp_va x3
-	bl	__save_debug
-1:
-	save_guest_32bit_state
-
-	save_timer_state
-	save_vgic_state
-
-	deactivate_traps
-	deactivate_vm
-
-	// Host context
-	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x2
-
-	bl __restore_sysregs
-	bl __restore_fpsimd
-	/* Clear FPSIMD and Trace trapping */
-	msr     cptr_el2, xzr
-
-	skip_debug_state x3, 1f
-	// Clear the dirty flag for the next run, as all the state has
-	// already been saved. Note that we nuke the whole 64bit word.
-	// If we ever add more flags, we'll have to be more careful...
-	str	xzr, [x0, #VCPU_DEBUG_FLAGS]
-	add	x3, x0, #VCPU_HOST_DEBUG_STATE
-	bl	__restore_debug
-1:
-	restore_host_regs
-
-	mov	x0, x1
-	ret
-END(__kvm_vcpu_run)
-
-// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
-ENTRY(__kvm_tlb_flush_vmid_ipa)
-	dsb	ishst
-
-	kern_hyp_va	x0
-	ldr	x2, [x0, #KVM_VTTBR]
-	msr	vttbr_el2, x2
-	isb
-
-	/*
-	 * We could do so much better if we had the VA as well.
-	 * Instead, we invalidate Stage-2 for this IPA, and the
-	 * whole of Stage-1. Weep...
-	 */
-	lsr	x1, x1, #12
-	tlbi	ipas2e1is, x1
-	/*
-	 * We have to ensure completion of the invalidation at Stage-2,
-	 * since a table walk on another CPU could refill a TLB with a
-	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
-	 * the Stage-1 invalidation happened first.
-	 */
-	dsb	ish
-	tlbi	vmalle1is
-	dsb	ish
-	isb
-
-	msr	vttbr_el2, xzr
-	ret
-ENDPROC(__kvm_tlb_flush_vmid_ipa)
-
-/**
- * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
- * @struct kvm *kvm - pointer to kvm structure
- *
- * Invalidates all Stage 1 and 2 TLB entries for current VMID.
- */
-ENTRY(__kvm_tlb_flush_vmid)
-	dsb     ishst
-
-	kern_hyp_va     x0
-	ldr     x2, [x0, #KVM_VTTBR]
-	msr     vttbr_el2, x2
-	isb
-
-	tlbi    vmalls12e1is
-	dsb     ish
-	isb
-
-	msr     vttbr_el2, xzr
-	ret
-ENDPROC(__kvm_tlb_flush_vmid)
-
-ENTRY(__kvm_flush_vm_context)
-	dsb	ishst
-	tlbi	alle1is
-	ic	ialluis
-	dsb	ish
-	ret
-ENDPROC(__kvm_flush_vm_context)
-
-__kvm_hyp_panic:
-	// Stash PAR_EL1 before corrupting it in __restore_sysregs
-	mrs	x0, par_el1
-	push	x0, xzr
-
-	// Guess the context by looking at VTTBR:
-	// If zero, then we're already a host.
-	// Otherwise restore a minimal host context before panicing.
-	mrs	x0, vttbr_el2
-	cbz	x0, 1f
-
-	mrs	x0, tpidr_el2
-
-	deactivate_traps
-	deactivate_vm
-
-	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x2
-
-	bl __restore_sysregs
-
-	/*
-	 * Make sure we have a valid host stack, and don't leave junk in the
-	 * frame pointer that will give us a misleading host stack unwinding.
-	 */
-	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
-	msr	sp_el1, x22
-	mov	x29, xzr
-
-1:	adr	x0, __hyp_panic_str
-	adr	x1, 2f
-	ldp	x2, x3, [x1]
-	sub	x0, x0, x2
-	add	x0, x0, x3
-	mrs	x1, spsr_el2
-	mrs	x2, elr_el2
-	mrs	x3, esr_el2
-	mrs	x4, far_el2
-	mrs	x5, hpfar_el2
-	pop	x6, xzr		// active context PAR_EL1
-	mrs	x7, tpidr_el2
-
-	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
-		      PSR_MODE_EL1h)
-	msr	spsr_el2, lr
-	ldr	lr, =panic
-	msr	elr_el2, lr
-	eret
-
-	.align	3
-2:	.quad	HYP_PAGE_OFFSET
-	.quad	PAGE_OFFSET
-ENDPROC(__kvm_hyp_panic)
-
-__hyp_panic_str:
-	.ascii	"HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0"
-
-	.align	2
 
 
 /*
 /*
  * u64 kvm_call_hyp(void *hypfn, ...);
  * u64 kvm_call_hyp(void *hypfn, ...);
@@ -934,7 +31,7 @@ __hyp_panic_str:
  * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
  * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
  * function pointer can be passed).  The function being called must be mapped
  * function pointer can be passed).  The function being called must be mapped
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
- * passed in r0 and r1.
+ * passed in x0.
  *
  *
  * A function pointer with a value of 0 has a special meaning, and is
  * A function pointer with a value of 0 has a special meaning, and is
  * used to implement __hyp_get_vectors in the same way as in
  * used to implement __hyp_get_vectors in the same way as in
@@ -944,179 +41,3 @@ ENTRY(kvm_call_hyp)
 	hvc	#0
 	hvc	#0
 	ret
 	ret
 ENDPROC(kvm_call_hyp)
 ENDPROC(kvm_call_hyp)
-
-.macro invalid_vector	label, target
-	.align	2
-\label:
-	b \target
-ENDPROC(\label)
-.endm
-
-	/* None of these should ever happen */
-	invalid_vector	el2t_sync_invalid, __kvm_hyp_panic
-	invalid_vector	el2t_irq_invalid, __kvm_hyp_panic
-	invalid_vector	el2t_fiq_invalid, __kvm_hyp_panic
-	invalid_vector	el2t_error_invalid, __kvm_hyp_panic
-	invalid_vector	el2h_sync_invalid, __kvm_hyp_panic
-	invalid_vector	el2h_irq_invalid, __kvm_hyp_panic
-	invalid_vector	el2h_fiq_invalid, __kvm_hyp_panic
-	invalid_vector	el2h_error_invalid, __kvm_hyp_panic
-	invalid_vector	el1_sync_invalid, __kvm_hyp_panic
-	invalid_vector	el1_irq_invalid, __kvm_hyp_panic
-	invalid_vector	el1_fiq_invalid, __kvm_hyp_panic
-	invalid_vector	el1_error_invalid, __kvm_hyp_panic
-
-el1_sync:					// Guest trapped into EL2
-	push	x0, x1
-	push	x2, x3
-
-	mrs	x1, esr_el2
-	lsr	x2, x1, #ESR_ELx_EC_SHIFT
-
-	cmp	x2, #ESR_ELx_EC_HVC64
-	b.ne	el1_trap
-
-	mrs	x3, vttbr_el2			// If vttbr is valid, the 64bit guest
-	cbnz	x3, el1_trap			// called HVC
-
-	/* Here, we're pretty sure the host called HVC. */
-	pop	x2, x3
-	pop	x0, x1
-
-	/* Check for __hyp_get_vectors */
-	cbnz	x0, 1f
-	mrs	x0, vbar_el2
-	b	2f
-
-1:	push	lr, xzr
-
-	/*
-	 * Compute the function address in EL2, and shuffle the parameters.
-	 */
-	kern_hyp_va	x0
-	mov	lr, x0
-	mov	x0, x1
-	mov	x1, x2
-	mov	x2, x3
-	blr	lr
-
-	pop	lr, xzr
-2:	eret
-
-el1_trap:
-	/*
-	 * x1: ESR
-	 * x2: ESR_EC
-	 */
-
-	/* Guest accessed VFP/SIMD registers, save host, restore Guest */
-	cmp	x2, #ESR_ELx_EC_FP_ASIMD
-	b.eq	switch_to_guest_fpsimd
-
-	cmp	x2, #ESR_ELx_EC_DABT_LOW
-	mov	x0, #ESR_ELx_EC_IABT_LOW
-	ccmp	x2, x0, #4, ne
-	b.ne	1f		// Not an abort we care about
-
-	/* This is an abort. Check for permission fault */
-alternative_if_not ARM64_WORKAROUND_834220
-	and	x2, x1, #ESR_ELx_FSC_TYPE
-	cmp	x2, #FSC_PERM
-	b.ne	1f		// Not a permission fault
-alternative_else
-	nop			// Use the permission fault path to
-	nop			// check for a valid S1 translation,
-	nop			// regardless of the ESR value.
-alternative_endif
-
-	/*
-	 * Check for Stage-1 page table walk, which is guaranteed
-	 * to give a valid HPFAR_EL2.
-	 */
-	tbnz	x1, #7, 1f	// S1PTW is set
-
-	/* Preserve PAR_EL1 */
-	mrs	x3, par_el1
-	push	x3, xzr
-
-	/*
-	 * Permission fault, HPFAR_EL2 is invalid.
-	 * Resolve the IPA the hard way using the guest VA.
-	 * Stage-1 translation already validated the memory access rights.
-	 * As such, we can use the EL1 translation regime, and don't have
-	 * to distinguish between EL0 and EL1 access.
-	 */
-	mrs	x2, far_el2
-	at	s1e1r, x2
-	isb
-
-	/* Read result */
-	mrs	x3, par_el1
-	pop	x0, xzr			// Restore PAR_EL1 from the stack
-	msr	par_el1, x0
-	tbnz	x3, #0, 3f		// Bail out if we failed the translation
-	ubfx	x3, x3, #12, #36	// Extract IPA
-	lsl	x3, x3, #4		// and present it like HPFAR
-	b	2f
-
-1:	mrs	x3, hpfar_el2
-	mrs	x2, far_el2
-
-2:	mrs	x0, tpidr_el2
-	str	w1, [x0, #VCPU_ESR_EL2]
-	str	x2, [x0, #VCPU_FAR_EL2]
-	str	x3, [x0, #VCPU_HPFAR_EL2]
-
-	mov	x1, #ARM_EXCEPTION_TRAP
-	b	__kvm_vcpu_return
-
-	/*
-	 * Translation failed. Just return to the guest and
-	 * let it fault again. Another CPU is probably playing
-	 * behind our back.
-	 */
-3:	pop	x2, x3
-	pop	x0, x1
-
-	eret
-
-el1_irq:
-	push	x0, x1
-	push	x2, x3
-	mrs	x0, tpidr_el2
-	mov	x1, #ARM_EXCEPTION_IRQ
-	b	__kvm_vcpu_return
-
-	.ltorg
-
-	.align 11
-
-ENTRY(__kvm_hyp_vector)
-	ventry	el2t_sync_invalid		// Synchronous EL2t
-	ventry	el2t_irq_invalid		// IRQ EL2t
-	ventry	el2t_fiq_invalid		// FIQ EL2t
-	ventry	el2t_error_invalid		// Error EL2t
-
-	ventry	el2h_sync_invalid		// Synchronous EL2h
-	ventry	el2h_irq_invalid		// IRQ EL2h
-	ventry	el2h_fiq_invalid		// FIQ EL2h
-	ventry	el2h_error_invalid		// Error EL2h
-
-	ventry	el1_sync			// Synchronous 64-bit EL1
-	ventry	el1_irq				// IRQ 64-bit EL1
-	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
-	ventry	el1_error_invalid		// Error 64-bit EL1
-
-	ventry	el1_sync			// Synchronous 32-bit EL1
-	ventry	el1_irq				// IRQ 32-bit EL1
-	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
-	ventry	el1_error_invalid		// Error 32-bit EL1
-ENDPROC(__kvm_hyp_vector)
-
-
-ENTRY(__kvm_get_mdcr_el2)
-	mrs	x0, mdcr_el2
-	ret
-ENDPROC(__kvm_get_mdcr_el2)
-
-	.popsection

+ 14 - 0
arch/arm64/kvm/hyp/Makefile

@@ -0,0 +1,14 @@
+#
+# Makefile for Kernel-based Virtual Machine module, HYP part
+#
+
+obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += entry.o
+obj-$(CONFIG_KVM_ARM_HOST) += switch.o
+obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
+obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
+obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o

+ 140 - 0
arch/arm64/kvm/hyp/debug-sr.c

@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+#define read_debug(r,n)		read_sysreg(r##n##_el1)
+#define write_debug(v,r,n)	write_sysreg(v, r##n##_el1)
+
+#define save_debug(ptr,reg,nr)						\
+	switch (nr) {							\
+	case 15:	ptr[15] = read_debug(reg, 15);			\
+	case 14:	ptr[14] = read_debug(reg, 14);			\
+	case 13:	ptr[13] = read_debug(reg, 13);			\
+	case 12:	ptr[12] = read_debug(reg, 12);			\
+	case 11:	ptr[11] = read_debug(reg, 11);			\
+	case 10:	ptr[10] = read_debug(reg, 10);			\
+	case 9:		ptr[9] = read_debug(reg, 9);			\
+	case 8:		ptr[8] = read_debug(reg, 8);			\
+	case 7:		ptr[7] = read_debug(reg, 7);			\
+	case 6:		ptr[6] = read_debug(reg, 6);			\
+	case 5:		ptr[5] = read_debug(reg, 5);			\
+	case 4:		ptr[4] = read_debug(reg, 4);			\
+	case 3:		ptr[3] = read_debug(reg, 3);			\
+	case 2:		ptr[2] = read_debug(reg, 2);			\
+	case 1:		ptr[1] = read_debug(reg, 1);			\
+	default:	ptr[0] = read_debug(reg, 0);			\
+	}
+
+#define restore_debug(ptr,reg,nr)					\
+	switch (nr) {							\
+	case 15:	write_debug(ptr[15], reg, 15);			\
+	case 14:	write_debug(ptr[14], reg, 14);			\
+	case 13:	write_debug(ptr[13], reg, 13);			\
+	case 12:	write_debug(ptr[12], reg, 12);			\
+	case 11:	write_debug(ptr[11], reg, 11);			\
+	case 10:	write_debug(ptr[10], reg, 10);			\
+	case 9:		write_debug(ptr[9], reg, 9);			\
+	case 8:		write_debug(ptr[8], reg, 8);			\
+	case 7:		write_debug(ptr[7], reg, 7);			\
+	case 6:		write_debug(ptr[6], reg, 6);			\
+	case 5:		write_debug(ptr[5], reg, 5);			\
+	case 4:		write_debug(ptr[4], reg, 4);			\
+	case 3:		write_debug(ptr[3], reg, 3);			\
+	case 2:		write_debug(ptr[2], reg, 2);			\
+	case 1:		write_debug(ptr[1], reg, 1);			\
+	default:	write_debug(ptr[0], reg, 0);			\
+	}
+
+void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
+				   struct kvm_guest_debug_arch *dbg,
+				   struct kvm_cpu_context *ctxt)
+{
+	u64 aa64dfr0;
+	int brps, wrps;
+
+	if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+		return;
+
+	aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
+	brps = (aa64dfr0 >> 12) & 0xf;
+	wrps = (aa64dfr0 >> 20) & 0xf;
+
+	save_debug(dbg->dbg_bcr, dbgbcr, brps);
+	save_debug(dbg->dbg_bvr, dbgbvr, brps);
+	save_debug(dbg->dbg_wcr, dbgwcr, wrps);
+	save_debug(dbg->dbg_wvr, dbgwvr, wrps);
+
+	ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
+}
+
+void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
+				      struct kvm_guest_debug_arch *dbg,
+				      struct kvm_cpu_context *ctxt)
+{
+	u64 aa64dfr0;
+	int brps, wrps;
+
+	if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+		return;
+
+	aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
+
+	brps = (aa64dfr0 >> 12) & 0xf;
+	wrps = (aa64dfr0 >> 20) & 0xf;
+
+	restore_debug(dbg->dbg_bcr, dbgbcr, brps);
+	restore_debug(dbg->dbg_bvr, dbgbvr, brps);
+	restore_debug(dbg->dbg_wcr, dbgwcr, wrps);
+	restore_debug(dbg->dbg_wvr, dbgwvr, wrps);
+
+	write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
+}
+
+void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu)
+{
+	/* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform
+	 * a full save/restore cycle. */
+	if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) ||
+	    (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE))
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+
+	__debug_save_state(vcpu, &vcpu->arch.host_debug_state,
+			   kern_hyp_va(vcpu->arch.host_cpu_context));
+}
+
+void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
+{
+	__debug_restore_state(vcpu, &vcpu->arch.host_debug_state,
+			      kern_hyp_va(vcpu->arch.host_cpu_context));
+
+	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+		vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
+}
+
+static u32 __hyp_text __debug_read_mdcr_el2(void)
+{
+	return read_sysreg(mdcr_el2);
+}
+
+__alias(__debug_read_mdcr_el2) u32 __kvm_get_mdcr_el2(void);

+ 160 - 0
arch/arm64/kvm/hyp/entry.S

@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/fpsimdmacros.h>
+#include <asm/kvm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+#define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
+#define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+.macro save_callee_saved_regs ctxt
+	stp	x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
+	stp	x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
+	stp	x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
+	stp	x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
+	stp	x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
+	stp	x29, lr,  [\ctxt, #CPU_XREG_OFFSET(29)]
+.endm
+
+.macro restore_callee_saved_regs ctxt
+	ldp	x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
+	ldp	x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
+	ldp	x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
+	ldp	x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
+	ldp	x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
+	ldp	x29, lr,  [\ctxt, #CPU_XREG_OFFSET(29)]
+.endm
+
+/*
+ * u64 __guest_enter(struct kvm_vcpu *vcpu,
+ *		     struct kvm_cpu_context *host_ctxt);
+ */
+ENTRY(__guest_enter)
+	// x0: vcpu
+	// x1: host/guest context
+	// x2-x18: clobbered by macros
+
+	// Store the host regs
+	save_callee_saved_regs x1
+
+	// Preserve vcpu & host_ctxt for use at exit time
+	stp	x0, x1, [sp, #-16]!
+
+	add	x1, x0, #VCPU_CONTEXT
+
+	// Prepare x0-x1 for later restore by pushing them onto the stack
+	ldp	x2, x3, [x1, #CPU_XREG_OFFSET(0)]
+	stp	x2, x3, [sp, #-16]!
+
+	// x2-x18
+	ldp	x2, x3,   [x1, #CPU_XREG_OFFSET(2)]
+	ldp	x4, x5,   [x1, #CPU_XREG_OFFSET(4)]
+	ldp	x6, x7,   [x1, #CPU_XREG_OFFSET(6)]
+	ldp	x8, x9,   [x1, #CPU_XREG_OFFSET(8)]
+	ldp	x10, x11, [x1, #CPU_XREG_OFFSET(10)]
+	ldp	x12, x13, [x1, #CPU_XREG_OFFSET(12)]
+	ldp	x14, x15, [x1, #CPU_XREG_OFFSET(14)]
+	ldp	x16, x17, [x1, #CPU_XREG_OFFSET(16)]
+	ldr	x18,      [x1, #CPU_XREG_OFFSET(18)]
+
+	// x19-x29, lr
+	restore_callee_saved_regs x1
+
+	// Last bits of the 64bit state
+	ldp	x0, x1, [sp], #16
+
+	// Do not touch any register after this!
+	eret
+ENDPROC(__guest_enter)
+
+ENTRY(__guest_exit)
+	// x0: vcpu
+	// x1: return code
+	// x2-x3: free
+	// x4-x29,lr: vcpu regs
+	// vcpu x0-x3 on the stack
+
+	add	x2, x0, #VCPU_CONTEXT
+
+	stp	x4, x5,   [x2, #CPU_XREG_OFFSET(4)]
+	stp	x6, x7,   [x2, #CPU_XREG_OFFSET(6)]
+	stp	x8, x9,   [x2, #CPU_XREG_OFFSET(8)]
+	stp	x10, x11, [x2, #CPU_XREG_OFFSET(10)]
+	stp	x12, x13, [x2, #CPU_XREG_OFFSET(12)]
+	stp	x14, x15, [x2, #CPU_XREG_OFFSET(14)]
+	stp	x16, x17, [x2, #CPU_XREG_OFFSET(16)]
+	str	x18,      [x2, #CPU_XREG_OFFSET(18)]
+
+	ldp	x6, x7, [sp], #16	// x2, x3
+	ldp	x4, x5, [sp], #16	// x0, x1
+
+	stp	x4, x5, [x2, #CPU_XREG_OFFSET(0)]
+	stp	x6, x7, [x2, #CPU_XREG_OFFSET(2)]
+
+	save_callee_saved_regs x2
+
+	// Restore vcpu & host_ctxt from the stack
+	// (preserving return code in x1)
+	ldp	x0, x2, [sp], #16
+	// Now restore the host regs
+	restore_callee_saved_regs x2
+
+	mov	x0, x1
+	ret
+ENDPROC(__guest_exit)
+
+ENTRY(__fpsimd_guest_restore)
+	stp	x4, lr, [sp, #-16]!
+
+	mrs	x2, cptr_el2
+	bic	x2, x2, #CPTR_EL2_TFP
+	msr	cptr_el2, x2
+	isb
+
+	mrs	x3, tpidr_el2
+
+	ldr	x0, [x3, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x0
+	add	x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	bl	__fpsimd_save_state
+
+	add	x2, x3, #VCPU_CONTEXT
+	add	x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	bl	__fpsimd_restore_state
+
+	// Skip restoring fpexc32 for AArch64 guests
+	mrs	x1, hcr_el2
+	tbnz	x1, #HCR_RW_SHIFT, 1f
+	ldr	x4, [x3, #VCPU_FPEXC32_EL2]
+	msr	fpexc32_el2, x4
+1:
+	ldp	x4, lr, [sp], #16
+	ldp	x2, x3, [sp], #16
+	ldp	x0, x1, [sp], #16
+
+	eret
+ENDPROC(__fpsimd_guest_restore)

+ 33 - 0
arch/arm64/kvm/hyp/fpsimd.S

@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/fpsimdmacros.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+ENTRY(__fpsimd_save_state)
+	fpsimd_save	x0, 1
+	ret
+ENDPROC(__fpsimd_save_state)
+
+ENTRY(__fpsimd_restore_state)
+	fpsimd_restore	x0, 1
+	ret
+ENDPROC(__fpsimd_restore_state)

+ 212 - 0
arch/arm64/kvm/hyp/hyp-entry.S

@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/alternative.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/cpufeature.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+.macro	save_x0_to_x3
+	stp	x0, x1, [sp, #-16]!
+	stp	x2, x3, [sp, #-16]!
+.endm
+
+.macro	restore_x0_to_x3
+	ldp	x2, x3, [sp], #16
+	ldp	x0, x1, [sp], #16
+.endm
+
+el1_sync:				// Guest trapped into EL2
+	save_x0_to_x3
+
+	mrs	x1, esr_el2
+	lsr	x2, x1, #ESR_ELx_EC_SHIFT
+
+	cmp	x2, #ESR_ELx_EC_HVC64
+	b.ne	el1_trap
+
+	mrs	x3, vttbr_el2		// If vttbr is valid, the 64bit guest
+	cbnz	x3, el1_trap		// called HVC
+
+	/* Here, we're pretty sure the host called HVC. */
+	restore_x0_to_x3
+
+	/* Check for __hyp_get_vectors */
+	cbnz	x0, 1f
+	mrs	x0, vbar_el2
+	b	2f
+
+1:	stp	lr, xzr, [sp, #-16]!
+
+	/*
+	 * Compute the function address in EL2, and shuffle the parameters.
+	 */
+	kern_hyp_va	x0
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+
+	ldp	lr, xzr, [sp], #16
+2:	eret
+
+el1_trap:
+	/*
+	 * x1: ESR
+	 * x2: ESR_EC
+	 */
+
+	/* Guest accessed VFP/SIMD registers, save host, restore Guest */
+	cmp	x2, #ESR_ELx_EC_FP_ASIMD
+	b.eq	__fpsimd_guest_restore
+
+	cmp	x2, #ESR_ELx_EC_DABT_LOW
+	mov	x0, #ESR_ELx_EC_IABT_LOW
+	ccmp	x2, x0, #4, ne
+	b.ne	1f		// Not an abort we care about
+
+	/* This is an abort. Check for permission fault */
+alternative_if_not ARM64_WORKAROUND_834220
+	and	x2, x1, #ESR_ELx_FSC_TYPE
+	cmp	x2, #FSC_PERM
+	b.ne	1f		// Not a permission fault
+alternative_else
+	nop			// Use the permission fault path to
+	nop			// check for a valid S1 translation,
+	nop			// regardless of the ESR value.
+alternative_endif
+
+	/*
+	 * Check for Stage-1 page table walk, which is guaranteed
+	 * to give a valid HPFAR_EL2.
+	 */
+	tbnz	x1, #7, 1f	// S1PTW is set
+
+	/* Preserve PAR_EL1 */
+	mrs	x3, par_el1
+	stp	x3, xzr, [sp, #-16]!
+
+	/*
+	 * Permission fault, HPFAR_EL2 is invalid.
+	 * Resolve the IPA the hard way using the guest VA.
+	 * Stage-1 translation already validated the memory access rights.
+	 * As such, we can use the EL1 translation regime, and don't have
+	 * to distinguish between EL0 and EL1 access.
+	 */
+	mrs	x2, far_el2
+	at	s1e1r, x2
+	isb
+
+	/* Read result */
+	mrs	x3, par_el1
+	ldp	x0, xzr, [sp], #16	// Restore PAR_EL1 from the stack
+	msr	par_el1, x0
+	tbnz	x3, #0, 3f		// Bail out if we failed the translation
+	ubfx	x3, x3, #12, #36	// Extract IPA
+	lsl	x3, x3, #4		// and present it like HPFAR
+	b	2f
+
+1:	mrs	x3, hpfar_el2
+	mrs	x2, far_el2
+
+2:	mrs	x0, tpidr_el2
+	str	w1, [x0, #VCPU_ESR_EL2]
+	str	x2, [x0, #VCPU_FAR_EL2]
+	str	x3, [x0, #VCPU_HPFAR_EL2]
+
+	mov	x1, #ARM_EXCEPTION_TRAP
+	b	__guest_exit
+
+	/*
+	 * Translation failed. Just return to the guest and
+	 * let it fault again. Another CPU is probably playing
+	 * behind our back.
+	 */
+3:	restore_x0_to_x3
+
+	eret
+
+el1_irq:
+	save_x0_to_x3
+	mrs	x0, tpidr_el2
+	mov	x1, #ARM_EXCEPTION_IRQ
+	b	__guest_exit
+
+ENTRY(__hyp_do_panic)
+	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
+		      PSR_MODE_EL1h)
+	msr	spsr_el2, lr
+	ldr	lr, =panic
+	msr	elr_el2, lr
+	eret
+ENDPROC(__hyp_do_panic)
+
+.macro invalid_vector	label, target = __hyp_panic
+	.align	2
+\label:
+	b \target
+ENDPROC(\label)
+.endm
+
+	/* None of these should ever happen */
+	invalid_vector	el2t_sync_invalid
+	invalid_vector	el2t_irq_invalid
+	invalid_vector	el2t_fiq_invalid
+	invalid_vector	el2t_error_invalid
+	invalid_vector	el2h_sync_invalid
+	invalid_vector	el2h_irq_invalid
+	invalid_vector	el2h_fiq_invalid
+	invalid_vector	el2h_error_invalid
+	invalid_vector	el1_sync_invalid
+	invalid_vector	el1_irq_invalid
+	invalid_vector	el1_fiq_invalid
+	invalid_vector	el1_error_invalid
+
+	.ltorg
+
+	.align 11
+
+ENTRY(__kvm_hyp_vector)
+	ventry	el2t_sync_invalid		// Synchronous EL2t
+	ventry	el2t_irq_invalid		// IRQ EL2t
+	ventry	el2t_fiq_invalid		// FIQ EL2t
+	ventry	el2t_error_invalid		// Error EL2t
+
+	ventry	el2h_sync_invalid		// Synchronous EL2h
+	ventry	el2h_irq_invalid		// IRQ EL2h
+	ventry	el2h_fiq_invalid		// FIQ EL2h
+	ventry	el2h_error_invalid		// Error EL2h
+
+	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	el1_irq				// IRQ 64-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
+	ventry	el1_error_invalid		// Error 64-bit EL1
+
+	ventry	el1_sync			// Synchronous 32-bit EL1
+	ventry	el1_irq				// IRQ 32-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
+	ventry	el1_error_invalid		// Error 32-bit EL1
+ENDPROC(__kvm_hyp_vector)

+ 90 - 0
arch/arm64/kvm/hyp/hyp.h

@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_HYP_H__
+#define __ARM64_KVM_HYP_H__
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
+#include <asm/sysreg.h>
+
+#define __hyp_text __section(.hyp.text) notrace
+
+#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK)
+#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \
+						      + PAGE_OFFSET)
+
+/**
+ * hyp_alternate_select - Generates patchable code sequences that are
+ * used to switch between two implementations of a function, depending
+ * on the availability of a feature.
+ *
+ * @fname: a symbol name that will be defined as a function returning a
+ * function pointer whose type will match @orig and @alt
+ * @orig: A pointer to the default function, as returned by @fname when
+ * @cond doesn't hold
+ * @alt: A pointer to the alternate function, as returned by @fname
+ * when @cond holds
+ * @cond: a CPU feature (as described in asm/cpufeature.h)
+ */
+#define hyp_alternate_select(fname, orig, alt, cond)			\
+typeof(orig) * __hyp_text fname(void)					\
+{									\
+	typeof(alt) *val = orig;					\
+	asm volatile(ALTERNATIVE("nop		\n",			\
+				 "mov	%0, %1	\n",			\
+				 cond)					\
+		     : "+r" (val) : "r" (alt));				\
+	return val;							\
+}
+
+void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
+void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
+
+void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+
+void __timer_save_state(struct kvm_vcpu *vcpu);
+void __timer_restore_state(struct kvm_vcpu *vcpu);
+
+void __sysreg_save_state(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
+void __sysreg32_save_state(struct kvm_vcpu *vcpu);
+void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
+
+void __debug_save_state(struct kvm_vcpu *vcpu,
+			struct kvm_guest_debug_arch *dbg,
+			struct kvm_cpu_context *ctxt);
+void __debug_restore_state(struct kvm_vcpu *vcpu,
+			   struct kvm_guest_debug_arch *dbg,
+			   struct kvm_cpu_context *ctxt);
+void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
+void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
+
+void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
+void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
+static inline bool __fpsimd_enabled(void)
+{
+	return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
+}
+
+u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
+void __noreturn __hyp_do_panic(unsigned long, ...);
+
+#endif /* __ARM64_KVM_HYP_H__ */
+

+ 175 - 0
arch/arm64/kvm/hyp/switch.c

@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hyp.h"
+
+static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+	/*
+	 * We are about to set CPTR_EL2.TFP to trap all floating point
+	 * register accesses to EL2, however, the ARM ARM clearly states that
+	 * traps are only taken to EL2 if the operation would not otherwise
+	 * trap to EL1.  Therefore, always make sure that for 32-bit guests,
+	 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+	 */
+	val = vcpu->arch.hcr_el2;
+	if (!(val & HCR_RW)) {
+		write_sysreg(1 << 30, fpexc32_el2);
+		isb();
+	}
+	write_sysreg(val, hcr_el2);
+	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
+	write_sysreg(1 << 15, hstr_el2);
+	write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2);
+	write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+}
+
+static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+	write_sysreg(HCR_RW, hcr_el2);
+	write_sysreg(0, hstr_el2);
+	write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
+	write_sysreg(0, cptr_el2);
+}
+
+static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+}
+
+static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
+{
+	write_sysreg(0, vttbr_el2);
+}
+
+static hyp_alternate_select(__vgic_call_save_state,
+			    __vgic_v2_save_state, __vgic_v3_save_state,
+			    ARM64_HAS_SYSREG_GIC_CPUIF);
+
+static hyp_alternate_select(__vgic_call_restore_state,
+			    __vgic_v2_restore_state, __vgic_v3_restore_state,
+			    ARM64_HAS_SYSREG_GIC_CPUIF);
+
+static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
+{
+	__vgic_call_save_state()(vcpu);
+	write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2);
+}
+
+static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+	val = read_sysreg(hcr_el2);
+	val |= 	HCR_INT_OVERRIDE;
+	val |= vcpu->arch.irq_lines;
+	write_sysreg(val, hcr_el2);
+
+	__vgic_call_restore_state()(vcpu);
+}
+
+static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpu_context *host_ctxt;
+	struct kvm_cpu_context *guest_ctxt;
+	bool fp_enabled;
+	u64 exit_code;
+
+	vcpu = kern_hyp_va(vcpu);
+	write_sysreg(vcpu, tpidr_el2);
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	guest_ctxt = &vcpu->arch.ctxt;
+
+	__sysreg_save_state(host_ctxt);
+	__debug_cond_save_host_state(vcpu);
+
+	__activate_traps(vcpu);
+	__activate_vm(vcpu);
+
+	__vgic_restore_state(vcpu);
+	__timer_restore_state(vcpu);
+
+	/*
+	 * We must restore the 32-bit state before the sysregs, thanks
+	 * to Cortex-A57 erratum #852523.
+	 */
+	__sysreg32_restore_state(vcpu);
+	__sysreg_restore_state(guest_ctxt);
+	__debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
+
+	/* Jump in the fire! */
+	exit_code = __guest_enter(vcpu, host_ctxt);
+	/* And we're baaack! */
+
+	fp_enabled = __fpsimd_enabled();
+
+	__sysreg_save_state(guest_ctxt);
+	__sysreg32_save_state(vcpu);
+	__timer_save_state(vcpu);
+	__vgic_save_state(vcpu);
+
+	__deactivate_traps(vcpu);
+	__deactivate_vm(vcpu);
+
+	__sysreg_restore_state(host_ctxt);
+
+	if (fp_enabled) {
+		__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
+		__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+	}
+
+	__debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
+	__debug_cond_restore_host_state(vcpu);
+
+	return exit_code;
+}
+
+__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
+
+void __hyp_text __noreturn __hyp_panic(void)
+{
+	unsigned long str_va = (unsigned long)__hyp_panic_string;
+	u64 spsr = read_sysreg(spsr_el2);
+	u64 elr = read_sysreg(elr_el2);
+	u64 par = read_sysreg(par_el1);
+
+	if (read_sysreg(vttbr_el2)) {
+		struct kvm_vcpu *vcpu;
+		struct kvm_cpu_context *host_ctxt;
+
+		vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
+		host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+		__deactivate_traps(vcpu);
+		__deactivate_vm(vcpu);
+		__sysreg_restore_state(host_ctxt);
+	}
+
+	/* Call panic for real */
+	__hyp_do_panic(hyp_kern_va(str_va),
+		       spsr,  elr,
+		       read_sysreg(esr_el2),   read_sysreg(far_el2),
+		       read_sysreg(hpfar_el2), par,
+		       (void *)read_sysreg(tpidr_el2));
+
+	unreachable();
+}

+ 138 - 0
arch/arm64/kvm/hyp/sysreg-sr.c

@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+/* ctxt is already in the HYP VA space */
+void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+{
+	ctxt->sys_regs[MPIDR_EL1]	= read_sysreg(vmpidr_el2);
+	ctxt->sys_regs[CSSELR_EL1]	= read_sysreg(csselr_el1);
+	ctxt->sys_regs[SCTLR_EL1]	= read_sysreg(sctlr_el1);
+	ctxt->sys_regs[ACTLR_EL1]	= read_sysreg(actlr_el1);
+	ctxt->sys_regs[CPACR_EL1]	= read_sysreg(cpacr_el1);
+	ctxt->sys_regs[TTBR0_EL1]	= read_sysreg(ttbr0_el1);
+	ctxt->sys_regs[TTBR1_EL1]	= read_sysreg(ttbr1_el1);
+	ctxt->sys_regs[TCR_EL1]		= read_sysreg(tcr_el1);
+	ctxt->sys_regs[ESR_EL1]		= read_sysreg(esr_el1);
+	ctxt->sys_regs[AFSR0_EL1]	= read_sysreg(afsr0_el1);
+	ctxt->sys_regs[AFSR1_EL1]	= read_sysreg(afsr1_el1);
+	ctxt->sys_regs[FAR_EL1]		= read_sysreg(far_el1);
+	ctxt->sys_regs[MAIR_EL1]	= read_sysreg(mair_el1);
+	ctxt->sys_regs[VBAR_EL1]	= read_sysreg(vbar_el1);
+	ctxt->sys_regs[CONTEXTIDR_EL1]	= read_sysreg(contextidr_el1);
+	ctxt->sys_regs[TPIDR_EL0]	= read_sysreg(tpidr_el0);
+	ctxt->sys_regs[TPIDRRO_EL0]	= read_sysreg(tpidrro_el0);
+	ctxt->sys_regs[TPIDR_EL1]	= read_sysreg(tpidr_el1);
+	ctxt->sys_regs[AMAIR_EL1]	= read_sysreg(amair_el1);
+	ctxt->sys_regs[CNTKCTL_EL1]	= read_sysreg(cntkctl_el1);
+	ctxt->sys_regs[PAR_EL1]		= read_sysreg(par_el1);
+	ctxt->sys_regs[MDSCR_EL1]	= read_sysreg(mdscr_el1);
+
+	ctxt->gp_regs.regs.sp		= read_sysreg(sp_el0);
+	ctxt->gp_regs.regs.pc		= read_sysreg(elr_el2);
+	ctxt->gp_regs.regs.pstate	= read_sysreg(spsr_el2);
+	ctxt->gp_regs.sp_el1		= read_sysreg(sp_el1);
+	ctxt->gp_regs.elr_el1		= read_sysreg(elr_el1);
+	ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1);
+}
+
+void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+{
+	write_sysreg(ctxt->sys_regs[MPIDR_EL1],	  vmpidr_el2);
+	write_sysreg(ctxt->sys_regs[CSSELR_EL1],  csselr_el1);
+	write_sysreg(ctxt->sys_regs[SCTLR_EL1],	  sctlr_el1);
+	write_sysreg(ctxt->sys_regs[ACTLR_EL1],	  actlr_el1);
+	write_sysreg(ctxt->sys_regs[CPACR_EL1],	  cpacr_el1);
+	write_sysreg(ctxt->sys_regs[TTBR0_EL1],	  ttbr0_el1);
+	write_sysreg(ctxt->sys_regs[TTBR1_EL1],	  ttbr1_el1);
+	write_sysreg(ctxt->sys_regs[TCR_EL1],	  tcr_el1);
+	write_sysreg(ctxt->sys_regs[ESR_EL1],	  esr_el1);
+	write_sysreg(ctxt->sys_regs[AFSR0_EL1],	  afsr0_el1);
+	write_sysreg(ctxt->sys_regs[AFSR1_EL1],	  afsr1_el1);
+	write_sysreg(ctxt->sys_regs[FAR_EL1],	  far_el1);
+	write_sysreg(ctxt->sys_regs[MAIR_EL1],	  mair_el1);
+	write_sysreg(ctxt->sys_regs[VBAR_EL1],	  vbar_el1);
+	write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1);
+	write_sysreg(ctxt->sys_regs[TPIDR_EL0],	  tpidr_el0);
+	write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
+	write_sysreg(ctxt->sys_regs[TPIDR_EL1],	  tpidr_el1);
+	write_sysreg(ctxt->sys_regs[AMAIR_EL1],	  amair_el1);
+	write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1);
+	write_sysreg(ctxt->sys_regs[PAR_EL1],	  par_el1);
+	write_sysreg(ctxt->sys_regs[MDSCR_EL1],	  mdscr_el1);
+
+	write_sysreg(ctxt->gp_regs.regs.sp,	sp_el0);
+	write_sysreg(ctxt->gp_regs.regs.pc,	elr_el2);
+	write_sysreg(ctxt->gp_regs.regs.pstate,	spsr_el2);
+	write_sysreg(ctxt->gp_regs.sp_el1,	sp_el1);
+	write_sysreg(ctxt->gp_regs.elr_el1,	elr_el1);
+	write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1);
+}
+
+void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
+{
+	u64 *spsr, *sysreg;
+
+	if (read_sysreg(hcr_el2) & HCR_RW)
+		return;
+
+	spsr = vcpu->arch.ctxt.gp_regs.spsr;
+	sysreg = vcpu->arch.ctxt.sys_regs;
+
+	spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt);
+	spsr[KVM_SPSR_UND] = read_sysreg(spsr_und);
+	spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq);
+	spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq);
+
+	sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
+	sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
+
+	if (__fpsimd_enabled())
+		sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
+
+	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+		sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
+}
+
+void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
+{
+	u64 *spsr, *sysreg;
+
+	if (read_sysreg(hcr_el2) & HCR_RW)
+		return;
+
+	spsr = vcpu->arch.ctxt.gp_regs.spsr;
+	sysreg = vcpu->arch.ctxt.sys_regs;
+
+	write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt);
+	write_sysreg(spsr[KVM_SPSR_UND], spsr_und);
+	write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq);
+	write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq);
+
+	write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
+	write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
+
+	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+		write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
+}

+ 71 - 0
arch/arm64/kvm/hyp/timer-sr.c

@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <clocksource/arm_arch_timer.h>
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+/* vcpu is already in the HYP VA space */
+void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	u64 val;
+
+	if (kvm->arch.timer.enabled) {
+		timer->cntv_ctl = read_sysreg(cntv_ctl_el0);
+		timer->cntv_cval = read_sysreg(cntv_cval_el0);
+	}
+
+	/* Disable the virtual timer */
+	write_sysreg(0, cntv_ctl_el0);
+
+	/* Allow physical timer/counter access for the host */
+	val = read_sysreg(cnthctl_el2);
+	val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+	write_sysreg(val, cnthctl_el2);
+
+	/* Clear cntvoff for the host */
+	write_sysreg(0, cntvoff_el2);
+}
+
+void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	u64 val;
+
+	/*
+	 * Disallow physical timer access for the guest
+	 * Physical counter access is allowed
+	 */
+	val = read_sysreg(cnthctl_el2);
+	val &= ~CNTHCTL_EL1PCEN;
+	val |= CNTHCTL_EL1PCTEN;
+	write_sysreg(val, cnthctl_el2);
+
+	if (kvm->arch.timer.enabled) {
+		write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
+		write_sysreg(timer->cntv_cval, cntv_cval_el0);
+		isb();
+		write_sysreg(timer->cntv_ctl, cntv_ctl_el0);
+	}
+}

+ 80 - 0
arch/arm64/kvm/hyp/tlb.c

@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hyp.h"
+
+static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+{
+	dsb(ishst);
+
+	/* Switch to requested VMID */
+	kvm = kern_hyp_va(kvm);
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	isb();
+
+	/*
+	 * We could do so much better if we had the VA as well.
+	 * Instead, we invalidate Stage-2 for this IPA, and the
+	 * whole of Stage-1. Weep...
+	 */
+	ipa >>= 12;
+	asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa));
+
+	/*
+	 * We have to ensure completion of the invalidation at Stage-2,
+	 * since a table walk on another CPU could refill a TLB with a
+	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
+	 * the Stage-1 invalidation happened first.
+	 */
+	dsb(ish);
+	asm volatile("tlbi vmalle1is" : : );
+	dsb(ish);
+	isb();
+
+	write_sysreg(0, vttbr_el2);
+}
+
+__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm,
+							    phys_addr_t ipa);
+
+static void __hyp_text __tlb_flush_vmid(struct kvm *kvm)
+{
+	dsb(ishst);
+
+	/* Switch to requested VMID */
+	kvm = kern_hyp_va(kvm);
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	isb();
+
+	asm volatile("tlbi vmalls12e1is" : : );
+	dsb(ish);
+	isb();
+
+	write_sysreg(0, vttbr_el2);
+}
+
+__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm);
+
+static void __hyp_text __tlb_flush_vm_context(void)
+{
+	dsb(ishst);
+	asm volatile("tlbi alle1is	\n"
+		     "ic ialluis	  ": : );
+	dsb(ish);
+}
+
+__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void);

+ 84 - 0
arch/arm64/kvm/hyp/vgic-v2-sr.c

@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+/* vcpu is already in the HYP VA space */
+void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
+	u32 eisr0, eisr1, elrsr0, elrsr1;
+	int i, nr_lr;
+
+	if (!base)
+		return;
+
+	nr_lr = vcpu->arch.vgic_cpu.nr_lr;
+	cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR);
+	cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR);
+	eisr0  = readl_relaxed(base + GICH_EISR0);
+	elrsr0 = readl_relaxed(base + GICH_ELRSR0);
+	if (unlikely(nr_lr > 32)) {
+		eisr1  = readl_relaxed(base + GICH_EISR1);
+		elrsr1 = readl_relaxed(base + GICH_ELRSR1);
+	} else {
+		eisr1 = elrsr1 = 0;
+	}
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	cpu_if->vgic_eisr  = ((u64)eisr0 << 32) | eisr1;
+	cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
+#else
+	cpu_if->vgic_eisr  = ((u64)eisr1 << 32) | eisr0;
+	cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
+#endif
+	cpu_if->vgic_apr    = readl_relaxed(base + GICH_APR);
+
+	writel_relaxed(0, base + GICH_HCR);
+
+	for (i = 0; i < nr_lr; i++)
+		cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
+}
+
+/* vcpu is already in the HYP VA space */
+void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
+	int i, nr_lr;
+
+	if (!base)
+		return;
+
+	writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
+	writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR);
+	writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
+
+	nr_lr = vcpu->arch.vgic_cpu.nr_lr;
+	for (i = 0; i < nr_lr; i++)
+		writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4));
+}

+ 228 - 0
arch/arm64/kvm/hyp/vgic-v3-sr.c

@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+#define vtr_to_max_lr_idx(v)		((v) & 0xf)
+#define vtr_to_nr_pri_bits(v)		(((u32)(v) >> 29) + 1)
+
+#define read_gicreg(r)							\
+	({								\
+		u64 reg;						\
+		asm volatile("mrs_s %0, " __stringify(r) : "=r" (reg));	\
+		reg;							\
+	})
+
+#define write_gicreg(v,r)						\
+	do {								\
+		u64 __val = (v);					\
+		asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\
+	} while (0)
+
+/* vcpu is already in the HYP VA space */
+void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u64 val;
+	u32 max_lr_idx, nr_pri_bits;
+
+	/*
+	 * Make sure stores to the GIC via the memory mapped interface
+	 * are now visible to the system register interface.
+	 */
+	dsb(st);
+
+	cpu_if->vgic_vmcr  = read_gicreg(ICH_VMCR_EL2);
+	cpu_if->vgic_misr  = read_gicreg(ICH_MISR_EL2);
+	cpu_if->vgic_eisr  = read_gicreg(ICH_EISR_EL2);
+	cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
+
+	write_gicreg(0, ICH_HCR_EL2);
+	val = read_gicreg(ICH_VTR_EL2);
+	max_lr_idx = vtr_to_max_lr_idx(val);
+	nr_pri_bits = vtr_to_nr_pri_bits(val);
+
+	switch (max_lr_idx) {
+	case 15:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2);
+	case 14:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2);
+	case 13:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2);
+	case 12:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2);
+	case 11:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2);
+	case 10:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2);
+	case 9:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2);
+	case 8:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2);
+	case 7:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2);
+	case 6:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2);
+	case 5:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2);
+	case 4:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2);
+	case 3:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2);
+	case 2:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2);
+	case 1:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2);
+	case 0:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2);
+	}
+
+	switch (nr_pri_bits) {
+	case 7:
+		cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
+		cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
+	case 6:
+		cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
+	default:
+		cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
+	}
+
+	switch (nr_pri_bits) {
+	case 7:
+		cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
+		cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
+	case 6:
+		cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
+	default:
+		cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
+	}
+
+	val = read_gicreg(ICC_SRE_EL2);
+	write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+	isb(); /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
+	write_gicreg(1, ICC_SRE_EL1);
+}
+
+void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u64 val;
+	u32 max_lr_idx, nr_pri_bits;
+
+	/*
+	 * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
+	 * Group0 interrupt (as generated in GICv2 mode) to be
+	 * delivered as a FIQ to the guest, with potentially fatal
+	 * consequences. So we must make sure that ICC_SRE_EL1 has
+	 * been actually programmed with the value we want before
+	 * starting to mess with the rest of the GIC.
+	 */
+	write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1);
+	isb();
+
+	write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+	write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
+
+	val = read_gicreg(ICH_VTR_EL2);
+	max_lr_idx = vtr_to_max_lr_idx(val);
+	nr_pri_bits = vtr_to_nr_pri_bits(val);
+
+	switch (nr_pri_bits) {
+	case 7:
+		 write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
+		 write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
+	case 6:
+		 write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
+	default:
+		 write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
+	}	 	                           
+		 	                           
+	switch (nr_pri_bits) {
+	case 7:
+		 write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
+		 write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
+	case 6:
+		 write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
+	default:
+		 write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
+	}
+
+	switch (max_lr_idx) {
+	case 15:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2);
+	case 14:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2);
+	case 13:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2);
+	case 12:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2);
+	case 11:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2);
+	case 10:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2);
+	case 9:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2);
+	case 8:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2);
+	case 7:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2);
+	case 6:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2);
+	case 5:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2);
+	case 4:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2);
+	case 3:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2);
+	case 2:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2);
+	case 1:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2);
+	case 0:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2);
+	}
+
+	/*
+	 * Ensures that the above will have reached the
+	 * (re)distributors. This ensure the guest will read the
+	 * correct values from the memory-mapped interface.
+	 */
+	isb();
+	dsb(sy);
+
+	/*
+	 * Prevent the guest from touching the GIC system registers if
+	 * SRE isn't enabled for GICv3 emulation.
+	 */
+	if (!cpu_if->vgic_sre) {
+		write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
+			     ICC_SRE_EL2);
+	}
+}
+
+static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void)
+{
+	return read_gicreg(ICH_VTR_EL2);
+}
+
+__alias(__vgic_v3_read_ich_vtr_el2) u64 __vgic_v3_get_ich_vtr_el2(void);

+ 30 - 29
arch/arm64/kvm/sys_regs.c

@@ -29,6 +29,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
 #include <asm/esr.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_host.h>
 #include <asm/kvm_host.h>
@@ -219,9 +220,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
  * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the
  * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the
  * hyp.S code switches between host and guest values in future.
  * hyp.S code switches between host and guest values in future.
  */
  */
-static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
-			      struct sys_reg_params *p,
-			      u64 *dbg_reg)
+static void reg_to_dbg(struct kvm_vcpu *vcpu,
+		       struct sys_reg_params *p,
+		       u64 *dbg_reg)
 {
 {
 	u64 val = p->regval;
 	u64 val = p->regval;
 
 
@@ -234,18 +235,18 @@ static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
 	vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
 	vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
 }
 }
 
 
-static inline void dbg_to_reg(struct kvm_vcpu *vcpu,
-			      struct sys_reg_params *p,
-			      u64 *dbg_reg)
+static void dbg_to_reg(struct kvm_vcpu *vcpu,
+		       struct sys_reg_params *p,
+		       u64 *dbg_reg)
 {
 {
 	p->regval = *dbg_reg;
 	p->regval = *dbg_reg;
 	if (p->is_32bit)
 	if (p->is_32bit)
 		p->regval &= 0xffffffffUL;
 		p->regval &= 0xffffffffUL;
 }
 }
 
 
-static inline bool trap_bvr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_bvr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
 
 
@@ -279,15 +280,15 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 	return 0;
 }
 }
 
 
-static inline void reset_bvr(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
+static void reset_bvr(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_desc *rd)
 {
 {
 	vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
 	vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
 }
 }
 
 
-static inline bool trap_bcr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_bcr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
 
 
@@ -322,15 +323,15 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 	return 0;
 }
 }
 
 
-static inline void reset_bcr(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
+static void reset_bcr(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_desc *rd)
 {
 {
 	vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
 	vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
 }
 }
 
 
-static inline bool trap_wvr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_wvr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
 
 
@@ -365,15 +366,15 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 	return 0;
 }
 }
 
 
-static inline void reset_wvr(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
+static void reset_wvr(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_desc *rd)
 {
 {
 	vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
 	vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
 }
 }
 
 
-static inline bool trap_wcr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_wcr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
 
 
@@ -407,8 +408,8 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 	return 0;
 }
 }
 
 
-static inline void reset_wcr(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
+static void reset_wcr(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_desc *rd)
 {
 {
 	vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
 	vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
 }
 }
@@ -722,9 +723,9 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
  * system is in.
  * system is in.
  */
  */
 
 
-static inline bool trap_xvr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_xvr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
 
 

+ 0 - 134
arch/arm64/kvm/vgic-v2-switch.S

@@ -1,134 +0,0 @@
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/linkage.h>
-#include <linux/irqchip/arm-gic.h>
-
-#include <asm/assembler.h>
-#include <asm/memory.h>
-#include <asm/asm-offsets.h>
-#include <asm/kvm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_mmu.h>
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-/*
- * Save the VGIC CPU state into memory
- * x0: Register pointing to VCPU struct
- * Do not corrupt x1!!!
- */
-ENTRY(__save_vgic_v2_state)
-__save_vgic_v2_state:
-	/* Get VGIC VCTRL base into x2 */
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va	x2
-	ldr	x2, [x2, #KVM_VGIC_VCTRL]
-	kern_hyp_va	x2
-	cbz	x2, 2f		// disabled
-
-	/* Compute the address of struct vgic_cpu */
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	/* Save all interesting registers */
-	ldr	w5, [x2, #GICH_VMCR]
-	ldr	w6, [x2, #GICH_MISR]
-	ldr	w7, [x2, #GICH_EISR0]
-	ldr	w8, [x2, #GICH_EISR1]
-	ldr	w9, [x2, #GICH_ELRSR0]
-	ldr	w10, [x2, #GICH_ELRSR1]
-	ldr	w11, [x2, #GICH_APR]
-CPU_BE(	rev	w5,  w5  )
-CPU_BE(	rev	w6,  w6  )
-CPU_BE(	rev	w7,  w7  )
-CPU_BE(	rev	w8,  w8  )
-CPU_BE(	rev	w9,  w9  )
-CPU_BE(	rev	w10, w10 )
-CPU_BE(	rev	w11, w11 )
-
-	str	w5, [x3, #VGIC_V2_CPU_VMCR]
-	str	w6, [x3, #VGIC_V2_CPU_MISR]
-CPU_LE(	str	w7, [x3, #VGIC_V2_CPU_EISR] )
-CPU_LE(	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)] )
-CPU_LE(	str	w9, [x3, #VGIC_V2_CPU_ELRSR] )
-CPU_LE(	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
-CPU_BE(	str	w7, [x3, #(VGIC_V2_CPU_EISR + 4)] )
-CPU_BE(	str	w8, [x3, #VGIC_V2_CPU_EISR] )
-CPU_BE(	str	w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
-CPU_BE(	str	w10, [x3, #VGIC_V2_CPU_ELRSR] )
-	str	w11, [x3, #VGIC_V2_CPU_APR]
-
-	/* Clear GICH_HCR */
-	str	wzr, [x2, #GICH_HCR]
-
-	/* Save list registers */
-	add	x2, x2, #GICH_LR0
-	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_V2_CPU_LR
-1:	ldr	w5, [x2], #4
-CPU_BE(	rev	w5, w5 )
-	str	w5, [x3], #4
-	sub	w4, w4, #1
-	cbnz	w4, 1b
-2:
-	ret
-ENDPROC(__save_vgic_v2_state)
-
-/*
- * Restore the VGIC CPU state from memory
- * x0: Register pointing to VCPU struct
- */
-ENTRY(__restore_vgic_v2_state)
-__restore_vgic_v2_state:
-	/* Get VGIC VCTRL base into x2 */
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va	x2
-	ldr	x2, [x2, #KVM_VGIC_VCTRL]
-	kern_hyp_va	x2
-	cbz	x2, 2f		// disabled
-
-	/* Compute the address of struct vgic_cpu */
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	/* We only restore a minimal set of registers */
-	ldr	w4, [x3, #VGIC_V2_CPU_HCR]
-	ldr	w5, [x3, #VGIC_V2_CPU_VMCR]
-	ldr	w6, [x3, #VGIC_V2_CPU_APR]
-CPU_BE(	rev	w4, w4 )
-CPU_BE(	rev	w5, w5 )
-CPU_BE(	rev	w6, w6 )
-
-	str	w4, [x2, #GICH_HCR]
-	str	w5, [x2, #GICH_VMCR]
-	str	w6, [x2, #GICH_APR]
-
-	/* Restore list registers */
-	add	x2, x2, #GICH_LR0
-	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_V2_CPU_LR
-1:	ldr	w5, [x3], #4
-CPU_BE(	rev	w5, w5 )
-	str	w5, [x2], #4
-	sub	w4, w4, #1
-	cbnz	w4, 1b
-2:
-	ret
-ENDPROC(__restore_vgic_v2_state)
-
-	.popsection

+ 0 - 269
arch/arm64/kvm/vgic-v3-switch.S

@@ -1,269 +0,0 @@
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/linkage.h>
-#include <linux/irqchip/arm-gic-v3.h>
-
-#include <asm/assembler.h>
-#include <asm/memory.h>
-#include <asm/asm-offsets.h>
-#include <asm/kvm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_arm.h>
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-/*
- * We store LRs in reverse order to let the CPU deal with streaming
- * access. Use this macro to make it look saner...
- */
-#define LR_OFFSET(n)	(VGIC_V3_CPU_LR + (15 - n) * 8)
-
-/*
- * Save the VGIC CPU state into memory
- * x0: Register pointing to VCPU struct
- * Do not corrupt x1!!!
- */
-.macro	save_vgic_v3_state
-	// Compute the address of struct vgic_cpu
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	// Make sure stores to the GIC via the memory mapped interface
-	// are now visible to the system register interface
-	dsb	st
-
-	// Save all interesting registers
-	mrs_s	x5, ICH_VMCR_EL2
-	mrs_s	x6, ICH_MISR_EL2
-	mrs_s	x7, ICH_EISR_EL2
-	mrs_s	x8, ICH_ELSR_EL2
-
-	str	w5, [x3, #VGIC_V3_CPU_VMCR]
-	str	w6, [x3, #VGIC_V3_CPU_MISR]
-	str	w7, [x3, #VGIC_V3_CPU_EISR]
-	str	w8, [x3, #VGIC_V3_CPU_ELRSR]
-
-	msr_s	ICH_HCR_EL2, xzr
-
-	mrs_s	x21, ICH_VTR_EL2
-	mvn	w22, w21
-	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
-
-	adr	x24, 1f
-	add	x24, x24, x23
-	br	x24
-
-1:
-	mrs_s	x20, ICH_LR15_EL2
-	mrs_s	x19, ICH_LR14_EL2
-	mrs_s	x18, ICH_LR13_EL2
-	mrs_s	x17, ICH_LR12_EL2
-	mrs_s	x16, ICH_LR11_EL2
-	mrs_s	x15, ICH_LR10_EL2
-	mrs_s	x14, ICH_LR9_EL2
-	mrs_s	x13, ICH_LR8_EL2
-	mrs_s	x12, ICH_LR7_EL2
-	mrs_s	x11, ICH_LR6_EL2
-	mrs_s	x10, ICH_LR5_EL2
-	mrs_s	x9, ICH_LR4_EL2
-	mrs_s	x8, ICH_LR3_EL2
-	mrs_s	x7, ICH_LR2_EL2
-	mrs_s	x6, ICH_LR1_EL2
-	mrs_s	x5, ICH_LR0_EL2
-
-	adr	x24, 1f
-	add	x24, x24, x23
-	br	x24
-
-1:
-	str	x20, [x3, #LR_OFFSET(15)]
-	str	x19, [x3, #LR_OFFSET(14)]
-	str	x18, [x3, #LR_OFFSET(13)]
-	str	x17, [x3, #LR_OFFSET(12)]
-	str	x16, [x3, #LR_OFFSET(11)]
-	str	x15, [x3, #LR_OFFSET(10)]
-	str	x14, [x3, #LR_OFFSET(9)]
-	str	x13, [x3, #LR_OFFSET(8)]
-	str	x12, [x3, #LR_OFFSET(7)]
-	str	x11, [x3, #LR_OFFSET(6)]
-	str	x10, [x3, #LR_OFFSET(5)]
-	str	x9, [x3, #LR_OFFSET(4)]
-	str	x8, [x3, #LR_OFFSET(3)]
-	str	x7, [x3, #LR_OFFSET(2)]
-	str	x6, [x3, #LR_OFFSET(1)]
-	str	x5, [x3, #LR_OFFSET(0)]
-
-	tbnz	w21, #29, 6f	// 6 bits
-	tbz	w21, #30, 5f	// 5 bits
-				// 7 bits
-	mrs_s	x20, ICH_AP0R3_EL2
-	str	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
-	mrs_s	x19, ICH_AP0R2_EL2
-	str	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
-6:	mrs_s	x18, ICH_AP0R1_EL2
-	str	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
-5:	mrs_s	x17, ICH_AP0R0_EL2
-	str	w17, [x3, #VGIC_V3_CPU_AP0R]
-
-	tbnz	w21, #29, 6f	// 6 bits
-	tbz	w21, #30, 5f	// 5 bits
-				// 7 bits
-	mrs_s	x20, ICH_AP1R3_EL2
-	str	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
-	mrs_s	x19, ICH_AP1R2_EL2
-	str	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
-6:	mrs_s	x18, ICH_AP1R1_EL2
-	str	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
-5:	mrs_s	x17, ICH_AP1R0_EL2
-	str	w17, [x3, #VGIC_V3_CPU_AP1R]
-
-	// Restore SRE_EL1 access and re-enable SRE at EL1.
-	mrs_s	x5, ICC_SRE_EL2
-	orr	x5, x5, #ICC_SRE_EL2_ENABLE
-	msr_s	ICC_SRE_EL2, x5
-	isb
-	mov	x5, #1
-	msr_s	ICC_SRE_EL1, x5
-.endm
-
-/*
- * Restore the VGIC CPU state from memory
- * x0: Register pointing to VCPU struct
- */
-.macro	restore_vgic_v3_state
-	// Compute the address of struct vgic_cpu
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	// Restore all interesting registers
-	ldr	w4, [x3, #VGIC_V3_CPU_HCR]
-	ldr	w5, [x3, #VGIC_V3_CPU_VMCR]
-	ldr	w25, [x3, #VGIC_V3_CPU_SRE]
-
-	msr_s	ICC_SRE_EL1, x25
-
-	// make sure SRE is valid before writing the other registers
-	isb
-
-	msr_s	ICH_HCR_EL2, x4
-	msr_s	ICH_VMCR_EL2, x5
-
-	mrs_s	x21, ICH_VTR_EL2
-
-	tbnz	w21, #29, 6f	// 6 bits
-	tbz	w21, #30, 5f	// 5 bits
-				// 7 bits
-	ldr	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
-	msr_s	ICH_AP1R3_EL2, x20
-	ldr	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
-	msr_s	ICH_AP1R2_EL2, x19
-6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
-	msr_s	ICH_AP1R1_EL2, x18
-5:	ldr	w17, [x3, #VGIC_V3_CPU_AP1R]
-	msr_s	ICH_AP1R0_EL2, x17
-
-	tbnz	w21, #29, 6f	// 6 bits
-	tbz	w21, #30, 5f	// 5 bits
-				// 7 bits
-	ldr	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
-	msr_s	ICH_AP0R3_EL2, x20
-	ldr	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
-	msr_s	ICH_AP0R2_EL2, x19
-6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
-	msr_s	ICH_AP0R1_EL2, x18
-5:	ldr	w17, [x3, #VGIC_V3_CPU_AP0R]
-	msr_s	ICH_AP0R0_EL2, x17
-
-	and	w22, w21, #0xf
-	mvn	w22, w21
-	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
-
-	adr	x24, 1f
-	add	x24, x24, x23
-	br	x24
-
-1:
-	ldr	x20, [x3, #LR_OFFSET(15)]
-	ldr	x19, [x3, #LR_OFFSET(14)]
-	ldr	x18, [x3, #LR_OFFSET(13)]
-	ldr	x17, [x3, #LR_OFFSET(12)]
-	ldr	x16, [x3, #LR_OFFSET(11)]
-	ldr	x15, [x3, #LR_OFFSET(10)]
-	ldr	x14, [x3, #LR_OFFSET(9)]
-	ldr	x13, [x3, #LR_OFFSET(8)]
-	ldr	x12, [x3, #LR_OFFSET(7)]
-	ldr	x11, [x3, #LR_OFFSET(6)]
-	ldr	x10, [x3, #LR_OFFSET(5)]
-	ldr	x9, [x3, #LR_OFFSET(4)]
-	ldr	x8, [x3, #LR_OFFSET(3)]
-	ldr	x7, [x3, #LR_OFFSET(2)]
-	ldr	x6, [x3, #LR_OFFSET(1)]
-	ldr	x5, [x3, #LR_OFFSET(0)]
-
-	adr	x24, 1f
-	add	x24, x24, x23
-	br	x24
-
-1:
-	msr_s	ICH_LR15_EL2, x20
-	msr_s	ICH_LR14_EL2, x19
-	msr_s	ICH_LR13_EL2, x18
-	msr_s	ICH_LR12_EL2, x17
-	msr_s	ICH_LR11_EL2, x16
-	msr_s	ICH_LR10_EL2, x15
-	msr_s	ICH_LR9_EL2,  x14
-	msr_s	ICH_LR8_EL2,  x13
-	msr_s	ICH_LR7_EL2,  x12
-	msr_s	ICH_LR6_EL2,  x11
-	msr_s	ICH_LR5_EL2,  x10
-	msr_s	ICH_LR4_EL2,   x9
-	msr_s	ICH_LR3_EL2,   x8
-	msr_s	ICH_LR2_EL2,   x7
-	msr_s	ICH_LR1_EL2,   x6
-	msr_s	ICH_LR0_EL2,   x5
-
-	// Ensure that the above will have reached the
-	// (re)distributors. This ensure the guest will read
-	// the correct values from the memory-mapped interface.
-	isb
-	dsb	sy
-
-	// Prevent the guest from touching the GIC system registers
-	// if SRE isn't enabled for GICv3 emulation
-	cbnz	x25, 1f
-	mrs_s	x5, ICC_SRE_EL2
-	and	x5, x5, #~ICC_SRE_EL2_ENABLE
-	msr_s	ICC_SRE_EL2, x5
-1:
-.endm
-
-ENTRY(__save_vgic_v3_state)
-	save_vgic_v3_state
-	ret
-ENDPROC(__save_vgic_v3_state)
-
-ENTRY(__restore_vgic_v3_state)
-	restore_vgic_v3_state
-	ret
-ENDPROC(__restore_vgic_v3_state)
-
-ENTRY(__vgic_v3_get_ich_vtr_el2)
-	mrs_s	x0, ICH_VTR_EL2
-	ret
-ENDPROC(__vgic_v3_get_ich_vtr_el2)
-
-	.popsection

+ 4 - 0
arch/powerpc/include/asm/kvm_host.h

@@ -50,6 +50,10 @@
 #define KVM_NR_IRQCHIPS          1
 #define KVM_NR_IRQCHIPS          1
 #define KVM_IRQCHIP_NUM_PINS     256
 #define KVM_IRQCHIP_NUM_PINS     256
 
 
+/* PPC-specific vcpu->requests bit members */
+#define KVM_REQ_WATCHDOG           8
+#define KVM_REQ_EPR_EXIT           9
+
 #include <linux/mmu_notifier.h>
 #include <linux/mmu_notifier.h>
 
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 #define KVM_ARCH_WANT_MMU_NOTIFIER

+ 2 - 8
arch/powerpc/kvm/book3s_hv.c

@@ -314,16 +314,10 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
 
 
 static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
 static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
 {
 {
-	int r;
-	struct kvm_vcpu *v, *ret = NULL;
+	struct kvm_vcpu *ret;
 
 
 	mutex_lock(&kvm->lock);
 	mutex_lock(&kvm->lock);
-	kvm_for_each_vcpu(r, v, kvm) {
-		if (v->vcpu_id == id) {
-			ret = v;
-			break;
-		}
-	}
+	ret = kvm_get_vcpu_by_id(kvm, id);
 	mutex_unlock(&kvm->lock);
 	mutex_unlock(&kvm->lock);
 	return ret;
 	return ret;
 }
 }

+ 2 - 2
arch/powerpc/kvm/book3s_pr.c

@@ -512,7 +512,7 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 	put_page(hpage);
 	put_page(hpage);
 }
 }
 
 
-static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
 {
 {
 	ulong mp_pa = vcpu->arch.magic_page_pa;
 	ulong mp_pa = vcpu->arch.magic_page_pa;
 
 
@@ -521,7 +521,7 @@ static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
 
 
 	gpa &= ~0xFFFULL;
 	gpa &= ~0xFFFULL;
 	if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) {
 	if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) {
-		return 1;
+		return true;
 	}
 	}
 
 
 	return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);
 	return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);

+ 7 - 0
arch/s390/include/asm/elf.h

@@ -104,6 +104,9 @@
 #define HWCAP_S390_TE		1024
 #define HWCAP_S390_TE		1024
 #define HWCAP_S390_VXRS		2048
 #define HWCAP_S390_VXRS		2048
 
 
+/* Internal bits, not exposed via elf */
+#define HWCAP_INT_SIE		1UL
+
 /*
 /*
  * These are used to set parameters in the core dumps.
  * These are used to set parameters in the core dumps.
  */
  */
@@ -169,6 +172,10 @@ extern unsigned int vdso_enabled;
 extern unsigned long elf_hwcap;
 extern unsigned long elf_hwcap;
 #define ELF_HWCAP (elf_hwcap)
 #define ELF_HWCAP (elf_hwcap)
 
 
+/* Internal hardware capabilities, not exposed via elf */
+
+extern unsigned long int_hwcap;
+
 /* This yields a string that ld.so will use to load implementation
 /* This yields a string that ld.so will use to load implementation
    specific libraries for optimization.  This is more specific in
    specific libraries for optimization.  This is more specific in
    intent than poking at uname or /proc/cpuinfo.
    intent than poking at uname or /proc/cpuinfo.

+ 50 - 7
arch/s390/include/asm/kvm_host.h

@@ -25,7 +25,9 @@
 #include <asm/fpu/api.h>
 #include <asm/fpu/api.h>
 #include <asm/isc.h>
 #include <asm/isc.h>
 
 
-#define KVM_MAX_VCPUS 64
+#define KVM_S390_BSCA_CPU_SLOTS 64
+#define KVM_S390_ESCA_CPU_SLOTS 248
+#define KVM_MAX_VCPUS KVM_S390_ESCA_CPU_SLOTS
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_USER_MEM_SLOTS 32
 
 
 /*
 /*
@@ -37,12 +39,41 @@
 #define KVM_IRQCHIP_NUM_PINS 4096
 #define KVM_IRQCHIP_NUM_PINS 4096
 #define KVM_HALT_POLL_NS_DEFAULT 0
 #define KVM_HALT_POLL_NS_DEFAULT 0
 
 
+/* s390-specific vcpu->requests bit members */
+#define KVM_REQ_ENABLE_IBS         8
+#define KVM_REQ_DISABLE_IBS        9
+
 #define SIGP_CTRL_C		0x80
 #define SIGP_CTRL_C		0x80
 #define SIGP_CTRL_SCN_MASK	0x3f
 #define SIGP_CTRL_SCN_MASK	0x3f
 
 
-struct sca_entry {
+union bsca_sigp_ctrl {
+	__u8 value;
+	struct {
+		__u8 c : 1;
+		__u8 r : 1;
+		__u8 scn : 6;
+	};
+} __packed;
+
+union esca_sigp_ctrl {
+	__u16 value;
+	struct {
+		__u8 c : 1;
+		__u8 reserved: 7;
+		__u8 scn;
+	};
+} __packed;
+
+struct esca_entry {
+	union esca_sigp_ctrl sigp_ctrl;
+	__u16   reserved1[3];
+	__u64   sda;
+	__u64   reserved2[6];
+} __packed;
+
+struct bsca_entry {
 	__u8	reserved0;
 	__u8	reserved0;
-	__u8	sigp_ctrl;
+	union bsca_sigp_ctrl	sigp_ctrl;
 	__u16	reserved[3];
 	__u16	reserved[3];
 	__u64	sda;
 	__u64	sda;
 	__u64	reserved2[2];
 	__u64	reserved2[2];
@@ -57,14 +88,22 @@ union ipte_control {
 	};
 	};
 };
 };
 
 
-struct sca_block {
+struct bsca_block {
 	union ipte_control ipte_control;
 	union ipte_control ipte_control;
 	__u64	reserved[5];
 	__u64	reserved[5];
 	__u64	mcn;
 	__u64	mcn;
 	__u64	reserved2;
 	__u64	reserved2;
-	struct sca_entry cpu[64];
+	struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
 } __attribute__((packed));
 } __attribute__((packed));
 
 
+struct esca_block {
+	union ipte_control ipte_control;
+	__u64   reserved1[7];
+	__u64   mcn[4];
+	__u64   reserved2[20];
+	struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
+} __packed;
+
 #define CPUSTAT_STOPPED    0x80000000
 #define CPUSTAT_STOPPED    0x80000000
 #define CPUSTAT_WAIT       0x10000000
 #define CPUSTAT_WAIT       0x10000000
 #define CPUSTAT_ECALL_PEND 0x08000000
 #define CPUSTAT_ECALL_PEND 0x08000000
@@ -182,7 +221,8 @@ struct kvm_s390_sie_block {
 	__u64	pp;			/* 0x01de */
 	__u64	pp;			/* 0x01de */
 	__u8	reserved1e6[2];		/* 0x01e6 */
 	__u8	reserved1e6[2];		/* 0x01e6 */
 	__u64	itdba;			/* 0x01e8 */
 	__u64	itdba;			/* 0x01e8 */
-	__u8	reserved1f0[16];	/* 0x01f0 */
+	__u64   riccbd;			/* 0x01f0 */
+	__u8    reserved1f8[8];		/* 0x01f8 */
 } __attribute__((packed));
 } __attribute__((packed));
 
 
 struct kvm_s390_itdb {
 struct kvm_s390_itdb {
@@ -585,11 +625,14 @@ struct kvm_s390_crypto_cb {
 };
 };
 
 
 struct kvm_arch{
 struct kvm_arch{
-	struct sca_block *sca;
+	void *sca;
+	int use_esca;
+	rwlock_t sca_lock;
 	debug_info_t *dbf;
 	debug_info_t *dbf;
 	struct kvm_s390_float_interrupt float_int;
 	struct kvm_s390_float_interrupt float_int;
 	struct kvm_device *flic;
 	struct kvm_device *flic;
 	struct gmap *gmap;
 	struct gmap *gmap;
+	unsigned long mem_limit;
 	int css_support;
 	int css_support;
 	int use_irqchip;
 	int use_irqchip;
 	int use_cmma;
 	int use_cmma;

+ 7 - 1
arch/s390/include/asm/sclp.h

@@ -29,7 +29,10 @@ struct sclp_ipl_info {
 
 
 struct sclp_core_entry {
 struct sclp_core_entry {
 	u8 core_id;
 	u8 core_id;
-	u8 reserved0[2];
+	u8 reserved0;
+	u8 : 4;
+	u8 sief2 : 1;
+	u8 : 3;
 	u8 : 3;
 	u8 : 3;
 	u8 siif : 1;
 	u8 siif : 1;
 	u8 sigpif : 1;
 	u8 sigpif : 1;
@@ -53,6 +56,9 @@ struct sclp_info {
 	unsigned char has_sigpif : 1;
 	unsigned char has_sigpif : 1;
 	unsigned char has_core_type : 1;
 	unsigned char has_core_type : 1;
 	unsigned char has_sprp : 1;
 	unsigned char has_sprp : 1;
+	unsigned char has_hvs : 1;
+	unsigned char has_esca : 1;
+	unsigned char has_sief2 : 1;
 	unsigned int ibc;
 	unsigned int ibc;
 	unsigned int mtid;
 	unsigned int mtid;
 	unsigned int mtid_cp;
 	unsigned int mtid_cp;

+ 5 - 0
arch/s390/include/uapi/asm/kvm.h

@@ -66,6 +66,8 @@ struct kvm_s390_io_adapter_req {
 #define KVM_S390_VM_MEM_CLR_CMMA	1
 #define KVM_S390_VM_MEM_CLR_CMMA	1
 #define KVM_S390_VM_MEM_LIMIT_SIZE	2
 #define KVM_S390_VM_MEM_LIMIT_SIZE	2
 
 
+#define KVM_S390_NO_MEM_LIMIT		U64_MAX
+
 /* kvm attributes for KVM_S390_VM_TOD */
 /* kvm attributes for KVM_S390_VM_TOD */
 #define KVM_S390_VM_TOD_LOW		0
 #define KVM_S390_VM_TOD_LOW		0
 #define KVM_S390_VM_TOD_HIGH		1
 #define KVM_S390_VM_TOD_HIGH		1
@@ -151,6 +153,7 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_ARCH0  (1UL << 4)
 #define KVM_SYNC_ARCH0  (1UL << 4)
 #define KVM_SYNC_PFAULT (1UL << 5)
 #define KVM_SYNC_PFAULT (1UL << 5)
 #define KVM_SYNC_VRS    (1UL << 6)
 #define KVM_SYNC_VRS    (1UL << 6)
+#define KVM_SYNC_RICCB  (1UL << 7)
 /* definition of registers in kvm_run */
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 struct kvm_sync_regs {
 	__u64 prefix;	/* prefix register */
 	__u64 prefix;	/* prefix register */
@@ -168,6 +171,8 @@ struct kvm_sync_regs {
 	__u64 vrs[32][2];	/* vector registers */
 	__u64 vrs[32][2];	/* vector registers */
 	__u8  reserved[512];	/* for future vector expansion */
 	__u8  reserved[512];	/* for future vector expansion */
 	__u32 fpc;	/* only valid with vector registers */
 	__u32 fpc;	/* only valid with vector registers */
+	__u8 padding[52];	/* riccb needs to be 64byte aligned */
+	__u8 riccb[64];		/* runtime instrumentation controls block */
 };
 };
 
 
 #define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
 #define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)

+ 6 - 0
arch/s390/kernel/processor.c

@@ -61,6 +61,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
 		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
 		"edat", "etf3eh", "highgprs", "te", "vx"
 		"edat", "etf3eh", "highgprs", "te", "vx"
 	};
 	};
+	static const char * const int_hwcap_str[] = {
+		"sie"
+	};
 	unsigned long n = (unsigned long) v - 1;
 	unsigned long n = (unsigned long) v - 1;
 	int i;
 	int i;
 
 
@@ -75,6 +78,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
 		for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
 			if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
 			if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
 				seq_printf(m, "%s ", hwcap_str[i]);
 				seq_printf(m, "%s ", hwcap_str[i]);
+		for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
+			if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
+				seq_printf(m, "%s ", int_hwcap_str[i]);
 		seq_puts(m, "\n");
 		seq_puts(m, "\n");
 		show_cacheinfo(m);
 		show_cacheinfo(m);
 	}
 	}

+ 9 - 0
arch/s390/kernel/setup.c

@@ -80,6 +80,8 @@ EXPORT_SYMBOL(console_irq);
 unsigned long elf_hwcap __read_mostly = 0;
 unsigned long elf_hwcap __read_mostly = 0;
 char elf_platform[ELF_PLATFORM_SIZE];
 char elf_platform[ELF_PLATFORM_SIZE];
 
 
+unsigned long int_hwcap = 0;
+
 int __initdata memory_end_set;
 int __initdata memory_end_set;
 unsigned long __initdata memory_end;
 unsigned long __initdata memory_end;
 unsigned long __initdata max_physmem_end;
 unsigned long __initdata max_physmem_end;
@@ -793,6 +795,13 @@ static int __init setup_hwcaps(void)
 		strcpy(elf_platform, "z13");
 		strcpy(elf_platform, "z13");
 		break;
 		break;
 	}
 	}
+
+	/*
+	 * Virtualization support HWCAP_INT_SIE is bit 0.
+	 */
+	if (sclp.has_sief2)
+		int_hwcap |= HWCAP_INT_SIE;
+
 	return 0;
 	return 0;
 }
 }
 arch_initcall(setup_hwcaps);
 arch_initcall(setup_hwcaps);

+ 3 - 8
arch/s390/kvm/diag.c

@@ -155,10 +155,8 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
 
 
 static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 {
 {
-	struct kvm *kvm = vcpu->kvm;
 	struct kvm_vcpu *tcpu;
 	struct kvm_vcpu *tcpu;
 	int tid;
 	int tid;
-	int i;
 
 
 	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	vcpu->stat.diagnose_9c++;
 	vcpu->stat.diagnose_9c++;
@@ -167,12 +165,9 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 	if (tid == vcpu->vcpu_id)
 	if (tid == vcpu->vcpu_id)
 		return 0;
 		return 0;
 
 
-	kvm_for_each_vcpu(i, tcpu, kvm)
-		if (tcpu->vcpu_id == tid) {
-			kvm_vcpu_yield_to(tcpu);
-			break;
-		}
-
+	tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid);
+	if (tcpu)
+		kvm_vcpu_yield_to(tcpu);
 	return 0;
 	return 0;
 }
 }
 
 

+ 27 - 11
arch/s390/kvm/gaccess.c

@@ -259,10 +259,14 @@ struct aste {
 
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
 {
-	union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
+	if (vcpu->arch.sie_block->eca & 1) {
+		int rc;
 
 
-	if (vcpu->arch.sie_block->eca & 1)
-		return ic->kh != 0;
+		read_lock(&vcpu->kvm->arch.sca_lock);
+		rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
+		read_unlock(&vcpu->kvm->arch.sca_lock);
+		return rc;
+	}
 	return vcpu->kvm->arch.ipte_lock_count != 0;
 	return vcpu->kvm->arch.ipte_lock_count != 0;
 }
 }
 
 
@@ -274,16 +278,20 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu)
 	vcpu->kvm->arch.ipte_lock_count++;
 	vcpu->kvm->arch.ipte_lock_count++;
 	if (vcpu->kvm->arch.ipte_lock_count > 1)
 	if (vcpu->kvm->arch.ipte_lock_count > 1)
 		goto out;
 		goto out;
-	ic = &vcpu->kvm->arch.sca->ipte_control;
+retry:
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	ic = kvm_s390_get_ipte_control(vcpu->kvm);
 	do {
 	do {
 		old = READ_ONCE(*ic);
 		old = READ_ONCE(*ic);
-		while (old.k) {
+		if (old.k) {
+			read_unlock(&vcpu->kvm->arch.sca_lock);
 			cond_resched();
 			cond_resched();
-			old = READ_ONCE(*ic);
+			goto retry;
 		}
 		}
 		new = old;
 		new = old;
 		new.k = 1;
 		new.k = 1;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 out:
 out:
 	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 }
 }
@@ -296,12 +304,14 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
 	vcpu->kvm->arch.ipte_lock_count--;
 	vcpu->kvm->arch.ipte_lock_count--;
 	if (vcpu->kvm->arch.ipte_lock_count)
 	if (vcpu->kvm->arch.ipte_lock_count)
 		goto out;
 		goto out;
-	ic = &vcpu->kvm->arch.sca->ipte_control;
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	ic = kvm_s390_get_ipte_control(vcpu->kvm);
 	do {
 	do {
 		old = READ_ONCE(*ic);
 		old = READ_ONCE(*ic);
 		new = old;
 		new = old;
 		new.k = 0;
 		new.k = 0;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 	wake_up(&vcpu->kvm->arch.ipte_wq);
 	wake_up(&vcpu->kvm->arch.ipte_wq);
 out:
 out:
 	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
@@ -311,24 +321,29 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
 {
 {
 	union ipte_control old, new, *ic;
 	union ipte_control old, new, *ic;
 
 
-	ic = &vcpu->kvm->arch.sca->ipte_control;
+retry:
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	ic = kvm_s390_get_ipte_control(vcpu->kvm);
 	do {
 	do {
 		old = READ_ONCE(*ic);
 		old = READ_ONCE(*ic);
-		while (old.kg) {
+		if (old.kg) {
+			read_unlock(&vcpu->kvm->arch.sca_lock);
 			cond_resched();
 			cond_resched();
-			old = READ_ONCE(*ic);
+			goto retry;
 		}
 		}
 		new = old;
 		new = old;
 		new.k = 1;
 		new.k = 1;
 		new.kh++;
 		new.kh++;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 }
 }
 
 
 static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
 static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
 {
 {
 	union ipte_control old, new, *ic;
 	union ipte_control old, new, *ic;
 
 
-	ic = &vcpu->kvm->arch.sca->ipte_control;
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	ic = kvm_s390_get_ipte_control(vcpu->kvm);
 	do {
 	do {
 		old = READ_ONCE(*ic);
 		old = READ_ONCE(*ic);
 		new = old;
 		new = old;
@@ -336,6 +351,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
 		if (!new.kh)
 		if (!new.kh)
 			new.k = 0;
 			new.k = 0;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 	if (!new.kh)
 	if (!new.kh)
 		wake_up(&vcpu->kvm->arch.ipte_wq);
 		wake_up(&vcpu->kvm->arch.ipte_wq);
 }
 }

+ 3 - 4
arch/s390/kvm/intercept.c

@@ -54,9 +54,6 @@ void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
 static int handle_noop(struct kvm_vcpu *vcpu)
 static int handle_noop(struct kvm_vcpu *vcpu)
 {
 {
 	switch (vcpu->arch.sie_block->icptcode) {
 	switch (vcpu->arch.sie_block->icptcode) {
-	case 0x0:
-		vcpu->stat.exit_null++;
-		break;
 	case 0x10:
 	case 0x10:
 		vcpu->stat.exit_external_request++;
 		vcpu->stat.exit_external_request++;
 		break;
 		break;
@@ -338,8 +335,10 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
 
 
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 {
 {
+	if (kvm_is_ucontrol(vcpu->kvm))
+		return -EOPNOTSUPP;
+
 	switch (vcpu->arch.sie_block->icptcode) {
 	switch (vcpu->arch.sie_block->icptcode) {
-	case 0x00:
 	case 0x10:
 	case 0x10:
 	case 0x18:
 	case 0x18:
 		return handle_noop(vcpu);
 		return handle_noop(vcpu);

+ 106 - 27
arch/s390/kvm/interrupt.c

@@ -34,6 +34,106 @@
 #define PFAULT_DONE 0x0680
 #define PFAULT_DONE 0x0680
 #define VIRTIO_PARAM 0x0d00
 #define VIRTIO_PARAM 0x0d00
 
 
+/* handle external calls via sigp interpretation facility */
+static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
+{
+	int c, scn;
+
+	if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
+		return 0;
+
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+		union esca_sigp_ctrl sigp_ctrl =
+			sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+		c = sigp_ctrl.c;
+		scn = sigp_ctrl.scn;
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		union bsca_sigp_ctrl sigp_ctrl =
+			sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+		c = sigp_ctrl.c;
+		scn = sigp_ctrl.scn;
+	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
+
+	if (src_id)
+		*src_id = scn;
+
+	return c;
+}
+
+static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
+{
+	int expect, rc;
+
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+		union esca_sigp_ctrl *sigp_ctrl =
+			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+		union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+
+		new_val.scn = src_id;
+		new_val.c = 1;
+		old_val.c = 0;
+
+		expect = old_val.value;
+		rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		union bsca_sigp_ctrl *sigp_ctrl =
+			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+		union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+
+		new_val.scn = src_id;
+		new_val.c = 1;
+		old_val.c = 0;
+
+		expect = old_val.value;
+		rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
+	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
+
+	if (rc != expect) {
+		/* another external call is pending */
+		return -EBUSY;
+	}
+	atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+	return 0;
+}
+
+static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc, expect;
+
+	atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+		union esca_sigp_ctrl *sigp_ctrl =
+			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+		union esca_sigp_ctrl old = *sigp_ctrl;
+
+		expect = old.value;
+		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		union bsca_sigp_ctrl *sigp_ctrl =
+			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+		union bsca_sigp_ctrl old = *sigp_ctrl;
+
+		expect = old.value;
+		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
+	WARN_ON(rc != expect); /* cannot clear? */
+}
+
 int psw_extint_disabled(struct kvm_vcpu *vcpu)
 int psw_extint_disabled(struct kvm_vcpu *vcpu)
 {
 {
 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
@@ -792,13 +892,11 @@ static const deliver_irq_t deliver_irq_funcs[] = {
 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 {
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
 
 
 	if (!sclp.has_sigpif)
 	if (!sclp.has_sigpif)
 		return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
 		return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
 
 
-	return (sigp_ctrl & SIGP_CTRL_C) &&
-	       (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND);
+	return sca_ext_call_pending(vcpu, NULL);
 }
 }
 
 
 int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
 int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
@@ -909,9 +1007,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 	memset(&li->irq, 0, sizeof(li->irq));
 	memset(&li->irq, 0, sizeof(li->irq));
 	spin_unlock(&li->lock);
 	spin_unlock(&li->lock);
 
 
-	/* clear pending external calls set by sigp interpretation facility */
-	atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
-	vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0;
+	sca_clear_ext_call(vcpu);
 }
 }
 
 
 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
@@ -1003,21 +1099,6 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	return 0;
 	return 0;
 }
 }
 
 
-static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id)
-{
-	unsigned char new_val, old_val;
-	uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
-
-	new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK);
-	old_val = *sigp_ctrl & ~SIGP_CTRL_C;
-	if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) {
-		/* another external call is pending */
-		return -EBUSY;
-	}
-	atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
-	return 0;
-}
-
 static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -1034,7 +1115,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	if (sclp.has_sigpif)
 	if (sclp.has_sigpif)
-		return __inject_extcall_sigpif(vcpu, src_id);
+		return sca_inject_ext_call(vcpu, src_id);
 
 
 	if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
 	if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
 		return -EBUSY;
 		return -EBUSY;
@@ -2203,7 +2284,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li,
 
 
 int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
 int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
 {
 {
-	uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+	int scn;
 	unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
 	unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	unsigned long pending_irqs;
 	unsigned long pending_irqs;
@@ -2243,14 +2324,12 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
 		}
 		}
 	}
 	}
 
 
-	if ((sigp_ctrl & SIGP_CTRL_C) &&
-	    (atomic_read(&vcpu->arch.sie_block->cpuflags) &
-	     CPUSTAT_ECALL_PEND)) {
+	if (sca_ext_call_pending(vcpu, &scn)) {
 		if (n + sizeof(irq) > len)
 		if (n + sizeof(irq) > len)
 			return -ENOBUFS;
 			return -ENOBUFS;
 		memset(&irq, 0, sizeof(irq));
 		memset(&irq, 0, sizeof(irq));
 		irq.type = KVM_S390_INT_EXTERNAL_CALL;
 		irq.type = KVM_S390_INT_EXTERNAL_CALL;
-		irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK;
+		irq.u.extcall.code = scn;
 		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
 		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
 			return -EFAULT;
 			return -EFAULT;
 		n += sizeof(irq);
 		n += sizeof(irq);

+ 219 - 81
arch/s390/kvm/kvm-s390.c

@@ -246,7 +246,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		break;
 		break;
 	case KVM_CAP_NR_VCPUS:
 	case KVM_CAP_NR_VCPUS:
 	case KVM_CAP_MAX_VCPUS:
 	case KVM_CAP_MAX_VCPUS:
-		r = KVM_MAX_VCPUS;
+		r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
+				  : KVM_S390_BSCA_CPU_SLOTS;
 		break;
 		break;
 	case KVM_CAP_NR_MEMSLOTS:
 	case KVM_CAP_NR_MEMSLOTS:
 		r = KVM_USER_MEM_SLOTS;
 		r = KVM_USER_MEM_SLOTS;
@@ -257,6 +258,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_VECTOR_REGISTERS:
 	case KVM_CAP_S390_VECTOR_REGISTERS:
 		r = MACHINE_HAS_VX;
 		r = MACHINE_HAS_VX;
 		break;
 		break;
+	case KVM_CAP_S390_RI:
+		r = test_facility(64);
+		break;
 	default:
 	default:
 		r = 0;
 		r = 0;
 	}
 	}
@@ -283,6 +287,8 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 }
 }
 
 
 /* Section: vm related */
 /* Section: vm related */
+static void sca_del_vcpu(struct kvm_vcpu *vcpu);
+
 /*
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  * Get (and clear) the dirty memory log for a memory slot.
  */
  */
@@ -355,6 +361,20 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 			 r ? "(not available)" : "(success)");
 			 r ? "(not available)" : "(success)");
 		break;
 		break;
+	case KVM_CAP_S390_RI:
+		r = -EINVAL;
+		mutex_lock(&kvm->lock);
+		if (atomic_read(&kvm->online_vcpus)) {
+			r = -EBUSY;
+		} else if (test_facility(64)) {
+			set_kvm_facility(kvm->arch.model.fac->mask, 64);
+			set_kvm_facility(kvm->arch.model.fac->list, 64);
+			r = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
+			 r ? "(not available)" : "(success)");
+		break;
 	case KVM_CAP_S390_USER_STSI:
 	case KVM_CAP_S390_USER_STSI:
 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 		kvm->arch.user_stsi = 1;
 		kvm->arch.user_stsi = 1;
@@ -375,8 +395,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 	case KVM_S390_VM_MEM_LIMIT_SIZE:
 	case KVM_S390_VM_MEM_LIMIT_SIZE:
 		ret = 0;
 		ret = 0;
 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
-			 kvm->arch.gmap->asce_end);
-		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
+			 kvm->arch.mem_limit);
+		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 			ret = -EFAULT;
 			ret = -EFAULT;
 		break;
 		break;
 	default:
 	default:
@@ -428,9 +448,17 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 		if (get_user(new_limit, (u64 __user *)attr->addr))
 		if (get_user(new_limit, (u64 __user *)attr->addr))
 			return -EFAULT;
 			return -EFAULT;
 
 
-		if (new_limit > kvm->arch.gmap->asce_end)
+		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
+		    new_limit > kvm->arch.mem_limit)
 			return -E2BIG;
 			return -E2BIG;
 
 
+		if (!new_limit)
+			return -EINVAL;
+
+		/* gmap_alloc takes last usable address */
+		if (new_limit != KVM_S390_NO_MEM_LIMIT)
+			new_limit -= 1;
+
 		ret = -EBUSY;
 		ret = -EBUSY;
 		mutex_lock(&kvm->lock);
 		mutex_lock(&kvm->lock);
 		if (atomic_read(&kvm->online_vcpus) == 0) {
 		if (atomic_read(&kvm->online_vcpus) == 0) {
@@ -447,7 +475,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 			}
 			}
 		}
 		}
 		mutex_unlock(&kvm->lock);
 		mutex_unlock(&kvm->lock);
-		VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
+		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
+		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
+			 (void *) kvm->arch.gmap->asce);
 		break;
 		break;
 	}
 	}
 	default:
 	default:
@@ -1024,7 +1054,7 @@ static int kvm_s390_apxa_installed(void)
 	u8 config[128];
 	u8 config[128];
 	int cc;
 	int cc;
 
 
-	if (test_facility(2) && test_facility(12)) {
+	if (test_facility(12)) {
 		cc = kvm_s390_query_ap_config(config);
 		cc = kvm_s390_query_ap_config(config);
 
 
 		if (cc)
 		if (cc)
@@ -1075,6 +1105,15 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
 	return 0;
 	return 0;
 }
 }
 
 
+static void sca_dispose(struct kvm *kvm)
+{
+	if (kvm->arch.use_esca)
+		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
+	else
+		free_page((unsigned long)(kvm->arch.sca));
+	kvm->arch.sca = NULL;
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 {
 	int i, rc;
 	int i, rc;
@@ -1098,14 +1137,17 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 
 	rc = -ENOMEM;
 	rc = -ENOMEM;
 
 
-	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
+	kvm->arch.use_esca = 0; /* start with basic SCA */
+	rwlock_init(&kvm->arch.sca_lock);
+	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
 	if (!kvm->arch.sca)
 	if (!kvm->arch.sca)
 		goto out_err;
 		goto out_err;
 	spin_lock(&kvm_lock);
 	spin_lock(&kvm_lock);
 	sca_offset += 16;
 	sca_offset += 16;
-	if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
+	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
 		sca_offset = 0;
 		sca_offset = 0;
-	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
+	kvm->arch.sca = (struct bsca_block *)
+			((char *) kvm->arch.sca + sca_offset);
 	spin_unlock(&kvm_lock);
 	spin_unlock(&kvm_lock);
 
 
 	sprintf(debug_name, "kvm-%u", current->pid);
 	sprintf(debug_name, "kvm-%u", current->pid);
@@ -1157,8 +1199,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 
 	if (type & KVM_VM_S390_UCONTROL) {
 	if (type & KVM_VM_S390_UCONTROL) {
 		kvm->arch.gmap = NULL;
 		kvm->arch.gmap = NULL;
+		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
 	} else {
 	} else {
-		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
+		if (sclp.hamax == U64_MAX)
+			kvm->arch.mem_limit = TASK_MAX_SIZE;
+		else
+			kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
+						    sclp.hamax + 1);
+		kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
 		if (!kvm->arch.gmap)
 		if (!kvm->arch.gmap)
 			goto out_err;
 			goto out_err;
 		kvm->arch.gmap->private = kvm;
 		kvm->arch.gmap->private = kvm;
@@ -1170,14 +1218,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.epoch = 0;
 	kvm->arch.epoch = 0;
 
 
 	spin_lock_init(&kvm->arch.start_stop_lock);
 	spin_lock_init(&kvm->arch.start_stop_lock);
-	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
+	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
 
 
 	return 0;
 	return 0;
 out_err:
 out_err:
 	kfree(kvm->arch.crypto.crycb);
 	kfree(kvm->arch.crypto.crycb);
 	free_page((unsigned long)kvm->arch.model.fac);
 	free_page((unsigned long)kvm->arch.model.fac);
 	debug_unregister(kvm->arch.dbf);
 	debug_unregister(kvm->arch.dbf);
-	free_page((unsigned long)(kvm->arch.sca));
+	sca_dispose(kvm);
 	KVM_EVENT(3, "creation of vm failed: %d", rc);
 	KVM_EVENT(3, "creation of vm failed: %d", rc);
 	return rc;
 	return rc;
 }
 }
@@ -1188,14 +1236,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
 	kvm_s390_clear_local_irqs(vcpu);
 	kvm_s390_clear_local_irqs(vcpu);
 	kvm_clear_async_pf_completion_queue(vcpu);
 	kvm_clear_async_pf_completion_queue(vcpu);
-	if (!kvm_is_ucontrol(vcpu->kvm)) {
-		clear_bit(63 - vcpu->vcpu_id,
-			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
-		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
-		    (__u64) vcpu->arch.sie_block)
-			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
-	}
-	smp_mb();
+	if (!kvm_is_ucontrol(vcpu->kvm))
+		sca_del_vcpu(vcpu);
 
 
 	if (kvm_is_ucontrol(vcpu->kvm))
 	if (kvm_is_ucontrol(vcpu->kvm))
 		gmap_free(vcpu->arch.gmap);
 		gmap_free(vcpu->arch.gmap);
@@ -1228,14 +1270,14 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 {
 	kvm_free_vcpus(kvm);
 	kvm_free_vcpus(kvm);
 	free_page((unsigned long)kvm->arch.model.fac);
 	free_page((unsigned long)kvm->arch.model.fac);
-	free_page((unsigned long)(kvm->arch.sca));
+	sca_dispose(kvm);
 	debug_unregister(kvm->arch.dbf);
 	debug_unregister(kvm->arch.dbf);
 	kfree(kvm->arch.crypto.crycb);
 	kfree(kvm->arch.crypto.crycb);
 	if (!kvm_is_ucontrol(kvm))
 	if (!kvm_is_ucontrol(kvm))
 		gmap_free(kvm->arch.gmap);
 		gmap_free(kvm->arch.gmap);
 	kvm_s390_destroy_adapters(kvm);
 	kvm_s390_destroy_adapters(kvm);
 	kvm_s390_clear_float_irqs(kvm);
 	kvm_s390_clear_float_irqs(kvm);
-	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
+	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
 }
 }
 
 
 /* Section: vcpu related */
 /* Section: vcpu related */
@@ -1249,6 +1291,117 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 	return 0;
 }
 }
 
 
+static void sca_del_vcpu(struct kvm_vcpu *vcpu)
+{
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+
+		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
+		sca->cpu[vcpu->vcpu_id].sda = 0;
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+
+		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
+		sca->cpu[vcpu->vcpu_id].sda = 0;
+	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
+}
+
+static void sca_add_vcpu(struct kvm_vcpu *vcpu)
+{
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+
+		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
+		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
+		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
+		vcpu->arch.sie_block->ecb2 |= 0x04U;
+		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+
+		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
+		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
+		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
+	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
+}
+
+/* Basic SCA to Extended SCA data copy routines */
+static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
+{
+	d->sda = s->sda;
+	d->sigp_ctrl.c = s->sigp_ctrl.c;
+	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
+}
+
+static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
+{
+	int i;
+
+	d->ipte_control = s->ipte_control;
+	d->mcn[0] = s->mcn;
+	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
+		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
+}
+
+static int sca_switch_to_extended(struct kvm *kvm)
+{
+	struct bsca_block *old_sca = kvm->arch.sca;
+	struct esca_block *new_sca;
+	struct kvm_vcpu *vcpu;
+	unsigned int vcpu_idx;
+	u32 scaol, scaoh;
+
+	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
+	if (!new_sca)
+		return -ENOMEM;
+
+	scaoh = (u32)((u64)(new_sca) >> 32);
+	scaol = (u32)(u64)(new_sca) & ~0x3fU;
+
+	kvm_s390_vcpu_block_all(kvm);
+	write_lock(&kvm->arch.sca_lock);
+
+	sca_copy_b_to_e(new_sca, old_sca);
+
+	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
+		vcpu->arch.sie_block->scaoh = scaoh;
+		vcpu->arch.sie_block->scaol = scaol;
+		vcpu->arch.sie_block->ecb2 |= 0x04U;
+	}
+	kvm->arch.sca = new_sca;
+	kvm->arch.use_esca = 1;
+
+	write_unlock(&kvm->arch.sca_lock);
+	kvm_s390_vcpu_unblock_all(kvm);
+
+	free_page((unsigned long)old_sca);
+
+	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
+		 old_sca, kvm->arch.sca);
+	return 0;
+}
+
+static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
+{
+	int rc;
+
+	if (id < KVM_S390_BSCA_CPU_SLOTS)
+		return true;
+	if (!sclp.has_esca)
+		return false;
+
+	mutex_lock(&kvm->lock);
+	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
+	mutex_unlock(&kvm->lock);
+
+	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
+}
+
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 {
 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1259,6 +1412,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 				    KVM_SYNC_CRS |
 				    KVM_SYNC_CRS |
 				    KVM_SYNC_ARCH0 |
 				    KVM_SYNC_ARCH0 |
 				    KVM_SYNC_PFAULT;
 				    KVM_SYNC_PFAULT;
+	if (test_kvm_facility(vcpu->kvm, 64))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
 	if (test_kvm_facility(vcpu->kvm, 129))
 	if (test_kvm_facility(vcpu->kvm, 129))
 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
 
 
@@ -1369,8 +1524,11 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
 	preempt_enable();
 	preempt_enable();
 	mutex_unlock(&vcpu->kvm->lock);
 	mutex_unlock(&vcpu->kvm->lock);
-	if (!kvm_is_ucontrol(vcpu->kvm))
+	if (!kvm_is_ucontrol(vcpu->kvm)) {
 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
+		sca_add_vcpu(vcpu);
+	}
+
 }
 }
 
 
 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
@@ -1439,10 +1597,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->eca |= 1;
 		vcpu->arch.sie_block->eca |= 1;
 	if (sclp.has_sigpif)
 	if (sclp.has_sigpif)
 		vcpu->arch.sie_block->eca |= 0x10000000U;
 		vcpu->arch.sie_block->eca |= 0x10000000U;
+	if (test_kvm_facility(vcpu->kvm, 64))
+		vcpu->arch.sie_block->ecb3 |= 0x01;
 	if (test_kvm_facility(vcpu->kvm, 129)) {
 	if (test_kvm_facility(vcpu->kvm, 129)) {
 		vcpu->arch.sie_block->eca |= 0x00020000;
 		vcpu->arch.sie_block->eca |= 0x00020000;
 		vcpu->arch.sie_block->ecd |= 0x20000000;
 		vcpu->arch.sie_block->ecd |= 0x20000000;
 	}
 	}
+	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 
 
 	if (vcpu->kvm->arch.use_cmma) {
 	if (vcpu->kvm->arch.use_cmma) {
@@ -1465,7 +1626,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	struct sie_page *sie_page;
 	struct sie_page *sie_page;
 	int rc = -EINVAL;
 	int rc = -EINVAL;
 
 
-	if (id >= KVM_MAX_VCPUS)
+	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
 		goto out;
 		goto out;
 
 
 	rc = -ENOMEM;
 	rc = -ENOMEM;
@@ -1482,20 +1643,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
 
 
 	vcpu->arch.sie_block->icpua = id;
 	vcpu->arch.sie_block->icpua = id;
-	if (!kvm_is_ucontrol(kvm)) {
-		if (!kvm->arch.sca) {
-			WARN_ON_ONCE(1);
-			goto out_free_cpu;
-		}
-		if (!kvm->arch.sca->cpu[id].sda)
-			kvm->arch.sca->cpu[id].sda =
-				(__u64) vcpu->arch.sie_block;
-		vcpu->arch.sie_block->scaoh =
-			(__u32)(((__u64)kvm->arch.sca) >> 32);
-		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
-		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
-	}
-
 	spin_lock_init(&vcpu->arch.local_int.lock);
 	spin_lock_init(&vcpu->arch.local_int.lock);
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.wq = &vcpu->wq;
@@ -1509,15 +1656,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	 */
 	 */
 	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
 	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
 					       GFP_KERNEL);
 					       GFP_KERNEL);
-	if (!vcpu->arch.guest_fpregs.fprs) {
-		rc = -ENOMEM;
+	if (!vcpu->arch.guest_fpregs.fprs)
 		goto out_free_sie_block;
 		goto out_free_sie_block;
-	}
 
 
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
 	if (rc)
 		goto out_free_sie_block;
 		goto out_free_sie_block;
-	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
+	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
 		 vcpu->arch.sie_block);
 		 vcpu->arch.sie_block);
 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
 
 
@@ -2013,7 +2158,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 	 */
 	 */
 	kvm_check_async_pf_completion(vcpu);
 	kvm_check_async_pf_completion(vcpu);
 
 
-	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
+	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
+	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
 
 
 	if (need_resched())
 	if (need_resched())
 		schedule();
 		schedule();
@@ -2071,8 +2217,6 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
 
 
 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 {
 {
-	int rc = -1;
-
 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
 		   vcpu->arch.sie_block->icptcode);
 		   vcpu->arch.sie_block->icptcode);
 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
@@ -2080,40 +2224,36 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 	if (guestdbg_enabled(vcpu))
 	if (guestdbg_enabled(vcpu))
 		kvm_s390_restore_guest_per_regs(vcpu);
 		kvm_s390_restore_guest_per_regs(vcpu);
 
 
-	if (exit_reason >= 0) {
-		rc = 0;
+	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
+	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
+
+	if (vcpu->arch.sie_block->icptcode > 0) {
+		int rc = kvm_handle_sie_intercept(vcpu);
+
+		if (rc != -EOPNOTSUPP)
+			return rc;
+		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
+		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
+		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
+		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
+		return -EREMOTE;
+	} else if (exit_reason != -EFAULT) {
+		vcpu->stat.exit_null++;
+		return 0;
 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
 		vcpu->run->s390_ucontrol.trans_exc_code =
 		vcpu->run->s390_ucontrol.trans_exc_code =
 						current->thread.gmap_addr;
 						current->thread.gmap_addr;
 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
-		rc = -EREMOTE;
-
+		return -EREMOTE;
 	} else if (current->thread.gmap_pfault) {
 	} else if (current->thread.gmap_pfault) {
 		trace_kvm_s390_major_guest_pfault(vcpu);
 		trace_kvm_s390_major_guest_pfault(vcpu);
 		current->thread.gmap_pfault = 0;
 		current->thread.gmap_pfault = 0;
-		if (kvm_arch_setup_async_pf(vcpu)) {
-			rc = 0;
-		} else {
-			gpa_t gpa = current->thread.gmap_addr;
-			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
-		}
+		if (kvm_arch_setup_async_pf(vcpu))
+			return 0;
+		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
 	}
 	}
-
-	if (rc == -1)
-		rc = vcpu_post_run_fault_in_sie(vcpu);
-
-	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
-
-	if (rc == 0) {
-		if (kvm_is_ucontrol(vcpu->kvm))
-			/* Don't exit for host interrupts. */
-			rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
-		else
-			rc = kvm_handle_sie_intercept(vcpu);
-	}
-
-	return rc;
+	return vcpu_post_run_fault_in_sie(vcpu);
 }
 }
 
 
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 static int __vcpu_run(struct kvm_vcpu *vcpu)
@@ -2233,18 +2373,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		rc = 0;
 		rc = 0;
 	}
 	}
 
 
-	if (rc == -EOPNOTSUPP) {
-		/* intercept cannot be handled in-kernel, prepare kvm-run */
-		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
-		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
-		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
-		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
-		rc = 0;
-	}
-
 	if (rc == -EREMOTE) {
 	if (rc == -EREMOTE) {
-		/* intercept was handled, but userspace support is needed
-		 * kvm_run has been prepared by the handler */
+		/* userspace support is needed, kvm_run has been prepared */
 		rc = 0;
 		rc = 0;
 	}
 	}
 
 
@@ -2736,6 +2866,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	if (mem->memory_size & 0xffffful)
 	if (mem->memory_size & 0xffffful)
 		return -EINVAL;
 		return -EINVAL;
 
 
+	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
+		return -EINVAL;
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2767,6 +2900,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 
 
 static int __init kvm_s390_init(void)
 static int __init kvm_s390_init(void)
 {
 {
+	if (!sclp.has_sief2) {
+		pr_info("SIE not available\n");
+		return -ENODEV;
+	}
+
 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 }
 }
 
 

+ 7 - 0
arch/s390/kvm/kvm-s390.h

@@ -340,4 +340,11 @@ void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
 void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
 void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
 void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
 void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
 
 
+/* support for Basic/Extended SCA handling */
+static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm)
+{
+	struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */
+
+	return &sca->ipte_control;
+}
 #endif
 #endif

+ 3 - 3
arch/s390/kvm/trace-s390.h

@@ -55,8 +55,8 @@ TRACE_EVENT(kvm_s390_create_vcpu,
 		    __entry->sie_block = sie_block;
 		    __entry->sie_block = sie_block;
 		    ),
 		    ),
 
 
-	    TP_printk("create cpu %d at %p, sie block at %p", __entry->id,
-		      __entry->vcpu, __entry->sie_block)
+	    TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
+		      __entry->id, __entry->vcpu, __entry->sie_block)
 	);
 	);
 
 
 TRACE_EVENT(kvm_s390_destroy_vcpu,
 TRACE_EVENT(kvm_s390_destroy_vcpu,
@@ -254,7 +254,7 @@ TRACE_EVENT(kvm_s390_enable_css,
 		    __entry->kvm = kvm;
 		    __entry->kvm = kvm;
 		    ),
 		    ),
 
 
-	    TP_printk("enabling channel I/O support (kvm @ %p)\n",
+	    TP_printk("enabling channel I/O support (kvm @ %pK)\n",
 		      __entry->kvm)
 		      __entry->kvm)
 	);
 	);
 
 

+ 2 - 2
arch/s390/mm/pgtable.c

@@ -133,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
 /**
 /**
  * gmap_alloc - allocate a guest address space
  * gmap_alloc - allocate a guest address space
  * @mm: pointer to the parent mm_struct
  * @mm: pointer to the parent mm_struct
- * @limit: maximum size of the gmap address space
+ * @limit: maximum address of the gmap address space
  *
  *
  * Returns a guest address space structure.
  * Returns a guest address space structure.
  */
  */
@@ -402,7 +402,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 	if ((from | to | len) & (PMD_SIZE - 1))
 	if ((from | to | len) & (PMD_SIZE - 1))
 		return -EINVAL;
 		return -EINVAL;
 	if (len == 0 || from + len < from || to + len < to ||
 	if (len == 0 || from + len < from || to + len < to ||
-	    from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
+	    from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	flush = 0;
 	flush = 0;

+ 70 - 5
arch/x86/include/asm/kvm_host.h

@@ -25,6 +25,7 @@
 #include <linux/pvclock_gtod.h>
 #include <linux/pvclock_gtod.h>
 #include <linux/clocksource.h>
 #include <linux/clocksource.h>
 #include <linux/irqbypass.h>
 #include <linux/irqbypass.h>
+#include <linux/hyperv.h>
 
 
 #include <asm/pvclock-abi.h>
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
 #include <asm/desc.h>
@@ -45,6 +46,31 @@
 
 
 #define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
 #define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
 
 
+/* x86-specific vcpu->requests bit members */
+#define KVM_REQ_MIGRATE_TIMER      8
+#define KVM_REQ_REPORT_TPR_ACCESS  9
+#define KVM_REQ_TRIPLE_FAULT      10
+#define KVM_REQ_MMU_SYNC          11
+#define KVM_REQ_CLOCK_UPDATE      12
+#define KVM_REQ_DEACTIVATE_FPU    13
+#define KVM_REQ_EVENT             14
+#define KVM_REQ_APF_HALT          15
+#define KVM_REQ_STEAL_UPDATE      16
+#define KVM_REQ_NMI               17
+#define KVM_REQ_PMU               18
+#define KVM_REQ_PMI               19
+#define KVM_REQ_SMI               20
+#define KVM_REQ_MASTERCLOCK_UPDATE 21
+#define KVM_REQ_MCLOCK_INPROGRESS 22
+#define KVM_REQ_SCAN_IOAPIC       23
+#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24
+#define KVM_REQ_APIC_PAGE_RELOAD  25
+#define KVM_REQ_HV_CRASH          26
+#define KVM_REQ_IOAPIC_EOI_EXIT   27
+#define KVM_REQ_HV_RESET          28
+#define KVM_REQ_HV_EXIT           29
+#define KVM_REQ_HV_STIMER         30
+
 #define CR0_RESERVED_BITS                                               \
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
 			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
 			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
@@ -213,6 +239,10 @@ union kvm_mmu_page_role {
 	};
 	};
 };
 };
 
 
+struct kvm_rmap_head {
+	unsigned long val;
+};
+
 struct kvm_mmu_page {
 struct kvm_mmu_page {
 	struct list_head link;
 	struct list_head link;
 	struct hlist_node hash_link;
 	struct hlist_node hash_link;
@@ -230,7 +260,7 @@ struct kvm_mmu_page {
 	bool unsync;
 	bool unsync;
 	int root_count;          /* Currently serving as active root */
 	int root_count;          /* Currently serving as active root */
 	unsigned int unsync_children;
 	unsigned int unsync_children;
-	unsigned long parent_ptes;	/* Reverse mapping for parent_pte */
+	struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
 
 
 	/* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen.  */
 	/* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen.  */
 	unsigned long mmu_valid_gen;
 	unsigned long mmu_valid_gen;
@@ -374,10 +404,38 @@ struct kvm_mtrr {
 	struct list_head head;
 	struct list_head head;
 };
 };
 
 
+/* Hyper-V SynIC timer */
+struct kvm_vcpu_hv_stimer {
+	struct hrtimer timer;
+	int index;
+	u64 config;
+	u64 count;
+	u64 exp_time;
+	struct hv_message msg;
+	bool msg_pending;
+};
+
+/* Hyper-V synthetic interrupt controller (SynIC)*/
+struct kvm_vcpu_hv_synic {
+	u64 version;
+	u64 control;
+	u64 msg_page;
+	u64 evt_page;
+	atomic64_t sint[HV_SYNIC_SINT_COUNT];
+	atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT];
+	DECLARE_BITMAP(auto_eoi_bitmap, 256);
+	DECLARE_BITMAP(vec_bitmap, 256);
+	bool active;
+};
+
 /* Hyper-V per vcpu emulation context */
 /* Hyper-V per vcpu emulation context */
 struct kvm_vcpu_hv {
 struct kvm_vcpu_hv {
 	u64 hv_vapic;
 	u64 hv_vapic;
 	s64 runtime_offset;
 	s64 runtime_offset;
+	struct kvm_vcpu_hv_synic synic;
+	struct kvm_hyperv_exit exit;
+	struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
+	DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
 };
 };
 
 
 struct kvm_vcpu_arch {
 struct kvm_vcpu_arch {
@@ -400,7 +458,8 @@ struct kvm_vcpu_arch {
 	u64 efer;
 	u64 efer;
 	u64 apic_base;
 	u64 apic_base;
 	struct kvm_lapic *apic;    /* kernel irqchip context */
 	struct kvm_lapic *apic;    /* kernel irqchip context */
-	u64 eoi_exit_bitmap[4];
+	bool apicv_active;
+	DECLARE_BITMAP(ioapic_handled_vectors, 256);
 	unsigned long apic_attention;
 	unsigned long apic_attention;
 	int32_t apic_arb_prio;
 	int32_t apic_arb_prio;
 	int mp_state;
 	int mp_state;
@@ -589,7 +648,7 @@ struct kvm_lpage_info {
 };
 };
 
 
 struct kvm_arch_memory_slot {
 struct kvm_arch_memory_slot {
-	unsigned long *rmap[KVM_NR_PAGE_SIZES];
+	struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 };
 
 
@@ -831,10 +890,11 @@ struct kvm_x86_ops {
 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
-	int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu);
+	bool (*get_enable_apicv)(void);
+	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
-	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu);
+	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
 	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
 	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
@@ -1086,6 +1146,8 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
 				struct x86_exception *exception);
 				struct x86_exception *exception);
 
 
+void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
 
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
@@ -1231,6 +1293,9 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
 
+void kvm_make_mclock_inprogress_request(struct kvm *kvm);
+void kvm_make_scan_ioapic_request(struct kvm *kvm);
+
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 				     struct kvm_async_pf *work);
 				     struct kvm_async_pf *work);
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,

+ 92 - 0
arch/x86/include/uapi/asm/hyperv.h

@@ -269,4 +269,96 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
 #define HV_SYNIC_SINT_AUTO_EOI		(1ULL << 17)
 #define HV_SYNIC_SINT_AUTO_EOI		(1ULL << 17)
 #define HV_SYNIC_SINT_VECTOR_MASK	(0xFF)
 #define HV_SYNIC_SINT_VECTOR_MASK	(0xFF)
 
 
+#define HV_SYNIC_STIMER_COUNT		(4)
+
+/* Define synthetic interrupt controller message constants. */
+#define HV_MESSAGE_SIZE			(256)
+#define HV_MESSAGE_PAYLOAD_BYTE_COUNT	(240)
+#define HV_MESSAGE_PAYLOAD_QWORD_COUNT	(30)
+
+/* Define hypervisor message types. */
+enum hv_message_type {
+	HVMSG_NONE			= 0x00000000,
+
+	/* Memory access messages. */
+	HVMSG_UNMAPPED_GPA		= 0x80000000,
+	HVMSG_GPA_INTERCEPT		= 0x80000001,
+
+	/* Timer notification messages. */
+	HVMSG_TIMER_EXPIRED			= 0x80000010,
+
+	/* Error messages. */
+	HVMSG_INVALID_VP_REGISTER_VALUE	= 0x80000020,
+	HVMSG_UNRECOVERABLE_EXCEPTION	= 0x80000021,
+	HVMSG_UNSUPPORTED_FEATURE		= 0x80000022,
+
+	/* Trace buffer complete messages. */
+	HVMSG_EVENTLOG_BUFFERCOMPLETE	= 0x80000040,
+
+	/* Platform-specific processor intercept messages. */
+	HVMSG_X64_IOPORT_INTERCEPT		= 0x80010000,
+	HVMSG_X64_MSR_INTERCEPT		= 0x80010001,
+	HVMSG_X64_CPUID_INTERCEPT		= 0x80010002,
+	HVMSG_X64_EXCEPTION_INTERCEPT	= 0x80010003,
+	HVMSG_X64_APIC_EOI			= 0x80010004,
+	HVMSG_X64_LEGACY_FP_ERROR		= 0x80010005
+};
+
+/* Define synthetic interrupt controller message flags. */
+union hv_message_flags {
+	__u8 asu8;
+	struct {
+		__u8 msg_pending:1;
+		__u8 reserved:7;
+	};
+};
+
+/* Define port identifier type. */
+union hv_port_id {
+	__u32 asu32;
+	struct {
+		__u32 id:24;
+		__u32 reserved:8;
+	} u;
+};
+
+/* Define synthetic interrupt controller message header. */
+struct hv_message_header {
+	__u32 message_type;
+	__u8 payload_size;
+	union hv_message_flags message_flags;
+	__u8 reserved[2];
+	union {
+		__u64 sender;
+		union hv_port_id port;
+	};
+};
+
+/* Define synthetic interrupt controller message format. */
+struct hv_message {
+	struct hv_message_header header;
+	union {
+		__u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
+	} u;
+};
+
+/* Define the synthetic interrupt message page layout. */
+struct hv_message_page {
+	struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
+};
+
+/* Define timer message payload structure. */
+struct hv_timer_message_payload {
+	__u32 timer_index;
+	__u32 reserved;
+	__u64 expiration_time;	/* When the timer expired */
+	__u64 delivery_time;	/* When the message was delivered */
+};
+
+#define HV_STIMER_ENABLE		(1ULL << 0)
+#define HV_STIMER_PERIODIC		(1ULL << 1)
+#define HV_STIMER_LAZY			(1ULL << 2)
+#define HV_STIMER_AUTOENABLE		(1ULL << 3)
+#define HV_STIMER_SINT(config)		(__u8)(((config) >> 16) & 0x0F)
+
 #endif
 #endif

+ 704 - 4
arch/x86/kvm/hyperv.c

@@ -23,13 +23,665 @@
 
 
 #include "x86.h"
 #include "x86.h"
 #include "lapic.h"
 #include "lapic.h"
+#include "ioapic.h"
 #include "hyperv.h"
 #include "hyperv.h"
 
 
 #include <linux/kvm_host.h>
 #include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <asm/apicdef.h>
 #include <trace/events/kvm.h>
 #include <trace/events/kvm.h>
 
 
 #include "trace.h"
 #include "trace.h"
 
 
+static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
+{
+	return atomic64_read(&synic->sint[sint]);
+}
+
+static inline int synic_get_sint_vector(u64 sint_value)
+{
+	if (sint_value & HV_SYNIC_SINT_MASKED)
+		return -1;
+	return sint_value & HV_SYNIC_SINT_VECTOR_MASK;
+}
+
+static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic,
+				      int vector)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
+		if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
+			return true;
+	}
+	return false;
+}
+
+static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
+				     int vector)
+{
+	int i;
+	u64 sint_value;
+
+	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
+		sint_value = synic_read_sint(synic, i);
+		if (synic_get_sint_vector(sint_value) == vector &&
+		    sint_value & HV_SYNIC_SINT_AUTO_EOI)
+			return true;
+	}
+	return false;
+}
+
+static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
+			  u64 data, bool host)
+{
+	int vector;
+
+	vector = data & HV_SYNIC_SINT_VECTOR_MASK;
+	if (vector < 16 && !host)
+		return 1;
+	/*
+	 * Guest may configure multiple SINTs to use the same vector, so
+	 * we maintain a bitmap of vectors handled by synic, and a
+	 * bitmap of vectors with auto-eoi behavior.  The bitmaps are
+	 * updated here, and atomically queried on fast paths.
+	 */
+
+	atomic64_set(&synic->sint[sint], data);
+
+	if (synic_has_vector_connected(synic, vector))
+		__set_bit(vector, synic->vec_bitmap);
+	else
+		__clear_bit(vector, synic->vec_bitmap);
+
+	if (synic_has_vector_auto_eoi(synic, vector))
+		__set_bit(vector, synic->auto_eoi_bitmap);
+	else
+		__clear_bit(vector, synic->auto_eoi_bitmap);
+
+	/* Load SynIC vectors into EOI exit bitmap */
+	kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
+	return 0;
+}
+
+static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu_hv_synic *synic;
+
+	if (vcpu_id >= atomic_read(&kvm->online_vcpus))
+		return NULL;
+	vcpu = kvm_get_vcpu(kvm, vcpu_id);
+	if (!vcpu)
+		return NULL;
+	synic = vcpu_to_synic(vcpu);
+	return (synic->active) ? synic : NULL;
+}
+
+static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic,
+					u32 sint)
+{
+	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
+	struct page *page;
+	gpa_t gpa;
+	struct hv_message *msg;
+	struct hv_message_page *msg_page;
+
+	gpa = synic->msg_page & PAGE_MASK;
+	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
+	if (is_error_page(page)) {
+		vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n",
+			 gpa);
+		return;
+	}
+	msg_page = kmap_atomic(page);
+
+	msg = &msg_page->sint_message[sint];
+	msg->header.message_flags.msg_pending = 0;
+
+	kunmap_atomic(msg_page);
+	kvm_release_page_dirty(page);
+	kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
+}
+
+static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
+	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
+	struct kvm_vcpu_hv_stimer *stimer;
+	int gsi, idx, stimers_pending;
+
+	trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
+
+	if (synic->msg_page & HV_SYNIC_SIMP_ENABLE)
+		synic_clear_sint_msg_pending(synic, sint);
+
+	/* Try to deliver pending Hyper-V SynIC timers messages */
+	stimers_pending = 0;
+	for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
+		stimer = &hv_vcpu->stimer[idx];
+		if (stimer->msg_pending &&
+		    (stimer->config & HV_STIMER_ENABLE) &&
+		    HV_STIMER_SINT(stimer->config) == sint) {
+			set_bit(stimer->index,
+				hv_vcpu->stimer_pending_bitmap);
+			stimers_pending++;
+		}
+	}
+	if (stimers_pending)
+		kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = atomic_read(&synic->sint_to_gsi[sint]);
+	if (gsi != -1)
+		kvm_notify_acked_gsi(kvm, gsi);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
+{
+	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
+	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
+
+	hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
+	hv_vcpu->exit.u.synic.msr = msr;
+	hv_vcpu->exit.u.synic.control = synic->control;
+	hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
+	hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
+
+	kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
+}
+
+static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
+			 u32 msr, u64 data, bool host)
+{
+	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
+	int ret;
+
+	if (!synic->active)
+		return 1;
+
+	trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
+
+	ret = 0;
+	switch (msr) {
+	case HV_X64_MSR_SCONTROL:
+		synic->control = data;
+		if (!host)
+			synic_exit(synic, msr);
+		break;
+	case HV_X64_MSR_SVERSION:
+		if (!host) {
+			ret = 1;
+			break;
+		}
+		synic->version = data;
+		break;
+	case HV_X64_MSR_SIEFP:
+		if (data & HV_SYNIC_SIEFP_ENABLE)
+			if (kvm_clear_guest(vcpu->kvm,
+					    data & PAGE_MASK, PAGE_SIZE)) {
+				ret = 1;
+				break;
+			}
+		synic->evt_page = data;
+		if (!host)
+			synic_exit(synic, msr);
+		break;
+	case HV_X64_MSR_SIMP:
+		if (data & HV_SYNIC_SIMP_ENABLE)
+			if (kvm_clear_guest(vcpu->kvm,
+					    data & PAGE_MASK, PAGE_SIZE)) {
+				ret = 1;
+				break;
+			}
+		synic->msg_page = data;
+		if (!host)
+			synic_exit(synic, msr);
+		break;
+	case HV_X64_MSR_EOM: {
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
+			kvm_hv_notify_acked_sint(vcpu, i);
+		break;
+	}
+	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
+		ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host);
+		break;
+	default:
+		ret = 1;
+		break;
+	}
+	return ret;
+}
+
+static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata)
+{
+	int ret;
+
+	if (!synic->active)
+		return 1;
+
+	ret = 0;
+	switch (msr) {
+	case HV_X64_MSR_SCONTROL:
+		*pdata = synic->control;
+		break;
+	case HV_X64_MSR_SVERSION:
+		*pdata = synic->version;
+		break;
+	case HV_X64_MSR_SIEFP:
+		*pdata = synic->evt_page;
+		break;
+	case HV_X64_MSR_SIMP:
+		*pdata = synic->msg_page;
+		break;
+	case HV_X64_MSR_EOM:
+		*pdata = 0;
+		break;
+	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
+		*pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]);
+		break;
+	default:
+		ret = 1;
+		break;
+	}
+	return ret;
+}
+
+int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
+{
+	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
+	struct kvm_lapic_irq irq;
+	int ret, vector;
+
+	if (sint >= ARRAY_SIZE(synic->sint))
+		return -EINVAL;
+
+	vector = synic_get_sint_vector(synic_read_sint(synic, sint));
+	if (vector < 0)
+		return -ENOENT;
+
+	memset(&irq, 0, sizeof(irq));
+	irq.dest_id = kvm_apic_id(vcpu->arch.apic);
+	irq.dest_mode = APIC_DEST_PHYSICAL;
+	irq.delivery_mode = APIC_DM_FIXED;
+	irq.vector = vector;
+	irq.level = 1;
+
+	ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL);
+	trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
+	return ret;
+}
+
+int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint)
+{
+	struct kvm_vcpu_hv_synic *synic;
+
+	synic = synic_get(kvm, vcpu_id);
+	if (!synic)
+		return -EINVAL;
+
+	return synic_set_irq(synic, sint);
+}
+
+void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
+{
+	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
+	int i;
+
+	trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
+
+	for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
+		if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
+			kvm_hv_notify_acked_sint(vcpu, i);
+}
+
+static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi)
+{
+	struct kvm_vcpu_hv_synic *synic;
+
+	synic = synic_get(kvm, vcpu_id);
+	if (!synic)
+		return -EINVAL;
+
+	if (sint >= ARRAY_SIZE(synic->sint_to_gsi))
+		return -EINVAL;
+
+	atomic_set(&synic->sint_to_gsi[sint], gsi);
+	return 0;
+}
+
+void kvm_hv_irq_routing_update(struct kvm *kvm)
+{
+	struct kvm_irq_routing_table *irq_rt;
+	struct kvm_kernel_irq_routing_entry *e;
+	u32 gsi;
+
+	irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
+					lockdep_is_held(&kvm->irq_lock));
+
+	for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) {
+		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
+			if (e->type == KVM_IRQ_ROUTING_HV_SINT)
+				kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu,
+						    e->hv_sint.sint, gsi);
+		}
+	}
+}
+
+static void synic_init(struct kvm_vcpu_hv_synic *synic)
+{
+	int i;
+
+	memset(synic, 0, sizeof(*synic));
+	synic->version = HV_SYNIC_VERSION_1;
+	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
+		atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED);
+		atomic_set(&synic->sint_to_gsi[i], -1);
+	}
+}
+
+static u64 get_time_ref_counter(struct kvm *kvm)
+{
+	return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
+}
+
+static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
+				bool vcpu_kick)
+{
+	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
+
+	set_bit(stimer->index,
+		vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
+	kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
+	if (vcpu_kick)
+		kvm_vcpu_kick(vcpu);
+}
+
+static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
+{
+	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
+
+	trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id,
+				    stimer->index);
+
+	hrtimer_cancel(&stimer->timer);
+	clear_bit(stimer->index,
+		  vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
+	stimer->msg_pending = false;
+	stimer->exp_time = 0;
+}
+
+static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
+{
+	struct kvm_vcpu_hv_stimer *stimer;
+
+	stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
+	trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id,
+				     stimer->index);
+	stimer_mark_pending(stimer, true);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * stimer_start() assumptions:
+ * a) stimer->count is not equal to 0
+ * b) stimer->config has HV_STIMER_ENABLE flag
+ */
+static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
+{
+	u64 time_now;
+	ktime_t ktime_now;
+
+	time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
+	ktime_now = ktime_get();
+
+	if (stimer->config & HV_STIMER_PERIODIC) {
+		if (stimer->exp_time) {
+			if (time_now >= stimer->exp_time) {
+				u64 remainder;
+
+				div64_u64_rem(time_now - stimer->exp_time,
+					      stimer->count, &remainder);
+				stimer->exp_time =
+					time_now + (stimer->count - remainder);
+			}
+		} else
+			stimer->exp_time = time_now + stimer->count;
+
+		trace_kvm_hv_stimer_start_periodic(
+					stimer_to_vcpu(stimer)->vcpu_id,
+					stimer->index,
+					time_now, stimer->exp_time);
+
+		hrtimer_start(&stimer->timer,
+			      ktime_add_ns(ktime_now,
+					   100 * (stimer->exp_time - time_now)),
+			      HRTIMER_MODE_ABS);
+		return 0;
+	}
+	stimer->exp_time = stimer->count;
+	if (time_now >= stimer->count) {
+		/*
+		 * Expire timer according to Hypervisor Top-Level Functional
+		 * specification v4(15.3.1):
+		 * "If a one shot is enabled and the specified count is in
+		 * the past, it will expire immediately."
+		 */
+		stimer_mark_pending(stimer, false);
+		return 0;
+	}
+
+	trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id,
+					   stimer->index,
+					   time_now, stimer->count);
+
+	hrtimer_start(&stimer->timer,
+		      ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
+		      HRTIMER_MODE_ABS);
+	return 0;
+}
+
+static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
+			     bool host)
+{
+	trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
+				       stimer->index, config, host);
+
+	stimer_cleanup(stimer);
+	if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0)
+		config &= ~HV_STIMER_ENABLE;
+	stimer->config = config;
+	stimer_mark_pending(stimer, false);
+	return 0;
+}
+
+static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
+			    bool host)
+{
+	trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id,
+				      stimer->index, count, host);
+
+	stimer_cleanup(stimer);
+	stimer->count = count;
+	if (stimer->count == 0)
+		stimer->config &= ~HV_STIMER_ENABLE;
+	else if (stimer->config & HV_STIMER_AUTOENABLE)
+		stimer->config |= HV_STIMER_ENABLE;
+	stimer_mark_pending(stimer, false);
+	return 0;
+}
+
+static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig)
+{
+	*pconfig = stimer->config;
+	return 0;
+}
+
+static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
+{
+	*pcount = stimer->count;
+	return 0;
+}
+
+static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
+			     struct hv_message *src_msg)
+{
+	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
+	struct page *page;
+	gpa_t gpa;
+	struct hv_message *dst_msg;
+	int r;
+	struct hv_message_page *msg_page;
+
+	if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
+		return -ENOENT;
+
+	gpa = synic->msg_page & PAGE_MASK;
+	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
+	if (is_error_page(page))
+		return -EFAULT;
+
+	msg_page = kmap_atomic(page);
+	dst_msg = &msg_page->sint_message[sint];
+	if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE,
+			 src_msg->header.message_type) != HVMSG_NONE) {
+		dst_msg->header.message_flags.msg_pending = 1;
+		r = -EAGAIN;
+	} else {
+		memcpy(&dst_msg->u.payload, &src_msg->u.payload,
+		       src_msg->header.payload_size);
+		dst_msg->header.message_type = src_msg->header.message_type;
+		dst_msg->header.payload_size = src_msg->header.payload_size;
+		r = synic_set_irq(synic, sint);
+		if (r >= 1)
+			r = 0;
+		else if (r == 0)
+			r = -EFAULT;
+	}
+	kunmap_atomic(msg_page);
+	kvm_release_page_dirty(page);
+	kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
+	return r;
+}
+
+static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
+{
+	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
+	struct hv_message *msg = &stimer->msg;
+	struct hv_timer_message_payload *payload =
+			(struct hv_timer_message_payload *)&msg->u.payload;
+
+	payload->expiration_time = stimer->exp_time;
+	payload->delivery_time = get_time_ref_counter(vcpu->kvm);
+	return synic_deliver_msg(vcpu_to_synic(vcpu),
+				 HV_STIMER_SINT(stimer->config), msg);
+}
+
+static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
+{
+	int r;
+
+	stimer->msg_pending = true;
+	r = stimer_send_msg(stimer);
+	trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id,
+				       stimer->index, r);
+	if (!r) {
+		stimer->msg_pending = false;
+		if (!(stimer->config & HV_STIMER_PERIODIC))
+			stimer->config &= ~HV_STIMER_ENABLE;
+	}
+}
+
+void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
+	struct kvm_vcpu_hv_stimer *stimer;
+	u64 time_now, exp_time;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
+		if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
+			stimer = &hv_vcpu->stimer[i];
+			if (stimer->config & HV_STIMER_ENABLE) {
+				exp_time = stimer->exp_time;
+
+				if (exp_time) {
+					time_now =
+						get_time_ref_counter(vcpu->kvm);
+					if (time_now >= exp_time)
+						stimer_expiration(stimer);
+				}
+
+				if ((stimer->config & HV_STIMER_ENABLE) &&
+				    stimer->count)
+					stimer_start(stimer);
+				else
+					stimer_cleanup(stimer);
+			}
+		}
+}
+
+void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
+		stimer_cleanup(&hv_vcpu->stimer[i]);
+}
+
+static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
+{
+	struct hv_message *msg = &stimer->msg;
+	struct hv_timer_message_payload *payload =
+			(struct hv_timer_message_payload *)&msg->u.payload;
+
+	memset(&msg->header, 0, sizeof(msg->header));
+	msg->header.message_type = HVMSG_TIMER_EXPIRED;
+	msg->header.payload_size = sizeof(*payload);
+
+	payload->timer_index = stimer->index;
+	payload->expiration_time = 0;
+	payload->delivery_time = 0;
+}
+
+static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
+{
+	memset(stimer, 0, sizeof(*stimer));
+	stimer->index = timer_index;
+	hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	stimer->timer.function = stimer_timer_callback;
+	stimer_prepare_msg(stimer);
+}
+
+void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
+	int i;
+
+	synic_init(&hv_vcpu->synic);
+
+	bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
+	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
+		stimer_init(&hv_vcpu->stimer[i], i);
+}
+
+int kvm_hv_activate_synic(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Hyper-V SynIC auto EOI SINT's are
+	 * not compatible with APICV, so deactivate APICV
+	 */
+	kvm_vcpu_deactivate_apicv(vcpu);
+	vcpu_to_synic(vcpu)->active = true;
+	return 0;
+}
+
 static bool kvm_hv_msr_partition_wide(u32 msr)
 static bool kvm_hv_msr_partition_wide(u32 msr)
 {
 {
 	bool r = false;
 	bool r = false;
@@ -226,6 +878,31 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 			return 1;
 			return 1;
 		hv->runtime_offset = data - current_task_runtime_100ns();
 		hv->runtime_offset = data - current_task_runtime_100ns();
 		break;
 		break;
+	case HV_X64_MSR_SCONTROL:
+	case HV_X64_MSR_SVERSION:
+	case HV_X64_MSR_SIEFP:
+	case HV_X64_MSR_SIMP:
+	case HV_X64_MSR_EOM:
+	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
+		return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host);
+	case HV_X64_MSR_STIMER0_CONFIG:
+	case HV_X64_MSR_STIMER1_CONFIG:
+	case HV_X64_MSR_STIMER2_CONFIG:
+	case HV_X64_MSR_STIMER3_CONFIG: {
+		int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
+
+		return stimer_set_config(vcpu_to_stimer(vcpu, timer_index),
+					 data, host);
+	}
+	case HV_X64_MSR_STIMER0_COUNT:
+	case HV_X64_MSR_STIMER1_COUNT:
+	case HV_X64_MSR_STIMER2_COUNT:
+	case HV_X64_MSR_STIMER3_COUNT: {
+		int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
+
+		return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
+					data, host);
+	}
 	default:
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
 			    msr, data);
 			    msr, data);
@@ -248,11 +925,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_HYPERCALL:
 	case HV_X64_MSR_HYPERCALL:
 		data = hv->hv_hypercall;
 		data = hv->hv_hypercall;
 		break;
 		break;
-	case HV_X64_MSR_TIME_REF_COUNT: {
-		data =
-		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
+	case HV_X64_MSR_TIME_REF_COUNT:
+		data = get_time_ref_counter(kvm);
 		break;
 		break;
-	}
 	case HV_X64_MSR_REFERENCE_TSC:
 	case HV_X64_MSR_REFERENCE_TSC:
 		data = hv->hv_tsc_page;
 		data = hv->hv_tsc_page;
 		break;
 		break;
@@ -304,6 +979,31 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_VP_RUNTIME:
 	case HV_X64_MSR_VP_RUNTIME:
 		data = current_task_runtime_100ns() + hv->runtime_offset;
 		data = current_task_runtime_100ns() + hv->runtime_offset;
 		break;
 		break;
+	case HV_X64_MSR_SCONTROL:
+	case HV_X64_MSR_SVERSION:
+	case HV_X64_MSR_SIEFP:
+	case HV_X64_MSR_SIMP:
+	case HV_X64_MSR_EOM:
+	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
+		return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata);
+	case HV_X64_MSR_STIMER0_CONFIG:
+	case HV_X64_MSR_STIMER1_CONFIG:
+	case HV_X64_MSR_STIMER2_CONFIG:
+	case HV_X64_MSR_STIMER3_CONFIG: {
+		int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
+
+		return stimer_get_config(vcpu_to_stimer(vcpu, timer_index),
+					 pdata);
+	}
+	case HV_X64_MSR_STIMER0_COUNT:
+	case HV_X64_MSR_STIMER1_COUNT:
+	case HV_X64_MSR_STIMER2_COUNT:
+	case HV_X64_MSR_STIMER3_COUNT: {
+		int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
+
+		return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
+					pdata);
+	}
 	default:
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
 		return 1;

+ 55 - 0
arch/x86/kvm/hyperv.h

@@ -24,9 +24,64 @@
 #ifndef __ARCH_X86_KVM_HYPERV_H__
 #ifndef __ARCH_X86_KVM_HYPERV_H__
 #define __ARCH_X86_KVM_HYPERV_H__
 #define __ARCH_X86_KVM_HYPERV_H__
 
 
+static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->arch.hyperv;
+}
+
+static inline struct kvm_vcpu *hv_vcpu_to_vcpu(struct kvm_vcpu_hv *hv_vcpu)
+{
+	struct kvm_vcpu_arch *arch;
+
+	arch = container_of(hv_vcpu, struct kvm_vcpu_arch, hyperv);
+	return container_of(arch, struct kvm_vcpu, arch);
+}
+
+static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->arch.hyperv.synic;
+}
+
+static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
+{
+	return hv_vcpu_to_vcpu(container_of(synic, struct kvm_vcpu_hv, synic));
+}
+
 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+
 bool kvm_hv_hypercall_enabled(struct kvm *kvm);
 bool kvm_hv_hypercall_enabled(struct kvm *kvm);
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
 
 
+void kvm_hv_irq_routing_update(struct kvm *kvm);
+int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
+void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
+int kvm_hv_activate_synic(struct kvm_vcpu *vcpu);
+
+void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
+
+static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
+							int timer_index)
+{
+	return &vcpu_to_hv_vcpu(vcpu)->stimer[timer_index];
+}
+
+static inline struct kvm_vcpu *stimer_to_vcpu(struct kvm_vcpu_hv_stimer *stimer)
+{
+	struct kvm_vcpu_hv *hv_vcpu;
+
+	hv_vcpu = container_of(stimer - stimer->index, struct kvm_vcpu_hv,
+			       stimer[0]);
+	return hv_vcpu_to_vcpu(hv_vcpu);
+}
+
+static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
+{
+	return !bitmap_empty(vcpu->arch.hyperv.stimer_pending_bitmap,
+			     HV_SYNIC_STIMER_COUNT);
+}
+
+void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
+
 #endif
 #endif

+ 2 - 2
arch/x86/kvm/ioapic.c

@@ -233,7 +233,7 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
 }
 }
 
 
 
 
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
 {
 {
 	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
 	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
 	union kvm_ioapic_redirect_entry *e;
 	union kvm_ioapic_redirect_entry *e;
@@ -250,7 +250,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 			    (e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
 			    (e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
 			     kvm_apic_pending_eoi(vcpu, e->fields.vector)))
 			     kvm_apic_pending_eoi(vcpu, e->fields.vector)))
 				__set_bit(e->fields.vector,
 				__set_bit(e->fields.vector,
-					(unsigned long *)eoi_exit_bitmap);
+					  ioapic_handled_vectors);
 		}
 		}
 	}
 	}
 	spin_unlock(&ioapic->lock);
 	spin_unlock(&ioapic->lock);

+ 4 - 3
arch/x86/kvm/ioapic.h

@@ -121,7 +121,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, unsigned long *dest_map);
 		struct kvm_lapic_irq *irq, unsigned long *dest_map);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
-void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
-
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
+			   ulong *ioapic_handled_vectors);
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
+			    ulong *ioapic_handled_vectors);
 #endif
 #endif

+ 1 - 1
arch/x86/kvm/irq.c

@@ -76,7 +76,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 	if (kvm_cpu_has_extint(v))
 	if (kvm_cpu_has_extint(v))
 		return 1;
 		return 1;
 
 
-	if (kvm_vcpu_apic_vid_enabled(v))
+	if (kvm_vcpu_apicv_active(v))
 		return 0;
 		return 0;
 
 
 	return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
 	return kvm_apic_has_interrupt(v) != -1; /* LAPIC */

+ 38 - 3
arch/x86/kvm/irq_comm.c

@@ -33,6 +33,8 @@
 
 
 #include "lapic.h"
 #include "lapic.h"
 
 
+#include "hyperv.h"
+
 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
 			   struct kvm *kvm, int irq_source_id, int level,
 			   struct kvm *kvm, int irq_source_id, int level,
 			   bool line_status)
 			   bool line_status)
@@ -219,6 +221,16 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 }
 
 
+static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
+		    struct kvm *kvm, int irq_source_id, int level,
+		    bool line_status)
+{
+	if (!level)
+		return -1;
+
+	return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
+}
+
 int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 			  const struct kvm_irq_routing_entry *ue)
 {
 {
@@ -257,6 +269,11 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 		e->msi.address_hi = ue->u.msi.address_hi;
 		e->msi.address_hi = ue->u.msi.address_hi;
 		e->msi.data = ue->u.msi.data;
 		e->msi.data = ue->u.msi.data;
 		break;
 		break;
+	case KVM_IRQ_ROUTING_HV_SINT:
+		e->set = kvm_hv_set_sint;
+		e->hv_sint.vcpu = ue->u.hv_sint.vcpu;
+		e->hv_sint.sint = ue->u.hv_sint.sint;
+		break;
 	default:
 	default:
 		goto out;
 		goto out;
 	}
 	}
@@ -332,14 +349,15 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
 	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
 	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
 }
 }
 
 
-void kvm_arch_irq_routing_update(struct kvm *kvm)
+void kvm_arch_post_irq_routing_update(struct kvm *kvm)
 {
 {
 	if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
 	if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
 		return;
 		return;
 	kvm_make_scan_ioapic_request(kvm);
 	kvm_make_scan_ioapic_request(kvm);
 }
 }
 
 
-void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
+			    ulong *ioapic_handled_vectors)
 {
 {
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_kernel_irq_routing_entry *entry;
 	struct kvm_kernel_irq_routing_entry *entry;
@@ -369,9 +387,26 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 				u32 vector = entry->msi.data & 0xff;
 				u32 vector = entry->msi.data & 0xff;
 
 
 				__set_bit(vector,
 				__set_bit(vector,
-					  (unsigned long *) eoi_exit_bitmap);
+					  ioapic_handled_vectors);
 			}
 			}
 		}
 		}
 	}
 	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 }
+
+int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
+		     int irq_source_id, int level, bool line_status)
+{
+	switch (irq->type) {
+	case KVM_IRQ_ROUTING_HV_SINT:
+		return kvm_hv_set_sint(irq, kvm, irq_source_id, level,
+				       line_status);
+	default:
+		return -EWOULDBLOCK;
+	}
+}
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+	kvm_hv_irq_routing_update(kvm);
+}

+ 23 - 17
arch/x86/kvm/lapic.c

@@ -41,6 +41,7 @@
 #include "trace.h"
 #include "trace.h"
 #include "x86.h"
 #include "x86.h"
 #include "cpuid.h"
 #include "cpuid.h"
+#include "hyperv.h"
 
 
 #ifndef CONFIG_X86_64
 #ifndef CONFIG_X86_64
 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -128,11 +129,6 @@ static inline int apic_enabled(struct kvm_lapic *apic)
 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 
 
-static inline int kvm_apic_id(struct kvm_lapic *apic)
-{
-	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
-}
-
 /* The logical map is definitely wrong if we have multiple
 /* The logical map is definitely wrong if we have multiple
  * modes at the same time.  (Physical map is always right.)
  * modes at the same time.  (Physical map is always right.)
  */
  */
@@ -379,7 +375,8 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 	if (!apic->irr_pending)
 	if (!apic->irr_pending)
 		return -1;
 		return -1;
 
 
-	kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
+	if (apic->vcpu->arch.apicv_active)
+		kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
 	result = apic_search_irr(apic);
 	result = apic_search_irr(apic);
 	ASSERT(result == -1 || result >= 16);
 	ASSERT(result == -1 || result >= 16);
 
 
@@ -392,7 +389,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 
 
 	vcpu = apic->vcpu;
 	vcpu = apic->vcpu;
 
 
-	if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) {
+	if (unlikely(vcpu->arch.apicv_active)) {
 		/* try to update RVI */
 		/* try to update RVI */
 		apic_clear_vector(vec, apic->regs + APIC_IRR);
 		apic_clear_vector(vec, apic->regs + APIC_IRR);
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -418,7 +415,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 	 * because the processor can modify ISR under the hood.  Instead
 	 * because the processor can modify ISR under the hood.  Instead
 	 * just set SVI.
 	 * just set SVI.
 	 */
 	 */
-	if (unlikely(kvm_x86_ops->hwapic_isr_update))
+	if (unlikely(vcpu->arch.apicv_active))
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec);
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec);
 	else {
 	else {
 		++apic->isr_count;
 		++apic->isr_count;
@@ -466,7 +463,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 	 * on the other hand isr_count and highest_isr_cache are unused
 	 * on the other hand isr_count and highest_isr_cache are unused
 	 * and must be left alone.
 	 * and must be left alone.
 	 */
 	 */
-	if (unlikely(kvm_x86_ops->hwapic_isr_update))
+	if (unlikely(vcpu->arch.apicv_active))
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
 					       apic_find_highest_isr(apic));
 					       apic_find_highest_isr(apic));
 	else {
 	else {
@@ -852,7 +849,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 				apic_clear_vector(vector, apic->regs + APIC_TMR);
 				apic_clear_vector(vector, apic->regs + APIC_TMR);
 		}
 		}
 
 
-		if (kvm_x86_ops->deliver_posted_interrupt)
+		if (vcpu->arch.apicv_active)
 			kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
 			kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
 		else {
 		else {
 			apic_set_irr(vector, apic);
 			apic_set_irr(vector, apic);
@@ -932,7 +929,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 
 
 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
 {
 {
-	return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap);
+	return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
 }
 }
 
 
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
@@ -974,6 +971,9 @@ static int apic_set_eoi(struct kvm_lapic *apic)
 	apic_clear_isr(vector, apic);
 	apic_clear_isr(vector, apic);
 	apic_update_ppr(apic);
 	apic_update_ppr(apic);
 
 
+	if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
+		kvm_hv_synic_send_eoi(apic->vcpu, vector);
+
 	kvm_ioapic_send_eoi(apic, vector);
 	kvm_ioapic_send_eoi(apic, vector);
 	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 	return vector;
 	return vector;
@@ -1225,7 +1225,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
 		int vec = reg & APIC_VECTOR_MASK;
 		int vec = reg & APIC_VECTOR_MASK;
 		void *bitmap = apic->regs + APIC_ISR;
 		void *bitmap = apic->regs + APIC_ISR;
 
 
-		if (kvm_x86_ops->deliver_posted_interrupt)
+		if (vcpu->arch.apicv_active)
 			bitmap = apic->regs + APIC_IRR;
 			bitmap = apic->regs + APIC_IRR;
 
 
 		if (apic_test_vector(vec, bitmap))
 		if (apic_test_vector(vec, bitmap))
@@ -1693,8 +1693,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 		apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
 		apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
 	}
 	}
-	apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu);
-	apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0;
+	apic->irr_pending = vcpu->arch.apicv_active;
+	apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
 	apic->highest_isr_cache = -1;
 	apic->highest_isr_cache = -1;
 	update_divide_count(apic);
 	update_divide_count(apic);
 	atomic_set(&apic->lapic_timer.pending, 0);
 	atomic_set(&apic->lapic_timer.pending, 0);
@@ -1883,6 +1883,12 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 	apic_set_isr(vector, apic);
 	apic_set_isr(vector, apic);
 	apic_update_ppr(apic);
 	apic_update_ppr(apic);
 	apic_clear_irr(vector, apic);
 	apic_clear_irr(vector, apic);
+
+	if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
+		apic_clear_isr(vector, apic);
+		apic_update_ppr(apic);
+	}
+
 	return vector;
 	return vector;
 }
 }
 
 
@@ -1906,15 +1912,15 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 	update_divide_count(apic);
 	update_divide_count(apic);
 	start_apic_timer(apic);
 	start_apic_timer(apic);
 	apic->irr_pending = true;
 	apic->irr_pending = true;
-	apic->isr_count = kvm_x86_ops->hwapic_isr_update ?
+	apic->isr_count = vcpu->arch.apicv_active ?
 				1 : count_vectors(apic->regs + APIC_ISR);
 				1 : count_vectors(apic->regs + APIC_ISR);
 	apic->highest_isr_cache = -1;
 	apic->highest_isr_cache = -1;
-	if (kvm_x86_ops->hwapic_irr_update)
+	if (vcpu->arch.apicv_active) {
 		kvm_x86_ops->hwapic_irr_update(vcpu,
 		kvm_x86_ops->hwapic_irr_update(vcpu,
 				apic_find_highest_irr(apic));
 				apic_find_highest_irr(apic));
-	if (unlikely(kvm_x86_ops->hwapic_isr_update))
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
 				apic_find_highest_isr(apic));
 				apic_find_highest_isr(apic));
+	}
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	if (ioapic_in_kernel(vcpu->kvm))
 	if (ioapic_in_kernel(vcpu->kvm))
 		kvm_rtc_eoi_tracking_restore_one(vcpu);
 		kvm_rtc_eoi_tracking_restore_one(vcpu);

+ 7 - 2
arch/x86/kvm/lapic.h

@@ -143,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
 	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
 	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
 }
 }
 
 
-static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu)
+static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu)
 {
 {
-	return kvm_x86_ops->cpu_uses_apicv(vcpu);
+	return vcpu->arch.apic && vcpu->arch.apicv_active;
 }
 }
 
 
 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
@@ -164,6 +164,11 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
 	return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
 }
 }
 
 
+static inline int kvm_apic_id(struct kvm_lapic *apic)
+{
+	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
+}
+
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
 
 void wait_lapic_expire(struct kvm_vcpu *vcpu);
 void wait_lapic_expire(struct kvm_vcpu *vcpu);

+ 183 - 214
arch/x86/kvm/mmu.c

@@ -311,11 +311,6 @@ static int is_large_pte(u64 pte)
 	return pte & PT_PAGE_SIZE_MASK;
 	return pte & PT_PAGE_SIZE_MASK;
 }
 }
 
 
-static int is_rmap_spte(u64 pte)
-{
-	return is_shadow_present_pte(pte);
-}
-
 static int is_last_spte(u64 pte, int level)
 static int is_last_spte(u64 pte, int level)
 {
 {
 	if (level == PT_PAGE_TABLE_LEVEL)
 	if (level == PT_PAGE_TABLE_LEVEL)
@@ -540,7 +535,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
 	u64 old_spte = *sptep;
 	u64 old_spte = *sptep;
 	bool ret = false;
 	bool ret = false;
 
 
-	WARN_ON(!is_rmap_spte(new_spte));
+	WARN_ON(!is_shadow_present_pte(new_spte));
 
 
 	if (!is_shadow_present_pte(old_spte)) {
 	if (!is_shadow_present_pte(old_spte)) {
 		mmu_spte_set(sptep, new_spte);
 		mmu_spte_set(sptep, new_spte);
@@ -595,7 +590,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
 	else
 	else
 		old_spte = __update_clear_spte_slow(sptep, 0ull);
 		old_spte = __update_clear_spte_slow(sptep, 0ull);
 
 
-	if (!is_rmap_spte(old_spte))
+	if (!is_shadow_present_pte(old_spte))
 		return 0;
 		return 0;
 
 
 	pfn = spte_to_pfn(old_spte);
 	pfn = spte_to_pfn(old_spte);
@@ -909,36 +904,35 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
 }
 }
 
 
 /*
 /*
- * Pte mapping structures:
+ * About rmap_head encoding:
  *
  *
- * If pte_list bit zero is zero, then pte_list point to the spte.
- *
- * If pte_list bit zero is one, (then pte_list & ~1) points to a struct
+ * If the bit zero of rmap_head->val is clear, then it points to the only spte
+ * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
  * pte_list_desc containing more mappings.
  * pte_list_desc containing more mappings.
- *
- * Returns the number of pte entries before the spte was added or zero if
- * the spte was not added.
- *
+ */
+
+/*
+ * Returns the number of pointers in the rmap chain, not counting the new one.
  */
  */
 static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
 static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
-			unsigned long *pte_list)
+			struct kvm_rmap_head *rmap_head)
 {
 {
 	struct pte_list_desc *desc;
 	struct pte_list_desc *desc;
 	int i, count = 0;
 	int i, count = 0;
 
 
-	if (!*pte_list) {
+	if (!rmap_head->val) {
 		rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte);
 		rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte);
-		*pte_list = (unsigned long)spte;
-	} else if (!(*pte_list & 1)) {
+		rmap_head->val = (unsigned long)spte;
+	} else if (!(rmap_head->val & 1)) {
 		rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte);
 		rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte);
 		desc = mmu_alloc_pte_list_desc(vcpu);
 		desc = mmu_alloc_pte_list_desc(vcpu);
-		desc->sptes[0] = (u64 *)*pte_list;
+		desc->sptes[0] = (u64 *)rmap_head->val;
 		desc->sptes[1] = spte;
 		desc->sptes[1] = spte;
-		*pte_list = (unsigned long)desc | 1;
+		rmap_head->val = (unsigned long)desc | 1;
 		++count;
 		++count;
 	} else {
 	} else {
 		rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
 		rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
-		desc = (struct pte_list_desc *)(*pte_list & ~1ul);
+		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 		while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
 		while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
 			desc = desc->more;
 			desc = desc->more;
 			count += PTE_LIST_EXT;
 			count += PTE_LIST_EXT;
@@ -955,8 +949,9 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
 }
 }
 
 
 static void
 static void
-pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc,
-			   int i, struct pte_list_desc *prev_desc)
+pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
+			   struct pte_list_desc *desc, int i,
+			   struct pte_list_desc *prev_desc)
 {
 {
 	int j;
 	int j;
 
 
@@ -967,43 +962,43 @@ pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc,
 	if (j != 0)
 	if (j != 0)
 		return;
 		return;
 	if (!prev_desc && !desc->more)
 	if (!prev_desc && !desc->more)
-		*pte_list = (unsigned long)desc->sptes[0];
+		rmap_head->val = (unsigned long)desc->sptes[0];
 	else
 	else
 		if (prev_desc)
 		if (prev_desc)
 			prev_desc->more = desc->more;
 			prev_desc->more = desc->more;
 		else
 		else
-			*pte_list = (unsigned long)desc->more | 1;
+			rmap_head->val = (unsigned long)desc->more | 1;
 	mmu_free_pte_list_desc(desc);
 	mmu_free_pte_list_desc(desc);
 }
 }
 
 
-static void pte_list_remove(u64 *spte, unsigned long *pte_list)
+static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
 {
 {
 	struct pte_list_desc *desc;
 	struct pte_list_desc *desc;
 	struct pte_list_desc *prev_desc;
 	struct pte_list_desc *prev_desc;
 	int i;
 	int i;
 
 
-	if (!*pte_list) {
+	if (!rmap_head->val) {
 		printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte);
 		printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte);
 		BUG();
 		BUG();
-	} else if (!(*pte_list & 1)) {
+	} else if (!(rmap_head->val & 1)) {
 		rmap_printk("pte_list_remove:  %p 1->0\n", spte);
 		rmap_printk("pte_list_remove:  %p 1->0\n", spte);
-		if ((u64 *)*pte_list != spte) {
+		if ((u64 *)rmap_head->val != spte) {
 			printk(KERN_ERR "pte_list_remove:  %p 1->BUG\n", spte);
 			printk(KERN_ERR "pte_list_remove:  %p 1->BUG\n", spte);
 			BUG();
 			BUG();
 		}
 		}
-		*pte_list = 0;
+		rmap_head->val = 0;
 	} else {
 	} else {
 		rmap_printk("pte_list_remove:  %p many->many\n", spte);
 		rmap_printk("pte_list_remove:  %p many->many\n", spte);
-		desc = (struct pte_list_desc *)(*pte_list & ~1ul);
+		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 		prev_desc = NULL;
 		prev_desc = NULL;
 		while (desc) {
 		while (desc) {
-			for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
+			for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
 				if (desc->sptes[i] == spte) {
 				if (desc->sptes[i] == spte) {
-					pte_list_desc_remove_entry(pte_list,
-							       desc, i,
-							       prev_desc);
+					pte_list_desc_remove_entry(rmap_head,
+							desc, i, prev_desc);
 					return;
 					return;
 				}
 				}
+			}
 			prev_desc = desc;
 			prev_desc = desc;
 			desc = desc->more;
 			desc = desc->more;
 		}
 		}
@@ -1012,28 +1007,8 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list)
 	}
 	}
 }
 }
 
 
-typedef void (*pte_list_walk_fn) (u64 *spte);
-static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
-{
-	struct pte_list_desc *desc;
-	int i;
-
-	if (!*pte_list)
-		return;
-
-	if (!(*pte_list & 1))
-		return fn((u64 *)*pte_list);
-
-	desc = (struct pte_list_desc *)(*pte_list & ~1ul);
-	while (desc) {
-		for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
-			fn(desc->sptes[i]);
-		desc = desc->more;
-	}
-}
-
-static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
-				    struct kvm_memory_slot *slot)
+static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
+					   struct kvm_memory_slot *slot)
 {
 {
 	unsigned long idx;
 	unsigned long idx;
 
 
@@ -1041,10 +1016,8 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
 	return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
 	return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
 }
 }
 
 
-/*
- * Take gfn and return the reverse mapping to it.
- */
-static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, struct kvm_mmu_page *sp)
+static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn,
+					 struct kvm_mmu_page *sp)
 {
 {
 	struct kvm_memslots *slots;
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *slot;
 	struct kvm_memory_slot *slot;
@@ -1065,24 +1038,24 @@ static bool rmap_can_add(struct kvm_vcpu *vcpu)
 static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 {
 {
 	struct kvm_mmu_page *sp;
 	struct kvm_mmu_page *sp;
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 
 
 	sp = page_header(__pa(spte));
 	sp = page_header(__pa(spte));
 	kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
 	kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
-	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp);
-	return pte_list_add(vcpu, spte, rmapp);
+	rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
+	return pte_list_add(vcpu, spte, rmap_head);
 }
 }
 
 
 static void rmap_remove(struct kvm *kvm, u64 *spte)
 static void rmap_remove(struct kvm *kvm, u64 *spte)
 {
 {
 	struct kvm_mmu_page *sp;
 	struct kvm_mmu_page *sp;
 	gfn_t gfn;
 	gfn_t gfn;
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 
 
 	sp = page_header(__pa(spte));
 	sp = page_header(__pa(spte));
 	gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
 	gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
-	rmapp = gfn_to_rmap(kvm, gfn, sp);
-	pte_list_remove(spte, rmapp);
+	rmap_head = gfn_to_rmap(kvm, gfn, sp);
+	pte_list_remove(spte, rmap_head);
 }
 }
 
 
 /*
 /*
@@ -1102,19 +1075,26 @@ struct rmap_iterator {
  *
  *
  * Returns sptep if found, NULL otherwise.
  * Returns sptep if found, NULL otherwise.
  */
  */
-static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
+static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
+			   struct rmap_iterator *iter)
 {
 {
-	if (!rmap)
+	u64 *sptep;
+
+	if (!rmap_head->val)
 		return NULL;
 		return NULL;
 
 
-	if (!(rmap & 1)) {
+	if (!(rmap_head->val & 1)) {
 		iter->desc = NULL;
 		iter->desc = NULL;
-		return (u64 *)rmap;
+		sptep = (u64 *)rmap_head->val;
+		goto out;
 	}
 	}
 
 
-	iter->desc = (struct pte_list_desc *)(rmap & ~1ul);
+	iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 	iter->pos = 0;
 	iter->pos = 0;
-	return iter->desc->sptes[iter->pos];
+	sptep = iter->desc->sptes[iter->pos];
+out:
+	BUG_ON(!is_shadow_present_pte(*sptep));
+	return sptep;
 }
 }
 
 
 /*
 /*
@@ -1124,14 +1104,14 @@ static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
  */
  */
 static u64 *rmap_get_next(struct rmap_iterator *iter)
 static u64 *rmap_get_next(struct rmap_iterator *iter)
 {
 {
+	u64 *sptep;
+
 	if (iter->desc) {
 	if (iter->desc) {
 		if (iter->pos < PTE_LIST_EXT - 1) {
 		if (iter->pos < PTE_LIST_EXT - 1) {
-			u64 *sptep;
-
 			++iter->pos;
 			++iter->pos;
 			sptep = iter->desc->sptes[iter->pos];
 			sptep = iter->desc->sptes[iter->pos];
 			if (sptep)
 			if (sptep)
-				return sptep;
+				goto out;
 		}
 		}
 
 
 		iter->desc = iter->desc->more;
 		iter->desc = iter->desc->more;
@@ -1139,17 +1119,20 @@ static u64 *rmap_get_next(struct rmap_iterator *iter)
 		if (iter->desc) {
 		if (iter->desc) {
 			iter->pos = 0;
 			iter->pos = 0;
 			/* desc->sptes[0] cannot be NULL */
 			/* desc->sptes[0] cannot be NULL */
-			return iter->desc->sptes[iter->pos];
+			sptep = iter->desc->sptes[iter->pos];
+			goto out;
 		}
 		}
 	}
 	}
 
 
 	return NULL;
 	return NULL;
+out:
+	BUG_ON(!is_shadow_present_pte(*sptep));
+	return sptep;
 }
 }
 
 
-#define for_each_rmap_spte(_rmap_, _iter_, _spte_)			    \
-	   for (_spte_ = rmap_get_first(*_rmap_, _iter_);		    \
-		_spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;});  \
-			_spte_ = rmap_get_next(_iter_))
+#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)			\
+	for (_spte_ = rmap_get_first(_rmap_head_, _iter_);		\
+	     _spte_; _spte_ = rmap_get_next(_iter_))
 
 
 static void drop_spte(struct kvm *kvm, u64 *sptep)
 static void drop_spte(struct kvm *kvm, u64 *sptep)
 {
 {
@@ -1207,14 +1190,15 @@ static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
 	return mmu_spte_update(sptep, spte);
 	return mmu_spte_update(sptep, spte);
 }
 }
 
 
-static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
+static bool __rmap_write_protect(struct kvm *kvm,
+				 struct kvm_rmap_head *rmap_head,
 				 bool pt_protect)
 				 bool pt_protect)
 {
 {
 	u64 *sptep;
 	u64 *sptep;
 	struct rmap_iterator iter;
 	struct rmap_iterator iter;
 	bool flush = false;
 	bool flush = false;
 
 
-	for_each_rmap_spte(rmapp, &iter, sptep)
+	for_each_rmap_spte(rmap_head, &iter, sptep)
 		flush |= spte_write_protect(kvm, sptep, pt_protect);
 		flush |= spte_write_protect(kvm, sptep, pt_protect);
 
 
 	return flush;
 	return flush;
@@ -1231,13 +1215,13 @@ static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep)
 	return mmu_spte_update(sptep, spte);
 	return mmu_spte_update(sptep, spte);
 }
 }
 
 
-static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
+static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 {
 {
 	u64 *sptep;
 	u64 *sptep;
 	struct rmap_iterator iter;
 	struct rmap_iterator iter;
 	bool flush = false;
 	bool flush = false;
 
 
-	for_each_rmap_spte(rmapp, &iter, sptep)
+	for_each_rmap_spte(rmap_head, &iter, sptep)
 		flush |= spte_clear_dirty(kvm, sptep);
 		flush |= spte_clear_dirty(kvm, sptep);
 
 
 	return flush;
 	return flush;
@@ -1254,13 +1238,13 @@ static bool spte_set_dirty(struct kvm *kvm, u64 *sptep)
 	return mmu_spte_update(sptep, spte);
 	return mmu_spte_update(sptep, spte);
 }
 }
 
 
-static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp)
+static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 {
 {
 	u64 *sptep;
 	u64 *sptep;
 	struct rmap_iterator iter;
 	struct rmap_iterator iter;
 	bool flush = false;
 	bool flush = false;
 
 
-	for_each_rmap_spte(rmapp, &iter, sptep)
+	for_each_rmap_spte(rmap_head, &iter, sptep)
 		flush |= spte_set_dirty(kvm, sptep);
 		flush |= spte_set_dirty(kvm, sptep);
 
 
 	return flush;
 	return flush;
@@ -1280,12 +1264,12 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 				     struct kvm_memory_slot *slot,
 				     struct kvm_memory_slot *slot,
 				     gfn_t gfn_offset, unsigned long mask)
 				     gfn_t gfn_offset, unsigned long mask)
 {
 {
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 
 
 	while (mask) {
 	while (mask) {
-		rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
-				      PT_PAGE_TABLE_LEVEL, slot);
-		__rmap_write_protect(kvm, rmapp, false);
+		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+					  PT_PAGE_TABLE_LEVEL, slot);
+		__rmap_write_protect(kvm, rmap_head, false);
 
 
 		/* clear the first set bit */
 		/* clear the first set bit */
 		mask &= mask - 1;
 		mask &= mask - 1;
@@ -1305,12 +1289,12 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 				     struct kvm_memory_slot *slot,
 				     struct kvm_memory_slot *slot,
 				     gfn_t gfn_offset, unsigned long mask)
 				     gfn_t gfn_offset, unsigned long mask)
 {
 {
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 
 
 	while (mask) {
 	while (mask) {
-		rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
-				      PT_PAGE_TABLE_LEVEL, slot);
-		__rmap_clear_dirty(kvm, rmapp);
+		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+					  PT_PAGE_TABLE_LEVEL, slot);
+		__rmap_clear_dirty(kvm, rmap_head);
 
 
 		/* clear the first set bit */
 		/* clear the first set bit */
 		mask &= mask - 1;
 		mask &= mask - 1;
@@ -1342,28 +1326,27 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 {
 {
 	struct kvm_memory_slot *slot;
 	struct kvm_memory_slot *slot;
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 	int i;
 	int i;
 	bool write_protected = false;
 	bool write_protected = false;
 
 
 	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 
 
 	for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
 	for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
-		rmapp = __gfn_to_rmap(gfn, i, slot);
-		write_protected |= __rmap_write_protect(vcpu->kvm, rmapp, true);
+		rmap_head = __gfn_to_rmap(gfn, i, slot);
+		write_protected |= __rmap_write_protect(vcpu->kvm, rmap_head, true);
 	}
 	}
 
 
 	return write_protected;
 	return write_protected;
 }
 }
 
 
-static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 {
 {
 	u64 *sptep;
 	u64 *sptep;
 	struct rmap_iterator iter;
 	struct rmap_iterator iter;
 	bool flush = false;
 	bool flush = false;
 
 
-	while ((sptep = rmap_get_first(*rmapp, &iter))) {
-		BUG_ON(!(*sptep & PT_PRESENT_MASK));
+	while ((sptep = rmap_get_first(rmap_head, &iter))) {
 		rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
 		rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
 
 
 		drop_spte(kvm, sptep);
 		drop_spte(kvm, sptep);
@@ -1373,14 +1356,14 @@ static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp)
 	return flush;
 	return flush;
 }
 }
 
 
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
+static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 			   struct kvm_memory_slot *slot, gfn_t gfn, int level,
 			   struct kvm_memory_slot *slot, gfn_t gfn, int level,
 			   unsigned long data)
 			   unsigned long data)
 {
 {
-	return kvm_zap_rmapp(kvm, rmapp);
+	return kvm_zap_rmapp(kvm, rmap_head);
 }
 }
 
 
-static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
+static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 			     struct kvm_memory_slot *slot, gfn_t gfn, int level,
 			     struct kvm_memory_slot *slot, gfn_t gfn, int level,
 			     unsigned long data)
 			     unsigned long data)
 {
 {
@@ -1395,7 +1378,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	new_pfn = pte_pfn(*ptep);
 	new_pfn = pte_pfn(*ptep);
 
 
 restart:
 restart:
-	for_each_rmap_spte(rmapp, &iter, sptep) {
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
 		rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
 			     sptep, *sptep, gfn, level);
 			     sptep, *sptep, gfn, level);
 
 
@@ -1433,11 +1416,11 @@ struct slot_rmap_walk_iterator {
 
 
 	/* output fields. */
 	/* output fields. */
 	gfn_t gfn;
 	gfn_t gfn;
-	unsigned long *rmap;
+	struct kvm_rmap_head *rmap;
 	int level;
 	int level;
 
 
 	/* private field. */
 	/* private field. */
-	unsigned long *end_rmap;
+	struct kvm_rmap_head *end_rmap;
 };
 };
 
 
 static void
 static void
@@ -1496,7 +1479,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 				unsigned long end,
 				unsigned long end,
 				unsigned long data,
 				unsigned long data,
 				int (*handler)(struct kvm *kvm,
 				int (*handler)(struct kvm *kvm,
-					       unsigned long *rmapp,
+					       struct kvm_rmap_head *rmap_head,
 					       struct kvm_memory_slot *slot,
 					       struct kvm_memory_slot *slot,
 					       gfn_t gfn,
 					       gfn_t gfn,
 					       int level,
 					       int level,
@@ -1540,7 +1523,8 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 
 
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			  unsigned long data,
 			  unsigned long data,
-			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+			  int (*handler)(struct kvm *kvm,
+					 struct kvm_rmap_head *rmap_head,
 					 struct kvm_memory_slot *slot,
 					 struct kvm_memory_slot *slot,
 					 gfn_t gfn, int level,
 					 gfn_t gfn, int level,
 					 unsigned long data))
 					 unsigned long data))
@@ -1563,7 +1547,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 	kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
 	kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
 }
 }
 
 
-static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 			 struct kvm_memory_slot *slot, gfn_t gfn, int level,
 			 struct kvm_memory_slot *slot, gfn_t gfn, int level,
 			 unsigned long data)
 			 unsigned long data)
 {
 {
@@ -1573,18 +1557,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 
 	BUG_ON(!shadow_accessed_mask);
 	BUG_ON(!shadow_accessed_mask);
 
 
-	for_each_rmap_spte(rmapp, &iter, sptep)
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		if (*sptep & shadow_accessed_mask) {
 		if (*sptep & shadow_accessed_mask) {
 			young = 1;
 			young = 1;
 			clear_bit((ffs(shadow_accessed_mask) - 1),
 			clear_bit((ffs(shadow_accessed_mask) - 1),
 				 (unsigned long *)sptep);
 				 (unsigned long *)sptep);
 		}
 		}
+	}
 
 
 	trace_kvm_age_page(gfn, level, slot, young);
 	trace_kvm_age_page(gfn, level, slot, young);
 	return young;
 	return young;
 }
 }
 
 
-static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 			      struct kvm_memory_slot *slot, gfn_t gfn,
 			      struct kvm_memory_slot *slot, gfn_t gfn,
 			      int level, unsigned long data)
 			      int level, unsigned long data)
 {
 {
@@ -1600,11 +1585,12 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	if (!shadow_accessed_mask)
 	if (!shadow_accessed_mask)
 		goto out;
 		goto out;
 
 
-	for_each_rmap_spte(rmapp, &iter, sptep)
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		if (*sptep & shadow_accessed_mask) {
 		if (*sptep & shadow_accessed_mask) {
 			young = 1;
 			young = 1;
 			break;
 			break;
 		}
 		}
+	}
 out:
 out:
 	return young;
 	return young;
 }
 }
@@ -1613,14 +1599,14 @@ out:
 
 
 static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 {
 {
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 	struct kvm_mmu_page *sp;
 	struct kvm_mmu_page *sp;
 
 
 	sp = page_header(__pa(spte));
 	sp = page_header(__pa(spte));
 
 
-	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp);
+	rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
 
 
-	kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0);
+	kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0);
 	kvm_flush_remote_tlbs(vcpu->kvm);
 	kvm_flush_remote_tlbs(vcpu->kvm);
 }
 }
 
 
@@ -1720,8 +1706,7 @@ static void drop_parent_pte(struct kvm_mmu_page *sp,
 	mmu_spte_clear_no_track(parent_pte);
 	mmu_spte_clear_no_track(parent_pte);
 }
 }
 
 
-static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
-					       u64 *parent_pte, int direct)
+static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct)
 {
 {
 	struct kvm_mmu_page *sp;
 	struct kvm_mmu_page *sp;
 
 
@@ -1737,8 +1722,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	 * this feature. See the comments in kvm_zap_obsolete_pages().
 	 * this feature. See the comments in kvm_zap_obsolete_pages().
 	 */
 	 */
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
-	sp->parent_ptes = 0;
-	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
 	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
 	return sp;
 	return sp;
 }
 }
@@ -1746,7 +1729,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 static void mark_unsync(u64 *spte);
 static void mark_unsync(u64 *spte);
 static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
 static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
 {
 {
-	pte_list_walk(&sp->parent_ptes, mark_unsync);
+	u64 *sptep;
+	struct rmap_iterator iter;
+
+	for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
+		mark_unsync(sptep);
+	}
 }
 }
 
 
 static void mark_unsync(u64 *spte)
 static void mark_unsync(u64 *spte)
@@ -1806,6 +1794,13 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
 	return (pvec->nr == KVM_PAGE_ARRAY_NR);
 	return (pvec->nr == KVM_PAGE_ARRAY_NR);
 }
 }
 
 
+static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
+{
+	--sp->unsync_children;
+	WARN_ON((int)sp->unsync_children < 0);
+	__clear_bit(idx, sp->unsync_child_bitmap);
+}
+
 static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 			   struct kvm_mmu_pages *pvec)
 			   struct kvm_mmu_pages *pvec)
 {
 {
@@ -1815,8 +1810,10 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 		struct kvm_mmu_page *child;
 		struct kvm_mmu_page *child;
 		u64 ent = sp->spt[i];
 		u64 ent = sp->spt[i];
 
 
-		if (!is_shadow_present_pte(ent) || is_large_pte(ent))
-			goto clear_child_bitmap;
+		if (!is_shadow_present_pte(ent) || is_large_pte(ent)) {
+			clear_unsync_child_bit(sp, i);
+			continue;
+		}
 
 
 		child = page_header(ent & PT64_BASE_ADDR_MASK);
 		child = page_header(ent & PT64_BASE_ADDR_MASK);
 
 
@@ -1825,28 +1822,21 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 				return -ENOSPC;
 				return -ENOSPC;
 
 
 			ret = __mmu_unsync_walk(child, pvec);
 			ret = __mmu_unsync_walk(child, pvec);
-			if (!ret)
-				goto clear_child_bitmap;
-			else if (ret > 0)
+			if (!ret) {
+				clear_unsync_child_bit(sp, i);
+				continue;
+			} else if (ret > 0) {
 				nr_unsync_leaf += ret;
 				nr_unsync_leaf += ret;
-			else
+			} else
 				return ret;
 				return ret;
 		} else if (child->unsync) {
 		} else if (child->unsync) {
 			nr_unsync_leaf++;
 			nr_unsync_leaf++;
 			if (mmu_pages_add(pvec, child, i))
 			if (mmu_pages_add(pvec, child, i))
 				return -ENOSPC;
 				return -ENOSPC;
 		} else
 		} else
-			 goto clear_child_bitmap;
-
-		continue;
-
-clear_child_bitmap:
-		__clear_bit(i, sp->unsync_child_bitmap);
-		sp->unsync_children--;
-		WARN_ON((int)sp->unsync_children < 0);
+			clear_unsync_child_bit(sp, i);
 	}
 	}
 
 
-
 	return nr_unsync_leaf;
 	return nr_unsync_leaf;
 }
 }
 
 
@@ -2009,9 +1999,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
 		if (!sp)
 		if (!sp)
 			return;
 			return;
 
 
-		--sp->unsync_children;
-		WARN_ON((int)sp->unsync_children < 0);
-		__clear_bit(idx, sp->unsync_child_bitmap);
+		clear_unsync_child_bit(sp, idx);
 		level++;
 		level++;
 	} while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
 	} while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
 }
 }
@@ -2053,14 +2041,6 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
 	}
 	}
 }
 }
 
 
-static void init_shadow_page_table(struct kvm_mmu_page *sp)
-{
-	int i;
-
-	for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
-		sp->spt[i] = 0ull;
-}
-
 static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
 static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
 {
 {
 	sp->write_flooding_count = 0;
 	sp->write_flooding_count = 0;
@@ -2083,8 +2063,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     gva_t gaddr,
 					     gva_t gaddr,
 					     unsigned level,
 					     unsigned level,
 					     int direct,
 					     int direct,
-					     unsigned access,
-					     u64 *parent_pte)
+					     unsigned access)
 {
 {
 	union kvm_mmu_page_role role;
 	union kvm_mmu_page_role role;
 	unsigned quadrant;
 	unsigned quadrant;
@@ -2116,21 +2095,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
 		if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
 			break;
 			break;
 
 
-		mmu_page_add_parent_pte(vcpu, sp, parent_pte);
-		if (sp->unsync_children) {
+		if (sp->unsync_children)
 			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
 			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-			kvm_mmu_mark_parents_unsync(sp);
-		} else if (sp->unsync)
-			kvm_mmu_mark_parents_unsync(sp);
 
 
 		__clear_sp_write_flooding_count(sp);
 		__clear_sp_write_flooding_count(sp);
 		trace_kvm_mmu_get_page(sp, false);
 		trace_kvm_mmu_get_page(sp, false);
 		return sp;
 		return sp;
 	}
 	}
+
 	++vcpu->kvm->stat.mmu_cache_miss;
 	++vcpu->kvm->stat.mmu_cache_miss;
-	sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct);
-	if (!sp)
-		return sp;
+
+	sp = kvm_mmu_alloc_page(vcpu, direct);
+
 	sp->gfn = gfn;
 	sp->gfn = gfn;
 	sp->role = role;
 	sp->role = role;
 	hlist_add_head(&sp->hash_link,
 	hlist_add_head(&sp->hash_link,
@@ -2144,7 +2120,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		account_shadowed(vcpu->kvm, sp);
 		account_shadowed(vcpu->kvm, sp);
 	}
 	}
 	sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
 	sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
-	init_shadow_page_table(sp);
+	clear_page(sp->spt);
 	trace_kvm_mmu_get_page(sp, true);
 	trace_kvm_mmu_get_page(sp, true);
 	return sp;
 	return sp;
 }
 }
@@ -2198,7 +2174,8 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
 	return __shadow_walk_next(iterator, *iterator->sptep);
 	return __shadow_walk_next(iterator, *iterator->sptep);
 }
 }
 
 
-static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed)
+static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
+			     struct kvm_mmu_page *sp)
 {
 {
 	u64 spte;
 	u64 spte;
 
 
@@ -2206,12 +2183,14 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed)
 			VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
 			VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
 
 
 	spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
 	spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
-	       shadow_user_mask | shadow_x_mask;
-
-	if (accessed)
-		spte |= shadow_accessed_mask;
+	       shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
 
 
 	mmu_spte_set(sptep, spte);
 	mmu_spte_set(sptep, spte);
+
+	mmu_page_add_parent_pte(vcpu, sp, sptep);
+
+	if (sp->unsync_children || sp->unsync)
+		mark_unsync(sptep);
 }
 }
 
 
 static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
@@ -2270,17 +2249,12 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
 		mmu_page_zap_pte(kvm, sp, sp->spt + i);
 		mmu_page_zap_pte(kvm, sp, sp->spt + i);
 }
 }
 
 
-static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
-{
-	mmu_page_remove_parent_pte(sp, parent_pte);
-}
-
 static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 {
 	u64 *sptep;
 	u64 *sptep;
 	struct rmap_iterator iter;
 	struct rmap_iterator iter;
 
 
-	while ((sptep = rmap_get_first(sp->parent_ptes, &iter)))
+	while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
 		drop_parent_pte(sp, sptep);
 		drop_parent_pte(sp, sptep);
 }
 }
 
 
@@ -2564,18 +2538,18 @@ done:
 	return ret;
 	return ret;
 }
 }
 
 
-static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
-			 unsigned pte_access, int write_fault, int *emulate,
-			 int level, gfn_t gfn, pfn_t pfn, bool speculative,
-			 bool host_writable)
+static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
+			 int write_fault, int level, gfn_t gfn, pfn_t pfn,
+			 bool speculative, bool host_writable)
 {
 {
 	int was_rmapped = 0;
 	int was_rmapped = 0;
 	int rmap_count;
 	int rmap_count;
+	bool emulate = false;
 
 
 	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
 	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
 		 *sptep, write_fault, gfn);
 		 *sptep, write_fault, gfn);
 
 
-	if (is_rmap_spte(*sptep)) {
+	if (is_shadow_present_pte(*sptep)) {
 		/*
 		/*
 		 * If we overwrite a PTE page pointer with a 2MB PMD, unlink
 		 * If we overwrite a PTE page pointer with a 2MB PMD, unlink
 		 * the parent of the now unreachable PTE.
 		 * the parent of the now unreachable PTE.
@@ -2600,12 +2574,12 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
 	if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
 	      true, host_writable)) {
 	      true, host_writable)) {
 		if (write_fault)
 		if (write_fault)
-			*emulate = 1;
+			emulate = true;
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	}
 	}
 
 
-	if (unlikely(is_mmio_spte(*sptep) && emulate))
-		*emulate = 1;
+	if (unlikely(is_mmio_spte(*sptep)))
+		emulate = true;
 
 
 	pgprintk("%s: setting spte %llx\n", __func__, *sptep);
 	pgprintk("%s: setting spte %llx\n", __func__, *sptep);
 	pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
 	pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
@@ -2624,6 +2598,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	}
 	}
 
 
 	kvm_release_pfn_clean(pfn);
 	kvm_release_pfn_clean(pfn);
+
+	return emulate;
 }
 }
 
 
 static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
@@ -2658,9 +2634,8 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
 		return -1;
 		return -1;
 
 
 	for (i = 0; i < ret; i++, gfn++, start++)
 	for (i = 0; i < ret; i++, gfn++, start++)
-		mmu_set_spte(vcpu, start, access, 0, NULL,
-			     sp->role.level, gfn, page_to_pfn(pages[i]),
-			     true, true);
+		mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
+			     page_to_pfn(pages[i]), true, true);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -2708,9 +2683,8 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
 	__direct_pte_prefetch(vcpu, sp, sptep);
 	__direct_pte_prefetch(vcpu, sp, sptep);
 }
 }
 
 
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
-			int map_writable, int level, gfn_t gfn, pfn_t pfn,
-			bool prefault)
+static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
+			int level, gfn_t gfn, pfn_t pfn, bool prefault)
 {
 {
 	struct kvm_shadow_walk_iterator iterator;
 	struct kvm_shadow_walk_iterator iterator;
 	struct kvm_mmu_page *sp;
 	struct kvm_mmu_page *sp;
@@ -2722,9 +2696,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 
 
 	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
 	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
 		if (iterator.level == level) {
 		if (iterator.level == level) {
-			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
-				     write, &emulate, level, gfn, pfn,
-				     prefault, map_writable);
+			emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
+					       write, level, gfn, pfn, prefault,
+					       map_writable);
 			direct_pte_prefetch(vcpu, iterator.sptep);
 			direct_pte_prefetch(vcpu, iterator.sptep);
 			++vcpu->stat.pf_fixed;
 			++vcpu->stat.pf_fixed;
 			break;
 			break;
@@ -2737,10 +2711,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 			base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
 			base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
 			pseudo_gfn = base_addr >> PAGE_SHIFT;
 			pseudo_gfn = base_addr >> PAGE_SHIFT;
 			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
 			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
-					      iterator.level - 1,
-					      1, ACC_ALL, iterator.sptep);
+					      iterator.level - 1, 1, ACC_ALL);
 
 
-			link_shadow_page(iterator.sptep, sp, true);
+			link_shadow_page(vcpu, iterator.sptep, sp);
 		}
 		}
 	}
 	}
 	return emulate;
 	return emulate;
@@ -2919,7 +2892,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 	 * If the mapping has been changed, let the vcpu fault on the
 	 * If the mapping has been changed, let the vcpu fault on the
 	 * same address again.
 	 * same address again.
 	 */
 	 */
-	if (!is_rmap_spte(spte)) {
+	if (!is_shadow_present_pte(spte)) {
 		ret = true;
 		ret = true;
 		goto exit;
 		goto exit;
 	}
 	}
@@ -3018,11 +2991,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 	make_mmu_pages_available(vcpu);
 	make_mmu_pages_available(vcpu);
 	if (likely(!force_pt_level))
 	if (likely(!force_pt_level))
 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
-	r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
-			 prefault);
+	r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 
-
 	return r;
 	return r;
 
 
 out_unlock:
 out_unlock:
@@ -3097,8 +3068,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
 	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
 		spin_lock(&vcpu->kvm->mmu_lock);
 		spin_lock(&vcpu->kvm->mmu_lock);
 		make_mmu_pages_available(vcpu);
 		make_mmu_pages_available(vcpu);
-		sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
-				      1, ACC_ALL, NULL);
+		sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL);
 		++sp->root_count;
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 		spin_unlock(&vcpu->kvm->mmu_lock);
 		vcpu->arch.mmu.root_hpa = __pa(sp->spt);
 		vcpu->arch.mmu.root_hpa = __pa(sp->spt);
@@ -3110,9 +3080,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 			spin_lock(&vcpu->kvm->mmu_lock);
 			spin_lock(&vcpu->kvm->mmu_lock);
 			make_mmu_pages_available(vcpu);
 			make_mmu_pages_available(vcpu);
 			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
 			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
-					      i << 30,
-					      PT32_ROOT_LEVEL, 1, ACC_ALL,
-					      NULL);
+					i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
 			root = __pa(sp->spt);
 			root = __pa(sp->spt);
 			++sp->root_count;
 			++sp->root_count;
 			spin_unlock(&vcpu->kvm->mmu_lock);
 			spin_unlock(&vcpu->kvm->mmu_lock);
@@ -3149,7 +3117,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		spin_lock(&vcpu->kvm->mmu_lock);
 		spin_lock(&vcpu->kvm->mmu_lock);
 		make_mmu_pages_available(vcpu);
 		make_mmu_pages_available(vcpu);
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
-				      0, ACC_ALL, NULL);
+				      0, ACC_ALL);
 		root = __pa(sp->spt);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 		spin_unlock(&vcpu->kvm->mmu_lock);
@@ -3182,9 +3150,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		}
 		}
 		spin_lock(&vcpu->kvm->mmu_lock);
 		spin_lock(&vcpu->kvm->mmu_lock);
 		make_mmu_pages_available(vcpu);
 		make_mmu_pages_available(vcpu);
-		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
-				      PT32_ROOT_LEVEL, 0,
-				      ACC_ALL, NULL);
+		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
+				      0, ACC_ALL);
 		root = __pa(sp->spt);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 		spin_unlock(&vcpu->kvm->mmu_lock);
@@ -3531,8 +3498,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 	make_mmu_pages_available(vcpu);
 	make_mmu_pages_available(vcpu);
 	if (likely(!force_pt_level))
 	if (likely(!force_pt_level))
 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
-	r = __direct_map(vcpu, gpa, write, map_writable,
-			 level, gfn, pfn, prefault);
+	r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 
 	return r;
 	return r;
@@ -4058,10 +4024,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	g_context->inject_page_fault = kvm_inject_page_fault;
 	g_context->inject_page_fault = kvm_inject_page_fault;
 
 
 	/*
 	/*
-	 * Note that arch.mmu.gva_to_gpa translates l2_gva to l1_gpa. The
-	 * translation of l2_gpa to l1_gpa addresses is done using the
-	 * arch.nested_mmu.gva_to_gpa function. Basically the gva_to_gpa
-	 * functions between mmu and nested_mmu are swapped.
+	 * Note that arch.mmu.gva_to_gpa translates l2_gpa to l1_gpa using
+	 * L1's nested page tables (e.g. EPT12). The nested translation
+	 * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using
+	 * L2's page tables as the first level of translation and L1's
+	 * nested page tables as the second level of translation. Basically
+	 * the gva_to_gpa functions between mmu and nested_mmu are swapped.
 	 */
 	 */
 	if (!is_paging(vcpu)) {
 	if (!is_paging(vcpu)) {
 		g_context->nx = false;
 		g_context->nx = false;
@@ -4495,7 +4463,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
 }
 }
 
 
 /* The return value indicates if tlb flush on all vcpus is needed. */
 /* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_level_handler) (struct kvm *kvm, unsigned long *rmap);
+typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
 
 
 /* The caller should hold mmu-lock before calling this function. */
 /* The caller should hold mmu-lock before calling this function. */
 static bool
 static bool
@@ -4589,9 +4557,10 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 	spin_unlock(&kvm->mmu_lock);
 	spin_unlock(&kvm->mmu_lock);
 }
 }
 
 
-static bool slot_rmap_write_protect(struct kvm *kvm, unsigned long *rmapp)
+static bool slot_rmap_write_protect(struct kvm *kvm,
+				    struct kvm_rmap_head *rmap_head)
 {
 {
-	return __rmap_write_protect(kvm, rmapp, false);
+	return __rmap_write_protect(kvm, rmap_head, false);
 }
 }
 
 
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@@ -4627,7 +4596,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 }
 }
 
 
 static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
 static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
-		unsigned long *rmapp)
+					 struct kvm_rmap_head *rmap_head)
 {
 {
 	u64 *sptep;
 	u64 *sptep;
 	struct rmap_iterator iter;
 	struct rmap_iterator iter;
@@ -4636,7 +4605,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
 	struct kvm_mmu_page *sp;
 	struct kvm_mmu_page *sp;
 
 
 restart:
 restart:
-	for_each_rmap_spte(rmapp, &iter, sptep) {
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		sp = page_header(__pa(sptep));
 		sp = page_header(__pa(sptep));
 		pfn = spte_to_pfn(*sptep);
 		pfn = spte_to_pfn(*sptep);
 
 

+ 8 - 7
arch/x86/kvm/mmu_audit.c

@@ -129,7 +129,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
 static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 {
 {
 	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
 	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 	struct kvm_mmu_page *rev_sp;
 	struct kvm_mmu_page *rev_sp;
 	struct kvm_memslots *slots;
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *slot;
 	struct kvm_memory_slot *slot;
@@ -150,8 +150,8 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 		return;
 		return;
 	}
 	}
 
 
-	rmapp = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
-	if (!*rmapp) {
+	rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
+	if (!rmap_head->val) {
 		if (!__ratelimit(&ratelimit_state))
 		if (!__ratelimit(&ratelimit_state))
 			return;
 			return;
 		audit_printk(kvm, "no rmap for writable spte %llx\n",
 		audit_printk(kvm, "no rmap for writable spte %llx\n",
@@ -183,7 +183,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
 		return;
 		return;
 
 
 	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
 	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
-		if (!is_rmap_spte(sp->spt[i]))
+		if (!is_shadow_present_pte(sp->spt[i]))
 			continue;
 			continue;
 
 
 		inspect_spte_has_rmap(kvm, sp->spt + i);
 		inspect_spte_has_rmap(kvm, sp->spt + i);
@@ -192,7 +192,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 
 static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 {
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 	u64 *sptep;
 	u64 *sptep;
 	struct rmap_iterator iter;
 	struct rmap_iterator iter;
 	struct kvm_memslots *slots;
 	struct kvm_memslots *slots;
@@ -203,13 +203,14 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 
 	slots = kvm_memslots_for_spte_role(kvm, sp->role);
 	slots = kvm_memslots_for_spte_role(kvm, sp->role);
 	slot = __gfn_to_memslot(slots, sp->gfn);
 	slot = __gfn_to_memslot(slots, sp->gfn);
-	rmapp = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
+	rmap_head = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
 
 
-	for_each_rmap_spte(rmapp, &iter, sptep)
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		if (is_writable_pte(*sptep))
 		if (is_writable_pte(*sptep))
 			audit_printk(kvm, "shadow page has writable "
 			audit_printk(kvm, "shadow page has writable "
 				     "mappings: gfn %llx role %x\n",
 				     "mappings: gfn %llx role %x\n",
 				     sp->gfn, sp->role.word);
 				     sp->gfn, sp->role.word);
+	}
 }
 }
 
 
 static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)

+ 9 - 11
arch/x86/kvm/paging_tmpl.h

@@ -475,8 +475,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	 * we call mmu_set_spte() with host_writable = true because
 	 * we call mmu_set_spte() with host_writable = true because
 	 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
 	 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
 	 */
 	 */
-	mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL,
-		     gfn, pfn, true, true);
+	mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
+		     true, true);
 
 
 	return true;
 	return true;
 }
 }
@@ -556,7 +556,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	struct kvm_mmu_page *sp = NULL;
 	struct kvm_mmu_page *sp = NULL;
 	struct kvm_shadow_walk_iterator it;
 	struct kvm_shadow_walk_iterator it;
 	unsigned direct_access, access = gw->pt_access;
 	unsigned direct_access, access = gw->pt_access;
-	int top_level, emulate = 0;
+	int top_level, emulate;
 
 
 	direct_access = gw->pte_access;
 	direct_access = gw->pte_access;
 
 
@@ -587,7 +587,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 		if (!is_shadow_present_pte(*it.sptep)) {
 		if (!is_shadow_present_pte(*it.sptep)) {
 			table_gfn = gw->table_gfn[it.level - 2];
 			table_gfn = gw->table_gfn[it.level - 2];
 			sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
 			sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
-					      false, access, it.sptep);
+					      false, access);
 		}
 		}
 
 
 		/*
 		/*
@@ -598,7 +598,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			goto out_gpte_changed;
 			goto out_gpte_changed;
 
 
 		if (sp)
 		if (sp)
-			link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK);
+			link_shadow_page(vcpu, it.sptep, sp);
 	}
 	}
 
 
 	for (;
 	for (;
@@ -617,20 +617,18 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 		direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
 		direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
 
 
 		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
 		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
-				      true, direct_access, it.sptep);
-		link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK);
+				      true, direct_access);
+		link_shadow_page(vcpu, it.sptep, sp);
 	}
 	}
 
 
 	clear_sp_write_flooding_count(it.sptep);
 	clear_sp_write_flooding_count(it.sptep);
-	mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate,
-		     it.level, gw->gfn, pfn, prefault, map_writable);
+	emulate = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
+			       it.level, gw->gfn, pfn, prefault, map_writable);
 	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
 	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
 
 
 	return emulate;
 	return emulate;
 
 
 out_gpte_changed:
 out_gpte_changed:
-	if (sp)
-		kvm_mmu_put_page(sp, it.sptep);
 	kvm_release_pfn_clean(pfn);
 	kvm_release_pfn_clean(pfn);
 	return 0;
 	return 0;
 }
 }

+ 32 - 5
arch/x86/kvm/svm.c

@@ -86,6 +86,7 @@ static const u32 host_save_user_msrs[] = {
 	MSR_FS_BASE,
 	MSR_FS_BASE,
 #endif
 #endif
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
+	MSR_TSC_AUX,
 };
 };
 
 
 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
@@ -135,6 +136,7 @@ struct vcpu_svm {
 	uint64_t asid_generation;
 	uint64_t asid_generation;
 	uint64_t sysenter_esp;
 	uint64_t sysenter_esp;
 	uint64_t sysenter_eip;
 	uint64_t sysenter_eip;
+	uint64_t tsc_aux;
 
 
 	u64 next_rip;
 	u64 next_rip;
 
 
@@ -1238,6 +1240,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 			wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
 			wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
 		}
 		}
 	}
 	}
+	/* This assumes that the kernel never uses MSR_TSC_AUX */
+	if (static_cpu_has(X86_FEATURE_RDTSCP))
+		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
 }
 }
 
 
 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
@@ -3024,6 +3029,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_SYSENTER_ESP:
 	case MSR_IA32_SYSENTER_ESP:
 		msr_info->data = svm->sysenter_esp;
 		msr_info->data = svm->sysenter_esp;
 		break;
 		break;
+	case MSR_TSC_AUX:
+		if (!boot_cpu_has(X86_FEATURE_RDTSCP))
+			return 1;
+		msr_info->data = svm->tsc_aux;
+		break;
 	/*
 	/*
 	 * Nobody will change the following 5 values in the VMCB so we can
 	 * Nobody will change the following 5 values in the VMCB so we can
 	 * safely return them on rdmsr. They will always be 0 until LBRV is
 	 * safely return them on rdmsr. They will always be 0 until LBRV is
@@ -3162,6 +3172,18 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		svm->sysenter_esp = data;
 		svm->sysenter_esp = data;
 		svm->vmcb->save.sysenter_esp = data;
 		svm->vmcb->save.sysenter_esp = data;
 		break;
 		break;
+	case MSR_TSC_AUX:
+		if (!boot_cpu_has(X86_FEATURE_RDTSCP))
+			return 1;
+
+		/*
+		 * This is rare, so we update the MSR here instead of using
+		 * direct_access_msrs.  Doing that would require a rdmsr in
+		 * svm_vcpu_put.
+		 */
+		svm->tsc_aux = data;
+		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+		break;
 	case MSR_IA32_DEBUGCTLMSR:
 	case MSR_IA32_DEBUGCTLMSR:
 		if (!boot_cpu_has(X86_FEATURE_LBRV)) {
 		if (!boot_cpu_has(X86_FEATURE_LBRV)) {
 			vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
 			vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
@@ -3578,12 +3600,16 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 	return;
 	return;
 }
 }
 
 
-static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu)
+static bool svm_get_enable_apicv(void)
+{
+	return false;
+}
+
+static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 {
 {
-	return 0;
 }
 }
 
 
-static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu)
+static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
 {
 	return;
 	return;
 }
 }
@@ -4054,7 +4080,7 @@ static int svm_get_lpage_level(void)
 
 
 static bool svm_rdtscp_supported(void)
 static bool svm_rdtscp_supported(void)
 {
 {
-	return false;
+	return boot_cpu_has(X86_FEATURE_RDTSCP);
 }
 }
 
 
 static bool svm_invpcid_supported(void)
 static bool svm_invpcid_supported(void)
@@ -4345,7 +4371,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.enable_irq_window = enable_irq_window,
 	.enable_irq_window = enable_irq_window,
 	.update_cr8_intercept = update_cr8_intercept,
 	.update_cr8_intercept = update_cr8_intercept,
 	.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
 	.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
-	.cpu_uses_apicv = svm_cpu_uses_apicv,
+	.get_enable_apicv = svm_get_enable_apicv,
+	.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
 	.sync_pir_to_irr = svm_sync_pir_to_irr,
 	.sync_pir_to_irr = svm_sync_pir_to_irr,
 
 

+ 264 - 1
arch/x86/kvm/trace.h

@@ -268,7 +268,7 @@ TRACE_EVENT(kvm_inj_virq,
 #define kvm_trace_sym_exc						\
 #define kvm_trace_sym_exc						\
 	EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM),	\
 	EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM),	\
 	EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF),		\
 	EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF),		\
-	EXS(MF), EXS(MC)
+	EXS(MF), EXS(AC), EXS(MC)
 
 
 /*
 /*
  * Tracepoint for kvm interrupt injection:
  * Tracepoint for kvm interrupt injection:
@@ -1025,6 +1025,269 @@ TRACE_EVENT(kvm_pi_irte_update,
 		  __entry->pi_desc_addr)
 		  __entry->pi_desc_addr)
 );
 );
 
 
+/*
+ * Tracepoint for kvm_hv_notify_acked_sint.
+ */
+TRACE_EVENT(kvm_hv_notify_acked_sint,
+	TP_PROTO(int vcpu_id, u32 sint),
+	TP_ARGS(vcpu_id, sint),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(u32, sint)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->sint = sint;
+	),
+
+	TP_printk("vcpu_id %d sint %u", __entry->vcpu_id, __entry->sint)
+);
+
+/*
+ * Tracepoint for synic_set_irq.
+ */
+TRACE_EVENT(kvm_hv_synic_set_irq,
+	TP_PROTO(int vcpu_id, u32 sint, int vector, int ret),
+	TP_ARGS(vcpu_id, sint, vector, ret),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(u32, sint)
+		__field(int, vector)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->sint = sint;
+		__entry->vector = vector;
+		__entry->ret = ret;
+	),
+
+	TP_printk("vcpu_id %d sint %u vector %d ret %d",
+		  __entry->vcpu_id, __entry->sint, __entry->vector,
+		  __entry->ret)
+);
+
+/*
+ * Tracepoint for kvm_hv_synic_send_eoi.
+ */
+TRACE_EVENT(kvm_hv_synic_send_eoi,
+	TP_PROTO(int vcpu_id, int vector),
+	TP_ARGS(vcpu_id, vector),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(u32, sint)
+		__field(int, vector)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->vector	= vector;
+	),
+
+	TP_printk("vcpu_id %d vector %d", __entry->vcpu_id, __entry->vector)
+);
+
+/*
+ * Tracepoint for synic_set_msr.
+ */
+TRACE_EVENT(kvm_hv_synic_set_msr,
+	TP_PROTO(int vcpu_id, u32 msr, u64 data, bool host),
+	TP_ARGS(vcpu_id, msr, data, host),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(u32, msr)
+		__field(u64, data)
+		__field(bool, host)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->msr = msr;
+		__entry->data = data;
+		__entry->host = host
+	),
+
+	TP_printk("vcpu_id %d msr 0x%x data 0x%llx host %d",
+		  __entry->vcpu_id, __entry->msr, __entry->data, __entry->host)
+);
+
+/*
+ * Tracepoint for stimer_set_config.
+ */
+TRACE_EVENT(kvm_hv_stimer_set_config,
+	TP_PROTO(int vcpu_id, int timer_index, u64 config, bool host),
+	TP_ARGS(vcpu_id, timer_index, config, host),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+		__field(u64, config)
+		__field(bool, host)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+		__entry->config = config;
+		__entry->host = host;
+	),
+
+	TP_printk("vcpu_id %d timer %d config 0x%llx host %d",
+		  __entry->vcpu_id, __entry->timer_index, __entry->config,
+		  __entry->host)
+);
+
+/*
+ * Tracepoint for stimer_set_count.
+ */
+TRACE_EVENT(kvm_hv_stimer_set_count,
+	TP_PROTO(int vcpu_id, int timer_index, u64 count, bool host),
+	TP_ARGS(vcpu_id, timer_index, count, host),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+		__field(u64, count)
+		__field(bool, host)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+		__entry->count = count;
+		__entry->host = host;
+	),
+
+	TP_printk("vcpu_id %d timer %d count %llu host %d",
+		  __entry->vcpu_id, __entry->timer_index, __entry->count,
+		  __entry->host)
+);
+
+/*
+ * Tracepoint for stimer_start(periodic timer case).
+ */
+TRACE_EVENT(kvm_hv_stimer_start_periodic,
+	TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 exp_time),
+	TP_ARGS(vcpu_id, timer_index, time_now, exp_time),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+		__field(u64, time_now)
+		__field(u64, exp_time)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+		__entry->time_now = time_now;
+		__entry->exp_time = exp_time;
+	),
+
+	TP_printk("vcpu_id %d timer %d time_now %llu exp_time %llu",
+		  __entry->vcpu_id, __entry->timer_index, __entry->time_now,
+		  __entry->exp_time)
+);
+
+/*
+ * Tracepoint for stimer_start(one-shot timer case).
+ */
+TRACE_EVENT(kvm_hv_stimer_start_one_shot,
+	TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 count),
+	TP_ARGS(vcpu_id, timer_index, time_now, count),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+		__field(u64, time_now)
+		__field(u64, count)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+		__entry->time_now = time_now;
+		__entry->count = count;
+	),
+
+	TP_printk("vcpu_id %d timer %d time_now %llu count %llu",
+		  __entry->vcpu_id, __entry->timer_index, __entry->time_now,
+		  __entry->count)
+);
+
+/*
+ * Tracepoint for stimer_timer_callback.
+ */
+TRACE_EVENT(kvm_hv_stimer_callback,
+	TP_PROTO(int vcpu_id, int timer_index),
+	TP_ARGS(vcpu_id, timer_index),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+	),
+
+	TP_printk("vcpu_id %d timer %d",
+		  __entry->vcpu_id, __entry->timer_index)
+);
+
+/*
+ * Tracepoint for stimer_expiration.
+ */
+TRACE_EVENT(kvm_hv_stimer_expiration,
+	TP_PROTO(int vcpu_id, int timer_index, int msg_send_result),
+	TP_ARGS(vcpu_id, timer_index, msg_send_result),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+		__field(int, msg_send_result)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+		__entry->msg_send_result = msg_send_result;
+	),
+
+	TP_printk("vcpu_id %d timer %d msg send result %d",
+		  __entry->vcpu_id, __entry->timer_index,
+		  __entry->msg_send_result)
+);
+
+/*
+ * Tracepoint for stimer_cleanup.
+ */
+TRACE_EVENT(kvm_hv_stimer_cleanup,
+	TP_PROTO(int vcpu_id, int timer_index),
+	TP_ARGS(vcpu_id, timer_index),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+	),
+
+	TP_printk("vcpu_id %d timer %d",
+		  __entry->vcpu_id, __entry->timer_index)
+);
+
 #endif /* _TRACE_KVM_H */
 #endif /* _TRACE_KVM_H */
 
 
 #undef TRACE_INCLUDE_PATH
 #undef TRACE_INCLUDE_PATH

+ 131 - 71
arch/x86/kvm/vmx.c

@@ -19,6 +19,7 @@
 #include "irq.h"
 #include "irq.h"
 #include "mmu.h"
 #include "mmu.h"
 #include "cpuid.h"
 #include "cpuid.h"
+#include "lapic.h"
 
 
 #include <linux/kvm_host.h>
 #include <linux/kvm_host.h>
 #include <linux/module.h>
 #include <linux/module.h>
@@ -862,7 +863,6 @@ static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static void kvm_cpu_vmxoff(void);
 static bool vmx_mpx_supported(void);
 static bool vmx_mpx_supported(void);
 static bool vmx_xsaves_supported(void);
 static bool vmx_xsaves_supported(void);
-static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
 			    struct kvm_segment *var, int seg);
@@ -870,7 +870,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
 			    struct kvm_segment *var, int seg);
 static bool guest_state_valid(struct kvm_vcpu *vcpu);
 static bool guest_state_valid(struct kvm_vcpu *vcpu);
 static u32 vmx_segment_access_rights(struct kvm_segment *var);
 static u32 vmx_segment_access_rights(struct kvm_segment *var);
-static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
 static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
 static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
 static int alloc_identity_pagetable(struct kvm *kvm);
 static int alloc_identity_pagetable(struct kvm *kvm);
@@ -1448,7 +1447,51 @@ static inline void ept_sync_context(u64 eptp)
 	}
 	}
 }
 }
 
 
-static __always_inline unsigned long vmcs_readl(unsigned long field)
+static __always_inline void vmcs_check16(unsigned long field)
+{
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
+			 "16-bit accessor invalid for 64-bit field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
+			 "16-bit accessor invalid for 64-bit high field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
+			 "16-bit accessor invalid for 32-bit high field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
+			 "16-bit accessor invalid for natural width field");
+}
+
+static __always_inline void vmcs_check32(unsigned long field)
+{
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
+			 "32-bit accessor invalid for 16-bit field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
+			 "32-bit accessor invalid for natural width field");
+}
+
+static __always_inline void vmcs_check64(unsigned long field)
+{
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
+			 "64-bit accessor invalid for 16-bit field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
+			 "64-bit accessor invalid for 64-bit high field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
+			 "64-bit accessor invalid for 32-bit field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
+			 "64-bit accessor invalid for natural width field");
+}
+
+static __always_inline void vmcs_checkl(unsigned long field)
+{
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
+			 "Natural width accessor invalid for 16-bit field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
+			 "Natural width accessor invalid for 64-bit field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
+			 "Natural width accessor invalid for 64-bit high field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
+			 "Natural width accessor invalid for 32-bit field");
+}
+
+static __always_inline unsigned long __vmcs_readl(unsigned long field)
 {
 {
 	unsigned long value;
 	unsigned long value;
 
 
@@ -1459,23 +1502,32 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
 
 
 static __always_inline u16 vmcs_read16(unsigned long field)
 static __always_inline u16 vmcs_read16(unsigned long field)
 {
 {
-	return vmcs_readl(field);
+	vmcs_check16(field);
+	return __vmcs_readl(field);
 }
 }
 
 
 static __always_inline u32 vmcs_read32(unsigned long field)
 static __always_inline u32 vmcs_read32(unsigned long field)
 {
 {
-	return vmcs_readl(field);
+	vmcs_check32(field);
+	return __vmcs_readl(field);
 }
 }
 
 
 static __always_inline u64 vmcs_read64(unsigned long field)
 static __always_inline u64 vmcs_read64(unsigned long field)
 {
 {
+	vmcs_check64(field);
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
-	return vmcs_readl(field);
+	return __vmcs_readl(field);
 #else
 #else
-	return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32);
+	return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32);
 #endif
 #endif
 }
 }
 
 
+static __always_inline unsigned long vmcs_readl(unsigned long field)
+{
+	vmcs_checkl(field);
+	return __vmcs_readl(field);
+}
+
 static noinline void vmwrite_error(unsigned long field, unsigned long value)
 static noinline void vmwrite_error(unsigned long field, unsigned long value)
 {
 {
 	printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
 	printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
@@ -1483,7 +1535,7 @@ static noinline void vmwrite_error(unsigned long field, unsigned long value)
 	dump_stack();
 	dump_stack();
 }
 }
 
 
-static void vmcs_writel(unsigned long field, unsigned long value)
+static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
 {
 {
 	u8 error;
 	u8 error;
 
 
@@ -1493,33 +1545,46 @@ static void vmcs_writel(unsigned long field, unsigned long value)
 		vmwrite_error(field, value);
 		vmwrite_error(field, value);
 }
 }
 
 
-static void vmcs_write16(unsigned long field, u16 value)
+static __always_inline void vmcs_write16(unsigned long field, u16 value)
 {
 {
-	vmcs_writel(field, value);
+	vmcs_check16(field);
+	__vmcs_writel(field, value);
 }
 }
 
 
-static void vmcs_write32(unsigned long field, u32 value)
+static __always_inline void vmcs_write32(unsigned long field, u32 value)
 {
 {
-	vmcs_writel(field, value);
+	vmcs_check32(field);
+	__vmcs_writel(field, value);
 }
 }
 
 
-static void vmcs_write64(unsigned long field, u64 value)
+static __always_inline void vmcs_write64(unsigned long field, u64 value)
 {
 {
-	vmcs_writel(field, value);
+	vmcs_check64(field);
+	__vmcs_writel(field, value);
 #ifndef CONFIG_X86_64
 #ifndef CONFIG_X86_64
 	asm volatile ("");
 	asm volatile ("");
-	vmcs_writel(field+1, value >> 32);
+	__vmcs_writel(field+1, value >> 32);
 #endif
 #endif
 }
 }
 
 
-static void vmcs_clear_bits(unsigned long field, u32 mask)
+static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
 {
 {
-	vmcs_writel(field, vmcs_readl(field) & ~mask);
+	vmcs_checkl(field);
+	__vmcs_writel(field, value);
 }
 }
 
 
-static void vmcs_set_bits(unsigned long field, u32 mask)
+static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
 {
 {
-	vmcs_writel(field, vmcs_readl(field) | mask);
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
+			 "vmcs_clear_bits does not support 64-bit fields");
+	__vmcs_writel(field, __vmcs_readl(field) & ~mask);
+}
+
+static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
+{
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
+			 "vmcs_set_bits does not support 64-bit fields");
+	__vmcs_writel(field, __vmcs_readl(field) | mask);
 }
 }
 
 
 static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
 static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
@@ -2498,7 +2563,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	vmx->nested.nested_vmx_pinbased_ctls_high |=
 	vmx->nested.nested_vmx_pinbased_ctls_high |=
 		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
 		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
 		PIN_BASED_VMX_PREEMPTION_TIMER;
 		PIN_BASED_VMX_PREEMPTION_TIMER;
-	if (vmx_cpu_uses_apicv(&vmx->vcpu))
+	if (kvm_vcpu_apicv_active(&vmx->vcpu))
 		vmx->nested.nested_vmx_pinbased_ctls_high |=
 		vmx->nested.nested_vmx_pinbased_ctls_high |=
 			PIN_BASED_POSTED_INTR;
 			PIN_BASED_POSTED_INTR;
 
 
@@ -4462,9 +4527,9 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
 			msr, MSR_TYPE_W);
 			msr, MSR_TYPE_W);
 }
 }
 
 
-static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu)
+static bool vmx_get_enable_apicv(void)
 {
 {
-	return enable_apicv && lapic_in_kernel(vcpu);
+	return enable_apicv;
 }
 }
 
 
 static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
@@ -4586,11 +4651,6 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 	kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
 	kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
 }
 }
 
 
-static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
-{
-	return;
-}
-
 /*
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
  * will not change in the lifetime of the guest.
@@ -4660,11 +4720,18 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
 {
 {
 	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
 	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
 
 
-	if (!vmx_cpu_uses_apicv(&vmx->vcpu))
+	if (!kvm_vcpu_apicv_active(&vmx->vcpu))
 		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
 		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
 	return pin_based_exec_ctrl;
 	return pin_based_exec_ctrl;
 }
 }
 
 
+static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+}
+
 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 {
 {
 	u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
 	u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
@@ -4703,7 +4770,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
 		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
 	if (!ple_gap)
 	if (!ple_gap)
 		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
 		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
-	if (!vmx_cpu_uses_apicv(&vmx->vcpu))
+	if (!kvm_vcpu_apicv_active(&vmx->vcpu))
 		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
 		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
 	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
 	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
@@ -4767,7 +4834,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
 				vmx_secondary_exec_control(vmx));
 				vmx_secondary_exec_control(vmx));
 
 
-	if (vmx_cpu_uses_apicv(&vmx->vcpu)) {
+	if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
 		vmcs_write64(EOI_EXIT_BITMAP0, 0);
 		vmcs_write64(EOI_EXIT_BITMAP0, 0);
 		vmcs_write64(EOI_EXIT_BITMAP1, 0);
 		vmcs_write64(EOI_EXIT_BITMAP1, 0);
 		vmcs_write64(EOI_EXIT_BITMAP2, 0);
 		vmcs_write64(EOI_EXIT_BITMAP2, 0);
@@ -4775,7 +4842,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 
 		vmcs_write16(GUEST_INTR_STATUS, 0);
 		vmcs_write16(GUEST_INTR_STATUS, 0);
 
 
-		vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
+		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
 		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
 		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
 	}
 	}
 
 
@@ -4867,7 +4934,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 
 	seg_setup(VCPU_SREG_CS);
 	seg_setup(VCPU_SREG_CS);
 	vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
 	vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
-	vmcs_write32(GUEST_CS_BASE, 0xffff0000);
+	vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
 
 
 	seg_setup(VCPU_SREG_DS);
 	seg_setup(VCPU_SREG_DS);
 	seg_setup(VCPU_SREG_ES);
 	seg_setup(VCPU_SREG_ES);
@@ -4903,7 +4970,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 
 	vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
 	vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
 	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
 	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
-	vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
 
 
 	setup_msrs(vmx);
 	setup_msrs(vmx);
 
 
@@ -4919,7 +4986,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 
 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
 
-	if (vmx_cpu_uses_apicv(vcpu))
+	if (kvm_vcpu_apicv_active(vcpu))
 		memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
 		memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
 
 
 	if (vmx->vpid != 0)
 	if (vmx->vpid != 0)
@@ -6203,15 +6270,6 @@ static __init int hardware_setup(void)
 		kvm_tsc_scaling_ratio_frac_bits = 48;
 		kvm_tsc_scaling_ratio_frac_bits = 48;
 	}
 	}
 
 
-	if (enable_apicv)
-		kvm_x86_ops->update_cr8_intercept = NULL;
-	else {
-		kvm_x86_ops->hwapic_irr_update = NULL;
-		kvm_x86_ops->hwapic_isr_update = NULL;
-		kvm_x86_ops->deliver_posted_interrupt = NULL;
-		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
-	}
-
 	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
 	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
 	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
 	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
 	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
 	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
@@ -7901,7 +7959,7 @@ static void dump_vmcs(void)
 	u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
 	u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
 	u32 secondary_exec_control = 0;
 	u32 secondary_exec_control = 0;
 	unsigned long cr4 = vmcs_readl(GUEST_CR4);
 	unsigned long cr4 = vmcs_readl(GUEST_CR4);
-	u64 efer = vmcs_readl(GUEST_IA32_EFER);
+	u64 efer = vmcs_read64(GUEST_IA32_EFER);
 	int i, n;
 	int i, n;
 
 
 	if (cpu_has_secondary_exec_ctrls())
 	if (cpu_has_secondary_exec_ctrls())
@@ -7917,10 +7975,10 @@ static void dump_vmcs(void)
 	if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
 	if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
 	    (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
 	    (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
 	{
 	{
-		pr_err("PDPTR0 = 0x%016lx  PDPTR1 = 0x%016lx\n",
-		       vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1));
-		pr_err("PDPTR2 = 0x%016lx  PDPTR3 = 0x%016lx\n",
-		       vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3));
+		pr_err("PDPTR0 = 0x%016llx  PDPTR1 = 0x%016llx\n",
+		       vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
+		pr_err("PDPTR2 = 0x%016llx  PDPTR3 = 0x%016llx\n",
+		       vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
 	}
 	}
 	pr_err("RSP = 0x%016lx  RIP = 0x%016lx\n",
 	pr_err("RSP = 0x%016lx  RIP = 0x%016lx\n",
 	       vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
 	       vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
@@ -7941,16 +7999,16 @@ static void dump_vmcs(void)
 	vmx_dump_sel("TR:  ", GUEST_TR_SELECTOR);
 	vmx_dump_sel("TR:  ", GUEST_TR_SELECTOR);
 	if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
 	if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
 	    (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
 	    (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
-		pr_err("EFER =     0x%016llx  PAT = 0x%016lx\n",
-		       efer, vmcs_readl(GUEST_IA32_PAT));
-	pr_err("DebugCtl = 0x%016lx  DebugExceptions = 0x%016lx\n",
-	       vmcs_readl(GUEST_IA32_DEBUGCTL),
+		pr_err("EFER =     0x%016llx  PAT = 0x%016llx\n",
+		       efer, vmcs_read64(GUEST_IA32_PAT));
+	pr_err("DebugCtl = 0x%016llx  DebugExceptions = 0x%016lx\n",
+	       vmcs_read64(GUEST_IA32_DEBUGCTL),
 	       vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
 	       vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
 	if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
 	if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
-		pr_err("PerfGlobCtl = 0x%016lx\n",
-		       vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL));
+		pr_err("PerfGlobCtl = 0x%016llx\n",
+		       vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
 	if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
 	if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
-		pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS));
+		pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
 	pr_err("Interruptibility = %08x  ActivityState = %08x\n",
 	pr_err("Interruptibility = %08x  ActivityState = %08x\n",
 	       vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
 	       vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
 	       vmcs_read32(GUEST_ACTIVITY_STATE));
 	       vmcs_read32(GUEST_ACTIVITY_STATE));
@@ -7979,11 +8037,12 @@ static void dump_vmcs(void)
 	       vmcs_read32(HOST_IA32_SYSENTER_CS),
 	       vmcs_read32(HOST_IA32_SYSENTER_CS),
 	       vmcs_readl(HOST_IA32_SYSENTER_EIP));
 	       vmcs_readl(HOST_IA32_SYSENTER_EIP));
 	if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
 	if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
-		pr_err("EFER = 0x%016lx  PAT = 0x%016lx\n",
-		       vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT));
+		pr_err("EFER = 0x%016llx  PAT = 0x%016llx\n",
+		       vmcs_read64(HOST_IA32_EFER),
+		       vmcs_read64(HOST_IA32_PAT));
 	if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
 	if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
-		pr_err("PerfGlobCtl = 0x%016lx\n",
-		       vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL));
+		pr_err("PerfGlobCtl = 0x%016llx\n",
+		       vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
 
 
 	pr_err("*** Control State ***\n");
 	pr_err("*** Control State ***\n");
 	pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
 	pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
@@ -8006,16 +8065,16 @@ static void dump_vmcs(void)
 	pr_err("IDTVectoring: info=%08x errcode=%08x\n",
 	pr_err("IDTVectoring: info=%08x errcode=%08x\n",
 	       vmcs_read32(IDT_VECTORING_INFO_FIELD),
 	       vmcs_read32(IDT_VECTORING_INFO_FIELD),
 	       vmcs_read32(IDT_VECTORING_ERROR_CODE));
 	       vmcs_read32(IDT_VECTORING_ERROR_CODE));
-	pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET));
+	pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
 	if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
 	if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
-		pr_err("TSC Multiplier = 0x%016lx\n",
-		       vmcs_readl(TSC_MULTIPLIER));
+		pr_err("TSC Multiplier = 0x%016llx\n",
+		       vmcs_read64(TSC_MULTIPLIER));
 	if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
 	if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
 		pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
 		pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
 	if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
 	if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
 		pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
 		pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
 	if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
 	if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
-		pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER));
+		pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
 	n = vmcs_read32(CR3_TARGET_COUNT);
 	n = vmcs_read32(CR3_TARGET_COUNT);
 	for (i = 0; i + 1 < n; i += 4)
 	for (i = 0; i + 1 < n; i += 4)
 		pr_err("CR3 target%u=%016lx target%u=%016lx\n",
 		pr_err("CR3 target%u=%016lx target%u=%016lx\n",
@@ -8154,7 +8213,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 	 * apicv
 	 * apicv
 	 */
 	 */
 	if (!cpu_has_vmx_virtualize_x2apic_mode() ||
 	if (!cpu_has_vmx_virtualize_x2apic_mode() ||
-				!vmx_cpu_uses_apicv(vcpu))
+				!kvm_vcpu_apicv_active(vcpu))
 		return;
 		return;
 
 
 	if (!cpu_need_tpr_shadow(vcpu))
 	if (!cpu_need_tpr_shadow(vcpu))
@@ -8259,10 +8318,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 	}
 	}
 }
 }
 
 
-static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu)
+static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
 {
-	u64 *eoi_exit_bitmap = vcpu->arch.eoi_exit_bitmap;
-	if (!vmx_cpu_uses_apicv(vcpu))
+	if (!kvm_vcpu_apicv_active(vcpu))
 		return;
 		return;
 
 
 	vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
 	vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
@@ -8932,7 +8990,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 			best->ebx &= ~bit(X86_FEATURE_INVPCID);
 			best->ebx &= ~bit(X86_FEATURE_INVPCID);
 	}
 	}
 
 
-	vmcs_set_secondary_exec_control(secondary_exec_ctl);
+	if (cpu_has_secondary_exec_ctrls())
+		vmcs_set_secondary_exec_control(secondary_exec_ctl);
 
 
 	if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
 	if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
 		if (guest_cpuid_has_pcommit(vcpu))
 		if (guest_cpuid_has_pcommit(vcpu))
@@ -9508,7 +9567,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		 */
 		 */
 		vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
 		vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
 		vmx->nested.pi_pending = false;
 		vmx->nested.pi_pending = false;
-		vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
+		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
 		vmcs_write64(POSTED_INTR_DESC_ADDR,
 		vmcs_write64(POSTED_INTR_DESC_ADDR,
 			page_to_phys(vmx->nested.pi_desc_page) +
 			page_to_phys(vmx->nested.pi_desc_page) +
 			(unsigned long)(vmcs12->posted_intr_desc_addr &
 			(unsigned long)(vmcs12->posted_intr_desc_addr &
@@ -10169,7 +10228,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	 * Additionally, restore L2's PDPTR to vmcs12.
 	 * Additionally, restore L2's PDPTR to vmcs12.
 	 */
 	 */
 	if (enable_ept) {
 	if (enable_ept) {
-		vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3);
+		vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
 		vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
 		vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
 		vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
 		vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
 		vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
 		vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
@@ -10805,7 +10864,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.update_cr8_intercept = update_cr8_intercept,
 	.update_cr8_intercept = update_cr8_intercept,
 	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
 	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
 	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
 	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
-	.cpu_uses_apicv = vmx_cpu_uses_apicv,
+	.get_enable_apicv = vmx_get_enable_apicv,
+	.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
 	.load_eoi_exitmap = vmx_load_eoi_exitmap,
 	.load_eoi_exitmap = vmx_load_eoi_exitmap,
 	.hwapic_irr_update = vmx_hwapic_irr_update,
 	.hwapic_irr_update = vmx_hwapic_irr_update,
 	.hwapic_isr_update = vmx_hwapic_isr_update,
 	.hwapic_isr_update = vmx_hwapic_isr_update,

+ 96 - 14
arch/x86/kvm/x86.c

@@ -951,7 +951,7 @@ static u32 msrs_to_save[] = {
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
 #endif
 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
-	MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS
+	MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
 };
 };
 
 
 static unsigned num_msrs_to_save;
 static unsigned num_msrs_to_save;
@@ -966,6 +966,8 @@ static u32 emulated_msrs[] = {
 	HV_X64_MSR_RESET,
 	HV_X64_MSR_RESET,
 	HV_X64_MSR_VP_INDEX,
 	HV_X64_MSR_VP_INDEX,
 	HV_X64_MSR_VP_RUNTIME,
 	HV_X64_MSR_VP_RUNTIME,
+	HV_X64_MSR_SCONTROL,
+	HV_X64_MSR_STIMER0_CONFIG,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 	MSR_KVM_PV_EOI_EN,
 
 
@@ -1167,7 +1169,8 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 
 
 	++version;
 	++version;
 
 
-	kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
+	if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
+		return;
 
 
 	/*
 	/*
 	 * The guest calculates current wall clock time by adding
 	 * The guest calculates current wall clock time by adding
@@ -1683,6 +1686,11 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
 #endif
 #endif
 }
 }
 
 
+void kvm_make_mclock_inprogress_request(struct kvm *kvm)
+{
+	kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+}
+
 static void kvm_gen_update_masterclock(struct kvm *kvm)
 static void kvm_gen_update_masterclock(struct kvm *kvm)
 {
 {
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
@@ -2198,6 +2206,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 	case HV_X64_MSR_CRASH_CTL:
 	case HV_X64_MSR_CRASH_CTL:
+	case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
 		return kvm_hv_set_msr_common(vcpu, msr, data,
 		return kvm_hv_set_msr_common(vcpu, msr, data,
 					     msr_info->host_initiated);
 					     msr_info->host_initiated);
 	case MSR_IA32_BBL_CR_CTL3:
 	case MSR_IA32_BBL_CR_CTL3:
@@ -2402,6 +2411,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 	case HV_X64_MSR_CRASH_CTL:
 	case HV_X64_MSR_CRASH_CTL:
+	case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
 		return kvm_hv_get_msr_common(vcpu,
 		return kvm_hv_get_msr_common(vcpu,
 					     msr_info->index, &msr_info->data);
 					     msr_info->index, &msr_info->data);
 		break;
 		break;
@@ -2541,6 +2551,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV:
 	case KVM_CAP_HYPERV:
 	case KVM_CAP_HYPERV_VAPIC:
 	case KVM_CAP_HYPERV_VAPIC:
 	case KVM_CAP_HYPERV_SPIN:
 	case KVM_CAP_HYPERV_SPIN:
+	case KVM_CAP_HYPERV_SYNIC:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
 	case KVM_CAP_DEBUGREGS:
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -2693,6 +2704,11 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
 	return kvm_arch_has_noncoherent_dma(vcpu->kvm);
 	return kvm_arch_has_noncoherent_dma(vcpu->kvm);
 }
 }
 
 
+static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
+{
+	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 {
 	/* Address WBINVD may be executed by guest */
 	/* Address WBINVD may be executed by guest */
@@ -2748,7 +2764,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 				    struct kvm_lapic_state *s)
 {
 {
-	kvm_x86_ops->sync_pir_to_irr(vcpu);
+	if (vcpu->arch.apicv_active)
+		kvm_x86_ops->sync_pir_to_irr(vcpu);
+
 	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
 	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
 
 
 	return 0;
 	return 0;
@@ -3191,6 +3209,20 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
 	return 0;
 	return 0;
 }
 }
 
 
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+				     struct kvm_enable_cap *cap)
+{
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_HYPERV_SYNIC:
+		return kvm_hv_activate_synic(vcpu);
+	default:
+		return -EINVAL;
+	}
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 			 unsigned int ioctl, unsigned long arg)
 {
 {
@@ -3455,6 +3487,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_set_guest_paused(vcpu);
 		r = kvm_set_guest_paused(vcpu);
 		goto out;
 		goto out;
 	}
 	}
+	case KVM_ENABLE_CAP: {
+		struct kvm_enable_cap cap;
+
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			goto out;
+		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+		break;
+	}
 	default:
 	default:
 		r = -EINVAL;
 		r = -EINVAL;
 	}
 	}
@@ -4006,16 +4047,17 @@ static void kvm_init_msr_list(void)
 
 
 		/*
 		/*
 		 * Even MSRs that are valid in the host may not be exposed
 		 * Even MSRs that are valid in the host may not be exposed
-		 * to the guests in some cases.  We could work around this
-		 * in VMX with the generic MSR save/load machinery, but it
-		 * is not really worthwhile since it will really only
-		 * happen with nested virtualization.
+		 * to the guests in some cases.
 		 */
 		 */
 		switch (msrs_to_save[i]) {
 		switch (msrs_to_save[i]) {
 		case MSR_IA32_BNDCFGS:
 		case MSR_IA32_BNDCFGS:
 			if (!kvm_x86_ops->mpx_supported())
 			if (!kvm_x86_ops->mpx_supported())
 				continue;
 				continue;
 			break;
 			break;
+		case MSR_TSC_AUX:
+			if (!kvm_x86_ops->rdtscp_supported())
+				continue;
+			break;
 		default:
 		default:
 			break;
 			break;
 		}
 		}
@@ -5872,6 +5914,12 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
 	kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
 	kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
 }
 }
 
 
+void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.apicv_active = false;
+	kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
 	unsigned long nr, a0, a1, a2, a3, ret;
@@ -5965,6 +6013,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.apic)
 	if (!vcpu->arch.apic)
 		return;
 		return;
 
 
+	if (vcpu->arch.apicv_active)
+		return;
+
 	if (!vcpu->arch.apic->vapic_addr)
 	if (!vcpu->arch.apic->vapic_addr)
 		max_irr = kvm_lapic_find_highest_irr(vcpu);
 		max_irr = kvm_lapic_find_highest_irr(vcpu);
 	else
 	else
@@ -6301,20 +6352,30 @@ static void process_smi(struct kvm_vcpu *vcpu)
 	kvm_mmu_reset_context(vcpu);
 	kvm_mmu_reset_context(vcpu);
 }
 }
 
 
+void kvm_make_scan_ioapic_request(struct kvm *kvm)
+{
+	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+}
+
 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 {
 {
+	u64 eoi_exit_bitmap[4];
+
 	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
 	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
 		return;
 		return;
 
 
-	memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8);
+	bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
 
 
 	if (irqchip_split(vcpu->kvm))
 	if (irqchip_split(vcpu->kvm))
-		kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap);
+		kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
 	else {
 	else {
-		kvm_x86_ops->sync_pir_to_irr(vcpu);
-		kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
+		if (vcpu->arch.apicv_active)
+			kvm_x86_ops->sync_pir_to_irr(vcpu);
+		kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
 	}
 	}
-	kvm_x86_ops->load_eoi_exitmap(vcpu);
+	bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
+		  vcpu_to_synic(vcpu)->vec_bitmap, 256);
+	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
 }
 }
 
 
 static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
@@ -6422,7 +6483,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
 		if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
 			BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
 			BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
 			if (test_bit(vcpu->arch.pending_ioapic_eoi,
 			if (test_bit(vcpu->arch.pending_ioapic_eoi,
-				     (void *) vcpu->arch.eoi_exit_bitmap)) {
+				     vcpu->arch.ioapic_handled_vectors)) {
 				vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
 				vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
 				vcpu->run->eoi.vector =
 				vcpu->run->eoi.vector =
 						vcpu->arch.pending_ioapic_eoi;
 						vcpu->arch.pending_ioapic_eoi;
@@ -6446,6 +6507,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 0;
 			r = 0;
 			goto out;
 			goto out;
 		}
 		}
+		if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
+			vcpu->run->exit_reason = KVM_EXIT_HYPERV;
+			vcpu->run->hyperv = vcpu->arch.hyperv.exit;
+			r = 0;
+			goto out;
+		}
+
+		/*
+		 * KVM_REQ_HV_STIMER has to be processed after
+		 * KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers
+		 * depend on the guest clock being up-to-date
+		 */
+		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
+			kvm_hv_process_stimers(vcpu);
 	}
 	}
 
 
 	/*
 	/*
@@ -6457,7 +6532,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		 * Update architecture specific hints for APIC
 		 * Update architecture specific hints for APIC
 		 * virtual interrupt delivery.
 		 * virtual interrupt delivery.
 		 */
 		 */
-		if (kvm_x86_ops->hwapic_irr_update)
+		if (vcpu->arch.apicv_active)
 			kvm_x86_ops->hwapic_irr_update(vcpu,
 			kvm_x86_ops->hwapic_irr_update(vcpu,
 				kvm_lapic_find_highest_irr(vcpu));
 				kvm_lapic_find_highest_irr(vcpu));
 	}
 	}
@@ -7528,6 +7603,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	BUG_ON(vcpu->kvm == NULL);
 	BUG_ON(vcpu->kvm == NULL);
 	kvm = vcpu->kvm;
 	kvm = vcpu->kvm;
 
 
+	vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv();
 	vcpu->arch.pv.pv_unhalted = false;
 	vcpu->arch.pv.pv_unhalted = false;
 	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
 	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
 	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
 	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
@@ -7585,6 +7661,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 
 	vcpu->arch.pending_external_vector = -1;
 	vcpu->arch.pending_external_vector = -1;
 
 
+	kvm_hv_vcpu_init(vcpu);
+
 	return 0;
 	return 0;
 
 
 fail_free_mce_banks:
 fail_free_mce_banks:
@@ -7603,6 +7681,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 {
 	int idx;
 	int idx;
 
 
+	kvm_hv_vcpu_uninit(vcpu);
 	kvm_pmu_destroy(vcpu);
 	kvm_pmu_destroy(vcpu);
 	kfree(vcpu->arch.mce_banks);
 	kfree(vcpu->arch.mce_banks);
 	kvm_free_lapic(vcpu);
 	kvm_free_lapic(vcpu);
@@ -7997,6 +8076,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
 	    kvm_cpu_has_interrupt(vcpu))
 	    kvm_cpu_has_interrupt(vcpu))
 		return true;
 		return true;
 
 
+	if (kvm_hv_has_stimer_pending(vcpu))
+		return true;
+
 	return false;
 	return false;
 }
 }
 
 

+ 1 - 87
drivers/hv/hyperv_vmbus.h

@@ -63,10 +63,6 @@ enum hv_cpuid_function {
 /* Define version of the synthetic interrupt controller. */
 /* Define version of the synthetic interrupt controller. */
 #define HV_SYNIC_VERSION		(1)
 #define HV_SYNIC_VERSION		(1)
 
 
-/* Define synthetic interrupt controller message constants. */
-#define HV_MESSAGE_SIZE			(256)
-#define HV_MESSAGE_PAYLOAD_BYTE_COUNT	(240)
-#define HV_MESSAGE_PAYLOAD_QWORD_COUNT	(30)
 #define HV_ANY_VP			(0xFFFFFFFF)
 #define HV_ANY_VP			(0xFFFFFFFF)
 
 
 /* Define synthetic interrupt controller flag constants. */
 /* Define synthetic interrupt controller flag constants. */
@@ -74,48 +70,9 @@ enum hv_cpuid_function {
 #define HV_EVENT_FLAGS_BYTE_COUNT	(256)
 #define HV_EVENT_FLAGS_BYTE_COUNT	(256)
 #define HV_EVENT_FLAGS_DWORD_COUNT	(256 / sizeof(u32))
 #define HV_EVENT_FLAGS_DWORD_COUNT	(256 / sizeof(u32))
 
 
-/* Define hypervisor message types. */
-enum hv_message_type {
-	HVMSG_NONE			= 0x00000000,
-
-	/* Memory access messages. */
-	HVMSG_UNMAPPED_GPA		= 0x80000000,
-	HVMSG_GPA_INTERCEPT		= 0x80000001,
-
-	/* Timer notification messages. */
-	HVMSG_TIMER_EXPIRED			= 0x80000010,
-
-	/* Error messages. */
-	HVMSG_INVALID_VP_REGISTER_VALUE	= 0x80000020,
-	HVMSG_UNRECOVERABLE_EXCEPTION	= 0x80000021,
-	HVMSG_UNSUPPORTED_FEATURE		= 0x80000022,
-
-	/* Trace buffer complete messages. */
-	HVMSG_EVENTLOG_BUFFERCOMPLETE	= 0x80000040,
-
-	/* Platform-specific processor intercept messages. */
-	HVMSG_X64_IOPORT_INTERCEPT		= 0x80010000,
-	HVMSG_X64_MSR_INTERCEPT		= 0x80010001,
-	HVMSG_X64_CPUID_INTERCEPT		= 0x80010002,
-	HVMSG_X64_EXCEPTION_INTERCEPT	= 0x80010003,
-	HVMSG_X64_APIC_EOI			= 0x80010004,
-	HVMSG_X64_LEGACY_FP_ERROR		= 0x80010005
-};
-
-#define HV_SYNIC_STIMER_COUNT		(4)
-
 /* Define invalid partition identifier. */
 /* Define invalid partition identifier. */
 #define HV_PARTITION_ID_INVALID		((u64)0x0)
 #define HV_PARTITION_ID_INVALID		((u64)0x0)
 
 
-/* Define port identifier type. */
-union hv_port_id {
-	u32 asu32;
-	struct {
-		u32 id:24;
-		u32 reserved:8;
-	} u ;
-};
-
 /* Define port type. */
 /* Define port type. */
 enum hv_port_type {
 enum hv_port_type {
 	HVPORT_MSG	= 1,
 	HVPORT_MSG	= 1,
@@ -163,27 +120,6 @@ struct hv_connection_info {
 	};
 	};
 };
 };
 
 
-/* Define synthetic interrupt controller message flags. */
-union hv_message_flags {
-	u8 asu8;
-	struct {
-		u8 msg_pending:1;
-		u8 reserved:7;
-	};
-};
-
-/* Define synthetic interrupt controller message header. */
-struct hv_message_header {
-	enum hv_message_type message_type;
-	u8 payload_size;
-	union hv_message_flags message_flags;
-	u8 reserved[2];
-	union {
-		u64 sender;
-		union hv_port_id port;
-	};
-};
-
 /*
 /*
  * Timer configuration register.
  * Timer configuration register.
  */
  */
@@ -200,31 +136,9 @@ union hv_timer_config {
 	};
 	};
 };
 };
 
 
-
-/* Define timer message payload structure. */
-struct hv_timer_message_payload {
-	u32 timer_index;
-	u32 reserved;
-	u64 expiration_time;	/* When the timer expired */
-	u64 delivery_time;	/* When the message was delivered */
-};
-
-/* Define synthetic interrupt controller message format. */
-struct hv_message {
-	struct hv_message_header header;
-	union {
-		u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
-	} u ;
-};
-
 /* Define the number of message buffers associated with each port. */
 /* Define the number of message buffers associated with each port. */
 #define HV_PORT_MESSAGE_BUFFER_COUNT	(16)
 #define HV_PORT_MESSAGE_BUFFER_COUNT	(16)
 
 
-/* Define the synthetic interrupt message page layout. */
-struct hv_message_page {
-	struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
-};
-
 /* Define the synthetic interrupt controller event flags format. */
 /* Define the synthetic interrupt controller event flags format. */
 union hv_synic_event_flags {
 union hv_synic_event_flags {
 	u8 flags8[HV_EVENT_FLAGS_BYTE_COUNT];
 	u8 flags8[HV_EVENT_FLAGS_BYTE_COUNT];
@@ -347,7 +261,7 @@ enum hv_call_code {
 struct hv_input_post_message {
 struct hv_input_post_message {
 	union hv_connection_id connectionid;
 	union hv_connection_id connectionid;
 	u32 reserved;
 	u32 reserved;
-	enum hv_message_type message_type;
+	u32 message_type;
 	u32 payload_size;
 	u32 payload_size;
 	u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
 	u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
 };
 };

+ 14 - 2
drivers/s390/char/sclp_early.c

@@ -40,10 +40,14 @@ struct read_info_sccb {
 	u8	fac85;			/* 85 */
 	u8	fac85;			/* 85 */
 	u8	_pad_86[91 - 86];	/* 86-90 */
 	u8	_pad_86[91 - 86];	/* 86-90 */
 	u8	flags;			/* 91 */
 	u8	flags;			/* 91 */
-	u8	_pad_92[100 - 92];	/* 92-99 */
+	u8	_pad_92[99 - 92];	/* 92-98 */
+	u8	hamaxpow;		/* 99 */
 	u32	rnsize2;		/* 100-103 */
 	u32	rnsize2;		/* 100-103 */
 	u64	rnmax2;			/* 104-111 */
 	u64	rnmax2;			/* 104-111 */
-	u8	_pad_112[120 - 112];	/* 112-119 */
+	u8	_pad_112[116 - 112];	/* 112-115 */
+	u8	fac116;			/* 116 */
+	u8	_pad_117[119 - 117];	/* 117-118 */
+	u8	fac119;			/* 119 */
 	u16	hcpua;			/* 120-121 */
 	u16	hcpua;			/* 120-121 */
 	u8	_pad_122[4096 - 122];	/* 122-4095 */
 	u8	_pad_122[4096 - 122];	/* 122-4095 */
 } __packed __aligned(PAGE_SIZE);
 } __packed __aligned(PAGE_SIZE);
@@ -108,6 +112,8 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 	sclp.facilities = sccb->facilities;
 	sclp.facilities = sccb->facilities;
 	sclp.has_sprp = !!(sccb->fac84 & 0x02);
 	sclp.has_sprp = !!(sccb->fac84 & 0x02);
 	sclp.has_core_type = !!(sccb->fac84 & 0x01);
 	sclp.has_core_type = !!(sccb->fac84 & 0x01);
+	sclp.has_esca = !!(sccb->fac116 & 0x08);
+	sclp.has_hvs = !!(sccb->fac119 & 0x80);
 	if (sccb->fac85 & 0x02)
 	if (sccb->fac85 & 0x02)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
 		S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
 	sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
 	sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
@@ -115,6 +121,11 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 	sclp.rzm <<= 20;
 	sclp.rzm <<= 20;
 	sclp.ibc = sccb->ibc;
 	sclp.ibc = sccb->ibc;
 
 
+	if (sccb->hamaxpow && sccb->hamaxpow < 64)
+		sclp.hamax = (1UL << sccb->hamaxpow) - 1;
+	else
+		sclp.hamax = U64_MAX;
+
 	if (!sccb->hcpua) {
 	if (!sccb->hcpua) {
 		if (MACHINE_IS_VM)
 		if (MACHINE_IS_VM)
 			sclp.max_cores = 64;
 			sclp.max_cores = 64;
@@ -131,6 +142,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 			continue;
 			continue;
 		sclp.has_siif = cpue->siif;
 		sclp.has_siif = cpue->siif;
 		sclp.has_sigpif = cpue->sigpif;
 		sclp.has_sigpif = cpue->sigpif;
+		sclp.has_sief2 = cpue->sief2;
 		break;
 		break;
 	}
 	}
 
 

+ 6 - 0
include/clocksource/arm_arch_timer.h

@@ -23,6 +23,12 @@
 #define ARCH_TIMER_CTRL_IT_MASK		(1 << 1)
 #define ARCH_TIMER_CTRL_IT_MASK		(1 << 1)
 #define ARCH_TIMER_CTRL_IT_STAT		(1 << 2)
 #define ARCH_TIMER_CTRL_IT_STAT		(1 << 2)
 
 
+#define CNTHCTL_EL1PCTEN		(1 << 0)
+#define CNTHCTL_EL1PCEN			(1 << 1)
+#define CNTHCTL_EVNTEN			(1 << 2)
+#define CNTHCTL_EVNTDIR			(1 << 3)
+#define CNTHCTL_EVNTI			(0xF << 4)
+
 enum arch_timer_reg {
 enum arch_timer_reg {
 	ARCH_TIMER_REG_CTRL,
 	ARCH_TIMER_REG_CTRL,
 	ARCH_TIMER_REG_TVAL,
 	ARCH_TIMER_REG_TVAL,

+ 6 - 0
include/kvm/arm_vgic.h

@@ -279,6 +279,12 @@ struct vgic_v2_cpu_if {
 	u32		vgic_lr[VGIC_V2_MAX_LRS];
 	u32		vgic_lr[VGIC_V2_MAX_LRS];
 };
 };
 
 
+/*
+ * LRs are stored in reverse order in memory. make sure we index them
+ * correctly.
+ */
+#define VGIC_V3_LR_INDEX(lr)		(VGIC_V3_MAX_LRS - 1 - lr)
+
 struct vgic_v3_cpu_if {
 struct vgic_v3_cpu_if {
 #ifdef CONFIG_KVM_ARM_VGIC_V3
 #ifdef CONFIG_KVM_ARM_VGIC_V3
 	u32		vgic_hcr;
 	u32		vgic_hcr;

+ 24 - 42
include/linux/kvm_host.h

@@ -111,38 +111,13 @@ static inline bool is_error_page(struct page *page)
 }
 }
 
 
 /*
 /*
- * vcpu->requests bit members
+ * Architecture-independent vcpu->requests bit members
+ * Bits 4-7 are reserved for more arch-independent bits.
  */
  */
 #define KVM_REQ_TLB_FLUSH          0
 #define KVM_REQ_TLB_FLUSH          0
-#define KVM_REQ_MIGRATE_TIMER      1
-#define KVM_REQ_REPORT_TPR_ACCESS  2
-#define KVM_REQ_MMU_RELOAD         3
-#define KVM_REQ_TRIPLE_FAULT       4
-#define KVM_REQ_PENDING_TIMER      5
-#define KVM_REQ_UNHALT             6
-#define KVM_REQ_MMU_SYNC           7
-#define KVM_REQ_CLOCK_UPDATE       8
-#define KVM_REQ_KICK               9
-#define KVM_REQ_DEACTIVATE_FPU    10
-#define KVM_REQ_EVENT             11
-#define KVM_REQ_APF_HALT          12
-#define KVM_REQ_STEAL_UPDATE      13
-#define KVM_REQ_NMI               14
-#define KVM_REQ_PMU               15
-#define KVM_REQ_PMI               16
-#define KVM_REQ_WATCHDOG          17
-#define KVM_REQ_MASTERCLOCK_UPDATE 18
-#define KVM_REQ_MCLOCK_INPROGRESS 19
-#define KVM_REQ_EPR_EXIT          20
-#define KVM_REQ_SCAN_IOAPIC       21
-#define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
-#define KVM_REQ_ENABLE_IBS        23
-#define KVM_REQ_DISABLE_IBS       24
-#define KVM_REQ_APIC_PAGE_RELOAD  25
-#define KVM_REQ_SMI               26
-#define KVM_REQ_HV_CRASH          27
-#define KVM_REQ_IOAPIC_EOI_EXIT   28
-#define KVM_REQ_HV_RESET          29
+#define KVM_REQ_MMU_RELOAD         1
+#define KVM_REQ_PENDING_TIMER      2
+#define KVM_REQ_UNHALT             3
 
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
@@ -318,6 +293,11 @@ struct kvm_s390_adapter_int {
 	u32 adapter_id;
 	u32 adapter_id;
 };
 };
 
 
+struct kvm_hv_sint {
+	u32 vcpu;
+	u32 sint;
+};
+
 struct kvm_kernel_irq_routing_entry {
 struct kvm_kernel_irq_routing_entry {
 	u32 gsi;
 	u32 gsi;
 	u32 type;
 	u32 type;
@@ -331,6 +311,7 @@ struct kvm_kernel_irq_routing_entry {
 		} irqchip;
 		} irqchip;
 		struct msi_msg msi;
 		struct msi_msg msi;
 		struct kvm_s390_adapter_int adapter;
 		struct kvm_s390_adapter_int adapter;
+		struct kvm_hv_sint hv_sint;
 	};
 	};
 	struct hlist_node link;
 	struct hlist_node link;
 };
 };
@@ -439,10 +420,13 @@ struct kvm {
 
 
 /* The guest did something we don't support. */
 /* The guest did something we don't support. */
 #define vcpu_unimpl(vcpu, fmt, ...)					\
 #define vcpu_unimpl(vcpu, fmt, ...)					\
-	kvm_pr_unimpl("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
+	kvm_pr_unimpl("vcpu%i, guest rIP: 0x%lx " fmt,			\
+			(vcpu)->vcpu_id, kvm_rip_read(vcpu), ## __VA_ARGS__)
 
 
 #define vcpu_debug(vcpu, fmt, ...)					\
 #define vcpu_debug(vcpu, fmt, ...)					\
 	kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 	kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
+#define vcpu_err(vcpu, fmt, ...)					\
+	kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 
 
 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 {
 {
@@ -465,6 +449,11 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 	struct kvm_vcpu *vcpu;
 	struct kvm_vcpu *vcpu;
 	int i;
 	int i;
 
 
+	if (id < 0 || id >= KVM_MAX_VCPUS)
+		return NULL;
+	vcpu = kvm_get_vcpu(kvm, id);
+	if (vcpu && vcpu->vcpu_id == id)
+		return vcpu;
 	kvm_for_each_vcpu(i, vcpu, kvm)
 	kvm_for_each_vcpu(i, vcpu, kvm)
 		if (vcpu->vcpu_id == id)
 		if (vcpu->vcpu_id == id)
 			return vcpu;
 			return vcpu;
@@ -484,12 +473,12 @@ void vcpu_put(struct kvm_vcpu *vcpu);
 
 
 #ifdef __KVM_HAVE_IOAPIC
 #ifdef __KVM_HAVE_IOAPIC
 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
-void kvm_arch_irq_routing_update(struct kvm *kvm);
+void kvm_arch_post_irq_routing_update(struct kvm *kvm);
 #else
 #else
 static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
 static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
 {
 {
 }
 }
-static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
+static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
 {
 {
 }
 }
 #endif
 #endif
@@ -634,7 +623,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
-int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
+bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 
@@ -668,8 +657,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
 
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
-void kvm_make_mclock_inprogress_request(struct kvm *kvm);
-void kvm_make_scan_ioapic_request(struct kvm *kvm);
 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
 
 
 long kvm_arch_dev_ioctl(struct file *filp,
 long kvm_arch_dev_ioctl(struct file *filp,
@@ -990,11 +977,6 @@ static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
 	return kvm_is_error_hva(hva);
 	return kvm_is_error_hva(hva);
 }
 }
 
 
-static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
-{
-	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
-}
-
 enum kvm_stat_kind {
 enum kvm_stat_kind {
 	KVM_STAT_VM,
 	KVM_STAT_VM,
 	KVM_STAT_VCPU,
 	KVM_STAT_VCPU,
@@ -1004,7 +986,6 @@ struct kvm_stats_debugfs_item {
 	const char *name;
 	const char *name;
 	int offset;
 	int offset;
 	enum kvm_stat_kind kind;
 	enum kvm_stat_kind kind;
-	struct dentry *dentry;
 };
 };
 extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct dentry *kvm_debugfs_dir;
 extern struct dentry *kvm_debugfs_dir;
@@ -1091,6 +1072,7 @@ static inline void kvm_irq_routing_update(struct kvm *kvm)
 {
 {
 }
 }
 #endif
 #endif
+void kvm_arch_irq_routing_update(struct kvm *kvm);
 
 
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 {

+ 2 - 4
include/linux/kvm_para.h

@@ -4,10 +4,8 @@
 #include <uapi/linux/kvm_para.h>
 #include <uapi/linux/kvm_para.h>
 
 
 
 
-static inline int kvm_para_has_feature(unsigned int feature)
+static inline bool kvm_para_has_feature(unsigned int feature)
 {
 {
-	if (kvm_arch_para_features() & (1UL << feature))
-		return 1;
-	return 0;
+	return !!(kvm_arch_para_features() & (1UL << feature));
 }
 }
 #endif /* __LINUX_KVM_PARA_H */
 #endif /* __LINUX_KVM_PARA_H */

+ 26 - 0
include/uapi/linux/kvm.h

@@ -154,6 +154,20 @@ struct kvm_s390_skeys {
 	__u32 flags;
 	__u32 flags;
 	__u32 reserved[9];
 	__u32 reserved[9];
 };
 };
+
+struct kvm_hyperv_exit {
+#define KVM_EXIT_HYPERV_SYNIC          1
+	__u32 type;
+	union {
+		struct {
+			__u32 msr;
+			__u64 control;
+			__u64 evt_page;
+			__u64 msg_page;
+		} synic;
+	} u;
+};
+
 #define KVM_S390_GET_SKEYS_NONE   1
 #define KVM_S390_GET_SKEYS_NONE   1
 #define KVM_S390_SKEYS_MAX        1048576
 #define KVM_S390_SKEYS_MAX        1048576
 
 
@@ -184,6 +198,7 @@ struct kvm_s390_skeys {
 #define KVM_EXIT_SYSTEM_EVENT     24
 #define KVM_EXIT_SYSTEM_EVENT     24
 #define KVM_EXIT_S390_STSI        25
 #define KVM_EXIT_S390_STSI        25
 #define KVM_EXIT_IOAPIC_EOI       26
 #define KVM_EXIT_IOAPIC_EOI       26
+#define KVM_EXIT_HYPERV           27
 
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
 /* Emulate instruction failed. */
@@ -338,6 +353,8 @@ struct kvm_run {
 		struct {
 		struct {
 			__u8 vector;
 			__u8 vector;
 		} eoi;
 		} eoi;
+		/* KVM_EXIT_HYPERV */
+		struct kvm_hyperv_exit hyperv;
 		/* Fix the size of the union. */
 		/* Fix the size of the union. */
 		char padding[256];
 		char padding[256];
 	};
 	};
@@ -831,6 +848,8 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_GUEST_DEBUG_HW_WPS 120
 #define KVM_CAP_GUEST_DEBUG_HW_WPS 120
 #define KVM_CAP_SPLIT_IRQCHIP 121
 #define KVM_CAP_SPLIT_IRQCHIP 121
 #define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
 #define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
+#define KVM_CAP_HYPERV_SYNIC 123
+#define KVM_CAP_S390_RI 124
 
 
 #ifdef KVM_CAP_IRQ_ROUTING
 #ifdef KVM_CAP_IRQ_ROUTING
 
 
@@ -854,10 +873,16 @@ struct kvm_irq_routing_s390_adapter {
 	__u32 adapter_id;
 	__u32 adapter_id;
 };
 };
 
 
+struct kvm_irq_routing_hv_sint {
+	__u32 vcpu;
+	__u32 sint;
+};
+
 /* gsi routing entry types */
 /* gsi routing entry types */
 #define KVM_IRQ_ROUTING_IRQCHIP 1
 #define KVM_IRQ_ROUTING_IRQCHIP 1
 #define KVM_IRQ_ROUTING_MSI 2
 #define KVM_IRQ_ROUTING_MSI 2
 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
+#define KVM_IRQ_ROUTING_HV_SINT 4
 
 
 struct kvm_irq_routing_entry {
 struct kvm_irq_routing_entry {
 	__u32 gsi;
 	__u32 gsi;
@@ -868,6 +893,7 @@ struct kvm_irq_routing_entry {
 		struct kvm_irq_routing_irqchip irqchip;
 		struct kvm_irq_routing_irqchip irqchip;
 		struct kvm_irq_routing_msi msi;
 		struct kvm_irq_routing_msi msi;
 		struct kvm_irq_routing_s390_adapter adapter;
 		struct kvm_irq_routing_s390_adapter adapter;
+		struct kvm_irq_routing_hv_sint hv_sint;
 		__u32 pad[8];
 		__u32 pad[8];
 	} u;
 	} u;
 };
 };

+ 3 - 8
virt/kvm/arm/vgic-v3.c

@@ -28,6 +28,7 @@
 
 
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_mmu.h>
 
 
 /* These are for GICv2 emulation only */
 /* These are for GICv2 emulation only */
@@ -36,18 +37,12 @@
 #define GICH_LR_PHYSID_CPUID		(7UL << GICH_LR_PHYSID_CPUID_SHIFT)
 #define GICH_LR_PHYSID_CPUID		(7UL << GICH_LR_PHYSID_CPUID_SHIFT)
 #define ICH_LR_VIRTUALID_MASK		(BIT_ULL(32) - 1)
 #define ICH_LR_VIRTUALID_MASK		(BIT_ULL(32) - 1)
 
 
-/*
- * LRs are stored in reverse order in memory. make sure we index them
- * correctly.
- */
-#define LR_INDEX(lr)			(VGIC_V3_MAX_LRS - 1 - lr)
-
 static u32 ich_vtr_el2;
 static u32 ich_vtr_el2;
 
 
 static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
 static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
 {
 {
 	struct vgic_lr lr_desc;
 	struct vgic_lr lr_desc;
-	u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
+	u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)];
 
 
 	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
 	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
 		lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
 		lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
@@ -111,7 +106,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
 		lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT;
 		lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT;
 	}
 	}
 
 
-	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)] = lr_val;
 
 
 	if (!(lr_desc.state & LR_STATE_MASK))
 	if (!(lr_desc.state & LR_STATE_MASK))
 		vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
 		vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);

+ 1 - 1
virt/kvm/arm/vgic.c

@@ -878,7 +878,7 @@ static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
 				       true);
 				       true);
 }
 }
 
 
-struct kvm_io_device_ops vgic_io_ops = {
+static struct kvm_io_device_ops vgic_io_ops = {
 	.read	= vgic_handle_mmio_read,
 	.read	= vgic_handle_mmio_read,
 	.write	= vgic_handle_mmio_write,
 	.write	= vgic_handle_mmio_write,
 };
 };

+ 1 - 2
virt/kvm/async_pf.c

@@ -57,8 +57,7 @@ int kvm_async_pf_init(void)
 
 
 void kvm_async_pf_deinit(void)
 void kvm_async_pf_deinit(void)
 {
 {
-	if (async_pf_cache)
-		kmem_cache_destroy(async_pf_cache);
+	kmem_cache_destroy(async_pf_cache);
 	async_pf_cache = NULL;
 	async_pf_cache = NULL;
 }
 }
 
 

+ 6 - 1
virt/kvm/irqchip.c

@@ -166,6 +166,10 @@ out:
 	return r;
 	return r;
 }
 }
 
 
+void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+}
+
 int kvm_set_irq_routing(struct kvm *kvm,
 int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *ue,
 			const struct kvm_irq_routing_entry *ue,
 			unsigned nr,
 			unsigned nr,
@@ -219,9 +223,10 @@ int kvm_set_irq_routing(struct kvm *kvm,
 	old = kvm->irq_routing;
 	old = kvm->irq_routing;
 	rcu_assign_pointer(kvm->irq_routing, new);
 	rcu_assign_pointer(kvm->irq_routing, new);
 	kvm_irq_routing_update(kvm);
 	kvm_irq_routing_update(kvm);
+	kvm_arch_irq_routing_update(kvm);
 	mutex_unlock(&kvm->irq_lock);
 	mutex_unlock(&kvm->irq_lock);
 
 
-	kvm_arch_irq_routing_update(kvm);
+	kvm_arch_post_irq_routing_update(kvm);
 
 
 	synchronize_srcu_expedited(&kvm->irq_srcu);
 	synchronize_srcu_expedited(&kvm->irq_srcu);
 
 

+ 12 - 34
virt/kvm/kvm_main.c

@@ -206,16 +206,6 @@ void kvm_reload_remote_mmus(struct kvm *kvm)
 	kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
 	kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
 }
 }
 
 
-void kvm_make_mclock_inprogress_request(struct kvm *kvm)
-{
-	kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
-}
-
-void kvm_make_scan_ioapic_request(struct kvm *kvm)
-{
-	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
-}
-
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 {
 {
 	struct page *page;
 	struct page *page;
@@ -1164,15 +1154,15 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
 	return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
 	return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
 }
 }
 
 
-int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
+bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
 {
 	struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
 	struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
 
 
 	if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS ||
 	if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS ||
 	      memslot->flags & KVM_MEMSLOT_INVALID)
 	      memslot->flags & KVM_MEMSLOT_INVALID)
-		return 0;
+		return false;
 
 
-	return 1;
+	return true;
 }
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
 
@@ -2257,7 +2247,7 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 {
 {
 	int r;
 	int r;
-	struct kvm_vcpu *vcpu, *v;
+	struct kvm_vcpu *vcpu;
 
 
 	if (id >= KVM_MAX_VCPUS)
 	if (id >= KVM_MAX_VCPUS)
 		return -EINVAL;
 		return -EINVAL;
@@ -2281,12 +2271,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 		r = -EINVAL;
 		r = -EINVAL;
 		goto unlock_vcpu_destroy;
 		goto unlock_vcpu_destroy;
 	}
 	}
-
-	kvm_for_each_vcpu(r, v, kvm)
-		if (v->vcpu_id == id) {
-			r = -EEXIST;
-			goto unlock_vcpu_destroy;
-		}
+	if (kvm_get_vcpu_by_id(kvm, id)) {
+		r = -EEXIST;
+		goto unlock_vcpu_destroy;
+	}
 
 
 	BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
 	BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
 
 
@@ -3449,10 +3437,9 @@ static int kvm_init_debug(void)
 		goto out;
 		goto out;
 
 
 	for (p = debugfs_entries; p->name; ++p) {
 	for (p = debugfs_entries; p->name; ++p) {
-		p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
-						(void *)(long)p->offset,
-						stat_fops[p->kind]);
-		if (p->dentry == NULL)
+		if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
+					 (void *)(long)p->offset,
+					 stat_fops[p->kind]))
 			goto out_dir;
 			goto out_dir;
 	}
 	}
 
 
@@ -3464,15 +3451,6 @@ out:
 	return r;
 	return r;
 }
 }
 
 
-static void kvm_exit_debug(void)
-{
-	struct kvm_stats_debugfs_item *p;
-
-	for (p = debugfs_entries; p->name; ++p)
-		debugfs_remove(p->dentry);
-	debugfs_remove(kvm_debugfs_dir);
-}
-
 static int kvm_suspend(void)
 static int kvm_suspend(void)
 {
 {
 	if (kvm_usage_count)
 	if (kvm_usage_count)
@@ -3630,7 +3608,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
 
 
 void kvm_exit(void)
 void kvm_exit(void)
 {
 {
-	kvm_exit_debug();
+	debugfs_remove_recursive(kvm_debugfs_dir);
 	misc_deregister(&kvm_dev);
 	misc_deregister(&kvm_dev);
 	kmem_cache_destroy(kvm_vcpu_cache);
 	kmem_cache_destroy(kvm_vcpu_cache);
 	kvm_async_pf_deinit();
 	kvm_async_pf_deinit();