瀏覽代碼

Merge tag 'kvm-arm-for-v4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/ARM Changes for v4.12.

Changes include:
 - Using the common sysreg definitions between KVM and arm64
 - Improved hyp-stub implementation with support for kexec and kdump on the 32-bit side
 - Proper PMU exception handling
 - Performance improvements of our GIC handling
 - Support for irqchip in userspace with in-kernel arch-timers and PMU support
 - A fix for a race condition in our PSCI code

Conflicts:
	Documentation/virtual/kvm/api.txt
	include/uapi/linux/kvm.h
Paolo Bonzini 8 年之前
父節點
當前提交
c24a7be211
共有 50 個文件被更改,包括 1174 次插入938 次删除
  1. 41 0
      Documentation/virtual/kvm/api.txt
  2. 53 0
      Documentation/virtual/kvm/arm/hyp-abi.txt
  3. 11 1
      arch/arm/boot/compressed/head.S
  4. 4 3
      arch/arm/include/asm/kvm_asm.h
  5. 0 6
      arch/arm/include/asm/kvm_host.h
  6. 0 1
      arch/arm/include/asm/kvm_mmu.h
  7. 2 2
      arch/arm/include/asm/proc-fns.h
  8. 13 1
      arch/arm/include/asm/virt.h
  9. 2 0
      arch/arm/include/uapi/asm/kvm.h
  10. 34 9
      arch/arm/kernel/hyp-stub.S
  11. 5 2
      arch/arm/kernel/reboot.c
  12. 36 30
      arch/arm/kvm/arm.c
  13. 21 3
      arch/arm/kvm/coproc.c
  14. 0 18
      arch/arm/kvm/coproc.h
  15. 8 0
      arch/arm/kvm/handle_exit.c
  16. 23 5
      arch/arm/kvm/hyp/hyp-entry.S
  17. 43 8
      arch/arm/kvm/init.S
  18. 0 4
      arch/arm/kvm/interrupts.S
  19. 13 23
      arch/arm/kvm/mmu.c
  20. 7 1
      arch/arm/kvm/psci.c
  21. 5 0
      arch/arm/mm/mmu.c
  22. 10 5
      arch/arm/mm/proc-v7.S
  23. 13 68
      arch/arm64/include/asm/arch_gicv3.h
  24. 3 2
      arch/arm64/include/asm/kvm_asm.h
  25. 0 7
      arch/arm64/include/asm/kvm_host.h
  26. 0 1
      arch/arm64/include/asm/kvm_mmu.h
  27. 155 7
      arch/arm64/include/asm/sysreg.h
  28. 22 9
      arch/arm64/include/asm/virt.h
  29. 2 0
      arch/arm64/include/uapi/asm/kvm.h
  30. 4 4
      arch/arm64/kernel/head.S
  31. 14 24
      arch/arm64/kernel/hyp-stub.S
  32. 36 10
      arch/arm64/kvm/hyp-init.S
  33. 1 4
      arch/arm64/kvm/hyp.S
  34. 21 22
      arch/arm64/kvm/hyp/hyp-entry.S
  35. 180 316
      arch/arm64/kvm/sys_regs.c
  36. 5 18
      arch/arm64/kvm/sys_regs.h
  37. 1 3
      arch/arm64/kvm/sys_regs_generic_v8.c
  38. 2 0
      include/kvm/arm_arch_timer.h
  39. 7 0
      include/kvm/arm_pmu.h
  40. 3 6
      include/kvm/arm_vgic.h
  41. 8 0
      include/uapi/linux/kvm.h
  42. 98 26
      virt/kvm/arm/arch_timer.c
  43. 7 71
      virt/kvm/arm/hyp/vgic-v2-sr.c
  44. 24 63
      virt/kvm/arm/hyp/vgic-v3-sr.c
  45. 35 4
      virt/kvm/arm/pmu.c
  46. 68 40
      virt/kvm/arm/vgic/vgic-init.c
  47. 39 51
      virt/kvm/arm/vgic/vgic-v2.c
  48. 45 42
      virt/kvm/arm/vgic/vgic-v3.c
  49. 44 16
      virt/kvm/arm/vgic/vgic.c
  50. 6 2
      virt/kvm/arm/vgic/vgic.h

+ 41 - 0
Documentation/virtual/kvm/api.txt

@@ -4120,3 +4120,44 @@ This capability indicates that guest using memory monotoring instructions
 (MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit.  As such time
 (MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit.  As such time
 spent while virtual CPU is halted in this way will then be accounted for as
 spent while virtual CPU is halted in this way will then be accounted for as
 guest running time on the host (as opposed to e.g. HLT).
 guest running time on the host (as opposed to e.g. HLT).
+
+8.9 KVM_CAP_ARM_USER_IRQ
+
+Architectures: arm, arm64
+This capability, if KVM_CHECK_EXTENSION indicates that it is available, means
+that if userspace creates a VM without an in-kernel interrupt controller, it
+will be notified of changes to the output level of in-kernel emulated devices,
+which can generate virtual interrupts, presented to the VM.
+For such VMs, on every return to userspace, the kernel
+updates the vcpu's run->s.regs.device_irq_level field to represent the actual
+output level of the device.
+
+Whenever kvm detects a change in the device output level, kvm guarantees at
+least one return to userspace before running the VM.  This exit could either
+be a KVM_EXIT_INTR or any other exit event, like KVM_EXIT_MMIO. This way,
+userspace can always sample the device output level and re-compute the state of
+the userspace interrupt controller.  Userspace should always check the state
+of run->s.regs.device_irq_level on every kvm exit.
+The value in run->s.regs.device_irq_level can represent both level and edge
+triggered interrupt signals, depending on the device.  Edge triggered interrupt
+signals will exit to userspace with the bit in run->s.regs.device_irq_level
+set exactly once per edge signal.
+
+The field run->s.regs.device_irq_level is available independent of
+run->kvm_valid_regs or run->kvm_dirty_regs bits.
+
+If KVM_CAP_ARM_USER_IRQ is supported, the KVM_CHECK_EXTENSION ioctl returns a
+number larger than 0 indicating the version of this capability is implemented
+and thereby which bits in in run->s.regs.device_irq_level can signal values.
+
+Currently the following bits are defined for the device_irq_level bitmap:
+
+  KVM_CAP_ARM_USER_IRQ >= 1:
+
+    KVM_ARM_DEV_EL1_VTIMER -  EL1 virtual timer
+    KVM_ARM_DEV_EL1_PTIMER -  EL1 physical timer
+    KVM_ARM_DEV_PMU        -  ARM PMU overflow interrupt signal
+
+Future versions of kvm may implement additional events. These will get
+indicated by returning a higher number from KVM_CHECK_EXTENSION and will be
+listed above.

+ 53 - 0
Documentation/virtual/kvm/arm/hyp-abi.txt

@@ -0,0 +1,53 @@
+* Internal ABI between the kernel and HYP
+
+This file documents the interaction between the Linux kernel and the
+hypervisor layer when running Linux as a hypervisor (for example
+KVM). It doesn't cover the interaction of the kernel with the
+hypervisor when running as a guest (under Xen, KVM or any other
+hypervisor), or any hypervisor-specific interaction when the kernel is
+used as a host.
+
+On arm and arm64 (without VHE), the kernel doesn't run in hypervisor
+mode, but still needs to interact with it, allowing a built-in
+hypervisor to be either installed or torn down.
+
+In order to achieve this, the kernel must be booted at HYP (arm) or
+EL2 (arm64), allowing it to install a set of stubs before dropping to
+SVC/EL1. These stubs are accessible by using a 'hvc #0' instruction,
+and only act on individual CPUs.
+
+Unless specified otherwise, any built-in hypervisor must implement
+these functions (see arch/arm{,64}/include/asm/virt.h):
+
+* r0/x0 = HVC_SET_VECTORS
+  r1/x1 = vectors
+
+  Set HVBAR/VBAR_EL2 to 'vectors' to enable a hypervisor. 'vectors'
+  must be a physical address, and respect the alignment requirements
+  of the architecture. Only implemented by the initial stubs, not by
+  Linux hypervisors.
+
+* r0/x0 = HVC_RESET_VECTORS
+
+  Turn HYP/EL2 MMU off, and reset HVBAR/VBAR_EL2 to the initials
+  stubs' exception vector value. This effectively disables an existing
+  hypervisor.
+
+* r0/x0 = HVC_SOFT_RESTART
+  r1/x1 = restart address
+  x2 = x0's value when entering the next payload (arm64)
+  x3 = x1's value when entering the next payload (arm64)
+  x4 = x2's value when entering the next payload (arm64)
+
+  Mask all exceptions, disable the MMU, move the arguments into place
+  (arm64 only), and jump to the restart address while at HYP/EL2. This
+  hypercall is not expected to return to its caller.
+
+Any other value of r0/x0 triggers a hypervisor-specific handling,
+which is not documented here.
+
+The return value of a stub hypercall is held by r0/x0, and is 0 on
+success, and HVC_STUB_ERR on error. A stub hypercall is allowed to
+clobber any of the caller-saved registers (x0-x18 on arm64, r0-r3 and
+ip on arm). It is thus recommended to use a function call to perform
+the hypercall.

+ 11 - 1
arch/arm/boot/compressed/head.S

@@ -422,7 +422,17 @@ dtb_check_done:
 		cmp	r0, #HYP_MODE
 		cmp	r0, #HYP_MODE
 		bne	1f
 		bne	1f
 
 
-		bl	__hyp_get_vectors
+		/*
+		 * Compute the address of the hyp vectors after relocation.
+		 * This requires some arithmetic since we cannot directly
+		 * reference __hyp_stub_vectors in a PC-relative way.
+		 * Call __hyp_set_vectors with the new address so that we
+		 * can HVC again after the copy.
+		 */
+0:		adr	r0, 0b
+		movw	r1, #:lower16:__hyp_stub_vectors - 0b
+		movt	r1, #:upper16:__hyp_stub_vectors - 0b
+		add	r0, r0, r1
 		sub	r0, r0, r5
 		sub	r0, r0, r5
 		add	r0, r0, r10
 		add	r0, r0, r10
 		bl	__hyp_set_vectors
 		bl	__hyp_set_vectors

+ 4 - 3
arch/arm/include/asm/kvm_asm.h

@@ -33,7 +33,7 @@
 #define ARM_EXCEPTION_IRQ	  5
 #define ARM_EXCEPTION_IRQ	  5
 #define ARM_EXCEPTION_FIQ	  6
 #define ARM_EXCEPTION_FIQ	  6
 #define ARM_EXCEPTION_HVC	  7
 #define ARM_EXCEPTION_HVC	  7
-
+#define ARM_EXCEPTION_HYP_GONE	  HVC_STUB_ERR
 /*
 /*
  * The rr_lo_hi macro swaps a pair of registers depending on
  * The rr_lo_hi macro swaps a pair of registers depending on
  * current endianness. It is used in conjunction with ldrd and strd
  * current endianness. It is used in conjunction with ldrd and strd
@@ -72,10 +72,11 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
 
 extern void __init_stage2_translation(void);
 extern void __init_stage2_translation(void);
 
 
-extern void __kvm_hyp_reset(unsigned long);
-
 extern u64 __vgic_v3_get_ich_vtr_el2(void);
 extern u64 __vgic_v3_get_ich_vtr_el2(void);
+extern u64 __vgic_v3_read_vmcr(void);
+extern void __vgic_v3_write_vmcr(u32 vmcr);
 extern void __vgic_v3_init_lrs(void);
 extern void __vgic_v3_init_lrs(void);
+
 #endif
 #endif
 
 
 #endif /* __ARM_KVM_ASM_H__ */
 #endif /* __ARM_KVM_ASM_H__ */

+ 0 - 6
arch/arm/include/asm/kvm_host.h

@@ -269,12 +269,6 @@ static inline void __cpu_init_stage2(void)
 	kvm_call_hyp(__init_stage2_translation);
 	kvm_call_hyp(__init_stage2_translation);
 }
 }
 
 
-static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr,
-					phys_addr_t phys_idmap_start)
-{
-	kvm_call_hyp((void *)virt_to_idmap(__kvm_hyp_reset), vector_ptr);
-}
-
 static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
 static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 {
 	return 0;
 	return 0;

+ 0 - 1
arch/arm/include/asm/kvm_mmu.h

@@ -56,7 +56,6 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 
 
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
 phys_addr_t kvm_get_idmap_vector(void);
-phys_addr_t kvm_get_idmap_start(void);
 int kvm_mmu_init(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 void kvm_clear_hyp_idmap(void);
 
 

+ 2 - 2
arch/arm/include/asm/proc-fns.h

@@ -43,7 +43,7 @@ extern struct processor {
 	/*
 	/*
 	 * Special stuff for a reset
 	 * Special stuff for a reset
 	 */
 	 */
-	void (*reset)(unsigned long addr) __attribute__((noreturn));
+	void (*reset)(unsigned long addr, bool hvc) __attribute__((noreturn));
 	/*
 	/*
 	 * Idle the processor
 	 * Idle the processor
 	 */
 	 */
@@ -88,7 +88,7 @@ extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte);
 #else
 #else
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
 #endif
 #endif
-extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+extern void cpu_reset(unsigned long addr, bool hvc) __attribute__((noreturn));
 
 
 /* These three are private to arch/arm/kernel/suspend.c */
 /* These three are private to arch/arm/kernel/suspend.c */
 extern void cpu_do_suspend(void *);
 extern void cpu_do_suspend(void *);

+ 13 - 1
arch/arm/include/asm/virt.h

@@ -53,7 +53,7 @@ static inline void sync_boot_mode(void)
 }
 }
 
 
 void __hyp_set_vectors(unsigned long phys_vector_base);
 void __hyp_set_vectors(unsigned long phys_vector_base);
-unsigned long __hyp_get_vectors(void);
+void __hyp_reset_vectors(void);
 #else
 #else
 #define __boot_cpu_mode	(SVC_MODE)
 #define __boot_cpu_mode	(SVC_MODE)
 #define sync_boot_mode()
 #define sync_boot_mode()
@@ -94,6 +94,18 @@ extern char __hyp_text_start[];
 extern char __hyp_text_end[];
 extern char __hyp_text_end[];
 #endif
 #endif
 
 
+#else
+
+/* Only assembly code should need those */
+
+#define HVC_SET_VECTORS 0
+#define HVC_SOFT_RESTART 1
+#define HVC_RESET_VECTORS 2
+
+#define HVC_STUB_HCALL_NR 3
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASSEMBLY__ */
 
 
+#define HVC_STUB_ERR	0xbadca11
+
 #endif /* ! VIRT_H */
 #endif /* ! VIRT_H */

+ 2 - 0
arch/arm/include/uapi/asm/kvm.h

@@ -116,6 +116,8 @@ struct kvm_debug_exit_arch {
 };
 };
 
 
 struct kvm_sync_regs {
 struct kvm_sync_regs {
+	/* Used with KVM_CAP_ARM_USER_IRQ */
+	__u64 device_irq_level;
 };
 };
 
 
 struct kvm_arch_memory_slot {
 struct kvm_arch_memory_slot {

+ 34 - 9
arch/arm/kernel/hyp-stub.S

@@ -125,7 +125,7 @@ ENTRY(__hyp_stub_install_secondary)
  * (see safe_svcmode_maskall).
  * (see safe_svcmode_maskall).
  */
  */
 	@ Now install the hypervisor stub:
 	@ Now install the hypervisor stub:
-	adr	r7, __hyp_stub_vectors
+	W(adr)	r7, __hyp_stub_vectors
 	mcr	p15, 4, r7, c12, c0, 0	@ set hypervisor vector base (HVBAR)
 	mcr	p15, 4, r7, c12, c0, 0	@ set hypervisor vector base (HVBAR)
 
 
 	@ Disable all traps, so we don't get any nasty surprise
 	@ Disable all traps, so we don't get any nasty surprise
@@ -202,9 +202,23 @@ ARM_BE8(orr	r7, r7, #(1 << 25))     @ HSCTLR.EE
 ENDPROC(__hyp_stub_install_secondary)
 ENDPROC(__hyp_stub_install_secondary)
 
 
 __hyp_stub_do_trap:
 __hyp_stub_do_trap:
-	cmp	r0, #-1
-	mrceq	p15, 4, r0, c12, c0, 0	@ get HVBAR
-	mcrne	p15, 4, r0, c12, c0, 0	@ set HVBAR
+	teq	r0, #HVC_SET_VECTORS
+	bne	1f
+	mcr	p15, 4, r1, c12, c0, 0	@ set HVBAR
+	b	__hyp_stub_exit
+
+1:	teq	r0, #HVC_SOFT_RESTART
+	bne	1f
+	bx	r1
+
+1:	teq	r0, #HVC_RESET_VECTORS
+	beq	__hyp_stub_exit
+
+	ldr	r0, =HVC_STUB_ERR
+	__ERET
+
+__hyp_stub_exit:
+	mov	r0, #0
 	__ERET
 	__ERET
 ENDPROC(__hyp_stub_do_trap)
 ENDPROC(__hyp_stub_do_trap)
 
 
@@ -230,15 +244,26 @@ ENDPROC(__hyp_stub_do_trap)
  * so you will need to set that to something sensible at the new hypervisor's
  * so you will need to set that to something sensible at the new hypervisor's
  * initialisation entry point.
  * initialisation entry point.
  */
  */
-ENTRY(__hyp_get_vectors)
-	mov	r0, #-1
-ENDPROC(__hyp_get_vectors)
-	@ fall through
 ENTRY(__hyp_set_vectors)
 ENTRY(__hyp_set_vectors)
+	mov	r1, r0
+	mov	r0, #HVC_SET_VECTORS
 	__HVC(0)
 	__HVC(0)
 	ret	lr
 	ret	lr
 ENDPROC(__hyp_set_vectors)
 ENDPROC(__hyp_set_vectors)
 
 
+ENTRY(__hyp_soft_restart)
+	mov	r1, r0
+	mov	r0, #HVC_SOFT_RESTART
+	__HVC(0)
+	ret	lr
+ENDPROC(__hyp_soft_restart)
+
+ENTRY(__hyp_reset_vectors)
+	mov	r0, #HVC_RESET_VECTORS
+	__HVC(0)
+	ret	lr
+ENDPROC(__hyp_reset_vectors)
+
 #ifndef ZIMAGE
 #ifndef ZIMAGE
 .align 2
 .align 2
 .L__boot_cpu_mode_offset:
 .L__boot_cpu_mode_offset:
@@ -246,7 +271,7 @@ ENDPROC(__hyp_set_vectors)
 #endif
 #endif
 
 
 .align 5
 .align 5
-__hyp_stub_vectors:
+ENTRY(__hyp_stub_vectors)
 __hyp_stub_reset:	W(b)	.
 __hyp_stub_reset:	W(b)	.
 __hyp_stub_und:		W(b)	.
 __hyp_stub_und:		W(b)	.
 __hyp_stub_svc:		W(b)	.
 __hyp_stub_svc:		W(b)	.

+ 5 - 2
arch/arm/kernel/reboot.c

@@ -12,10 +12,11 @@
 
 
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
 #include <asm/idmap.h>
+#include <asm/virt.h>
 
 
 #include "reboot.h"
 #include "reboot.h"
 
 
-typedef void (*phys_reset_t)(unsigned long);
+typedef void (*phys_reset_t)(unsigned long, bool);
 
 
 /*
 /*
  * Function pointers to optional machine specific functions
  * Function pointers to optional machine specific functions
@@ -51,7 +52,9 @@ static void __soft_restart(void *addr)
 
 
 	/* Switch to the identity mapping. */
 	/* Switch to the identity mapping. */
 	phys_reset = (phys_reset_t)virt_to_idmap(cpu_reset);
 	phys_reset = (phys_reset_t)virt_to_idmap(cpu_reset);
-	phys_reset((unsigned long)addr);
+
+	/* original stub should be restored by kvm */
+	phys_reset((unsigned long)addr, is_hyp_mode_available());
 
 
 	/* Should never get here. */
 	/* Should never get here. */
 	BUG();
 	BUG();

+ 36 - 30
arch/arm/kvm/arm.c

@@ -53,7 +53,6 @@ __asm__(".arch_extension	virt");
 
 
 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
 static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
 static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
-static unsigned long hyp_default_vectors;
 
 
 /* Per-CPU variable containing the currently running vcpu. */
 /* Per-CPU variable containing the currently running vcpu. */
 static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
 static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
@@ -227,6 +226,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		else
 		else
 			r = kvm->arch.vgic.msis_require_devid;
 			r = kvm->arch.vgic.msis_require_devid;
 		break;
 		break;
+	case KVM_CAP_ARM_USER_IRQ:
+		/*
+		 * 1: EL1_VTIMER, EL1_PTIMER, and PMU.
+		 * (bump this number if adding more devices)
+		 */
+		r = 1;
+		break;
 	default:
 	default:
 		r = kvm_arch_dev_ioctl_check_extension(kvm, ext);
 		r = kvm_arch_dev_ioctl_check_extension(kvm, ext);
 		break;
 		break;
@@ -348,15 +354,14 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
 	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
 
 
 	kvm_arm_set_running_vcpu(vcpu);
 	kvm_arm_set_running_vcpu(vcpu);
+
+	kvm_vgic_load(vcpu);
 }
 }
 
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 {
-	/*
-	 * The arch-generic KVM code expects the cpu field of a vcpu to be -1
-	 * if the vcpu is no longer assigned to a cpu.  This is used for the
-	 * optimized make_all_cpus_request path.
-	 */
+	kvm_vgic_put(vcpu);
+
 	vcpu->cpu = -1;
 	vcpu->cpu = -1;
 
 
 	kvm_arm_set_running_vcpu(NULL);
 	kvm_arm_set_running_vcpu(NULL);
@@ -514,13 +519,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 			return ret;
 			return ret;
 	}
 	}
 
 
-	/*
-	 * Enable the arch timers only if we have an in-kernel VGIC
-	 * and it has been properly initialized, since we cannot handle
-	 * interrupts from the virtual timer with a userspace gic.
-	 */
-	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
-		ret = kvm_timer_enable(vcpu);
+	ret = kvm_timer_enable(vcpu);
 
 
 	return ret;
 	return ret;
 }
 }
@@ -630,16 +629,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 * non-preemptible context.
 		 * non-preemptible context.
 		 */
 		 */
 		preempt_disable();
 		preempt_disable();
+
 		kvm_pmu_flush_hwstate(vcpu);
 		kvm_pmu_flush_hwstate(vcpu);
+
 		kvm_timer_flush_hwstate(vcpu);
 		kvm_timer_flush_hwstate(vcpu);
 		kvm_vgic_flush_hwstate(vcpu);
 		kvm_vgic_flush_hwstate(vcpu);
 
 
 		local_irq_disable();
 		local_irq_disable();
 
 
 		/*
 		/*
-		 * Re-check atomic conditions
+		 * If we have a singal pending, or need to notify a userspace
+		 * irqchip about timer or PMU level changes, then we exit (and
+		 * update the timer level state in kvm_timer_update_run
+		 * below).
 		 */
 		 */
-		if (signal_pending(current)) {
+		if (signal_pending(current) ||
+		    kvm_timer_should_notify_user(vcpu) ||
+		    kvm_pmu_should_notify_user(vcpu)) {
 			ret = -EINTR;
 			ret = -EINTR;
 			run->exit_reason = KVM_EXIT_INTR;
 			run->exit_reason = KVM_EXIT_INTR;
 		}
 		}
@@ -711,6 +717,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		ret = handle_exit(vcpu, run, ret);
 		ret = handle_exit(vcpu, run, ret);
 	}
 	}
 
 
+	/* Tell userspace about in-kernel device output levels */
+	if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
+		kvm_timer_update_run(vcpu);
+		kvm_pmu_update_run(vcpu);
+	}
+
 	if (vcpu->sigset_active)
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 	return ret;
 	return ret;
@@ -1109,8 +1121,16 @@ static void cpu_init_hyp_mode(void *dummy)
 	kvm_arm_init_debug();
 	kvm_arm_init_debug();
 }
 }
 
 
+static void cpu_hyp_reset(void)
+{
+	if (!is_kernel_in_hyp_mode())
+		__hyp_reset_vectors();
+}
+
 static void cpu_hyp_reinit(void)
 static void cpu_hyp_reinit(void)
 {
 {
+	cpu_hyp_reset();
+
 	if (is_kernel_in_hyp_mode()) {
 	if (is_kernel_in_hyp_mode()) {
 		/*
 		/*
 		 * __cpu_init_stage2() is safe to call even if the PM
 		 * __cpu_init_stage2() is safe to call even if the PM
@@ -1118,18 +1138,10 @@ static void cpu_hyp_reinit(void)
 		 */
 		 */
 		__cpu_init_stage2();
 		__cpu_init_stage2();
 	} else {
 	} else {
-		if (__hyp_get_vectors() == hyp_default_vectors)
-			cpu_init_hyp_mode(NULL);
+		cpu_init_hyp_mode(NULL);
 	}
 	}
 }
 }
 
 
-static void cpu_hyp_reset(void)
-{
-	if (!is_kernel_in_hyp_mode())
-		__cpu_reset_hyp_mode(hyp_default_vectors,
-				     kvm_get_idmap_start());
-}
-
 static void _kvm_arch_hardware_enable(void *discard)
 static void _kvm_arch_hardware_enable(void *discard)
 {
 {
 	if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
 	if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
@@ -1312,12 +1324,6 @@ static int init_hyp_mode(void)
 	if (err)
 	if (err)
 		goto out_err;
 		goto out_err;
 
 
-	/*
-	 * It is probably enough to obtain the default on one
-	 * CPU. It's unlikely to be different on the others.
-	 */
-	hyp_default_vectors = __hyp_get_vectors();
-
 	/*
 	/*
 	 * Allocate stack pages for Hypervisor-mode
 	 * Allocate stack pages for Hypervisor-mode
 	 */
 	 */

+ 21 - 3
arch/arm/kvm/coproc.c

@@ -40,6 +40,24 @@
  * Co-processor emulation
  * Co-processor emulation
  *****************************************************************************/
  *****************************************************************************/
 
 
+static bool write_to_read_only(struct kvm_vcpu *vcpu,
+			       const struct coproc_params *params)
+{
+	WARN_ONCE(1, "CP15 write to read-only register\n");
+	print_cp_instr(params);
+	kvm_inject_undefined(vcpu);
+	return false;
+}
+
+static bool read_from_write_only(struct kvm_vcpu *vcpu,
+				 const struct coproc_params *params)
+{
+	WARN_ONCE(1, "CP15 read to write-only register\n");
+	print_cp_instr(params);
+	kvm_inject_undefined(vcpu);
+	return false;
+}
+
 /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
 /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
 static u32 cache_levels;
 static u32 cache_levels;
 
 
@@ -502,15 +520,15 @@ static int emulate_cp15(struct kvm_vcpu *vcpu,
 		if (likely(r->access(vcpu, params, r))) {
 		if (likely(r->access(vcpu, params, r))) {
 			/* Skip instruction, since it was emulated */
 			/* Skip instruction, since it was emulated */
 			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
 			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-			return 1;
 		}
 		}
-		/* If access function fails, it should complain. */
 	} else {
 	} else {
+		/* If access function fails, it should complain. */
 		kvm_err("Unsupported guest CP15 access at: %08lx\n",
 		kvm_err("Unsupported guest CP15 access at: %08lx\n",
 			*vcpu_pc(vcpu));
 			*vcpu_pc(vcpu));
 		print_cp_instr(params);
 		print_cp_instr(params);
+		kvm_inject_undefined(vcpu);
 	}
 	}
-	kvm_inject_undefined(vcpu);
+
 	return 1;
 	return 1;
 }
 }
 
 

+ 0 - 18
arch/arm/kvm/coproc.h

@@ -81,24 +81,6 @@ static inline bool read_zero(struct kvm_vcpu *vcpu,
 	return true;
 	return true;
 }
 }
 
 
-static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
-				      const struct coproc_params *params)
-{
-	kvm_debug("CP15 write to read-only register at: %08lx\n",
-		  *vcpu_pc(vcpu));
-	print_cp_instr(params);
-	return false;
-}
-
-static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
-					const struct coproc_params *params)
-{
-	kvm_debug("CP15 read to write-only register at: %08lx\n",
-		  *vcpu_pc(vcpu));
-	print_cp_instr(params);
-	return false;
-}
-
 /* Reset functions */
 /* Reset functions */
 static inline void reset_unknown(struct kvm_vcpu *vcpu,
 static inline void reset_unknown(struct kvm_vcpu *vcpu,
 				 const struct coproc_reg *r)
 				 const struct coproc_reg *r)

+ 8 - 0
arch/arm/kvm/handle_exit.c

@@ -160,6 +160,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	case ARM_EXCEPTION_DATA_ABORT:
 	case ARM_EXCEPTION_DATA_ABORT:
 		kvm_inject_vabt(vcpu);
 		kvm_inject_vabt(vcpu);
 		return 1;
 		return 1;
+	case ARM_EXCEPTION_HYP_GONE:
+		/*
+		 * HYP has been reset to the hyp-stub. This happens
+		 * when a guest is pre-empted by kvm_reboot()'s
+		 * shutdown call.
+		 */
+		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+		return 0;
 	default:
 	default:
 		kvm_pr_unimpl("Unsupported exception type: %d",
 		kvm_pr_unimpl("Unsupported exception type: %d",
 			      exception_index);
 			      exception_index);

+ 23 - 5
arch/arm/kvm/hyp/hyp-entry.S

@@ -126,11 +126,29 @@ hyp_hvc:
 	 */
 	 */
 	pop	{r0, r1, r2}
 	pop	{r0, r1, r2}
 
 
-	/* Check for __hyp_get_vectors */
-	cmp	r0, #-1
-	mrceq	p15, 4, r0, c12, c0, 0	@ get HVBAR
-	beq	1f
+	/*
+	 * Check if we have a kernel function, which is guaranteed to be
+	 * bigger than the maximum hyp stub hypercall
+	 */
+	cmp	r0, #HVC_STUB_HCALL_NR
+	bhs	1f
 
 
+	/*
+	 * Not a kernel function, treat it as a stub hypercall.
+	 * Compute the physical address for __kvm_handle_stub_hvc
+	 * (as the code lives in the idmaped page) and branch there.
+	 * We hijack ip (r12) as a tmp register.
+	 */
+	push	{r1}
+	ldr	r1, =kimage_voffset
+	ldr	r1, [r1]
+	ldr	ip, =__kvm_handle_stub_hvc
+	sub	ip, ip, r1
+	pop	{r1}
+
+	bx	ip
+
+1:
 	push	{lr}
 	push	{lr}
 
 
 	mov	lr, r0
 	mov	lr, r0
@@ -142,7 +160,7 @@ THUMB(	orr	lr, #1)
 	blx	lr			@ Call the HYP function
 	blx	lr			@ Call the HYP function
 
 
 	pop	{lr}
 	pop	{lr}
-1:	eret
+	eret
 
 
 guest_trap:
 guest_trap:
 	load_vcpu r0			@ Load VCPU pointer to r0
 	load_vcpu r0			@ Load VCPU pointer to r0

+ 43 - 8
arch/arm/kvm/init.S

@@ -23,6 +23,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_mmu.h>
+#include <asm/virt.h>
 
 
 /********************************************************************
 /********************************************************************
  * Hypervisor initialization
  * Hypervisor initialization
@@ -39,6 +40,10 @@
  * - Setup the page tables
  * - Setup the page tables
  * - Enable the MMU
  * - Enable the MMU
  * - Profit! (or eret, if you only care about the code).
  * - Profit! (or eret, if you only care about the code).
+ *
+ * Another possibility is to get a HYP stub hypercall.
+ * We discriminate between the two by checking if r0 contains a value
+ * that is less than HVC_STUB_HCALL_NR.
  */
  */
 
 
 	.text
 	.text
@@ -58,6 +63,10 @@ __kvm_hyp_init:
 	W(b)	.
 	W(b)	.
 
 
 __do_hyp_init:
 __do_hyp_init:
+	@ Check for a stub hypercall
+	cmp	r0, #HVC_STUB_HCALL_NR
+	blo	__kvm_handle_stub_hvc
+
 	@ Set stack pointer
 	@ Set stack pointer
 	mov	sp, r0
 	mov	sp, r0
 
 
@@ -112,20 +121,46 @@ __do_hyp_init:
 
 
 	eret
 	eret
 
 
-	@ r0 : stub vectors address
-ENTRY(__kvm_hyp_reset)
+ENTRY(__kvm_handle_stub_hvc)
+	cmp	r0, #HVC_SOFT_RESTART
+	bne	1f
+
+	/* The target is expected in r1 */
+	msr	ELR_hyp, r1
+	mrs	r0, cpsr
+	bic	r0, r0, #MODE_MASK
+	orr	r0, r0, #HYP_MODE
+THUMB(	orr	r0, r0, #PSR_T_BIT	)
+	msr	spsr_cxsf, r0
+	b	reset
+
+1:	cmp	r0, #HVC_RESET_VECTORS
+	bne	1f
+
+reset:
 	/* We're now in idmap, disable MMU */
 	/* We're now in idmap, disable MMU */
 	mrc	p15, 4, r1, c1, c0, 0	@ HSCTLR
 	mrc	p15, 4, r1, c1, c0, 0	@ HSCTLR
-	ldr	r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I)
-	bic	r1, r1, r2
+	ldr	r0, =(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I)
+	bic	r1, r1, r0
 	mcr	p15, 4, r1, c1, c0, 0	@ HSCTLR
 	mcr	p15, 4, r1, c1, c0, 0	@ HSCTLR
 
 
-	/* Install stub vectors */
-	mcr	p15, 4, r0, c12, c0, 0	@ HVBAR
-	isb
+	/*
+	 * Install stub vectors, using ardb's VA->PA trick.
+	 */
+0:	adr	r0, 0b					@ PA(0)
+	movw	r1, #:lower16:__hyp_stub_vectors - 0b   @ VA(stub) - VA(0)
+	movt	r1, #:upper16:__hyp_stub_vectors - 0b
+	add	r1, r1, r0				@ PA(stub)
+	mcr	p15, 4, r1, c12, c0, 0	@ HVBAR
+	b	exit
+
+1:	ldr	r0, =HVC_STUB_ERR
+	eret
 
 
+exit:
+	mov	r0, #0
 	eret
 	eret
-ENDPROC(__kvm_hyp_reset)
+ENDPROC(__kvm_handle_stub_hvc)
 
 
 	.ltorg
 	.ltorg
 
 

+ 0 - 4
arch/arm/kvm/interrupts.S

@@ -37,10 +37,6 @@
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * passed in r0 (strictly 32bit).
  * passed in r0 (strictly 32bit).
  *
  *
- * A function pointer with a value of 0xffffffff has a special meaning,
- * and is used to implement __hyp_get_vectors in the same way as in
- * arch/arm/kernel/hyp_stub.S.
- *
  * The calling convention follows the standard AAPCS:
  * The calling convention follows the standard AAPCS:
  *   r0 - r3: caller save
  *   r0 - r3: caller save
  *   r12:     caller save
  *   r12:     caller save

+ 13 - 23
arch/arm/kvm/mmu.c

@@ -1512,7 +1512,8 @@ static int handle_hva_to_gpa(struct kvm *kvm,
 			     unsigned long start,
 			     unsigned long start,
 			     unsigned long end,
 			     unsigned long end,
 			     int (*handler)(struct kvm *kvm,
 			     int (*handler)(struct kvm *kvm,
-					    gpa_t gpa, void *data),
+					    gpa_t gpa, u64 size,
+					    void *data),
 			     void *data)
 			     void *data)
 {
 {
 	struct kvm_memslots *slots;
 	struct kvm_memslots *slots;
@@ -1524,7 +1525,7 @@ static int handle_hva_to_gpa(struct kvm *kvm,
 	/* we only care about the pages that the guest sees */
 	/* we only care about the pages that the guest sees */
 	kvm_for_each_memslot(memslot, slots) {
 	kvm_for_each_memslot(memslot, slots) {
 		unsigned long hva_start, hva_end;
 		unsigned long hva_start, hva_end;
-		gfn_t gfn, gfn_end;
+		gfn_t gpa;
 
 
 		hva_start = max(start, memslot->userspace_addr);
 		hva_start = max(start, memslot->userspace_addr);
 		hva_end = min(end, memslot->userspace_addr +
 		hva_end = min(end, memslot->userspace_addr +
@@ -1532,25 +1533,16 @@ static int handle_hva_to_gpa(struct kvm *kvm,
 		if (hva_start >= hva_end)
 		if (hva_start >= hva_end)
 			continue;
 			continue;
 
 
-		/*
-		 * {gfn(page) | page intersects with [hva_start, hva_end)} =
-		 * {gfn_start, gfn_start+1, ..., gfn_end-1}.
-		 */
-		gfn = hva_to_gfn_memslot(hva_start, memslot);
-		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
-
-		for (; gfn < gfn_end; ++gfn) {
-			gpa_t gpa = gfn << PAGE_SHIFT;
-			ret |= handler(kvm, gpa, data);
-		}
+		gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT;
+		ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data);
 	}
 	}
 
 
 	return ret;
 	return ret;
 }
 }
 
 
-static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 {
 {
-	unmap_stage2_range(kvm, gpa, PAGE_SIZE);
+	unmap_stage2_range(kvm, gpa, size);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1577,10 +1569,11 @@ int kvm_unmap_hva_range(struct kvm *kvm,
 	return 0;
 	return 0;
 }
 }
 
 
-static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 {
 {
 	pte_t *pte = (pte_t *)data;
 	pte_t *pte = (pte_t *)data;
 
 
+	WARN_ON(size != PAGE_SIZE);
 	/*
 	/*
 	 * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE
 	 * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE
 	 * flag clear because MMU notifiers will have unmapped a huge PMD before
 	 * flag clear because MMU notifiers will have unmapped a huge PMD before
@@ -1606,11 +1599,12 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 	handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
 	handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
 }
 }
 
 
-static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 {
 {
 	pmd_t *pmd;
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t *pte;
 
 
+	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
 	pmd = stage2_get_pmd(kvm, NULL, gpa);
 	pmd = stage2_get_pmd(kvm, NULL, gpa);
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		return 0;
 		return 0;
@@ -1625,11 +1619,12 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
 	return stage2_ptep_test_and_clear_young(pte);
 	return stage2_ptep_test_and_clear_young(pte);
 }
 }
 
 
-static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
 {
 {
 	pmd_t *pmd;
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t *pte;
 
 
+	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
 	pmd = stage2_get_pmd(kvm, NULL, gpa);
 	pmd = stage2_get_pmd(kvm, NULL, gpa);
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 	if (!pmd || pmd_none(*pmd))	/* Nothing there */
 		return 0;
 		return 0;
@@ -1674,11 +1669,6 @@ phys_addr_t kvm_get_idmap_vector(void)
 	return hyp_idmap_vector;
 	return hyp_idmap_vector;
 }
 }
 
 
-phys_addr_t kvm_get_idmap_start(void)
-{
-	return hyp_idmap_start;
-}
-
 static int kvm_map_idmap_text(pgd_t *pgd)
 static int kvm_map_idmap_text(pgd_t *pgd)
 {
 {
 	int err;
 	int err;

+ 7 - 1
arch/arm/kvm/psci.c

@@ -208,9 +208,10 @@ int kvm_psci_version(struct kvm_vcpu *vcpu)
 
 
 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 {
 {
-	int ret = 1;
+	struct kvm *kvm = vcpu->kvm;
 	unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
 	unsigned long val;
+	int ret = 1;
 
 
 	switch (psci_fn) {
 	switch (psci_fn) {
 	case PSCI_0_2_FN_PSCI_VERSION:
 	case PSCI_0_2_FN_PSCI_VERSION:
@@ -230,7 +231,9 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 		break;
 		break;
 	case PSCI_0_2_FN_CPU_ON:
 	case PSCI_0_2_FN_CPU_ON:
 	case PSCI_0_2_FN64_CPU_ON:
 	case PSCI_0_2_FN64_CPU_ON:
+		mutex_lock(&kvm->lock);
 		val = kvm_psci_vcpu_on(vcpu);
 		val = kvm_psci_vcpu_on(vcpu);
+		mutex_unlock(&kvm->lock);
 		break;
 		break;
 	case PSCI_0_2_FN_AFFINITY_INFO:
 	case PSCI_0_2_FN_AFFINITY_INFO:
 	case PSCI_0_2_FN64_AFFINITY_INFO:
 	case PSCI_0_2_FN64_AFFINITY_INFO:
@@ -279,6 +282,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 
 
 static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 {
 {
+	struct kvm *kvm = vcpu->kvm;
 	unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
 	unsigned long val;
 
 
@@ -288,7 +292,9 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 		val = PSCI_RET_SUCCESS;
 		val = PSCI_RET_SUCCESS;
 		break;
 		break;
 	case KVM_PSCI_FN_CPU_ON:
 	case KVM_PSCI_FN_CPU_ON:
+		mutex_lock(&kvm->lock);
 		val = kvm_psci_vcpu_on(vcpu);
 		val = kvm_psci_vcpu_on(vcpu);
+		mutex_unlock(&kvm->lock);
 		break;
 		break;
 	default:
 	default:
 		val = PSCI_RET_NOT_SUPPORTED;
 		val = PSCI_RET_NOT_SUPPORTED;

+ 5 - 0
arch/arm/mm/mmu.c

@@ -87,6 +87,8 @@ struct cachepolicy {
 #define s2_policy(policy)	0
 #define s2_policy(policy)	0
 #endif
 #endif
 
 
+unsigned long kimage_voffset __ro_after_init;
+
 static struct cachepolicy cache_policies[] __initdata = {
 static struct cachepolicy cache_policies[] __initdata = {
 	{
 	{
 		.policy		= "uncached",
 		.policy		= "uncached",
@@ -1635,4 +1637,7 @@ void __init paging_init(const struct machine_desc *mdesc)
 
 
 	empty_zero_page = virt_to_page(zero_page);
 	empty_zero_page = virt_to_page(zero_page);
 	__flush_dcache_page(NULL, empty_zero_page);
 	__flush_dcache_page(NULL, empty_zero_page);
+
+	/* Compute the virt/idmap offset, mostly for the sake of KVM */
+	kimage_voffset = (unsigned long)&kimage_voffset - virt_to_idmap(&kimage_voffset);
 }
 }

+ 10 - 5
arch/arm/mm/proc-v7.S

@@ -39,13 +39,14 @@ ENTRY(cpu_v7_proc_fin)
 ENDPROC(cpu_v7_proc_fin)
 ENDPROC(cpu_v7_proc_fin)
 
 
 /*
 /*
- *	cpu_v7_reset(loc)
+ *	cpu_v7_reset(loc, hyp)
  *
  *
  *	Perform a soft reset of the system.  Put the CPU into the
  *	Perform a soft reset of the system.  Put the CPU into the
  *	same state as it would be if it had been reset, and branch
  *	same state as it would be if it had been reset, and branch
  *	to what would be the reset vector.
  *	to what would be the reset vector.
  *
  *
  *	- loc   - location to jump to for soft reset
  *	- loc   - location to jump to for soft reset
+ *	- hyp   - indicate if restart occurs in HYP mode
  *
  *
  *	This code must be executed using a flat identity mapping with
  *	This code must be executed using a flat identity mapping with
  *      caches disabled.
  *      caches disabled.
@@ -53,11 +54,15 @@ ENDPROC(cpu_v7_proc_fin)
 	.align	5
 	.align	5
 	.pushsection	.idmap.text, "ax"
 	.pushsection	.idmap.text, "ax"
 ENTRY(cpu_v7_reset)
 ENTRY(cpu_v7_reset)
-	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
-	bic	r1, r1, #0x1			@ ...............m
- THUMB(	bic	r1, r1, #1 << 30 )		@ SCTLR.TE (Thumb exceptions)
-	mcr	p15, 0, r1, c1, c0, 0		@ disable MMU
+	mrc	p15, 0, r2, c1, c0, 0		@ ctrl register
+	bic	r2, r2, #0x1			@ ...............m
+ THUMB(	bic	r2, r2, #1 << 30 )		@ SCTLR.TE (Thumb exceptions)
+	mcr	p15, 0, r2, c1, c0, 0		@ disable MMU
 	isb
 	isb
+#ifdef CONFIG_ARM_VIRT_EXT
+	teq	r1, #0
+	bne	__hyp_soft_restart
+#endif
 	bx	r0
 	bx	r0
 ENDPROC(cpu_v7_reset)
 ENDPROC(cpu_v7_reset)
 	.popsection
 	.popsection

+ 13 - 68
arch/arm64/include/asm/arch_gicv3.h

@@ -20,69 +20,14 @@
 
 
 #include <asm/sysreg.h>
 #include <asm/sysreg.h>
 
 
-#define ICC_EOIR1_EL1			sys_reg(3, 0, 12, 12, 1)
-#define ICC_DIR_EL1			sys_reg(3, 0, 12, 11, 1)
-#define ICC_IAR1_EL1			sys_reg(3, 0, 12, 12, 0)
-#define ICC_SGI1R_EL1			sys_reg(3, 0, 12, 11, 5)
-#define ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
-#define ICC_CTLR_EL1			sys_reg(3, 0, 12, 12, 4)
-#define ICC_SRE_EL1			sys_reg(3, 0, 12, 12, 5)
-#define ICC_GRPEN1_EL1			sys_reg(3, 0, 12, 12, 7)
-#define ICC_BPR1_EL1			sys_reg(3, 0, 12, 12, 3)
-
-#define ICC_SRE_EL2			sys_reg(3, 4, 12, 9, 5)
-
-/*
- * System register definitions
- */
-#define ICH_VSEIR_EL2			sys_reg(3, 4, 12, 9, 4)
-#define ICH_HCR_EL2			sys_reg(3, 4, 12, 11, 0)
-#define ICH_VTR_EL2			sys_reg(3, 4, 12, 11, 1)
-#define ICH_MISR_EL2			sys_reg(3, 4, 12, 11, 2)
-#define ICH_EISR_EL2			sys_reg(3, 4, 12, 11, 3)
-#define ICH_ELSR_EL2			sys_reg(3, 4, 12, 11, 5)
-#define ICH_VMCR_EL2			sys_reg(3, 4, 12, 11, 7)
-
-#define __LR0_EL2(x)			sys_reg(3, 4, 12, 12, x)
-#define __LR8_EL2(x)			sys_reg(3, 4, 12, 13, x)
-
-#define ICH_LR0_EL2			__LR0_EL2(0)
-#define ICH_LR1_EL2			__LR0_EL2(1)
-#define ICH_LR2_EL2			__LR0_EL2(2)
-#define ICH_LR3_EL2			__LR0_EL2(3)
-#define ICH_LR4_EL2			__LR0_EL2(4)
-#define ICH_LR5_EL2			__LR0_EL2(5)
-#define ICH_LR6_EL2			__LR0_EL2(6)
-#define ICH_LR7_EL2			__LR0_EL2(7)
-#define ICH_LR8_EL2			__LR8_EL2(0)
-#define ICH_LR9_EL2			__LR8_EL2(1)
-#define ICH_LR10_EL2			__LR8_EL2(2)
-#define ICH_LR11_EL2			__LR8_EL2(3)
-#define ICH_LR12_EL2			__LR8_EL2(4)
-#define ICH_LR13_EL2			__LR8_EL2(5)
-#define ICH_LR14_EL2			__LR8_EL2(6)
-#define ICH_LR15_EL2			__LR8_EL2(7)
-
-#define __AP0Rx_EL2(x)			sys_reg(3, 4, 12, 8, x)
-#define ICH_AP0R0_EL2			__AP0Rx_EL2(0)
-#define ICH_AP0R1_EL2			__AP0Rx_EL2(1)
-#define ICH_AP0R2_EL2			__AP0Rx_EL2(2)
-#define ICH_AP0R3_EL2			__AP0Rx_EL2(3)
-
-#define __AP1Rx_EL2(x)			sys_reg(3, 4, 12, 9, x)
-#define ICH_AP1R0_EL2			__AP1Rx_EL2(0)
-#define ICH_AP1R1_EL2			__AP1Rx_EL2(1)
-#define ICH_AP1R2_EL2			__AP1Rx_EL2(2)
-#define ICH_AP1R3_EL2			__AP1Rx_EL2(3)
-
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__
 
 
 #include <linux/stringify.h>
 #include <linux/stringify.h>
 #include <asm/barrier.h>
 #include <asm/barrier.h>
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
 
 
-#define read_gicreg			read_sysreg_s
-#define write_gicreg			write_sysreg_s
+#define read_gicreg(r)			read_sysreg_s(SYS_ ## r)
+#define write_gicreg(v, r)		write_sysreg_s(v, SYS_ ## r)
 
 
 /*
 /*
  * Low-level accessors
  * Low-level accessors
@@ -93,13 +38,13 @@
 
 
 static inline void gic_write_eoir(u32 irq)
 static inline void gic_write_eoir(u32 irq)
 {
 {
-	write_sysreg_s(irq, ICC_EOIR1_EL1);
+	write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
 	isb();
 	isb();
 }
 }
 
 
 static inline void gic_write_dir(u32 irq)
 static inline void gic_write_dir(u32 irq)
 {
 {
-	write_sysreg_s(irq, ICC_DIR_EL1);
+	write_sysreg_s(irq, SYS_ICC_DIR_EL1);
 	isb();
 	isb();
 }
 }
 
 
@@ -107,7 +52,7 @@ static inline u64 gic_read_iar_common(void)
 {
 {
 	u64 irqstat;
 	u64 irqstat;
 
 
-	irqstat = read_sysreg_s(ICC_IAR1_EL1);
+	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
 	dsb(sy);
 	dsb(sy);
 	return irqstat;
 	return irqstat;
 }
 }
@@ -124,7 +69,7 @@ static inline u64 gic_read_iar_cavium_thunderx(void)
 	u64 irqstat;
 	u64 irqstat;
 
 
 	nops(8);
 	nops(8);
-	irqstat = read_sysreg_s(ICC_IAR1_EL1);
+	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
 	nops(4);
 	nops(4);
 	mb();
 	mb();
 
 
@@ -133,40 +78,40 @@ static inline u64 gic_read_iar_cavium_thunderx(void)
 
 
 static inline void gic_write_pmr(u32 val)
 static inline void gic_write_pmr(u32 val)
 {
 {
-	write_sysreg_s(val, ICC_PMR_EL1);
+	write_sysreg_s(val, SYS_ICC_PMR_EL1);
 }
 }
 
 
 static inline void gic_write_ctlr(u32 val)
 static inline void gic_write_ctlr(u32 val)
 {
 {
-	write_sysreg_s(val, ICC_CTLR_EL1);
+	write_sysreg_s(val, SYS_ICC_CTLR_EL1);
 	isb();
 	isb();
 }
 }
 
 
 static inline void gic_write_grpen1(u32 val)
 static inline void gic_write_grpen1(u32 val)
 {
 {
-	write_sysreg_s(val, ICC_GRPEN1_EL1);
+	write_sysreg_s(val, SYS_ICC_GRPEN1_EL1);
 	isb();
 	isb();
 }
 }
 
 
 static inline void gic_write_sgi1r(u64 val)
 static inline void gic_write_sgi1r(u64 val)
 {
 {
-	write_sysreg_s(val, ICC_SGI1R_EL1);
+	write_sysreg_s(val, SYS_ICC_SGI1R_EL1);
 }
 }
 
 
 static inline u32 gic_read_sre(void)
 static inline u32 gic_read_sre(void)
 {
 {
-	return read_sysreg_s(ICC_SRE_EL1);
+	return read_sysreg_s(SYS_ICC_SRE_EL1);
 }
 }
 
 
 static inline void gic_write_sre(u32 val)
 static inline void gic_write_sre(u32 val)
 {
 {
-	write_sysreg_s(val, ICC_SRE_EL1);
+	write_sysreg_s(val, SYS_ICC_SRE_EL1);
 	isb();
 	isb();
 }
 }
 
 
 static inline void gic_write_bpr1(u32 val)
 static inline void gic_write_bpr1(u32 val)
 {
 {
-	asm volatile("msr_s " __stringify(ICC_BPR1_EL1) ", %0" : : "r" (val));
+	write_sysreg_s(val, SYS_ICC_BPR1_EL1);
 }
 }
 
 
 #define gic_read_typer(c)		readq_relaxed(c)
 #define gic_read_typer(c)		readq_relaxed(c)

+ 3 - 2
arch/arm64/include/asm/kvm_asm.h

@@ -28,7 +28,7 @@
 #define ARM_EXCEPTION_EL1_SERROR  1
 #define ARM_EXCEPTION_EL1_SERROR  1
 #define ARM_EXCEPTION_TRAP	  2
 #define ARM_EXCEPTION_TRAP	  2
 /* The hyp-stub will return this for any kvm_call_hyp() call */
 /* The hyp-stub will return this for any kvm_call_hyp() call */
-#define ARM_EXCEPTION_HYP_GONE	  3
+#define ARM_EXCEPTION_HYP_GONE	  HVC_STUB_ERR
 
 
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
@@ -47,7 +47,6 @@ struct kvm_vcpu;
 
 
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init_end[];
 extern char __kvm_hyp_init_end[];
-extern char __kvm_hyp_reset[];
 
 
 extern char __kvm_hyp_vector[];
 extern char __kvm_hyp_vector[];
 
 
@@ -59,6 +58,8 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
 
 extern u64 __vgic_v3_get_ich_vtr_el2(void);
 extern u64 __vgic_v3_get_ich_vtr_el2(void);
+extern u64 __vgic_v3_read_vmcr(void);
+extern void __vgic_v3_write_vmcr(u32 vmcr);
 extern void __vgic_v3_init_lrs(void);
 extern void __vgic_v3_init_lrs(void);
 
 
 extern u32 __kvm_get_mdcr_el2(void);
 extern u32 __kvm_get_mdcr_el2(void);

+ 0 - 7
arch/arm64/include/asm/kvm_host.h

@@ -361,13 +361,6 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
 	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
 }
 }
 
 
-void __kvm_hyp_teardown(void);
-static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr,
-					phys_addr_t phys_idmap_start)
-{
-	kvm_call_hyp(__kvm_hyp_teardown, phys_idmap_start);
-}
-
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}

+ 0 - 1
arch/arm64/include/asm/kvm_mmu.h

@@ -155,7 +155,6 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 
 
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
 phys_addr_t kvm_get_idmap_vector(void);
-phys_addr_t kvm_get_idmap_start(void);
 int kvm_mmu_init(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 void kvm_clear_hyp_idmap(void);
 
 

+ 155 - 7
arch/arm64/include/asm/sysreg.h

@@ -48,6 +48,8 @@
 	 ((crn) << CRn_shift) | ((crm) << CRm_shift) | \
 	 ((crn) << CRn_shift) | ((crm) << CRm_shift) | \
 	 ((op2) << Op2_shift))
 	 ((op2) << Op2_shift))
 
 
+#define sys_insn	sys_reg
+
 #define sys_reg_Op0(id)	(((id) >> Op0_shift) & Op0_mask)
 #define sys_reg_Op0(id)	(((id) >> Op0_shift) & Op0_mask)
 #define sys_reg_Op1(id)	(((id) >> Op1_shift) & Op1_mask)
 #define sys_reg_Op1(id)	(((id) >> Op1_shift) & Op1_mask)
 #define sys_reg_CRn(id)	(((id) >> CRn_shift) & CRn_mask)
 #define sys_reg_CRn(id)	(((id) >> CRn_shift) & CRn_mask)
@@ -81,6 +83,41 @@
 
 
 #endif	/* CONFIG_BROKEN_GAS_INST */
 #endif	/* CONFIG_BROKEN_GAS_INST */
 
 
+#define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
+#define REG_PSTATE_UAO_IMM		sys_reg(0, 0, 4, 0, 3)
+
+#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM |	\
+				      (!!x)<<8 | 0x1f)
+#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM |	\
+				      (!!x)<<8 | 0x1f)
+
+#define SYS_DC_ISW			sys_insn(1, 0, 7, 6, 2)
+#define SYS_DC_CSW			sys_insn(1, 0, 7, 10, 2)
+#define SYS_DC_CISW			sys_insn(1, 0, 7, 14, 2)
+
+#define SYS_OSDTRRX_EL1			sys_reg(2, 0, 0, 0, 2)
+#define SYS_MDCCINT_EL1			sys_reg(2, 0, 0, 2, 0)
+#define SYS_MDSCR_EL1			sys_reg(2, 0, 0, 2, 2)
+#define SYS_OSDTRTX_EL1			sys_reg(2, 0, 0, 3, 2)
+#define SYS_OSECCR_EL1			sys_reg(2, 0, 0, 6, 2)
+#define SYS_DBGBVRn_EL1(n)		sys_reg(2, 0, 0, n, 4)
+#define SYS_DBGBCRn_EL1(n)		sys_reg(2, 0, 0, n, 5)
+#define SYS_DBGWVRn_EL1(n)		sys_reg(2, 0, 0, n, 6)
+#define SYS_DBGWCRn_EL1(n)		sys_reg(2, 0, 0, n, 7)
+#define SYS_MDRAR_EL1			sys_reg(2, 0, 1, 0, 0)
+#define SYS_OSLAR_EL1			sys_reg(2, 0, 1, 0, 4)
+#define SYS_OSLSR_EL1			sys_reg(2, 0, 1, 1, 4)
+#define SYS_OSDLR_EL1			sys_reg(2, 0, 1, 3, 4)
+#define SYS_DBGPRCR_EL1			sys_reg(2, 0, 1, 4, 4)
+#define SYS_DBGCLAIMSET_EL1		sys_reg(2, 0, 7, 8, 6)
+#define SYS_DBGCLAIMCLR_EL1		sys_reg(2, 0, 7, 9, 6)
+#define SYS_DBGAUTHSTATUS_EL1		sys_reg(2, 0, 7, 14, 6)
+#define SYS_MDCCSR_EL0			sys_reg(2, 3, 0, 1, 0)
+#define SYS_DBGDTR_EL0			sys_reg(2, 3, 0, 4, 0)
+#define SYS_DBGDTRRX_EL0		sys_reg(2, 3, 0, 5, 0)
+#define SYS_DBGDTRTX_EL0		sys_reg(2, 3, 0, 5, 0)
+#define SYS_DBGVCR32_EL2		sys_reg(2, 4, 0, 7, 0)
+
 #define SYS_MIDR_EL1			sys_reg(3, 0, 0, 0, 0)
 #define SYS_MIDR_EL1			sys_reg(3, 0, 0, 0, 0)
 #define SYS_MPIDR_EL1			sys_reg(3, 0, 0, 0, 5)
 #define SYS_MPIDR_EL1			sys_reg(3, 0, 0, 0, 5)
 #define SYS_REVIDR_EL1			sys_reg(3, 0, 0, 0, 6)
 #define SYS_REVIDR_EL1			sys_reg(3, 0, 0, 0, 6)
@@ -88,6 +125,7 @@
 #define SYS_ID_PFR0_EL1			sys_reg(3, 0, 0, 1, 0)
 #define SYS_ID_PFR0_EL1			sys_reg(3, 0, 0, 1, 0)
 #define SYS_ID_PFR1_EL1			sys_reg(3, 0, 0, 1, 1)
 #define SYS_ID_PFR1_EL1			sys_reg(3, 0, 0, 1, 1)
 #define SYS_ID_DFR0_EL1			sys_reg(3, 0, 0, 1, 2)
 #define SYS_ID_DFR0_EL1			sys_reg(3, 0, 0, 1, 2)
+#define SYS_ID_AFR0_EL1			sys_reg(3, 0, 0, 1, 3)
 #define SYS_ID_MMFR0_EL1		sys_reg(3, 0, 0, 1, 4)
 #define SYS_ID_MMFR0_EL1		sys_reg(3, 0, 0, 1, 4)
 #define SYS_ID_MMFR1_EL1		sys_reg(3, 0, 0, 1, 5)
 #define SYS_ID_MMFR1_EL1		sys_reg(3, 0, 0, 1, 5)
 #define SYS_ID_MMFR2_EL1		sys_reg(3, 0, 0, 1, 6)
 #define SYS_ID_MMFR2_EL1		sys_reg(3, 0, 0, 1, 6)
@@ -118,17 +156,127 @@
 #define SYS_ID_AA64MMFR1_EL1		sys_reg(3, 0, 0, 7, 1)
 #define SYS_ID_AA64MMFR1_EL1		sys_reg(3, 0, 0, 7, 1)
 #define SYS_ID_AA64MMFR2_EL1		sys_reg(3, 0, 0, 7, 2)
 #define SYS_ID_AA64MMFR2_EL1		sys_reg(3, 0, 0, 7, 2)
 
 
-#define SYS_CNTFRQ_EL0			sys_reg(3, 3, 14, 0, 0)
+#define SYS_SCTLR_EL1			sys_reg(3, 0, 1, 0, 0)
+#define SYS_ACTLR_EL1			sys_reg(3, 0, 1, 0, 1)
+#define SYS_CPACR_EL1			sys_reg(3, 0, 1, 0, 2)
+
+#define SYS_TTBR0_EL1			sys_reg(3, 0, 2, 0, 0)
+#define SYS_TTBR1_EL1			sys_reg(3, 0, 2, 0, 1)
+#define SYS_TCR_EL1			sys_reg(3, 0, 2, 0, 2)
+
+#define SYS_ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
+
+#define SYS_AFSR0_EL1			sys_reg(3, 0, 5, 1, 0)
+#define SYS_AFSR1_EL1			sys_reg(3, 0, 5, 1, 1)
+#define SYS_ESR_EL1			sys_reg(3, 0, 5, 2, 0)
+#define SYS_FAR_EL1			sys_reg(3, 0, 6, 0, 0)
+#define SYS_PAR_EL1			sys_reg(3, 0, 7, 4, 0)
+
+#define SYS_PMINTENSET_EL1		sys_reg(3, 0, 9, 14, 1)
+#define SYS_PMINTENCLR_EL1		sys_reg(3, 0, 9, 14, 2)
+
+#define SYS_MAIR_EL1			sys_reg(3, 0, 10, 2, 0)
+#define SYS_AMAIR_EL1			sys_reg(3, 0, 10, 3, 0)
+
+#define SYS_VBAR_EL1			sys_reg(3, 0, 12, 0, 0)
+
+#define SYS_ICC_DIR_EL1			sys_reg(3, 0, 12, 11, 1)
+#define SYS_ICC_SGI1R_EL1		sys_reg(3, 0, 12, 11, 5)
+#define SYS_ICC_IAR1_EL1		sys_reg(3, 0, 12, 12, 0)
+#define SYS_ICC_EOIR1_EL1		sys_reg(3, 0, 12, 12, 1)
+#define SYS_ICC_BPR1_EL1		sys_reg(3, 0, 12, 12, 3)
+#define SYS_ICC_CTLR_EL1		sys_reg(3, 0, 12, 12, 4)
+#define SYS_ICC_SRE_EL1			sys_reg(3, 0, 12, 12, 5)
+#define SYS_ICC_GRPEN1_EL1		sys_reg(3, 0, 12, 12, 7)
+
+#define SYS_CONTEXTIDR_EL1		sys_reg(3, 0, 13, 0, 1)
+#define SYS_TPIDR_EL1			sys_reg(3, 0, 13, 0, 4)
+
+#define SYS_CNTKCTL_EL1			sys_reg(3, 0, 14, 1, 0)
+
+#define SYS_CLIDR_EL1			sys_reg(3, 1, 0, 0, 1)
+#define SYS_AIDR_EL1			sys_reg(3, 1, 0, 0, 7)
+
+#define SYS_CSSELR_EL1			sys_reg(3, 2, 0, 0, 0)
+
 #define SYS_CTR_EL0			sys_reg(3, 3, 0, 0, 1)
 #define SYS_CTR_EL0			sys_reg(3, 3, 0, 0, 1)
 #define SYS_DCZID_EL0			sys_reg(3, 3, 0, 0, 7)
 #define SYS_DCZID_EL0			sys_reg(3, 3, 0, 0, 7)
 
 
-#define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
-#define REG_PSTATE_UAO_IMM		sys_reg(0, 0, 4, 0, 3)
+#define SYS_PMCR_EL0			sys_reg(3, 3, 9, 12, 0)
+#define SYS_PMCNTENSET_EL0		sys_reg(3, 3, 9, 12, 1)
+#define SYS_PMCNTENCLR_EL0		sys_reg(3, 3, 9, 12, 2)
+#define SYS_PMOVSCLR_EL0		sys_reg(3, 3, 9, 12, 3)
+#define SYS_PMSWINC_EL0			sys_reg(3, 3, 9, 12, 4)
+#define SYS_PMSELR_EL0			sys_reg(3, 3, 9, 12, 5)
+#define SYS_PMCEID0_EL0			sys_reg(3, 3, 9, 12, 6)
+#define SYS_PMCEID1_EL0			sys_reg(3, 3, 9, 12, 7)
+#define SYS_PMCCNTR_EL0			sys_reg(3, 3, 9, 13, 0)
+#define SYS_PMXEVTYPER_EL0		sys_reg(3, 3, 9, 13, 1)
+#define SYS_PMXEVCNTR_EL0		sys_reg(3, 3, 9, 13, 2)
+#define SYS_PMUSERENR_EL0		sys_reg(3, 3, 9, 14, 0)
+#define SYS_PMOVSSET_EL0		sys_reg(3, 3, 9, 14, 3)
+
+#define SYS_TPIDR_EL0			sys_reg(3, 3, 13, 0, 2)
+#define SYS_TPIDRRO_EL0			sys_reg(3, 3, 13, 0, 3)
 
 
-#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM |	\
-				      (!!x)<<8 | 0x1f)
-#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM |	\
-				      (!!x)<<8 | 0x1f)
+#define SYS_CNTFRQ_EL0			sys_reg(3, 3, 14, 0, 0)
+
+#define SYS_CNTP_TVAL_EL0		sys_reg(3, 3, 14, 2, 0)
+#define SYS_CNTP_CTL_EL0		sys_reg(3, 3, 14, 2, 1)
+#define SYS_CNTP_CVAL_EL0		sys_reg(3, 3, 14, 2, 2)
+
+#define __PMEV_op2(n)			((n) & 0x7)
+#define __CNTR_CRm(n)			(0x8 | (((n) >> 3) & 0x3))
+#define SYS_PMEVCNTRn_EL0(n)		sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n))
+#define __TYPER_CRm(n)			(0xc | (((n) >> 3) & 0x3))
+#define SYS_PMEVTYPERn_EL0(n)		sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n))
+
+#define SYS_PMCCFILTR_EL0		sys_reg (3, 3, 14, 15, 7)
+
+#define SYS_DACR32_EL2			sys_reg(3, 4, 3, 0, 0)
+#define SYS_IFSR32_EL2			sys_reg(3, 4, 5, 0, 1)
+#define SYS_FPEXC32_EL2			sys_reg(3, 4, 5, 3, 0)
+
+#define __SYS__AP0Rx_EL2(x)		sys_reg(3, 4, 12, 8, x)
+#define SYS_ICH_AP0R0_EL2		__SYS__AP0Rx_EL2(0)
+#define SYS_ICH_AP0R1_EL2		__SYS__AP0Rx_EL2(1)
+#define SYS_ICH_AP0R2_EL2		__SYS__AP0Rx_EL2(2)
+#define SYS_ICH_AP0R3_EL2		__SYS__AP0Rx_EL2(3)
+
+#define __SYS__AP1Rx_EL2(x)		sys_reg(3, 4, 12, 9, x)
+#define SYS_ICH_AP1R0_EL2		__SYS__AP1Rx_EL2(0)
+#define SYS_ICH_AP1R1_EL2		__SYS__AP1Rx_EL2(1)
+#define SYS_ICH_AP1R2_EL2		__SYS__AP1Rx_EL2(2)
+#define SYS_ICH_AP1R3_EL2		__SYS__AP1Rx_EL2(3)
+
+#define SYS_ICH_VSEIR_EL2		sys_reg(3, 4, 12, 9, 4)
+#define SYS_ICC_SRE_EL2			sys_reg(3, 4, 12, 9, 5)
+#define SYS_ICH_HCR_EL2			sys_reg(3, 4, 12, 11, 0)
+#define SYS_ICH_VTR_EL2			sys_reg(3, 4, 12, 11, 1)
+#define SYS_ICH_MISR_EL2		sys_reg(3, 4, 12, 11, 2)
+#define SYS_ICH_EISR_EL2		sys_reg(3, 4, 12, 11, 3)
+#define SYS_ICH_ELSR_EL2		sys_reg(3, 4, 12, 11, 5)
+#define SYS_ICH_VMCR_EL2		sys_reg(3, 4, 12, 11, 7)
+
+#define __SYS__LR0_EL2(x)		sys_reg(3, 4, 12, 12, x)
+#define SYS_ICH_LR0_EL2			__SYS__LR0_EL2(0)
+#define SYS_ICH_LR1_EL2			__SYS__LR0_EL2(1)
+#define SYS_ICH_LR2_EL2			__SYS__LR0_EL2(2)
+#define SYS_ICH_LR3_EL2			__SYS__LR0_EL2(3)
+#define SYS_ICH_LR4_EL2			__SYS__LR0_EL2(4)
+#define SYS_ICH_LR5_EL2			__SYS__LR0_EL2(5)
+#define SYS_ICH_LR6_EL2			__SYS__LR0_EL2(6)
+#define SYS_ICH_LR7_EL2			__SYS__LR0_EL2(7)
+
+#define __SYS__LR8_EL2(x)		sys_reg(3, 4, 12, 13, x)
+#define SYS_ICH_LR8_EL2			__SYS__LR8_EL2(0)
+#define SYS_ICH_LR9_EL2			__SYS__LR8_EL2(1)
+#define SYS_ICH_LR10_EL2		__SYS__LR8_EL2(2)
+#define SYS_ICH_LR11_EL2		__SYS__LR8_EL2(3)
+#define SYS_ICH_LR12_EL2		__SYS__LR8_EL2(4)
+#define SYS_ICH_LR13_EL2		__SYS__LR8_EL2(5)
+#define SYS_ICH_LR14_EL2		__SYS__LR8_EL2(6)
+#define SYS_ICH_LR15_EL2		__SYS__LR8_EL2(7)
 
 
 /* Common SCTLR_ELx flags. */
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_EE    (1 << 25)
 #define SCTLR_ELx_EE    (1 << 25)

+ 22 - 9
arch/arm64/include/asm/virt.h

@@ -19,25 +19,38 @@
 #define __ASM__VIRT_H
 #define __ASM__VIRT_H
 
 
 /*
 /*
- * The arm64 hcall implementation uses x0 to specify the hcall type. A value
- * less than 0xfff indicates a special hcall, such as get/set vector.
- * Any other value is used as a pointer to the function to call.
+ * The arm64 hcall implementation uses x0 to specify the hcall
+ * number. A value less than HVC_STUB_HCALL_NR indicates a special
+ * hcall, such as set vector. Any other value is handled in a
+ * hypervisor specific way.
+ *
+ * The hypercall is allowed to clobber any of the caller-saved
+ * registers (x0-x18), so it is advisable to use it through the
+ * indirection of a function call (as implemented in hyp-stub.S).
  */
  */
 
 
-/* HVC_GET_VECTORS - Return the value of the vbar_el2 register. */
-#define HVC_GET_VECTORS 0
-
 /*
 /*
  * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
  * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
  *
  *
  * @x1: Physical address of the new vector table.
  * @x1: Physical address of the new vector table.
  */
  */
-#define HVC_SET_VECTORS 1
+#define HVC_SET_VECTORS 0
 
 
 /*
 /*
  * HVC_SOFT_RESTART - CPU soft reset, used by the cpu_soft_restart routine.
  * HVC_SOFT_RESTART - CPU soft reset, used by the cpu_soft_restart routine.
  */
  */
-#define HVC_SOFT_RESTART 2
+#define HVC_SOFT_RESTART 1
+
+/*
+ * HVC_RESET_VECTORS - Restore the vectors to the original HYP stubs
+ */
+#define HVC_RESET_VECTORS 2
+
+/* Max number of HYP stub hypercalls */
+#define HVC_STUB_HCALL_NR 3
+
+/* Error returned when an invalid stub number is passed into x0 */
+#define HVC_STUB_ERR	0xbadca11
 
 
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 #define BOOT_CPU_MODE_EL2	(0xe12)
@@ -61,7 +74,7 @@
 extern u32 __boot_cpu_mode[2];
 extern u32 __boot_cpu_mode[2];
 
 
 void __hyp_set_vectors(phys_addr_t phys_vector_base);
 void __hyp_set_vectors(phys_addr_t phys_vector_base);
-phys_addr_t __hyp_get_vectors(void);
+void __hyp_reset_vectors(void);
 
 
 /* Reports the availability of HYP mode */
 /* Reports the availability of HYP mode */
 static inline bool is_hyp_mode_available(void)
 static inline bool is_hyp_mode_available(void)

+ 2 - 0
arch/arm64/include/uapi/asm/kvm.h

@@ -145,6 +145,8 @@ struct kvm_debug_exit_arch {
 #define KVM_GUESTDBG_USE_HW		(1 << 17)
 #define KVM_GUESTDBG_USE_HW		(1 << 17)
 
 
 struct kvm_sync_regs {
 struct kvm_sync_regs {
+	/* Used with KVM_CAP_ARM_USER_IRQ */
+	__u64 device_irq_level;
 };
 };
 
 
 struct kvm_arch_memory_slot {
 struct kvm_arch_memory_slot {

+ 4 - 4
arch/arm64/kernel/head.S

@@ -594,14 +594,14 @@ set_hcr:
 	cmp	x0, #1
 	cmp	x0, #1
 	b.ne	3f
 	b.ne	3f
 
 
-	mrs_s	x0, ICC_SRE_EL2
+	mrs_s	x0, SYS_ICC_SRE_EL2
 	orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1
 	orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1
 	orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1
 	orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1
-	msr_s	ICC_SRE_EL2, x0
+	msr_s	SYS_ICC_SRE_EL2, x0
 	isb					// Make sure SRE is now set
 	isb					// Make sure SRE is now set
-	mrs_s	x0, ICC_SRE_EL2			// Read SRE back,
+	mrs_s	x0, SYS_ICC_SRE_EL2		// Read SRE back,
 	tbz	x0, #0, 3f			// and check that it sticks
 	tbz	x0, #0, 3f			// and check that it sticks
-	msr_s	ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
+	msr_s	SYS_ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
 
 
 3:
 3:
 #endif
 #endif

+ 14 - 24
arch/arm64/kernel/hyp-stub.S

@@ -55,18 +55,7 @@ ENDPROC(__hyp_stub_vectors)
 	.align 11
 	.align 11
 
 
 el1_sync:
 el1_sync:
-	mrs	x30, esr_el2
-	lsr	x30, x30, #ESR_ELx_EC_SHIFT
-
-	cmp	x30, #ESR_ELx_EC_HVC64
-	b.ne	9f				// Not an HVC trap
-
-	cmp	x0, #HVC_GET_VECTORS
-	b.ne	1f
-	mrs	x0, vbar_el2
-	b	9f
-
-1:	cmp	x0, #HVC_SET_VECTORS
+	cmp	x0, #HVC_SET_VECTORS
 	b.ne	2f
 	b.ne	2f
 	msr	vbar_el2, x1
 	msr	vbar_el2, x1
 	b	9f
 	b	9f
@@ -79,10 +68,15 @@ el1_sync:
 	mov	x1, x3
 	mov	x1, x3
 	br	x4				// no return
 	br	x4				// no return
 
 
+3:	cmp	x0, #HVC_RESET_VECTORS
+	beq	9f				// Nothing to reset!
+
 	/* Someone called kvm_call_hyp() against the hyp-stub... */
 	/* Someone called kvm_call_hyp() against the hyp-stub... */
-3:	mov	x0, #ARM_EXCEPTION_HYP_GONE
+	ldr	x0, =HVC_STUB_ERR
+	eret
 
 
-9:	eret
+9:	mov	x0, xzr
+	eret
 ENDPROC(el1_sync)
 ENDPROC(el1_sync)
 
 
 .macro invalid_vector	label
 .macro invalid_vector	label
@@ -121,19 +115,15 @@ ENDPROC(\label)
  * initialisation entry point.
  * initialisation entry point.
  */
  */
 
 
-ENTRY(__hyp_get_vectors)
-	str	lr, [sp, #-16]!
-	mov	x0, #HVC_GET_VECTORS
-	hvc	#0
-	ldr	lr, [sp], #16
-	ret
-ENDPROC(__hyp_get_vectors)
-
 ENTRY(__hyp_set_vectors)
 ENTRY(__hyp_set_vectors)
-	str	lr, [sp, #-16]!
 	mov	x1, x0
 	mov	x1, x0
 	mov	x0, #HVC_SET_VECTORS
 	mov	x0, #HVC_SET_VECTORS
 	hvc	#0
 	hvc	#0
-	ldr	lr, [sp], #16
 	ret
 	ret
 ENDPROC(__hyp_set_vectors)
 ENDPROC(__hyp_set_vectors)
+
+ENTRY(__hyp_reset_vectors)
+	mov	x0, #HVC_RESET_VECTORS
+	hvc	#0
+	ret
+ENDPROC(__hyp_reset_vectors)

+ 36 - 10
arch/arm64/kvm/hyp-init.S

@@ -22,6 +22,7 @@
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_mmu.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/sysreg.h>
 #include <asm/sysreg.h>
+#include <asm/virt.h>
 
 
 	.text
 	.text
 	.pushsection	.hyp.idmap.text, "ax"
 	.pushsection	.hyp.idmap.text, "ax"
@@ -58,6 +59,9 @@ __invalid:
 	 * x2: HYP vectors
 	 * x2: HYP vectors
 	 */
 	 */
 __do_hyp_init:
 __do_hyp_init:
+	/* Check for a stub HVC call */
+	cmp	x0, #HVC_STUB_HCALL_NR
+	b.lo	__kvm_handle_stub_hvc
 
 
 	msr	ttbr0_el2, x0
 	msr	ttbr0_el2, x0
 
 
@@ -119,23 +123,45 @@ __do_hyp_init:
 	eret
 	eret
 ENDPROC(__kvm_hyp_init)
 ENDPROC(__kvm_hyp_init)
 
 
+ENTRY(__kvm_handle_stub_hvc)
+	cmp	x0, #HVC_SOFT_RESTART
+	b.ne	1f
+
+	/* This is where we're about to jump, staying at EL2 */
+	msr	elr_el2, x1
+	mov	x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h)
+	msr	spsr_el2, x0
+
+	/* Shuffle the arguments, and don't come back */
+	mov	x0, x2
+	mov	x1, x3
+	mov	x2, x4
+	b	reset
+
+1:	cmp	x0, #HVC_RESET_VECTORS
+	b.ne	1f
+reset:
 	/*
 	/*
-	 * Reset kvm back to the hyp stub.
+	 * Reset kvm back to the hyp stub. Do not clobber x0-x4 in
+	 * case we coming via HVC_SOFT_RESTART.
 	 */
 	 */
-ENTRY(__kvm_hyp_reset)
-	/* We're now in idmap, disable MMU */
-	mrs	x0, sctlr_el2
-	ldr	x1, =SCTLR_ELx_FLAGS
-	bic	x0, x0, x1		// Clear SCTL_M and etc
-	msr	sctlr_el2, x0
+	mrs	x5, sctlr_el2
+	ldr	x6, =SCTLR_ELx_FLAGS
+	bic	x5, x5, x6		// Clear SCTL_M and etc
+	msr	sctlr_el2, x5
 	isb
 	isb
 
 
 	/* Install stub vectors */
 	/* Install stub vectors */
-	adr_l	x0, __hyp_stub_vectors
-	msr	vbar_el2, x0
+	adr_l	x5, __hyp_stub_vectors
+	msr	vbar_el2, x5
+	mov	x0, xzr
+	eret
 
 
+1:	/* Bad stub call */
+	ldr	x0, =HVC_STUB_ERR
 	eret
 	eret
-ENDPROC(__kvm_hyp_reset)
+
+ENDPROC(__kvm_handle_stub_hvc)
 
 
 	.ltorg
 	.ltorg
 
 

+ 1 - 4
arch/arm64/kvm/hyp.S

@@ -36,15 +36,12 @@
  * passed in x0.
  * passed in x0.
  *
  *
  * A function pointer with a value less than 0xfff has a special meaning,
  * A function pointer with a value less than 0xfff has a special meaning,
- * and is used to implement __hyp_get_vectors in the same way as in
+ * and is used to implement hyp stubs in the same way as in
  * arch/arm64/kernel/hyp_stub.S.
  * arch/arm64/kernel/hyp_stub.S.
- * HVC behaves as a 'bl' call and will clobber lr.
  */
  */
 ENTRY(__kvm_call_hyp)
 ENTRY(__kvm_call_hyp)
 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	str     lr, [sp, #-16]!
 	hvc	#0
 	hvc	#0
-	ldr     lr, [sp], #16
 	ret
 	ret
 alternative_else_nop_endif
 alternative_else_nop_endif
 	b	__vhe_hyp_call
 	b	__vhe_hyp_call

+ 21 - 22
arch/arm64/kvm/hyp/hyp-entry.S

@@ -32,17 +32,17 @@
 	 * Shuffle the parameters before calling the function
 	 * Shuffle the parameters before calling the function
 	 * pointed to in x0. Assumes parameters in x[1,2,3].
 	 * pointed to in x0. Assumes parameters in x[1,2,3].
 	 */
 	 */
+	str	lr, [sp, #-16]!
 	mov	lr, x0
 	mov	lr, x0
 	mov	x0, x1
 	mov	x0, x1
 	mov	x1, x2
 	mov	x1, x2
 	mov	x2, x3
 	mov	x2, x3
 	blr	lr
 	blr	lr
+	ldr	lr, [sp], #16
 .endm
 .endm
 
 
 ENTRY(__vhe_hyp_call)
 ENTRY(__vhe_hyp_call)
-	str	lr, [sp, #-16]!
 	do_el2_call
 	do_el2_call
-	ldr	lr, [sp], #16
 	/*
 	/*
 	 * We used to rely on having an exception return to get
 	 * We used to rely on having an exception return to get
 	 * an implicit isb. In the E2H case, we don't have it anymore.
 	 * an implicit isb. In the E2H case, we don't have it anymore.
@@ -53,21 +53,6 @@ ENTRY(__vhe_hyp_call)
 	ret
 	ret
 ENDPROC(__vhe_hyp_call)
 ENDPROC(__vhe_hyp_call)
 
 
-/*
- * Compute the idmap address of __kvm_hyp_reset based on the idmap
- * start passed as a parameter, and jump there.
- *
- * x0: HYP phys_idmap_start
- */
-ENTRY(__kvm_hyp_teardown)
-	mov	x4, x0
-	adr_l	x3, __kvm_hyp_reset
-
-	/* insert __kvm_hyp_reset()s offset into phys_idmap_start */
-	bfi	x4, x3, #0, #PAGE_SHIFT
-	br	x4
-ENDPROC(__kvm_hyp_teardown)
-	
 el1_sync:				// Guest trapped into EL2
 el1_sync:				// Guest trapped into EL2
 	stp	x0, x1, [sp, #-16]!
 	stp	x0, x1, [sp, #-16]!
 
 
@@ -87,10 +72,24 @@ alternative_endif
 	/* Here, we're pretty sure the host called HVC. */
 	/* Here, we're pretty sure the host called HVC. */
 	ldp	x0, x1, [sp], #16
 	ldp	x0, x1, [sp], #16
 
 
-	cmp	x0, #HVC_GET_VECTORS
-	b.ne	1f
-	mrs	x0, vbar_el2
-	b	2f
+	/* Check for a stub HVC call */
+	cmp	x0, #HVC_STUB_HCALL_NR
+	b.hs	1f
+
+	/*
+	 * Compute the idmap address of __kvm_handle_stub_hvc and
+	 * jump there. Since we use kimage_voffset, do not use the
+	 * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead
+	 * (by loading it from the constant pool).
+	 *
+	 * Preserve x0-x4, which may contain stub parameters.
+	 */
+	ldr	x5, =__kvm_handle_stub_hvc
+	ldr_l	x6, kimage_voffset
+
+	/* x5 = __pa(x5) */
+	sub	x5, x5, x6
+	br	x5
 
 
 1:
 1:
 	/*
 	/*
@@ -99,7 +98,7 @@ alternative_endif
 	kern_hyp_va	x0
 	kern_hyp_va	x0
 	do_el2_call
 	do_el2_call
 
 
-2:	eret
+	eret
 
 
 el1_trap:
 el1_trap:
 	/*
 	/*

+ 180 - 316
arch/arm64/kvm/sys_regs.c

@@ -55,6 +55,15 @@
  * 64bit interface.
  * 64bit interface.
  */
  */
 
 
+static bool read_from_write_only(struct kvm_vcpu *vcpu,
+				 const struct sys_reg_params *params)
+{
+	WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n");
+	print_sys_reg_instr(params);
+	kvm_inject_undefined(vcpu);
+	return false;
+}
+
 /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
 /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
 static u32 cache_levels;
 static u32 cache_levels;
 
 
@@ -460,35 +469,35 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 	vcpu_sys_reg(vcpu, PMCR_EL0) = val;
 	vcpu_sys_reg(vcpu, PMCR_EL0) = val;
 }
 }
 
 
-static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu)
+static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
 {
 {
 	u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
 	u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+	bool enabled = (reg & flags) || vcpu_mode_priv(vcpu);
 
 
-	return !((reg & ARMV8_PMU_USERENR_EN) || vcpu_mode_priv(vcpu));
+	if (!enabled)
+		kvm_inject_undefined(vcpu);
+
+	return !enabled;
 }
 }
 
 
-static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu)
+static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu)
 {
 {
-	u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+	return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_EN);
+}
 
 
-	return !((reg & (ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN))
-		 || vcpu_mode_priv(vcpu));
+static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu)
+{
+	return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN);
 }
 }
 
 
 static bool pmu_access_cycle_counter_el0_disabled(struct kvm_vcpu *vcpu)
 static bool pmu_access_cycle_counter_el0_disabled(struct kvm_vcpu *vcpu)
 {
 {
-	u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
-
-	return !((reg & (ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN))
-		 || vcpu_mode_priv(vcpu));
+	return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN);
 }
 }
 
 
 static bool pmu_access_event_counter_el0_disabled(struct kvm_vcpu *vcpu)
 static bool pmu_access_event_counter_el0_disabled(struct kvm_vcpu *vcpu)
 {
 {
-	u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
-
-	return !((reg & (ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN))
-		 || vcpu_mode_priv(vcpu));
+	return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN);
 }
 }
 
 
 static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
@@ -567,8 +576,10 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
 
 
 	pmcr = vcpu_sys_reg(vcpu, PMCR_EL0);
 	pmcr = vcpu_sys_reg(vcpu, PMCR_EL0);
 	val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
 	val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
-	if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX)
+	if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) {
+		kvm_inject_undefined(vcpu);
 		return false;
 		return false;
+	}
 
 
 	return true;
 	return true;
 }
 }
@@ -707,8 +718,10 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	if (!kvm_arm_pmu_v3_ready(vcpu))
 	if (!kvm_arm_pmu_v3_ready(vcpu))
 		return trap_raz_wi(vcpu, p, r);
 		return trap_raz_wi(vcpu, p, r);
 
 
-	if (!vcpu_mode_priv(vcpu))
+	if (!vcpu_mode_priv(vcpu)) {
+		kvm_inject_undefined(vcpu);
 		return false;
 		return false;
+	}
 
 
 	if (p->is_write) {
 	if (p->is_write) {
 		u64 val = p->regval & mask;
 		u64 val = p->regval & mask;
@@ -759,16 +772,15 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	if (!kvm_arm_pmu_v3_ready(vcpu))
 	if (!kvm_arm_pmu_v3_ready(vcpu))
 		return trap_raz_wi(vcpu, p, r);
 		return trap_raz_wi(vcpu, p, r);
 
 
+	if (!p->is_write)
+		return read_from_write_only(vcpu, p);
+
 	if (pmu_write_swinc_el0_disabled(vcpu))
 	if (pmu_write_swinc_el0_disabled(vcpu))
 		return false;
 		return false;
 
 
-	if (p->is_write) {
-		mask = kvm_pmu_valid_counter_mask(vcpu);
-		kvm_pmu_software_increment(vcpu, p->regval & mask);
-		return true;
-	}
-
-	return false;
+	mask = kvm_pmu_valid_counter_mask(vcpu);
+	kvm_pmu_software_increment(vcpu, p->regval & mask);
+	return true;
 }
 }
 
 
 static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
@@ -778,8 +790,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		return trap_raz_wi(vcpu, p, r);
 		return trap_raz_wi(vcpu, p, r);
 
 
 	if (p->is_write) {
 	if (p->is_write) {
-		if (!vcpu_mode_priv(vcpu))
+		if (!vcpu_mode_priv(vcpu)) {
+			kvm_inject_undefined(vcpu);
 			return false;
 			return false;
+		}
 
 
 		vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval
 		vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval
 						    & ARMV8_PMU_USERENR_MASK;
 						    & ARMV8_PMU_USERENR_MASK;
@@ -793,31 +807,23 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 
 
 /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
 /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
 #define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
 #define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
-	/* DBGBVRn_EL1 */						\
-	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100),	\
+	{ SYS_DESC(SYS_DBGBVRn_EL1(n)),					\
 	  trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr },		\
 	  trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr },		\
-	/* DBGBCRn_EL1 */						\
-	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101),	\
+	{ SYS_DESC(SYS_DBGBCRn_EL1(n)),					\
 	  trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr },		\
 	  trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr },		\
-	/* DBGWVRn_EL1 */						\
-	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110),	\
+	{ SYS_DESC(SYS_DBGWVRn_EL1(n)),					\
 	  trap_wvr, reset_wvr, n, 0,  get_wvr, set_wvr },		\
 	  trap_wvr, reset_wvr, n, 0,  get_wvr, set_wvr },		\
-	/* DBGWCRn_EL1 */						\
-	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111),	\
+	{ SYS_DESC(SYS_DBGWCRn_EL1(n)),					\
 	  trap_wcr, reset_wcr, n, 0,  get_wcr, set_wcr }
 	  trap_wcr, reset_wcr, n, 0,  get_wcr, set_wcr }
 
 
 /* Macro to expand the PMEVCNTRn_EL0 register */
 /* Macro to expand the PMEVCNTRn_EL0 register */
 #define PMU_PMEVCNTR_EL0(n)						\
 #define PMU_PMEVCNTR_EL0(n)						\
-	/* PMEVCNTRn_EL0 */						\
-	{ Op0(0b11), Op1(0b011), CRn(0b1110),				\
-	  CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)),		\
+	{ SYS_DESC(SYS_PMEVCNTRn_EL0(n)),					\
 	  access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), }
 	  access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), }
 
 
 /* Macro to expand the PMEVTYPERn_EL0 register */
 /* Macro to expand the PMEVTYPERn_EL0 register */
 #define PMU_PMEVTYPER_EL0(n)						\
 #define PMU_PMEVTYPER_EL0(n)						\
-	/* PMEVTYPERn_EL0 */						\
-	{ Op0(0b11), Op1(0b011), CRn(0b1110),				\
-	  CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)),		\
+	{ SYS_DESC(SYS_PMEVTYPERn_EL0(n)),					\
 	  access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
 	  access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
 
 
 static bool access_cntp_tval(struct kvm_vcpu *vcpu,
 static bool access_cntp_tval(struct kvm_vcpu *vcpu,
@@ -887,24 +893,14 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu,
  * more demanding guest...
  * more demanding guest...
  */
  */
 static const struct sys_reg_desc sys_reg_descs[] = {
 static const struct sys_reg_desc sys_reg_descs[] = {
-	/* DC ISW */
-	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010),
-	  access_dcsw },
-	/* DC CSW */
-	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010),
-	  access_dcsw },
-	/* DC CISW */
-	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
-	  access_dcsw },
+	{ SYS_DESC(SYS_DC_ISW), access_dcsw },
+	{ SYS_DESC(SYS_DC_CSW), access_dcsw },
+	{ SYS_DESC(SYS_DC_CISW), access_dcsw },
 
 
 	DBG_BCR_BVR_WCR_WVR_EL1(0),
 	DBG_BCR_BVR_WCR_WVR_EL1(0),
 	DBG_BCR_BVR_WCR_WVR_EL1(1),
 	DBG_BCR_BVR_WCR_WVR_EL1(1),
-	/* MDCCINT_EL1 */
-	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
-	  trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
-	/* MDSCR_EL1 */
-	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
-	  trap_debug_regs, reset_val, MDSCR_EL1, 0 },
+	{ SYS_DESC(SYS_MDCCINT_EL1), trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
+	{ SYS_DESC(SYS_MDSCR_EL1), trap_debug_regs, reset_val, MDSCR_EL1, 0 },
 	DBG_BCR_BVR_WCR_WVR_EL1(2),
 	DBG_BCR_BVR_WCR_WVR_EL1(2),
 	DBG_BCR_BVR_WCR_WVR_EL1(3),
 	DBG_BCR_BVR_WCR_WVR_EL1(3),
 	DBG_BCR_BVR_WCR_WVR_EL1(4),
 	DBG_BCR_BVR_WCR_WVR_EL1(4),
@@ -920,179 +916,77 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	DBG_BCR_BVR_WCR_WVR_EL1(14),
 	DBG_BCR_BVR_WCR_WVR_EL1(14),
 	DBG_BCR_BVR_WCR_WVR_EL1(15),
 	DBG_BCR_BVR_WCR_WVR_EL1(15),
 
 
-	/* MDRAR_EL1 */
-	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
-	  trap_raz_wi },
-	/* OSLAR_EL1 */
-	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100),
-	  trap_raz_wi },
-	/* OSLSR_EL1 */
-	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100),
-	  trap_oslsr_el1 },
-	/* OSDLR_EL1 */
-	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100),
-	  trap_raz_wi },
-	/* DBGPRCR_EL1 */
-	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100),
-	  trap_raz_wi },
-	/* DBGCLAIMSET_EL1 */
-	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110),
-	  trap_raz_wi },
-	/* DBGCLAIMCLR_EL1 */
-	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110),
-	  trap_raz_wi },
-	/* DBGAUTHSTATUS_EL1 */
-	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
-	  trap_dbgauthstatus_el1 },
-
-	/* MDCCSR_EL1 */
-	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
-	  trap_raz_wi },
-	/* DBGDTR_EL0 */
-	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000),
-	  trap_raz_wi },
-	/* DBGDTR[TR]X_EL0 */
-	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000),
-	  trap_raz_wi },
-
-	/* DBGVCR32_EL2 */
-	{ Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
-	  NULL, reset_val, DBGVCR32_EL2, 0 },
-
-	/* MPIDR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101),
-	  NULL, reset_mpidr, MPIDR_EL1 },
-	/* SCTLR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
-	  access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
-	/* CPACR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
-	  NULL, reset_val, CPACR_EL1, 0 },
-	/* TTBR0_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
-	  access_vm_reg, reset_unknown, TTBR0_EL1 },
-	/* TTBR1_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
-	  access_vm_reg, reset_unknown, TTBR1_EL1 },
-	/* TCR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
-	  access_vm_reg, reset_val, TCR_EL1, 0 },
-
-	/* AFSR0_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
-	  access_vm_reg, reset_unknown, AFSR0_EL1 },
-	/* AFSR1_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
-	  access_vm_reg, reset_unknown, AFSR1_EL1 },
-	/* ESR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
-	  access_vm_reg, reset_unknown, ESR_EL1 },
-	/* FAR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
-	  access_vm_reg, reset_unknown, FAR_EL1 },
-	/* PAR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
-	  NULL, reset_unknown, PAR_EL1 },
-
-	/* PMINTENSET_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
-	  access_pminten, reset_unknown, PMINTENSET_EL1 },
-	/* PMINTENCLR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
-	  access_pminten, NULL, PMINTENSET_EL1 },
-
-	/* MAIR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
-	  access_vm_reg, reset_unknown, MAIR_EL1 },
-	/* AMAIR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
-	  access_vm_reg, reset_amair_el1, AMAIR_EL1 },
-
-	/* VBAR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
-	  NULL, reset_val, VBAR_EL1, 0 },
-
-	/* ICC_SGI1R_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1011), Op2(0b101),
-	  access_gic_sgi },
-	/* ICC_SRE_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101),
-	  access_gic_sre },
-
-	/* CONTEXTIDR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
-	  access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
-	/* TPIDR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
-	  NULL, reset_unknown, TPIDR_EL1 },
-
-	/* CNTKCTL_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000),
-	  NULL, reset_val, CNTKCTL_EL1, 0},
-
-	/* CSSELR_EL1 */
-	{ Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
-	  NULL, reset_unknown, CSSELR_EL1 },
-
-	/* PMCR_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
-	  access_pmcr, reset_pmcr, },
-	/* PMCNTENSET_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
-	  access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
-	/* PMCNTENCLR_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
-	  access_pmcnten, NULL, PMCNTENSET_EL0 },
-	/* PMOVSCLR_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
-	  access_pmovs, NULL, PMOVSSET_EL0 },
-	/* PMSWINC_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
-	  access_pmswinc, reset_unknown, PMSWINC_EL0 },
-	/* PMSELR_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
-	  access_pmselr, reset_unknown, PMSELR_EL0 },
-	/* PMCEID0_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
-	  access_pmceid },
-	/* PMCEID1_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
-	  access_pmceid },
-	/* PMCCNTR_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
-	  access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 },
-	/* PMXEVTYPER_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
-	  access_pmu_evtyper },
-	/* PMXEVCNTR_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
-	  access_pmu_evcntr },
-	/* PMUSERENR_EL0
-	 * This register resets as unknown in 64bit mode while it resets as zero
+	{ SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi },
+	{ SYS_DESC(SYS_OSLAR_EL1), trap_raz_wi },
+	{ SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1 },
+	{ SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi },
+	{ SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi },
+	{ SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi },
+	{ SYS_DESC(SYS_DBGCLAIMCLR_EL1), trap_raz_wi },
+	{ SYS_DESC(SYS_DBGAUTHSTATUS_EL1), trap_dbgauthstatus_el1 },
+
+	{ SYS_DESC(SYS_MDCCSR_EL0), trap_raz_wi },
+	{ SYS_DESC(SYS_DBGDTR_EL0), trap_raz_wi },
+	// DBGDTR[TR]X_EL0 share the same encoding
+	{ SYS_DESC(SYS_DBGDTRTX_EL0), trap_raz_wi },
+
+	{ SYS_DESC(SYS_DBGVCR32_EL2), NULL, reset_val, DBGVCR32_EL2, 0 },
+
+	{ SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
+	{ SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
+	{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
+	{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
+	{ SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 },
+	{ SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 },
+
+	{ SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 },
+	{ SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
+	{ SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
+	{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
+	{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
+
+	{ SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 },
+	{ SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, NULL, PMINTENSET_EL1 },
+
+	{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
+	{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
+
+	{ SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
+
+	{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
+	{ SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
+
+	{ SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
+	{ SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
+
+	{ SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
+
+	{ SYS_DESC(SYS_CSSELR_EL1), NULL, reset_unknown, CSSELR_EL1 },
+
+	{ SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, },
+	{ SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
+	{ SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 },
+	{ SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 },
+	{ SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 },
+	{ SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 },
+	{ SYS_DESC(SYS_PMCEID0_EL0), access_pmceid },
+	{ SYS_DESC(SYS_PMCEID1_EL0), access_pmceid },
+	{ SYS_DESC(SYS_PMCCNTR_EL0), access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 },
+	{ SYS_DESC(SYS_PMXEVTYPER_EL0), access_pmu_evtyper },
+	{ SYS_DESC(SYS_PMXEVCNTR_EL0), access_pmu_evcntr },
+	/*
+	 * PMUSERENR_EL0 resets as unknown in 64bit mode while it resets as zero
 	 * in 32bit mode. Here we choose to reset it as zero for consistency.
 	 * in 32bit mode. Here we choose to reset it as zero for consistency.
 	 */
 	 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
-	  access_pmuserenr, reset_val, PMUSERENR_EL0, 0 },
-	/* PMOVSSET_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
-	  access_pmovs, reset_unknown, PMOVSSET_EL0 },
-
-	/* TPIDR_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
-	  NULL, reset_unknown, TPIDR_EL0 },
-	/* TPIDRRO_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
-	  NULL, reset_unknown, TPIDRRO_EL0 },
-
-	/* CNTP_TVAL_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b000),
-	  access_cntp_tval },
-	/* CNTP_CTL_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b001),
-	  access_cntp_ctl },
-	/* CNTP_CVAL_EL0 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b010),
-	  access_cntp_cval },
+	{ SYS_DESC(SYS_PMUSERENR_EL0), access_pmuserenr, reset_val, PMUSERENR_EL0, 0 },
+	{ SYS_DESC(SYS_PMOVSSET_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 },
+
+	{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
+	{ SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
+
+	{ SYS_DESC(SYS_CNTP_TVAL_EL0), access_cntp_tval },
+	{ SYS_DESC(SYS_CNTP_CTL_EL0), access_cntp_ctl },
+	{ SYS_DESC(SYS_CNTP_CVAL_EL0), access_cntp_cval },
 
 
 	/* PMEVCNTRn_EL0 */
 	/* PMEVCNTRn_EL0 */
 	PMU_PMEVCNTR_EL0(0),
 	PMU_PMEVCNTR_EL0(0),
@@ -1158,22 +1052,15 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	PMU_PMEVTYPER_EL0(28),
 	PMU_PMEVTYPER_EL0(28),
 	PMU_PMEVTYPER_EL0(29),
 	PMU_PMEVTYPER_EL0(29),
 	PMU_PMEVTYPER_EL0(30),
 	PMU_PMEVTYPER_EL0(30),
-	/* PMCCFILTR_EL0
-	 * This register resets as unknown in 64bit mode while it resets as zero
+	/*
+	 * PMCCFILTR_EL0 resets as unknown in 64bit mode while it resets as zero
 	 * in 32bit mode. Here we choose to reset it as zero for consistency.
 	 * in 32bit mode. Here we choose to reset it as zero for consistency.
 	 */
 	 */
-	{ Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b1111), Op2(0b111),
-	  access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 },
-
-	/* DACR32_EL2 */
-	{ Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000),
-	  NULL, reset_unknown, DACR32_EL2 },
-	/* IFSR32_EL2 */
-	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001),
-	  NULL, reset_unknown, IFSR32_EL2 },
-	/* FPEXC32_EL2 */
-	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000),
-	  NULL, reset_val, FPEXC32_EL2, 0x70 },
+	{ SYS_DESC(SYS_PMCCFILTR_EL0), access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 },
+
+	{ SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
+	{ SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 },
+	{ SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x70 },
 };
 };
 
 
 static bool trap_dbgidr(struct kvm_vcpu *vcpu,
 static bool trap_dbgidr(struct kvm_vcpu *vcpu,
@@ -1557,6 +1444,22 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 	return 1;
 }
 }
 
 
+static void perform_access(struct kvm_vcpu *vcpu,
+			   struct sys_reg_params *params,
+			   const struct sys_reg_desc *r)
+{
+	/*
+	 * Not having an accessor means that we have configured a trap
+	 * that we don't know how to handle. This certainly qualifies
+	 * as a gross bug that should be fixed right away.
+	 */
+	BUG_ON(!r->access);
+
+	/* Skip instruction if instructed so */
+	if (likely(r->access(vcpu, params, r)))
+		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+}
+
 /*
 /*
  * emulate_cp --  tries to match a sys_reg access in a handling table, and
  * emulate_cp --  tries to match a sys_reg access in a handling table, and
  *                call the corresponding trap handler.
  *                call the corresponding trap handler.
@@ -1580,20 +1483,8 @@ static int emulate_cp(struct kvm_vcpu *vcpu,
 	r = find_reg(params, table, num);
 	r = find_reg(params, table, num);
 
 
 	if (r) {
 	if (r) {
-		/*
-		 * Not having an accessor means that we have
-		 * configured a trap that we don't know how to
-		 * handle. This certainly qualifies as a gross bug
-		 * that should be fixed right away.
-		 */
-		BUG_ON(!r->access);
-
-		if (likely(r->access(vcpu, params, r))) {
-			/* Skip instruction, since it was emulated */
-			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-			/* Handled */
-			return 0;
-		}
+		perform_access(vcpu, params, r);
+		return 0;
 	}
 	}
 
 
 	/* Not handled */
 	/* Not handled */
@@ -1660,20 +1551,25 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
 		params.regval |= vcpu_get_reg(vcpu, Rt2) << 32;
 		params.regval |= vcpu_get_reg(vcpu, Rt2) << 32;
 	}
 	}
 
 
-	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
-		goto out;
-	if (!emulate_cp(vcpu, &params, global, nr_global))
-		goto out;
-
-	unhandled_cp_access(vcpu, &params);
+	/*
+	 * Try to emulate the coprocessor access using the target
+	 * specific table first, and using the global table afterwards.
+	 * If either of the tables contains a handler, handle the
+	 * potential register operation in the case of a read and return
+	 * with success.
+	 */
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific) ||
+	    !emulate_cp(vcpu, &params, global, nr_global)) {
+		/* Split up the value between registers for the read side */
+		if (!params.is_write) {
+			vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval));
+			vcpu_set_reg(vcpu, Rt2, upper_32_bits(params.regval));
+		}
 
 
-out:
-	/* Split up the value between registers for the read side */
-	if (!params.is_write) {
-		vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval));
-		vcpu_set_reg(vcpu, Rt2, upper_32_bits(params.regval));
+		return 1;
 	}
 	}
 
 
+	unhandled_cp_access(vcpu, &params);
 	return 1;
 	return 1;
 }
 }
 
 
@@ -1763,26 +1659,13 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
 		r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
 		r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
 
 
 	if (likely(r)) {
 	if (likely(r)) {
-		/*
-		 * Not having an accessor means that we have
-		 * configured a trap that we don't know how to
-		 * handle. This certainly qualifies as a gross bug
-		 * that should be fixed right away.
-		 */
-		BUG_ON(!r->access);
-
-		if (likely(r->access(vcpu, params, r))) {
-			/* Skip instruction, since it was emulated */
-			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-			return 1;
-		}
-		/* If access function fails, it should complain. */
+		perform_access(vcpu, params, r);
 	} else {
 	} else {
 		kvm_err("Unsupported guest sys_reg access at: %lx\n",
 		kvm_err("Unsupported guest sys_reg access at: %lx\n",
 			*vcpu_pc(vcpu));
 			*vcpu_pc(vcpu));
 		print_sys_reg_instr(params);
 		print_sys_reg_instr(params);
+		kvm_inject_undefined(vcpu);
 	}
 	}
-	kvm_inject_undefined(vcpu);
 	return 1;
 	return 1;
 }
 }
 
 
@@ -1932,44 +1815,25 @@ FUNCTION_INVARIANT(aidr_el1)
 
 
 /* ->val is filled in by kvm_sys_reg_table_init() */
 /* ->val is filled in by kvm_sys_reg_table_init() */
 static struct sys_reg_desc invariant_sys_regs[] = {
 static struct sys_reg_desc invariant_sys_regs[] = {
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000),
-	  NULL, get_midr_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110),
-	  NULL, get_revidr_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000),
-	  NULL, get_id_pfr0_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001),
-	  NULL, get_id_pfr1_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010),
-	  NULL, get_id_dfr0_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011),
-	  NULL, get_id_afr0_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100),
-	  NULL, get_id_mmfr0_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101),
-	  NULL, get_id_mmfr1_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110),
-	  NULL, get_id_mmfr2_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111),
-	  NULL, get_id_mmfr3_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
-	  NULL, get_id_isar0_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001),
-	  NULL, get_id_isar1_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
-	  NULL, get_id_isar2_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011),
-	  NULL, get_id_isar3_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100),
-	  NULL, get_id_isar4_el1 },
-	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101),
-	  NULL, get_id_isar5_el1 },
-	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001),
-	  NULL, get_clidr_el1 },
-	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111),
-	  NULL, get_aidr_el1 },
-	{ Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001),
-	  NULL, get_ctr_el0 },
+	{ SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
+	{ SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
+	{ SYS_DESC(SYS_ID_PFR0_EL1), NULL, get_id_pfr0_el1 },
+	{ SYS_DESC(SYS_ID_PFR1_EL1), NULL, get_id_pfr1_el1 },
+	{ SYS_DESC(SYS_ID_DFR0_EL1), NULL, get_id_dfr0_el1 },
+	{ SYS_DESC(SYS_ID_AFR0_EL1), NULL, get_id_afr0_el1 },
+	{ SYS_DESC(SYS_ID_MMFR0_EL1), NULL, get_id_mmfr0_el1 },
+	{ SYS_DESC(SYS_ID_MMFR1_EL1), NULL, get_id_mmfr1_el1 },
+	{ SYS_DESC(SYS_ID_MMFR2_EL1), NULL, get_id_mmfr2_el1 },
+	{ SYS_DESC(SYS_ID_MMFR3_EL1), NULL, get_id_mmfr3_el1 },
+	{ SYS_DESC(SYS_ID_ISAR0_EL1), NULL, get_id_isar0_el1 },
+	{ SYS_DESC(SYS_ID_ISAR1_EL1), NULL, get_id_isar1_el1 },
+	{ SYS_DESC(SYS_ID_ISAR2_EL1), NULL, get_id_isar2_el1 },
+	{ SYS_DESC(SYS_ID_ISAR3_EL1), NULL, get_id_isar3_el1 },
+	{ SYS_DESC(SYS_ID_ISAR4_EL1), NULL, get_id_isar4_el1 },
+	{ SYS_DESC(SYS_ID_ISAR5_EL1), NULL, get_id_isar5_el1 },
+	{ SYS_DESC(SYS_CLIDR_EL1), NULL, get_clidr_el1 },
+	{ SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
+	{ SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
 };
 };
 
 
 static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)
 static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)

+ 5 - 18
arch/arm64/kvm/sys_regs.h

@@ -83,24 +83,6 @@ static inline bool read_zero(struct kvm_vcpu *vcpu,
 	return true;
 	return true;
 }
 }
 
 
-static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
-				      const struct sys_reg_params *params)
-{
-	kvm_debug("sys_reg write to read-only register at: %lx\n",
-		  *vcpu_pc(vcpu));
-	print_sys_reg_instr(params);
-	return false;
-}
-
-static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
-					const struct sys_reg_params *params)
-{
-	kvm_debug("sys_reg read to write-only register at: %lx\n",
-		  *vcpu_pc(vcpu));
-	print_sys_reg_instr(params);
-	return false;
-}
-
 /* Reset functions */
 /* Reset functions */
 static inline void reset_unknown(struct kvm_vcpu *vcpu,
 static inline void reset_unknown(struct kvm_vcpu *vcpu,
 				 const struct sys_reg_desc *r)
 				 const struct sys_reg_desc *r)
@@ -147,4 +129,9 @@ const struct sys_reg_desc *find_reg_by_id(u64 id,
 #define CRm(_x) 	.CRm = _x
 #define CRm(_x) 	.CRm = _x
 #define Op2(_x) 	.Op2 = _x
 #define Op2(_x) 	.Op2 = _x
 
 
+#define SYS_DESC(reg)					\
+	Op0(sys_reg_Op0(reg)), Op1(sys_reg_Op1(reg)),	\
+	CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)),	\
+	Op2(sys_reg_Op2(reg))
+
 #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
 #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */

+ 1 - 3
arch/arm64/kvm/sys_regs_generic_v8.c

@@ -52,9 +52,7 @@ static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
  */
  */
 static const struct sys_reg_desc genericv8_sys_regs[] = {
 static const struct sys_reg_desc genericv8_sys_regs[] = {
-	/* ACTLR_EL1 */
-	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
-	  access_actlr, reset_actlr, ACTLR_EL1 },
+	{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
 };
 };
 
 
 static const struct sys_reg_desc genericv8_cp15_regs[] = {
 static const struct sys_reg_desc genericv8_cp15_regs[] = {

+ 2 - 0
include/kvm/arm_arch_timer.h

@@ -63,6 +63,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
+bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu);
+void kvm_timer_update_run(struct kvm_vcpu *vcpu);
 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
 
 
 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);

+ 7 - 0
include/kvm/arm_pmu.h

@@ -50,6 +50,8 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
+bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu);
+void kvm_pmu_update_run(struct kvm_vcpu *vcpu);
 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
@@ -85,6 +87,11 @@ static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
+static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+static inline void kvm_pmu_update_run(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu,
 static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu,

+ 3 - 6
include/kvm/arm_vgic.h

@@ -225,8 +225,6 @@ struct vgic_dist {
 struct vgic_v2_cpu_if {
 struct vgic_v2_cpu_if {
 	u32		vgic_hcr;
 	u32		vgic_hcr;
 	u32		vgic_vmcr;
 	u32		vgic_vmcr;
-	u32		vgic_misr;	/* Saved only */
-	u64		vgic_eisr;	/* Saved only */
 	u64		vgic_elrsr;	/* Saved only */
 	u64		vgic_elrsr;	/* Saved only */
 	u32		vgic_apr;
 	u32		vgic_apr;
 	u32		vgic_lr[VGIC_V2_MAX_LRS];
 	u32		vgic_lr[VGIC_V2_MAX_LRS];
@@ -236,8 +234,6 @@ struct vgic_v3_cpu_if {
 	u32		vgic_hcr;
 	u32		vgic_hcr;
 	u32		vgic_vmcr;
 	u32		vgic_vmcr;
 	u32		vgic_sre;	/* Restored only, change ignored */
 	u32		vgic_sre;	/* Restored only, change ignored */
-	u32		vgic_misr;	/* Saved only */
-	u32		vgic_eisr;	/* Saved only */
 	u32		vgic_elrsr;	/* Saved only */
 	u32		vgic_elrsr;	/* Saved only */
 	u32		vgic_ap0r[4];
 	u32		vgic_ap0r[4];
 	u32		vgic_ap1r[4];
 	u32		vgic_ap1r[4];
@@ -264,8 +260,6 @@ struct vgic_cpu {
 	 */
 	 */
 	struct list_head ap_list_head;
 	struct list_head ap_list_head;
 
 
-	u64 live_lrs;
-
 	/*
 	/*
 	 * Members below are used with GICv3 emulation only and represent
 	 * Members below are used with GICv3 emulation only and represent
 	 * parts of the redistributor.
 	 * parts of the redistributor.
@@ -306,6 +300,9 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq);
 
 
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 
 
+void kvm_vgic_load(struct kvm_vcpu *vcpu);
+void kvm_vgic_put(struct kvm_vcpu *vcpu);
+
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
 #define vgic_initialized(k)	((k)->arch.vgic.initialized)
 #define vgic_initialized(k)	((k)->arch.vgic.initialized)
 #define vgic_ready(k)		((k)->arch.vgic.ready)
 #define vgic_ready(k)		((k)->arch.vgic.ready)

+ 8 - 0
include/uapi/linux/kvm.h

@@ -894,6 +894,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_AIS 141
 #define KVM_CAP_S390_AIS 141
 #define KVM_CAP_SPAPR_TCE_VFIO 142
 #define KVM_CAP_SPAPR_TCE_VFIO 142
 #define KVM_CAP_X86_GUEST_MWAIT 143
 #define KVM_CAP_X86_GUEST_MWAIT 143
+#define KVM_CAP_ARM_USER_IRQ 144
 
 
 #ifdef KVM_CAP_IRQ_ROUTING
 #ifdef KVM_CAP_IRQ_ROUTING
 
 
@@ -1371,4 +1372,11 @@ struct kvm_assigned_msix_entry {
 #define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
 #define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
 #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK  (1ULL << 1)
 #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK  (1ULL << 1)
 
 
+/* Available with KVM_CAP_ARM_USER_IRQ */
+
+/* Bits for run->s.regs.device_irq_level */
+#define KVM_ARM_DEV_EL1_VTIMER		(1 << 0)
+#define KVM_ARM_DEV_EL1_PTIMER		(1 << 1)
+#define KVM_ARM_DEV_PMU			(1 << 2)
+
 #endif /* __LINUX_KVM_H */
 #endif /* __LINUX_KVM_H */

+ 98 - 26
virt/kvm/arm/arch_timer.c

@@ -184,28 +184,47 @@ bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
 	return cval <= now;
 	return cval <= now;
 }
 }
 
 
+/*
+ * Reflect the timer output level into the kvm_run structure
+ */
+void kvm_timer_update_run(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
+
+	/* Populate the device bitmap with the timer states */
+	regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
+				    KVM_ARM_DEV_EL1_PTIMER);
+	if (vtimer->irq.level)
+		regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
+	if (ptimer->irq.level)
+		regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
+}
+
 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
 				 struct arch_timer_context *timer_ctx)
 				 struct arch_timer_context *timer_ctx)
 {
 {
 	int ret;
 	int ret;
 
 
-	BUG_ON(!vgic_initialized(vcpu->kvm));
-
 	timer_ctx->active_cleared_last = false;
 	timer_ctx->active_cleared_last = false;
 	timer_ctx->irq.level = new_level;
 	timer_ctx->irq.level = new_level;
 	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
 	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
 				   timer_ctx->irq.level);
 				   timer_ctx->irq.level);
 
 
-	ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq,
-				  timer_ctx->irq.level);
-	WARN_ON(ret);
+	if (likely(irqchip_in_kernel(vcpu->kvm))) {
+		ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+					  timer_ctx->irq.irq,
+					  timer_ctx->irq.level);
+		WARN_ON(ret);
+	}
 }
 }
 
 
 /*
 /*
  * Check if there was a change in the timer state (should we raise or lower
  * Check if there was a change in the timer state (should we raise or lower
  * the line level to the GIC).
  * the line level to the GIC).
  */
  */
-static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
+static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
 {
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
@@ -217,16 +236,14 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
 	 * because the guest would never see the interrupt.  Instead wait
 	 * because the guest would never see the interrupt.  Instead wait
 	 * until we call this function from kvm_timer_flush_hwstate.
 	 * until we call this function from kvm_timer_flush_hwstate.
 	 */
 	 */
-	if (!vgic_initialized(vcpu->kvm) || !timer->enabled)
-		return -ENODEV;
+	if (unlikely(!timer->enabled))
+		return;
 
 
 	if (kvm_timer_should_fire(vtimer) != vtimer->irq.level)
 	if (kvm_timer_should_fire(vtimer) != vtimer->irq.level)
 		kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer);
 		kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer);
 
 
 	if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
 	if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
 		kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
 		kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
-
-	return 0;
 }
 }
 
 
 /* Schedule the background timer for the emulated timer. */
 /* Schedule the background timer for the emulated timer. */
@@ -286,25 +303,12 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
 	timer_disarm(timer);
 	timer_disarm(timer);
 }
 }
 
 
-/**
- * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
- * @vcpu: The vcpu pointer
- *
- * Check if the virtual timer has expired while we were running in the host,
- * and inject an interrupt if that was the case.
- */
-void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
+static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu)
 {
 {
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	bool phys_active;
 	bool phys_active;
 	int ret;
 	int ret;
 
 
-	if (kvm_timer_update_state(vcpu))
-		return;
-
-	/* Set the background timer for the physical timer emulation. */
-	kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu));
-
 	/*
 	/*
 	* If we enter the guest with the virtual input level to the VGIC
 	* If we enter the guest with the virtual input level to the VGIC
 	* asserted, then we have already told the VGIC what we need to, and
 	* asserted, then we have already told the VGIC what we need to, and
@@ -356,11 +360,72 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
 	vtimer->active_cleared_last = !phys_active;
 	vtimer->active_cleared_last = !phys_active;
 }
 }
 
 
+bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
+	bool vlevel, plevel;
+
+	if (likely(irqchip_in_kernel(vcpu->kvm)))
+		return false;
+
+	vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
+	plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
+
+	return vtimer->irq.level != vlevel ||
+	       ptimer->irq.level != plevel;
+}
+
+static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+	/*
+	 * To prevent continuously exiting from the guest, we mask the
+	 * physical interrupt such that the guest can make forward progress.
+	 * Once we detect the output level being deasserted, we unmask the
+	 * interrupt again so that we exit from the guest when the timer
+	 * fires.
+	*/
+	if (vtimer->irq.level)
+		disable_percpu_irq(host_vtimer_irq);
+	else
+		enable_percpu_irq(host_vtimer_irq, 0);
+}
+
+/**
+ * kvm_timer_flush_hwstate - prepare timers before running the vcpu
+ * @vcpu: The vcpu pointer
+ *
+ * Check if the virtual timer has expired while we were running in the host,
+ * and inject an interrupt if that was the case, making sure the timer is
+ * masked or disabled on the host so that we keep executing.  Also schedule a
+ * software timer for the physical timer if it is enabled.
+ */
+void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	if (unlikely(!timer->enabled))
+		return;
+
+	kvm_timer_update_state(vcpu);
+
+	/* Set the background timer for the physical timer emulation. */
+	kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu));
+
+	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+		kvm_timer_flush_hwstate_user(vcpu);
+	else
+		kvm_timer_flush_hwstate_vgic(vcpu);
+}
+
 /**
 /**
  * kvm_timer_sync_hwstate - sync timer state from cpu
  * kvm_timer_sync_hwstate - sync timer state from cpu
  * @vcpu: The vcpu pointer
  * @vcpu: The vcpu pointer
  *
  *
- * Check if the virtual timer has expired while we were running in the guest,
+ * Check if any of the timers have expired while we were running in the guest,
  * and inject an interrupt if that was the case.
  * and inject an interrupt if that was the case.
  */
  */
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
@@ -560,6 +625,13 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 	if (timer->enabled)
 	if (timer->enabled)
 		return 0;
 		return 0;
 
 
+	/* Without a VGIC we do not map virtual IRQs to physical IRQs */
+	if (!irqchip_in_kernel(vcpu->kvm))
+		goto no_vgic;
+
+	if (!vgic_initialized(vcpu->kvm))
+		return -ENODEV;
+
 	/*
 	/*
 	 * Find the physical IRQ number corresponding to the host_vtimer_irq
 	 * Find the physical IRQ number corresponding to the host_vtimer_irq
 	 */
 	 */
@@ -583,8 +655,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
+no_vgic:
 	timer->enabled = 1;
 	timer->enabled = 1;
-
 	return 0;
 	return 0;
 }
 }
 
 

+ 7 - 71
virt/kvm/arm/hyp/vgic-v2-sr.c

@@ -22,49 +22,6 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_hyp.h>
 
 
-static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu,
-					    void __iomem *base)
-{
-	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
-	int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
-	u32 eisr0, eisr1;
-	int i;
-	bool expect_mi;
-
-	expect_mi = !!(cpu_if->vgic_hcr & GICH_HCR_UIE);
-
-	for (i = 0; i < nr_lr; i++) {
-		if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
-				continue;
-
-		expect_mi |= (!(cpu_if->vgic_lr[i] & GICH_LR_HW) &&
-			      (cpu_if->vgic_lr[i] & GICH_LR_EOI));
-	}
-
-	if (expect_mi) {
-		cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR);
-
-		if (cpu_if->vgic_misr & GICH_MISR_EOI) {
-			eisr0  = readl_relaxed(base + GICH_EISR0);
-			if (unlikely(nr_lr > 32))
-				eisr1  = readl_relaxed(base + GICH_EISR1);
-			else
-				eisr1 = 0;
-		} else {
-			eisr0 = eisr1 = 0;
-		}
-	} else {
-		cpu_if->vgic_misr = 0;
-		eisr0 = eisr1 = 0;
-	}
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1;
-#else
-	cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0;
-#endif
-}
-
 static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
 static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
 {
 {
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
@@ -87,13 +44,10 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
 static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
 static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
 {
 {
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
-	int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
 	int i;
 	int i;
+	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
 
 
-	for (i = 0; i < nr_lr; i++) {
-		if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
-			continue;
-
+	for (i = 0; i < used_lrs; i++) {
 		if (cpu_if->vgic_elrsr & (1UL << i))
 		if (cpu_if->vgic_elrsr & (1UL << i))
 			cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
 			cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
 		else
 		else
@@ -110,26 +64,20 @@ void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 	struct vgic_dist *vgic = &kvm->arch.vgic;
 	struct vgic_dist *vgic = &kvm->arch.vgic;
 	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
 	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
+	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
 
 
 	if (!base)
 	if (!base)
 		return;
 		return;
 
 
-	cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR);
-
-	if (vcpu->arch.vgic_cpu.live_lrs) {
+	if (used_lrs) {
 		cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
 		cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
 
 
-		save_maint_int_state(vcpu, base);
 		save_elrsr(vcpu, base);
 		save_elrsr(vcpu, base);
 		save_lrs(vcpu, base);
 		save_lrs(vcpu, base);
 
 
 		writel_relaxed(0, base + GICH_HCR);
 		writel_relaxed(0, base + GICH_HCR);
-
-		vcpu->arch.vgic_cpu.live_lrs = 0;
 	} else {
 	} else {
-		cpu_if->vgic_eisr = 0;
 		cpu_if->vgic_elrsr = ~0UL;
 		cpu_if->vgic_elrsr = ~0UL;
-		cpu_if->vgic_misr = 0;
 		cpu_if->vgic_apr = 0;
 		cpu_if->vgic_apr = 0;
 	}
 	}
 }
 }
@@ -141,32 +89,20 @@ void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 	struct vgic_dist *vgic = &kvm->arch.vgic;
 	struct vgic_dist *vgic = &kvm->arch.vgic;
 	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
 	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
-	int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
 	int i;
 	int i;
-	u64 live_lrs = 0;
+	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
 
 
 	if (!base)
 	if (!base)
 		return;
 		return;
 
 
-
-	for (i = 0; i < nr_lr; i++)
-		if (cpu_if->vgic_lr[i] & GICH_LR_STATE)
-			live_lrs |= 1UL << i;
-
-	if (live_lrs) {
+	if (used_lrs) {
 		writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
 		writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
 		writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
 		writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
-		for (i = 0; i < nr_lr; i++) {
-			if (!(live_lrs & (1UL << i)))
-				continue;
-
+		for (i = 0; i < used_lrs; i++) {
 			writel_relaxed(cpu_if->vgic_lr[i],
 			writel_relaxed(cpu_if->vgic_lr[i],
 				       base + GICH_LR0 + (i * 4));
 				       base + GICH_LR0 + (i * 4));
 		}
 		}
 	}
 	}
-
-	writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR);
-	vcpu->arch.vgic_cpu.live_lrs = live_lrs;
 }
 }
 
 
 #ifdef CONFIG_ARM64
 #ifdef CONFIG_ARM64

+ 24 - 63
virt/kvm/arm/hyp/vgic-v3-sr.c

@@ -118,66 +118,32 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
 	}
 	}
 }
 }
 
 
-static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, int nr_lr)
-{
-	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
-	int i;
-	bool expect_mi;
-
-	expect_mi = !!(cpu_if->vgic_hcr & ICH_HCR_UIE);
-
-	for (i = 0; i < nr_lr; i++) {
-		if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
-				continue;
-
-		expect_mi |= (!(cpu_if->vgic_lr[i] & ICH_LR_HW) &&
-			      (cpu_if->vgic_lr[i] & ICH_LR_EOI));
-	}
-
-	if (expect_mi) {
-		cpu_if->vgic_misr  = read_gicreg(ICH_MISR_EL2);
-
-		if (cpu_if->vgic_misr & ICH_MISR_EOI)
-			cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2);
-		else
-			cpu_if->vgic_eisr = 0;
-	} else {
-		cpu_if->vgic_misr = 0;
-		cpu_if->vgic_eisr = 0;
-	}
-}
-
 void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 {
 {
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
 	u64 val;
 	u64 val;
 
 
 	/*
 	/*
 	 * Make sure stores to the GIC via the memory mapped interface
 	 * Make sure stores to the GIC via the memory mapped interface
 	 * are now visible to the system register interface.
 	 * are now visible to the system register interface.
 	 */
 	 */
-	if (!cpu_if->vgic_sre)
+	if (!cpu_if->vgic_sre) {
 		dsb(st);
 		dsb(st);
+		cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
+	}
 
 
-	cpu_if->vgic_vmcr  = read_gicreg(ICH_VMCR_EL2);
-
-	if (vcpu->arch.vgic_cpu.live_lrs) {
+	if (used_lrs) {
 		int i;
 		int i;
-		u32 max_lr_idx, nr_pri_bits;
+		u32 nr_pri_bits;
 
 
 		cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
 		cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
 
 
 		write_gicreg(0, ICH_HCR_EL2);
 		write_gicreg(0, ICH_HCR_EL2);
 		val = read_gicreg(ICH_VTR_EL2);
 		val = read_gicreg(ICH_VTR_EL2);
-		max_lr_idx = vtr_to_max_lr_idx(val);
 		nr_pri_bits = vtr_to_nr_pri_bits(val);
 		nr_pri_bits = vtr_to_nr_pri_bits(val);
 
 
-		save_maint_int_state(vcpu, max_lr_idx + 1);
-
-		for (i = 0; i <= max_lr_idx; i++) {
-			if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
-				continue;
-
+		for (i = 0; i < used_lrs; i++) {
 			if (cpu_if->vgic_elrsr & (1 << i))
 			if (cpu_if->vgic_elrsr & (1 << i))
 				cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
 				cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
 			else
 			else
@@ -205,11 +171,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 		default:
 		default:
 			cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
 			cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
 		}
 		}
-
-		vcpu->arch.vgic_cpu.live_lrs = 0;
 	} else {
 	} else {
-		cpu_if->vgic_misr  = 0;
-		cpu_if->vgic_eisr  = 0;
 		cpu_if->vgic_elrsr = 0xffff;
 		cpu_if->vgic_elrsr = 0xffff;
 		cpu_if->vgic_ap0r[0] = 0;
 		cpu_if->vgic_ap0r[0] = 0;
 		cpu_if->vgic_ap0r[1] = 0;
 		cpu_if->vgic_ap0r[1] = 0;
@@ -234,9 +196,9 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 {
 {
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
 	u64 val;
 	u64 val;
-	u32 max_lr_idx, nr_pri_bits;
-	u16 live_lrs = 0;
+	u32 nr_pri_bits;
 	int i;
 	int i;
 
 
 	/*
 	/*
@@ -245,25 +207,19 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 	 * delivered as a FIQ to the guest, with potentially fatal
 	 * delivered as a FIQ to the guest, with potentially fatal
 	 * consequences. So we must make sure that ICC_SRE_EL1 has
 	 * consequences. So we must make sure that ICC_SRE_EL1 has
 	 * been actually programmed with the value we want before
 	 * been actually programmed with the value we want before
-	 * starting to mess with the rest of the GIC.
+	 * starting to mess with the rest of the GIC, and VMCR_EL2 in
+	 * particular.
 	 */
 	 */
 	if (!cpu_if->vgic_sre) {
 	if (!cpu_if->vgic_sre) {
 		write_gicreg(0, ICC_SRE_EL1);
 		write_gicreg(0, ICC_SRE_EL1);
 		isb();
 		isb();
+		write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
 	}
 	}
 
 
 	val = read_gicreg(ICH_VTR_EL2);
 	val = read_gicreg(ICH_VTR_EL2);
-	max_lr_idx = vtr_to_max_lr_idx(val);
 	nr_pri_bits = vtr_to_nr_pri_bits(val);
 	nr_pri_bits = vtr_to_nr_pri_bits(val);
 
 
-	for (i = 0; i <= max_lr_idx; i++) {
-		if (cpu_if->vgic_lr[i] & ICH_LR_STATE)
-			live_lrs |= (1 << i);
-	}
-
-	write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
-
-	if (live_lrs) {
+	if (used_lrs) {
 		write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
 		write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
 
 
 		switch (nr_pri_bits) {
 		switch (nr_pri_bits) {
@@ -286,12 +242,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 			write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
 			write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
 		}
 		}
 
 
-		for (i = 0; i <= max_lr_idx; i++) {
-			if (!(live_lrs & (1 << i)))
-				continue;
-
+		for (i = 0; i < used_lrs; i++)
 			__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
 			__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
-		}
 	}
 	}
 
 
 	/*
 	/*
@@ -303,7 +255,6 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 		isb();
 		isb();
 		dsb(sy);
 		dsb(sy);
 	}
 	}
-	vcpu->arch.vgic_cpu.live_lrs = live_lrs;
 
 
 	/*
 	/*
 	 * Prevent the guest from touching the GIC system registers if
 	 * Prevent the guest from touching the GIC system registers if
@@ -326,3 +277,13 @@ u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void)
 {
 {
 	return read_gicreg(ICH_VTR_EL2);
 	return read_gicreg(ICH_VTR_EL2);
 }
 }
+
+u64 __hyp_text __vgic_v3_read_vmcr(void)
+{
+	return read_gicreg(ICH_VMCR_EL2);
+}
+
+void __hyp_text __vgic_v3_write_vmcr(u32 vmcr)
+{
+	write_gicreg(vmcr, ICH_VMCR_EL2);
+}

+ 35 - 4
virt/kvm/arm/pmu.c

@@ -230,13 +230,44 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
 		return;
 		return;
 
 
 	overflow = !!kvm_pmu_overflow_status(vcpu);
 	overflow = !!kvm_pmu_overflow_status(vcpu);
-	if (pmu->irq_level != overflow) {
-		pmu->irq_level = overflow;
-		kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-				    pmu->irq_num, overflow);
+	if (pmu->irq_level == overflow)
+		return;
+
+	pmu->irq_level = overflow;
+
+	if (likely(irqchip_in_kernel(vcpu->kvm))) {
+		int ret;
+		ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+					  pmu->irq_num, overflow);
+		WARN_ON(ret);
 	}
 	}
 }
 }
 
 
+bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
+	bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
+
+	if (likely(irqchip_in_kernel(vcpu->kvm)))
+		return false;
+
+	return pmu->irq_level != run_level;
+}
+
+/*
+ * Reflect the PMU overflow interrupt output level into the kvm_run structure
+ */
+void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
+{
+	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
+
+	/* Populate the timer bitmap for user space */
+	regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
+	if (vcpu->arch.pmu.irq_level)
+		regs->device_irq_level |= KVM_ARM_DEV_PMU;
+}
+
 /**
 /**
  * kvm_pmu_flush_hwstate - flush pmu state to cpu
  * kvm_pmu_flush_hwstate - flush pmu state to cpu
  * @vcpu: The vcpu pointer
  * @vcpu: The vcpu pointer

+ 68 - 40
virt/kvm/arm/vgic/vgic-init.c

@@ -24,7 +24,12 @@
 
 
 /*
 /*
  * Initialization rules: there are multiple stages to the vgic
  * Initialization rules: there are multiple stages to the vgic
- * initialization, both for the distributor and the CPU interfaces.
+ * initialization, both for the distributor and the CPU interfaces.  The basic
+ * idea is that even though the VGIC is not functional or not requested from
+ * user space, the critical path of the run loop can still call VGIC functions
+ * that just won't do anything, without them having to check additional
+ * initialization flags to ensure they don't look at uninitialized data
+ * structures.
  *
  *
  * Distributor:
  * Distributor:
  *
  *
@@ -39,23 +44,67 @@
  *
  *
  * CPU Interface:
  * CPU Interface:
  *
  *
- * - kvm_vgic_cpu_early_init(): initialization of static data that
+ * - kvm_vgic_vcpu_early_init(): initialization of static data that
  *   doesn't depend on any sizing information or emulation type. No
  *   doesn't depend on any sizing information or emulation type. No
  *   allocation is allowed there.
  *   allocation is allowed there.
  */
  */
 
 
 /* EARLY INIT */
 /* EARLY INIT */
 
 
-/*
- * Those 2 functions should not be needed anymore but they
- * still are called from arm.c
+/**
+ * kvm_vgic_early_init() - Initialize static VGIC VCPU data structures
+ * @kvm: The VM whose VGIC districutor should be initialized
+ *
+ * Only do initialization of static structures that don't require any
+ * allocation or sizing information from userspace.  vgic_init() called
+ * kvm_vgic_dist_init() which takes care of the rest.
  */
  */
 void kvm_vgic_early_init(struct kvm *kvm)
 void kvm_vgic_early_init(struct kvm *kvm)
 {
 {
+	struct vgic_dist *dist = &kvm->arch.vgic;
+
+	INIT_LIST_HEAD(&dist->lpi_list_head);
+	spin_lock_init(&dist->lpi_list_lock);
 }
 }
 
 
+/**
+ * kvm_vgic_vcpu_early_init() - Initialize static VGIC VCPU data structures
+ * @vcpu: The VCPU whose VGIC data structures whould be initialized
+ *
+ * Only do initialization, but do not actually enable the VGIC CPU interface
+ * yet.
+ */
 void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
 void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
 {
 {
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	int i;
+
+	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
+	spin_lock_init(&vgic_cpu->ap_list_lock);
+
+	/*
+	 * Enable and configure all SGIs to be edge-triggered and
+	 * configure all PPIs as level-triggered.
+	 */
+	for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
+		struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
+
+		INIT_LIST_HEAD(&irq->ap_list);
+		spin_lock_init(&irq->irq_lock);
+		irq->intid = i;
+		irq->vcpu = NULL;
+		irq->target_vcpu = vcpu;
+		irq->targets = 1U << vcpu->vcpu_id;
+		kref_init(&irq->refcount);
+		if (vgic_irq_is_sgi(i)) {
+			/* SGIs */
+			irq->enabled = 1;
+			irq->config = VGIC_CONFIG_EDGE;
+		} else {
+			/* PPIs */
+			irq->config = VGIC_CONFIG_LEVEL;
+		}
+	}
 }
 }
 
 
 /* CREATION */
 /* CREATION */
@@ -148,9 +197,6 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
 	struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
 	struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
 	int i;
 	int i;
 
 
-	INIT_LIST_HEAD(&dist->lpi_list_head);
-	spin_lock_init(&dist->lpi_list_lock);
-
 	dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL);
 	dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL);
 	if (!dist->spis)
 	if (!dist->spis)
 		return  -ENOMEM;
 		return  -ENOMEM;
@@ -181,41 +227,11 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
 }
 }
 
 
 /**
 /**
- * kvm_vgic_vcpu_init: initialize the vcpu data structures and
- * enable the VCPU interface
- * @vcpu: the VCPU which's VGIC should be initialized
+ * kvm_vgic_vcpu_init() - Enable the VCPU interface
+ * @vcpu: the VCPU which's VGIC should be enabled
  */
  */
 static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 {
 {
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	int i;
-
-	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
-	spin_lock_init(&vgic_cpu->ap_list_lock);
-
-	/*
-	 * Enable and configure all SGIs to be edge-triggered and
-	 * configure all PPIs as level-triggered.
-	 */
-	for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
-		struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
-
-		INIT_LIST_HEAD(&irq->ap_list);
-		spin_lock_init(&irq->irq_lock);
-		irq->intid = i;
-		irq->vcpu = NULL;
-		irq->target_vcpu = vcpu;
-		irq->targets = 1U << vcpu->vcpu_id;
-		kref_init(&irq->refcount);
-		if (vgic_irq_is_sgi(i)) {
-			/* SGIs */
-			irq->enabled = 1;
-			irq->config = VGIC_CONFIG_EDGE;
-		} else {
-			/* PPIs */
-			irq->config = VGIC_CONFIG_LEVEL;
-		}
-	}
 	if (kvm_vgic_global_state.type == VGIC_V2)
 	if (kvm_vgic_global_state.type == VGIC_V2)
 		vgic_v2_enable(vcpu);
 		vgic_v2_enable(vcpu);
 	else
 	else
@@ -262,6 +278,18 @@ int vgic_init(struct kvm *kvm)
 	vgic_debug_init(kvm);
 	vgic_debug_init(kvm);
 
 
 	dist->initialized = true;
 	dist->initialized = true;
+
+	/*
+	 * If we're initializing GICv2 on-demand when first running the VCPU
+	 * then we need to load the VGIC state onto the CPU.  We can detect
+	 * this easily by checking if we are in between vcpu_load and vcpu_put
+	 * when we just initialized the VGIC.
+	 */
+	preempt_disable();
+	vcpu = kvm_arm_get_running_vcpu();
+	if (vcpu)
+		kvm_vgic_load(vcpu);
+	preempt_enable();
 out:
 out:
 	return ret;
 	return ret;
 }
 }

+ 39 - 51
virt/kvm/arm/vgic/vgic-v2.c

@@ -22,59 +22,17 @@
 
 
 #include "vgic.h"
 #include "vgic.h"
 
 
-/*
- * Call this function to convert a u64 value to an unsigned long * bitmask
- * in a way that works on both 32-bit and 64-bit LE and BE platforms.
- *
- * Warning: Calling this function may modify *val.
- */
-static unsigned long *u64_to_bitmask(u64 *val)
-{
-#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
-	*val = (*val >> 32) | (*val << 32);
-#endif
-	return (unsigned long *)val;
-}
-
-void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
+void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
 {
 {
 	struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
 	struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
 
 
-	if (cpuif->vgic_misr & GICH_MISR_EOI) {
-		u64 eisr = cpuif->vgic_eisr;
-		unsigned long *eisr_bmap = u64_to_bitmask(&eisr);
-		int lr;
-
-		for_each_set_bit(lr, eisr_bmap, kvm_vgic_global_state.nr_lr) {
-			u32 intid = cpuif->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
-			WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE);
-
-			/* Only SPIs require notification */
-			if (vgic_valid_spi(vcpu->kvm, intid))
-				kvm_notify_acked_irq(vcpu->kvm, 0,
-						     intid - VGIC_NR_PRIVATE_IRQS);
-		}
-	}
-
-	/* check and disable underflow maintenance IRQ */
-	cpuif->vgic_hcr &= ~GICH_HCR_UIE;
-
-	/*
-	 * In the next iterations of the vcpu loop, if we sync the
-	 * vgic state after flushing it, but before entering the guest
-	 * (this happens for pending signals and vmid rollovers), then
-	 * make sure we don't pick up any old maintenance interrupts
-	 * here.
-	 */
-	cpuif->vgic_eisr = 0;
+	cpuif->vgic_hcr |= GICH_HCR_UIE;
 }
 }
 
 
-void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
+static bool lr_signals_eoi_mi(u32 lr_val)
 {
 {
-	struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
-
-	cpuif->vgic_hcr |= GICH_HCR_UIE;
+	return !(lr_val & GICH_LR_STATE) && (lr_val & GICH_LR_EOI) &&
+	       !(lr_val & GICH_LR_HW);
 }
 }
 
 
 /*
 /*
@@ -86,14 +44,22 @@ void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
  */
  */
 void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 {
 {
-	struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
 	int lr;
 	int lr;
 
 
-	for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) {
+	cpuif->vgic_hcr &= ~GICH_HCR_UIE;
+
+	for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
 		u32 val = cpuif->vgic_lr[lr];
 		u32 val = cpuif->vgic_lr[lr];
 		u32 intid = val & GICH_LR_VIRTUALID;
 		u32 intid = val & GICH_LR_VIRTUALID;
 		struct vgic_irq *irq;
 		struct vgic_irq *irq;
 
 
+		/* Notify fds when the guest EOI'ed a level-triggered SPI */
+		if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
+			kvm_notify_acked_irq(vcpu->kvm, 0,
+					     intid - VGIC_NR_PRIVATE_IRQS);
+
 		irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 		irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 
 
 		spin_lock(&irq->irq_lock);
 		spin_lock(&irq->irq_lock);
@@ -126,6 +92,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 		spin_unlock(&irq->irq_lock);
 		spin_unlock(&irq->irq_lock);
 		vgic_put_irq(vcpu->kvm, irq);
 		vgic_put_irq(vcpu->kvm, irq);
 	}
 	}
+
+	vgic_cpu->used_lrs = 0;
 }
 }
 
 
 /*
 /*
@@ -184,6 +152,7 @@ void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr)
 
 
 void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 {
 {
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 	u32 vmcr;
 	u32 vmcr;
 
 
 	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
 	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
@@ -194,12 +163,15 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 	vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
 	vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
 		GICH_VMCR_PRIMASK_MASK;
 		GICH_VMCR_PRIMASK_MASK;
 
 
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
+	cpu_if->vgic_vmcr = vmcr;
 }
 }
 
 
 void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 {
 {
-	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+	u32 vmcr;
+
+	vmcr = cpu_if->vgic_vmcr;
 
 
 	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >>
 	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >>
 			GICH_VMCR_CTRL_SHIFT;
 			GICH_VMCR_CTRL_SHIFT;
@@ -375,3 +347,19 @@ out:
 
 
 	return ret;
 	return ret;
 }
 }
+
+void vgic_v2_load(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
+
+	writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR);
+}
+
+void vgic_v2_put(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
+
+	cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR);
+}

+ 45 - 42
virt/kvm/arm/vgic/vgic-v3.c

@@ -21,59 +21,29 @@
 
 
 #include "vgic.h"
 #include "vgic.h"
 
 
-void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
+void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
 {
 {
 	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
 	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
-	u32 model = vcpu->kvm->arch.vgic.vgic_model;
-
-	if (cpuif->vgic_misr & ICH_MISR_EOI) {
-		unsigned long eisr_bmap = cpuif->vgic_eisr;
-		int lr;
-
-		for_each_set_bit(lr, &eisr_bmap, kvm_vgic_global_state.nr_lr) {
-			u32 intid;
-			u64 val = cpuif->vgic_lr[lr];
-
-			if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
-				intid = val & ICH_LR_VIRTUAL_ID_MASK;
-			else
-				intid = val & GICH_LR_VIRTUALID;
-
-			WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE);
-
-			/* Only SPIs require notification */
-			if (vgic_valid_spi(vcpu->kvm, intid))
-				kvm_notify_acked_irq(vcpu->kvm, 0,
-						     intid - VGIC_NR_PRIVATE_IRQS);
-		}
-
-		/*
-		 * In the next iterations of the vcpu loop, if we sync
-		 * the vgic state after flushing it, but before
-		 * entering the guest (this happens for pending
-		 * signals and vmid rollovers), then make sure we
-		 * don't pick up any old maintenance interrupts here.
-		 */
-		cpuif->vgic_eisr = 0;
-	}
 
 
-	cpuif->vgic_hcr &= ~ICH_HCR_UIE;
+	cpuif->vgic_hcr |= ICH_HCR_UIE;
 }
 }
 
 
-void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
+static bool lr_signals_eoi_mi(u64 lr_val)
 {
 {
-	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
-
-	cpuif->vgic_hcr |= ICH_HCR_UIE;
+	return !(lr_val & ICH_LR_STATE) && (lr_val & ICH_LR_EOI) &&
+	       !(lr_val & ICH_LR_HW);
 }
 }
 
 
 void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 {
 {
-	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
 	u32 model = vcpu->kvm->arch.vgic.vgic_model;
 	u32 model = vcpu->kvm->arch.vgic.vgic_model;
 	int lr;
 	int lr;
 
 
-	for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) {
+	cpuif->vgic_hcr &= ~ICH_HCR_UIE;
+
+	for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
 		u64 val = cpuif->vgic_lr[lr];
 		u64 val = cpuif->vgic_lr[lr];
 		u32 intid;
 		u32 intid;
 		struct vgic_irq *irq;
 		struct vgic_irq *irq;
@@ -82,6 +52,12 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 			intid = val & ICH_LR_VIRTUAL_ID_MASK;
 			intid = val & ICH_LR_VIRTUAL_ID_MASK;
 		else
 		else
 			intid = val & GICH_LR_VIRTUALID;
 			intid = val & GICH_LR_VIRTUALID;
+
+		/* Notify fds when the guest EOI'ed a level-triggered IRQ */
+		if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
+			kvm_notify_acked_irq(vcpu->kvm, 0,
+					     intid - VGIC_NR_PRIVATE_IRQS);
+
 		irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 		irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 		if (!irq)	/* An LPI could have been unmapped. */
 		if (!irq)	/* An LPI could have been unmapped. */
 			continue;
 			continue;
@@ -117,6 +93,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 		spin_unlock(&irq->irq_lock);
 		spin_unlock(&irq->irq_lock);
 		vgic_put_irq(vcpu->kvm, irq);
 		vgic_put_irq(vcpu->kvm, irq);
 	}
 	}
+
+	vgic_cpu->used_lrs = 0;
 }
 }
 
 
 /* Requires the irq to be locked already */
 /* Requires the irq to be locked already */
@@ -173,6 +151,7 @@ void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
 
 
 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 {
 {
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
 	u32 vmcr;
 	u32 vmcr;
 
 
 	/*
 	/*
@@ -188,12 +167,15 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 	vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK;
 	vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK;
 	vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK;
 	vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK;
 
 
-	vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
+	cpu_if->vgic_vmcr = vmcr;
 }
 }
 
 
 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 {
 {
-	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u32 vmcr;
+
+	vmcr = cpu_if->vgic_vmcr;
 
 
 	/*
 	/*
 	 * Ignore the FIQen bit, because GIC emulation always implies
 	 * Ignore the FIQen bit, because GIC emulation always implies
@@ -386,3 +368,24 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
 
 
 	return 0;
 	return 0;
 }
 }
+
+void vgic_v3_load(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+
+	/*
+	 * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
+	 * is dependent on ICC_SRE_EL1.SRE, and we have to perform the
+	 * VMCR_EL2 save/restore in the world switch.
+	 */
+	if (likely(cpu_if->vgic_sre))
+		kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
+}
+
+void vgic_v3_put(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+
+	if (likely(cpu_if->vgic_sre))
+		cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr);
+}

+ 44 - 16
virt/kvm/arm/vgic/vgic.c

@@ -527,14 +527,6 @@ retry:
 	spin_unlock(&vgic_cpu->ap_list_lock);
 	spin_unlock(&vgic_cpu->ap_list_lock);
 }
 }
 
 
-static inline void vgic_process_maintenance_interrupt(struct kvm_vcpu *vcpu)
-{
-	if (kvm_vgic_global_state.type == VGIC_V2)
-		vgic_v2_process_maintenance(vcpu);
-	else
-		vgic_v3_process_maintenance(vcpu);
-}
-
 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
 {
 {
 	if (kvm_vgic_global_state.type == VGIC_V2)
 	if (kvm_vgic_global_state.type == VGIC_V2)
@@ -601,10 +593,8 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 
 
 	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 
 
-	if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) {
-		vgic_set_underflow(vcpu);
+	if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr)
 		vgic_sort_ap_list(vcpu);
 		vgic_sort_ap_list(vcpu);
-	}
 
 
 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 		spin_lock(&irq->irq_lock);
 		spin_lock(&irq->irq_lock);
@@ -623,8 +613,12 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 next:
 next:
 		spin_unlock(&irq->irq_lock);
 		spin_unlock(&irq->irq_lock);
 
 
-		if (count == kvm_vgic_global_state.nr_lr)
+		if (count == kvm_vgic_global_state.nr_lr) {
+			if (!list_is_last(&irq->ap_list,
+					  &vgic_cpu->ap_list_head))
+				vgic_set_underflow(vcpu);
 			break;
 			break;
+		}
 	}
 	}
 
 
 	vcpu->arch.vgic_cpu.used_lrs = count;
 	vcpu->arch.vgic_cpu.used_lrs = count;
@@ -637,18 +631,30 @@ next:
 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
 {
-	if (unlikely(!vgic_initialized(vcpu->kvm)))
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+	/* An empty ap_list_head implies used_lrs == 0 */
+	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 		return;
 		return;
 
 
-	vgic_process_maintenance_interrupt(vcpu);
-	vgic_fold_lr_state(vcpu);
+	if (vgic_cpu->used_lrs)
+		vgic_fold_lr_state(vcpu);
 	vgic_prune_ap_list(vcpu);
 	vgic_prune_ap_list(vcpu);
 }
 }
 
 
 /* Flush our emulation state into the GIC hardware before entering the guest. */
 /* Flush our emulation state into the GIC hardware before entering the guest. */
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 {
 {
-	if (unlikely(!vgic_initialized(vcpu->kvm)))
+	/*
+	 * If there are no virtual interrupts active or pending for this
+	 * VCPU, then there is no work to do and we can bail out without
+	 * taking any lock.  There is a potential race with someone injecting
+	 * interrupts to the VCPU, but it is a benign race as the VCPU will
+	 * either observe the new interrupt before or after doing this check,
+	 * and introducing additional synchronization mechanism doesn't change
+	 * this.
+	 */
+	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 		return;
 		return;
 
 
 	spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
 	spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
@@ -656,6 +662,28 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 	spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
 	spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
 }
 }
 
 
+void kvm_vgic_load(struct kvm_vcpu *vcpu)
+{
+	if (unlikely(!vgic_initialized(vcpu->kvm)))
+		return;
+
+	if (kvm_vgic_global_state.type == VGIC_V2)
+		vgic_v2_load(vcpu);
+	else
+		vgic_v3_load(vcpu);
+}
+
+void kvm_vgic_put(struct kvm_vcpu *vcpu)
+{
+	if (unlikely(!vgic_initialized(vcpu->kvm)))
+		return;
+
+	if (kvm_vgic_global_state.type == VGIC_V2)
+		vgic_v2_put(vcpu);
+	else
+		vgic_v3_put(vcpu);
+}
+
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 {
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;

+ 6 - 2
virt/kvm/arm/vgic/vgic.h

@@ -112,7 +112,6 @@ void vgic_kick_vcpus(struct kvm *kvm);
 int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
 int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
 		      phys_addr_t addr, phys_addr_t alignment);
 		      phys_addr_t addr, phys_addr_t alignment);
 
 
-void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu);
 void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
@@ -130,6 +129,9 @@ int vgic_v2_map_resources(struct kvm *kvm);
 int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
 int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
 			     enum vgic_type);
 			     enum vgic_type);
 
 
+void vgic_v2_load(struct kvm_vcpu *vcpu);
+void vgic_v2_put(struct kvm_vcpu *vcpu);
+
 static inline void vgic_get_irq_kref(struct vgic_irq *irq)
 static inline void vgic_get_irq_kref(struct vgic_irq *irq)
 {
 {
 	if (irq->intid < VGIC_MIN_LPI)
 	if (irq->intid < VGIC_MIN_LPI)
@@ -138,7 +140,6 @@ static inline void vgic_get_irq_kref(struct vgic_irq *irq)
 	kref_get(&irq->refcount);
 	kref_get(&irq->refcount);
 }
 }
 
 
-void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu);
 void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
@@ -150,6 +151,9 @@ int vgic_v3_probe(const struct gic_kvm_info *info);
 int vgic_v3_map_resources(struct kvm *kvm);
 int vgic_v3_map_resources(struct kvm *kvm);
 int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
 int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
 
 
+void vgic_v3_load(struct kvm_vcpu *vcpu);
+void vgic_v3_put(struct kvm_vcpu *vcpu);
+
 int vgic_register_its_iodevs(struct kvm *kvm);
 int vgic_register_its_iodevs(struct kvm *kvm);
 bool vgic_has_its(struct kvm *kvm);
 bool vgic_has_its(struct kvm *kvm);
 int kvm_vgic_register_its_device(void);
 int kvm_vgic_register_its_device(void);