10 年之前 · 519f526d39
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2671,7 +2671,7 @@ handled.
 
															 4.87 KVM_SET_GUEST_DEBUG
														
 
															 Capability: KVM_CAP_SET_GUEST_DEBUG
														
 
															-Architectures: x86, s390, ppc
														
 
															+Architectures: x86, s390, ppc, arm64
														
 
															 Type: vcpu ioctl
														
 
															 Parameters: struct kvm_guest_debug (in)
														
 
															 Returns: 0 on success; -1 on error
														
@@ -2693,8 +2693,8 @@ when running. Common control bits are:
 
															 The top 16 bits of the control field are architecture specific control
														
 
															 flags which can include the following:
														
 
															-  - KVM_GUESTDBG_USE_SW_BP:     using software breakpoints [x86]
														
 
															-  - KVM_GUESTDBG_USE_HW_BP:     using hardware breakpoints [x86, s390]
														
 
															+  - KVM_GUESTDBG_USE_SW_BP:     using software breakpoints [x86, arm64]
														
 
															+  - KVM_GUESTDBG_USE_HW_BP:     using hardware breakpoints [x86, s390, arm64]
														
 
															   - KVM_GUESTDBG_INJECT_DB:     inject DB type exception [x86]
														
 
															   - KVM_GUESTDBG_INJECT_BP:     inject BP type exception [x86]
														
 
															   - KVM_GUESTDBG_EXIT_PENDING:  trigger an immediate guest exit [s390]
														
@@ -2709,6 +2709,11 @@ updated to the correct (supplied) values.
 
															 The second part of the structure is architecture specific and
														
 
															 typically contains a set of debug registers.
														
 
															+For arm64 the number of debug registers is implementation defined and
														
 
															+can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and
														
 
															+KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number
														
 
															+indicating the number of supported registers.
														
 
															+
														
 
															 When debug events exit the main run loop with the reason
														
 
															 KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
														
 
															 structure containing architecture specific debug information.
														
@@ -3111,11 +3116,13 @@ data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
 
															 where kvm expects application code to place the data for the next
														
 
															 KVM_RUN invocation (KVM_EXIT_IO_IN).  Data format is a packed array.
														
 
															+		/* KVM_EXIT_DEBUG */
														
 
															 		struct {
														
 
															 			struct kvm_debug_exit_arch arch;
														
 
															 		} debug;
														
 
															-Unused.
														
 
															+If the exit_reason is KVM_EXIT_DEBUG, then a vcpu is processing a debug event
														
 
															+for which architecture specific information is returned.
														
 
															 		/* KVM_EXIT_MMIO */
														
 
															 		struct {
														
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -231,4 +231,9 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 
															 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
														
 
															 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
														
 
															+static inline void kvm_arm_init_debug(void) {}
														
 
															+static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
														
 
															+static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
														
 
															+static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
														
 
															+
														
 
															 #endif /* __ARM_KVM_HOST_H__ */
														
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -125,6 +125,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
															 	if (ret)
														
 
															 		goto out_free_stage2_pgd;
														
 
															+	kvm_vgic_early_init(kvm);
														
 
															 	kvm_timer_init(kvm);
														
 
															 	/* Mark the initial VMID generation invalid */
														
@@ -249,6 +250,7 @@ out:
 
															 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	kvm_vgic_vcpu_early_init(vcpu);
														
 
															 }
														
 
															 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
														
@@ -278,6 +280,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
															 	/* Set up the timer */
														
 
															 	kvm_timer_vcpu_init(vcpu);
														
 
															+	kvm_arm_reset_debug_ptr(vcpu);
														
 
															+
														
 
															 	return 0;
														
 
															 }
														
@@ -301,13 +305,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
															 	kvm_arm_set_running_vcpu(NULL);
														
 
															 }
														
 
															-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
														
 
															-					struct kvm_guest_debug *dbg)
														
 
															-{
														
 
															-	return -EINVAL;
														
 
															-}
														
 
															-
														
 
															-
														
 
															 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
														
 
															 				    struct kvm_mp_state *mp_state)
														
 
															 {
														
@@ -528,10 +525,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 		if (vcpu->arch.pause)
														
 
															 			vcpu_pause(vcpu);
														
 
															-		kvm_vgic_flush_hwstate(vcpu);
														
 
															+		/*
														
 
															+		 * Disarming the background timer must be done in a
														
 
															+		 * preemptible context, as this call may sleep.
														
 
															+		 */
														
 
															 		kvm_timer_flush_hwstate(vcpu);
														
 
															+		/*
														
 
															+		 * Preparing the interrupts to be injected also
														
 
															+		 * involves poking the GIC, which must be done in a
														
 
															+		 * non-preemptible context.
														
 
															+		 */
														
 
															 		preempt_disable();
														
 
															+		kvm_vgic_flush_hwstate(vcpu);
														
 
															+
														
 
															 		local_irq_disable();
														
 
															 		/*
														
@@ -544,12 +551,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
														
 
															 			local_irq_enable();
														
 
															+			kvm_vgic_sync_hwstate(vcpu);
														
 
															 			preempt_enable();
														
 
															 			kvm_timer_sync_hwstate(vcpu);
														
 
															-			kvm_vgic_sync_hwstate(vcpu);
														
 
															 			continue;
														
 
															 		}
														
 
															+		kvm_arm_setup_debug(vcpu);
														
 
															+
														
 
															 		/**************************************************************
														
 
															 		 * Enter the guest
														
 
															 		 */
														
@@ -564,6 +573,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 		 * Back from guest
														
 
															 		 *************************************************************/
														
 
															+		kvm_arm_clear_debug(vcpu);
														
 
															+
														
 
															 		/*
														
 
															 		 * We may have taken a host interrupt in HYP mode (ie
														
 
															 		 * while executing the guest). This interrupt is still
														
@@ -586,11 +597,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 		 */
														
 
															 		kvm_guest_exit();
														
 
															 		trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
														
 
															-		preempt_enable();
														
 
															+		kvm_vgic_sync_hwstate(vcpu);
														
 
															+
														
 
															+		preempt_enable();
														
 
															 		kvm_timer_sync_hwstate(vcpu);
														
 
															-		kvm_vgic_sync_hwstate(vcpu);
														
 
															 		ret = handle_exit(vcpu, run, ret);
														
 
															 	}
														
@@ -921,6 +933,8 @@ static void cpu_init_hyp_mode(void *dummy)
 
															 	vector_ptr = (unsigned long)__kvm_hyp_vector;
														
 
															 	__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
														
 
															+
														
 
															+	kvm_arm_init_debug();
														
 
															 }
														
 
															 static int hyp_init_cpu_notify(struct notifier_block *self,
														
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -290,3 +290,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
															 {
														
 
															 	return -EINVAL;
														
 
															 }
														
 
															+
														
 
															+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
														
 
															+					struct kvm_guest_debug *dbg)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -361,10 +361,6 @@ hyp_hvc:
 
															 	@ Check syndrome register
														
 
															 	mrc	p15, 4, r1, c5, c2, 0	@ HSR
														
 
															 	lsr	r0, r1, #HSR_EC_SHIFT
														
 
															-#ifdef CONFIG_VFPv3
														
 
															-	cmp	r0, #HSR_EC_CP_0_13
														
 
															-	beq	switch_to_guest_vfp
														
 
															-#endif
														
 
															 	cmp	r0, #HSR_EC_HVC
														
 
															 	bne	guest_trap		@ Not HVC instr.
														
@@ -378,7 +374,10 @@ hyp_hvc:
 
															 	cmp     r2, #0
														
 
															 	bne	guest_trap		@ Guest called HVC
														
 
															-host_switch_to_hyp:
														
 
															+	/*
														
 
															+	 * Getting here means host called HVC, we shift parameters and branch
														
 
															+	 * to Hyp function.
														
 
															+	 */
														
 
															 	pop	{r0, r1, r2}
														
 
															 	/* Check for __hyp_get_vectors */
														
@@ -409,6 +408,10 @@ guest_trap:
 
															 	@ Check if we need the fault information
														
 
															 	lsr	r1, r1, #HSR_EC_SHIFT
														
 
															+#ifdef CONFIG_VFPv3
														
 
															+	cmp	r1, #HSR_EC_CP_0_13
														
 
															+	beq	switch_to_guest_vfp
														
 
															+#endif
														
 
															 	cmp	r1, #HSR_EC_IABT
														
 
															 	mrceq	p15, 4, r2, c6, c0, 2	@ HIFAR
														
 
															 	beq	2f
														
@@ -477,7 +480,6 @@ guest_trap:
 
															  */
														
 
															 #ifdef CONFIG_VFPv3
														
 
															 switch_to_guest_vfp:
														
 
															-	load_vcpu			@ Load VCPU pointer to r0
														
 
															 	push	{r3-r7}
														
 
															 	@ NEON/VFP used.  Turn on VFP access.
														
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -77,7 +77,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 
															 	kvm_reset_coprocs(vcpu);
														
 
															 	/* Reset arch_timer context */
														
 
															-	kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
														
 
															-
														
 
															-	return 0;
														
 
															+	return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
														
 
															 }
														
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -16,6 +16,8 @@
 
															 #ifndef __ASM_HW_BREAKPOINT_H
														
 
															 #define __ASM_HW_BREAKPOINT_H
														
 
															+#include <asm/cputype.h>
														
 
															+
														
 
															 #ifdef __KERNEL__
														
 
															 struct arch_hw_breakpoint_ctrl {
														
@@ -132,5 +134,17 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task)
 
															 extern struct pmu perf_ops_bp;
														
 
															+/* Determine number of BRP registers available. */
														
 
															+static inline int get_num_brps(void)
														
 
															+{
														
 
															+	return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
														
 
															+}
														
 
															+
														
 
															+/* Determine number of WRP registers available. */
														
 
															+static inline int get_num_wrps(void)
														
 
															+{
														
 
															+	return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
														
 
															+}
														
 
															+
														
 
															 #endif	/* __KERNEL__ */
														
 
															 #endif	/* __ASM_BREAKPOINT_H */
														
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -171,10 +171,13 @@
 
															 #define HSTR_EL2_TTEE	(1 << 16)
														
 
															 #define HSTR_EL2_T(x)	(1 << x)
														
 
															+/* Hyp Coproccessor Trap Register Shifts */
														
 
															+#define CPTR_EL2_TFP_SHIFT 10
														
 
															+
														
 
															 /* Hyp Coprocessor Trap Register */
														
 
															 #define CPTR_EL2_TCPAC	(1 << 31)
														
 
															 #define CPTR_EL2_TTA	(1 << 20)
														
 
															-#define CPTR_EL2_TFP	(1 << 10)
														
 
															+#define CPTR_EL2_TFP	(1 << CPTR_EL2_TFP_SHIFT)
														
 
															 /* Hyp Debug Configuration Register bits */
														
 
															 #define MDCR_EL2_TDRA		(1 << 11)
														
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -46,24 +46,16 @@
 
															 #define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
														
 
															 #define	PAR_EL1		21	/* Physical Address Register */
														
 
															 #define MDSCR_EL1	22	/* Monitor Debug System Control Register */
														
 
															-#define DBGBCR0_EL1	23	/* Debug Breakpoint Control Registers (0-15) */
														
 
															-#define DBGBCR15_EL1	38
														
 
															-#define DBGBVR0_EL1	39	/* Debug Breakpoint Value Registers (0-15) */
														
 
															-#define DBGBVR15_EL1	54
														
 
															-#define DBGWCR0_EL1	55	/* Debug Watchpoint Control Registers (0-15) */
														
 
															-#define DBGWCR15_EL1	70
														
 
															-#define DBGWVR0_EL1	71	/* Debug Watchpoint Value Registers (0-15) */
														
 
															-#define DBGWVR15_EL1	86
														
 
															-#define MDCCINT_EL1	87	/* Monitor Debug Comms Channel Interrupt Enable Reg */
														
 
															+#define MDCCINT_EL1	23	/* Monitor Debug Comms Channel Interrupt Enable Reg */
														
 
															 /* 32bit specific registers. Keep them at the end of the range */
														
 
															-#define	DACR32_EL2	88	/* Domain Access Control Register */
														
 
															-#define	IFSR32_EL2	89	/* Instruction Fault Status Register */
														
 
															-#define	FPEXC32_EL2	90	/* Floating-Point Exception Control Register */
														
 
															-#define	DBGVCR32_EL2	91	/* Debug Vector Catch Register */
														
 
															-#define	TEECR32_EL1	92	/* ThumbEE Configuration Register */
														
 
															-#define	TEEHBR32_EL1	93	/* ThumbEE Handler Base Register */
														
 
															-#define	NR_SYS_REGS	94
														
 
															+#define	DACR32_EL2	24	/* Domain Access Control Register */
														
 
															+#define	IFSR32_EL2	25	/* Instruction Fault Status Register */
														
 
															+#define	FPEXC32_EL2	26	/* Floating-Point Exception Control Register */
														
 
															+#define	DBGVCR32_EL2	27	/* Debug Vector Catch Register */
														
 
															+#define	TEECR32_EL1	28	/* ThumbEE Configuration Register */
														
 
															+#define	TEEHBR32_EL1	29	/* ThumbEE Handler Base Register */
														
 
															+#define	NR_SYS_REGS	30
														
 
															 /* 32bit mapping */
														
 
															 #define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
														
@@ -132,6 +124,8 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
															 extern u64 __vgic_v3_get_ich_vtr_el2(void);
														
 
															+extern u32 __kvm_get_mdcr_el2(void);
														
 
															+
														
 
															 #endif
														
 
															 #endif /* __ARM_KVM_ASM_H__ */
														
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -103,15 +103,34 @@ struct kvm_vcpu_arch {
 
															 	/* HYP configuration */
														
 
															 	u64 hcr_el2;
														
 
															+	u32 mdcr_el2;
														
 
															 	/* Exception Information */
														
 
															 	struct kvm_vcpu_fault_info fault;
														
 
															-	/* Debug state */
														
 
															+	/* Guest debug state */
														
 
															 	u64 debug_flags;
														
 
															+	/*
														
 
															+	 * We maintain more than a single set of debug registers to support
														
 
															+	 * debugging the guest from the host and to maintain separate host and
														
 
															+	 * guest state during world switches. vcpu_debug_state are the debug
														
 
															+	 * registers of the vcpu as the guest sees them.  host_debug_state are
														
 
															+	 * the host registers which are saved and restored during
														
 
															+	 * world switches. external_debug_state contains the debug
														
 
															+	 * values we want to debug the guest. This is set via the
														
 
															+	 * KVM_SET_GUEST_DEBUG ioctl.
														
 
															+	 *
														
 
															+	 * debug_ptr points to the set of debug registers that should be loaded
														
 
															+	 * onto the hardware when running the guest.
														
 
															+	 */
														
 
															+	struct kvm_guest_debug_arch *debug_ptr;
														
 
															+	struct kvm_guest_debug_arch vcpu_debug_state;
														
 
															+	struct kvm_guest_debug_arch external_debug_state;
														
 
															+
														
 
															 	/* Pointer to host CPU context */
														
 
															 	kvm_cpu_context_t *host_cpu_context;
														
 
															+	struct kvm_guest_debug_arch host_debug_state;
														
 
															 	/* VGIC state */
														
 
															 	struct vgic_cpu vgic_cpu;
														
@@ -122,6 +141,17 @@ struct kvm_vcpu_arch {
 
															 	 * here.
														
 
															 	 */
														
 
															+	/*
														
 
															+	 * Guest registers we preserve during guest debugging.
														
 
															+	 *
														
 
															+	 * These shadow registers are updated by the kvm_handle_sys_reg
														
 
															+	 * trap handler if the guest accesses or updates them while we
														
 
															+	 * are using guest debug.
														
 
															+	 */
														
 
															+	struct {
														
 
															+		u32	mdscr_el1;
														
 
															+	} guest_debug_preserved;
														
 
															+
														
 
															 	/* Don't run the guest */
														
 
															 	bool pause;
														
@@ -216,15 +246,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 
															 		     hyp_stack_ptr, vector_ptr);
														
 
															 }
														
 
															-struct vgic_sr_vectors {
														
 
															-	void	*save_vgic;
														
 
															-	void	*restore_vgic;
														
 
															-};
														
 
															-
														
 
															 static inline void kvm_arch_hardware_disable(void) {}
														
 
															 static inline void kvm_arch_hardware_unsetup(void) {}
														
 
															 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
														
 
															 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
														
 
															 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
														
 
															+void kvm_arm_init_debug(void);
														
 
															+void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
														
 
															+void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
														
 
															+void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
														
 
															+
														
 
															 #endif /* __ARM64_KVM_HOST_H__ */
														
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -53,14 +53,20 @@ struct kvm_regs {
 
															 	struct user_fpsimd_state fp_regs;
														
 
															 };
														
 
															-/* Supported Processor Types */
														
 
															+/*
														
 
															+ * Supported CPU Targets - Adding a new target type is not recommended,
														
 
															+ * unless there are some special registers not supported by the
														
 
															+ * genericv8 syreg table.
														
 
															+ */
														
 
															 #define KVM_ARM_TARGET_AEM_V8		0
														
 
															 #define KVM_ARM_TARGET_FOUNDATION_V8	1
														
 
															 #define KVM_ARM_TARGET_CORTEX_A57	2
														
 
															 #define KVM_ARM_TARGET_XGENE_POTENZA	3
														
 
															 #define KVM_ARM_TARGET_CORTEX_A53	4
														
 
															+/* Generic ARM v8 target */
														
 
															+#define KVM_ARM_TARGET_GENERIC_V8	5
														
 
															-#define KVM_ARM_NUM_TARGETS		5
														
 
															+#define KVM_ARM_NUM_TARGETS		6
														
 
															 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
														
 
															 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
														
@@ -100,12 +106,39 @@ struct kvm_sregs {
 
															 struct kvm_fpu {
														
 
															 };
														
 
															+/*
														
 
															+ * See v8 ARM ARM D7.3: Debug Registers
														
 
															+ *
														
 
															+ * The architectural limit is 16 debug registers of each type although
														
 
															+ * in practice there are usually less (see ID_AA64DFR0_EL1).
														
 
															+ *
														
 
															+ * Although the control registers are architecturally defined as 32
														
 
															+ * bits wide we use a 64 bit structure here to keep parity with
														
 
															+ * KVM_GET/SET_ONE_REG behaviour which treats all system registers as
														
 
															+ * 64 bit values. It also allows for the possibility of the
														
 
															+ * architecture expanding the control registers without having to
														
 
															+ * change the userspace ABI.
														
 
															+ */
														
 
															+#define KVM_ARM_MAX_DBG_REGS 16
														
 
															 struct kvm_guest_debug_arch {
														
 
															+	__u64 dbg_bcr[KVM_ARM_MAX_DBG_REGS];
														
 
															+	__u64 dbg_bvr[KVM_ARM_MAX_DBG_REGS];
														
 
															+	__u64 dbg_wcr[KVM_ARM_MAX_DBG_REGS];
														
 
															+	__u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS];
														
 
															 };
														
 
															 struct kvm_debug_exit_arch {
														
 
															+	__u32 hsr;
														
 
															+	__u64 far;	/* used for watchpoints */
														
 
															 };
														
 
															+/*
														
 
															+ * Architecture specific defines for kvm_guest_debug->control
														
 
															+ */
														
 
															+
														
 
															+#define KVM_GUESTDBG_USE_SW_BP		(1 << 16)
														
 
															+#define KVM_GUESTDBG_USE_HW		(1 << 17)
														
 
															+
														
 
															 struct kvm_sync_regs {
														
 
															 };
														
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -116,17 +116,22 @@ int main(void)
 
															   DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
														
 
															   DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
														
 
															   DEFINE(VCPU_DEBUG_FLAGS,	offsetof(struct kvm_vcpu, arch.debug_flags));
														
 
															+  DEFINE(VCPU_DEBUG_PTR,	offsetof(struct kvm_vcpu, arch.debug_ptr));
														
 
															+  DEFINE(DEBUG_BCR, 		offsetof(struct kvm_guest_debug_arch, dbg_bcr));
														
 
															+  DEFINE(DEBUG_BVR, 		offsetof(struct kvm_guest_debug_arch, dbg_bvr));
														
 
															+  DEFINE(DEBUG_WCR, 		offsetof(struct kvm_guest_debug_arch, dbg_wcr));
														
 
															+  DEFINE(DEBUG_WVR, 		offsetof(struct kvm_guest_debug_arch, dbg_wvr));
														
 
															   DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
														
 
															+  DEFINE(VCPU_MDCR_EL2,	offsetof(struct kvm_vcpu, arch.mdcr_el2));
														
 
															   DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
														
 
															   DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
														
 
															+  DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state));
														
 
															   DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
														
 
															   DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
														
 
															   DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
														
 
															   DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
														
 
															   DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
														
 
															   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
														
 
															-  DEFINE(VGIC_SAVE_FN,		offsetof(struct vgic_sr_vectors, save_vgic));
														
 
															-  DEFINE(VGIC_RESTORE_FN,	offsetof(struct vgic_sr_vectors, restore_vgic));
														
 
															   DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
														
 
															   DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
														
 
															   DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
														
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -48,18 +48,6 @@ static DEFINE_PER_CPU(int, stepping_kernel_bp);
 
															 static int core_num_brps;
														
 
															 static int core_num_wrps;
														
 
															-/* Determine number of BRP registers available. */
														
 
															-static int get_num_brps(void)
														
 
															-{
														
 
															-	return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
														
 
															-}
														
 
															-
														
 
															-/* Determine number of WRP registers available. */
														
 
															-static int get_num_wrps(void)
														
 
															-{
														
 
															-	return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
														
 
															-}
														
 
															-
														
 
															 int hw_breakpoint_slots(int type)
														
 
															 {
														
 
															 	/*
														
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -17,7 +17,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
 
															 kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
														
 
															 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
														
 
															-kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
														
 
															+kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
														
 
															 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
														
 
															 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
														
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -0,0 +1,217 @@
 
															+/*
														
 
															+ * Debug and Guest Debug support
														
 
															+ *
														
 
															+ * Copyright (C) 2015 - Linaro Ltd
														
 
															+ * Author: Alex Bennée <alex.bennee@linaro.org>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public License
														
 
															+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/hw_breakpoint.h>
														
 
															+
														
 
															+#include <asm/debug-monitors.h>
														
 
															+#include <asm/kvm_asm.h>
														
 
															+#include <asm/kvm_arm.h>
														
 
															+#include <asm/kvm_emulate.h>
														
 
															+
														
 
															+#include "trace.h"
														
 
															+
														
 
															+/* These are the bits of MDSCR_EL1 we may manipulate */
														
 
															+#define MDSCR_EL1_DEBUG_MASK	(DBG_MDSCR_SS | \
														
 
															+				DBG_MDSCR_KDE | \
														
 
															+				DBG_MDSCR_MDE)
														
 
															+
														
 
															+static DEFINE_PER_CPU(u32, mdcr_el2);
														
 
															+
														
 
															+/**
														
 
															+ * save/restore_guest_debug_regs
														
 
															+ *
														
 
															+ * For some debug operations we need to tweak some guest registers. As
														
 
															+ * a result we need to save the state of those registers before we
														
 
															+ * make those modifications.
														
 
															+ *
														
 
															+ * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled
														
 
															+ * after we have restored the preserved value to the main context.
														
 
															+ */
														
 
															+static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1);
														
 
															+
														
 
															+	trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
														
 
															+				vcpu->arch.guest_debug_preserved.mdscr_el1);
														
 
															+}
														
 
															+
														
 
															+static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1;
														
 
															+
														
 
															+	trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
														
 
															+				vcpu_sys_reg(vcpu, MDSCR_EL1));
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * kvm_arm_init_debug - grab what we need for debug
														
 
															+ *
														
 
															+ * Currently the sole task of this function is to retrieve the initial
														
 
															+ * value of mdcr_el2 so we can preserve MDCR_EL2.HPMN which has
														
 
															+ * presumably been set-up by some knowledgeable bootcode.
														
 
															+ *
														
 
															+ * It is called once per-cpu during CPU hyp initialisation.
														
 
															+ */
														
 
															+
														
 
															+void kvm_arm_init_debug(void)
														
 
															+{
														
 
															+	__this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2));
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state
														
 
															+ */
														
 
															+
														
 
															+void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	vcpu->arch.debug_ptr = &vcpu->arch.vcpu_debug_state;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * kvm_arm_setup_debug - set up debug related stuff
														
 
															+ *
														
 
															+ * @vcpu:	the vcpu pointer
														
 
															+ *
														
 
															+ * This is called before each entry into the hypervisor to setup any
														
 
															+ * debug related registers. Currently this just ensures we will trap
														
 
															+ * access to:
														
 
															+ *  - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
														
 
															+ *  - Debug ROM Address (MDCR_EL2_TDRA)
														
 
															+ *  - OS related registers (MDCR_EL2_TDOSA)
														
 
															+ *
														
 
															+ * Additionally, KVM only traps guest accesses to the debug registers if
														
 
															+ * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
														
 
															+ * flag on vcpu->arch.debug_flags).  Since the guest must not interfere
														
 
															+ * with the hardware state when debugging the guest, we must ensure that
														
 
															+ * trapping is enabled whenever we are debugging the guest using the
														
 
															+ * debug registers.
														
 
															+ */
														
 
															+
														
 
															+void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY);
														
 
															+
														
 
															+	trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
														
 
															+
														
 
															+	vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
														
 
															+	vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
														
 
															+				MDCR_EL2_TPMCR |
														
 
															+				MDCR_EL2_TDRA |
														
 
															+				MDCR_EL2_TDOSA);
														
 
															+
														
 
															+	/* Is Guest debugging in effect? */
														
 
															+	if (vcpu->guest_debug) {
														
 
															+		/* Route all software debug exceptions to EL2 */
														
 
															+		vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
														
 
															+
														
 
															+		/* Save guest debug state */
														
 
															+		save_guest_debug_regs(vcpu);
														
 
															+
														
 
															+		/*
														
 
															+		 * Single Step (ARM ARM D2.12.3 The software step state
														
 
															+		 * machine)
														
 
															+		 *
														
 
															+		 * If we are doing Single Step we need to manipulate
														
 
															+		 * the guest's MDSCR_EL1.SS and PSTATE.SS. Once the
														
 
															+		 * step has occurred the hypervisor will trap the
														
 
															+		 * debug exception and we return to userspace.
														
 
															+		 *
														
 
															+		 * If the guest attempts to single step its userspace
														
 
															+		 * we would have to deal with a trapped exception
														
 
															+		 * while in the guest kernel. Because this would be
														
 
															+		 * hard to unwind we suppress the guest's ability to
														
 
															+		 * do so by masking MDSCR_EL.SS.
														
 
															+		 *
														
 
															+		 * This confuses guest debuggers which use
														
 
															+		 * single-step behind the scenes but everything
														
 
															+		 * returns to normal once the host is no longer
														
 
															+		 * debugging the system.
														
 
															+		 */
														
 
															+		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
														
 
															+			*vcpu_cpsr(vcpu) |=  DBG_SPSR_SS;
														
 
															+			vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS;
														
 
															+		} else {
														
 
															+			vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS;
														
 
															+		}
														
 
															+
														
 
															+		trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu));
														
 
															+
														
 
															+		/*
														
 
															+		 * HW Breakpoints and watchpoints
														
 
															+		 *
														
 
															+		 * We simply switch the debug_ptr to point to our new
														
 
															+		 * external_debug_state which has been populated by the
														
 
															+		 * debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY
														
 
															+		 * mechanism ensures the registers are updated on the
														
 
															+		 * world switch.
														
 
															+		 */
														
 
															+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
														
 
															+			/* Enable breakpoints/watchpoints */
														
 
															+			vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE;
														
 
															+
														
 
															+			vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
														
 
															+			vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
														
 
															+			trap_debug = true;
														
 
															+
														
 
															+			trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
														
 
															+						&vcpu->arch.debug_ptr->dbg_bcr[0],
														
 
															+						&vcpu->arch.debug_ptr->dbg_bvr[0]);
														
 
															+
														
 
															+			trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
														
 
															+						&vcpu->arch.debug_ptr->dbg_wcr[0],
														
 
															+						&vcpu->arch.debug_ptr->dbg_wvr[0]);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	BUG_ON(!vcpu->guest_debug &&
														
 
															+		vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state);
														
 
															+
														
 
															+	/* Trap debug register access */
														
 
															+	if (trap_debug)
														
 
															+		vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
														
 
															+
														
 
															+	trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
														
 
															+	trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1));
														
 
															+}
														
 
															+
														
 
															+void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	trace_kvm_arm_clear_debug(vcpu->guest_debug);
														
 
															+
														
 
															+	if (vcpu->guest_debug) {
														
 
															+		restore_guest_debug_regs(vcpu);
														
 
															+
														
 
															+		/*
														
 
															+		 * If we were using HW debug we need to restore the
														
 
															+		 * debug_ptr to the guest debug state.
														
 
															+		 */
														
 
															+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
														
 
															+			kvm_arm_reset_debug_ptr(vcpu);
														
 
															+
														
 
															+			trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
														
 
															+						&vcpu->arch.debug_ptr->dbg_bcr[0],
														
 
															+						&vcpu->arch.debug_ptr->dbg_bvr[0]);
														
 
															+
														
 
															+			trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
														
 
															+						&vcpu->arch.debug_ptr->dbg_wcr[0],
														
 
															+						&vcpu->arch.debug_ptr->dbg_wvr[0]);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -32,6 +32,8 @@
 
															 #include <asm/kvm_emulate.h>
														
 
															 #include <asm/kvm_coproc.h>
														
 
															+#include "trace.h"
														
 
															+
														
 
															 struct kvm_stats_debugfs_item debugfs_entries[] = {
														
 
															 	{ NULL }
														
 
															 };
														
@@ -293,7 +295,8 @@ int __attribute_const__ kvm_target_cpu(void)
 
															 		break;
														
 
															 	};
														
 
															-	return -EINVAL;
														
 
															+	/* Return a default generic target */
														
 
															+	return KVM_ARM_TARGET_GENERIC_V8;
														
 
															 }
														
 
															 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
														
@@ -331,3 +334,41 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
															 {
														
 
															 	return -EINVAL;
														
 
															 }
														
 
															+
														
 
															+#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE |    \
														
 
															+			    KVM_GUESTDBG_USE_SW_BP | \
														
 
															+			    KVM_GUESTDBG_USE_HW | \
														
 
															+			    KVM_GUESTDBG_SINGLESTEP)
														
 
															+
														
 
															+/**
														
 
															+ * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
														
 
															+ * @kvm:	pointer to the KVM struct
														
 
															+ * @kvm_guest_debug: the ioctl data buffer
														
 
															+ *
														
 
															+ * This sets up and enables the VM for guest debugging. Userspace
														
 
															+ * passes in a control flag to enable different debug types and
														
 
															+ * potentially other architecture specific information in the rest of
														
 
															+ * the structure.
														
 
															+ */
														
 
															+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
														
 
															+					struct kvm_guest_debug *dbg)
														
 
															+{
														
 
															+	trace_kvm_set_guest_debug(vcpu, dbg->control);
														
 
															+
														
 
															+	if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (dbg->control & KVM_GUESTDBG_ENABLE) {
														
 
															+		vcpu->guest_debug = dbg->control;
														
 
															+
														
 
															+		/* Hardware assisted Break and Watch points */
														
 
															+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
														
 
															+			vcpu->arch.external_debug_state = dbg->arch;
														
 
															+		}
														
 
															+
														
 
															+	} else {
														
 
															+		/* If not enabled clear all flags */
														
 
															+		vcpu->guest_debug = 0;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -82,6 +82,45 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 	return 1;
														
 
															 }
														
 
															+/**
														
 
															+ * kvm_handle_guest_debug - handle a debug exception instruction
														
 
															+ *
														
 
															+ * @vcpu:	the vcpu pointer
														
 
															+ * @run:	access to the kvm_run structure for results
														
 
															+ *
														
 
															+ * We route all debug exceptions through the same handler. If both the
														
 
															+ * guest and host are using the same debug facilities it will be up to
														
 
															+ * userspace to re-inject the correct exception for guest delivery.
														
 
															+ *
														
 
															+ * @return: 0 (while setting run->exit_reason), -1 for error
														
 
															+ */
														
 
															+static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
														
 
															+{
														
 
															+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	run->exit_reason = KVM_EXIT_DEBUG;
														
 
															+	run->debug.arch.hsr = hsr;
														
 
															+
														
 
															+	switch (hsr >> ESR_ELx_EC_SHIFT) {
														
 
															+	case ESR_ELx_EC_WATCHPT_LOW:
														
 
															+		run->debug.arch.far = vcpu->arch.fault.far_el2;
														
 
															+		/* fall through */
														
 
															+	case ESR_ELx_EC_SOFTSTP_LOW:
														
 
															+	case ESR_ELx_EC_BREAKPT_LOW:
														
 
															+	case ESR_ELx_EC_BKPT32:
														
 
															+	case ESR_ELx_EC_BRK64:
														
 
															+		break;
														
 
															+	default:
														
 
															+		kvm_err("%s: un-handled case hsr: %#08x\n",
														
 
															+			__func__, (unsigned int) hsr);
														
 
															+		ret = -1;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 static exit_handle_fn arm_exit_handlers[] = {
														
 
															 	[ESR_ELx_EC_WFx]	= kvm_handle_wfx,
														
 
															 	[ESR_ELx_EC_CP15_32]	= kvm_handle_cp15_32,
														
@@ -96,6 +135,11 @@ static exit_handle_fn arm_exit_handlers[] = {
 
															 	[ESR_ELx_EC_SYS64]	= kvm_handle_sys_reg,
														
 
															 	[ESR_ELx_EC_IABT_LOW]	= kvm_handle_guest_abort,
														
 
															 	[ESR_ELx_EC_DABT_LOW]	= kvm_handle_guest_abort,
														
 
															+	[ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
														
 
															+	[ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug,
														
 
															+	[ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
														
 
															+	[ESR_ELx_EC_BKPT32]	= kvm_handle_guest_debug,
														
 
															+	[ESR_ELx_EC_BRK64]	= kvm_handle_guest_debug,
														
 
															 };
														
 
															 static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
														
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -230,199 +230,52 @@
 
															 	stp	x24, x25, [x3, #160]
														
 
															 .endm
														
 
															-.macro save_debug
														
 
															-	// x2: base address for cpu context
														
 
															-	// x3: tmp register
														
 
															-
														
 
															-	mrs	x26, id_aa64dfr0_el1
														
 
															-	ubfx	x24, x26, #12, #4	// Extract BRPs
														
 
															-	ubfx	x25, x26, #20, #4	// Extract WRPs
														
 
															-	mov	w26, #15
														
 
															-	sub	w24, w26, w24		// How many BPs to skip
														
 
															-	sub	w25, w26, w25		// How many WPs to skip
														
 
															-
														
 
															-	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x24, lsl #2
														
 
															-	br	x26
														
 
															-1:
														
 
															-	mrs	x20, dbgbcr15_el1
														
 
															-	mrs	x19, dbgbcr14_el1
														
 
															-	mrs	x18, dbgbcr13_el1
														
 
															-	mrs	x17, dbgbcr12_el1
														
 
															-	mrs	x16, dbgbcr11_el1
														
 
															-	mrs	x15, dbgbcr10_el1
														
 
															-	mrs	x14, dbgbcr9_el1
														
 
															-	mrs	x13, dbgbcr8_el1
														
 
															-	mrs	x12, dbgbcr7_el1
														
 
															-	mrs	x11, dbgbcr6_el1
														
 
															-	mrs	x10, dbgbcr5_el1
														
 
															-	mrs	x9, dbgbcr4_el1
														
 
															-	mrs	x8, dbgbcr3_el1
														
 
															-	mrs	x7, dbgbcr2_el1
														
 
															-	mrs	x6, dbgbcr1_el1
														
 
															-	mrs	x5, dbgbcr0_el1
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x24, lsl #2
														
 
															-	br	x26
														
 
															-
														
 
															-1:
														
 
															-	str	x20, [x3, #(15 * 8)]
														
 
															-	str	x19, [x3, #(14 * 8)]
														
 
															-	str	x18, [x3, #(13 * 8)]
														
 
															-	str	x17, [x3, #(12 * 8)]
														
 
															-	str	x16, [x3, #(11 * 8)]
														
 
															-	str	x15, [x3, #(10 * 8)]
														
 
															-	str	x14, [x3, #(9 * 8)]
														
 
															-	str	x13, [x3, #(8 * 8)]
														
 
															-	str	x12, [x3, #(7 * 8)]
														
 
															-	str	x11, [x3, #(6 * 8)]
														
 
															-	str	x10, [x3, #(5 * 8)]
														
 
															-	str	x9, [x3, #(4 * 8)]
														
 
															-	str	x8, [x3, #(3 * 8)]
														
 
															-	str	x7, [x3, #(2 * 8)]
														
 
															-	str	x6, [x3, #(1 * 8)]
														
 
															-	str	x5, [x3, #(0 * 8)]
														
 
															-
														
 
															-	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x24, lsl #2
														
 
															-	br	x26
														
 
															+.macro save_debug type
														
 
															+	// x4: pointer to register set
														
 
															+	// x5: number of registers to skip
														
 
															+	// x6..x22 trashed
														
 
															+
														
 
															+	adr	x22, 1f
														
 
															+	add	x22, x22, x5, lsl #2
														
 
															+	br	x22
														
 
															 1:
														
 
															-	mrs	x20, dbgbvr15_el1
														
 
															-	mrs	x19, dbgbvr14_el1
														
 
															-	mrs	x18, dbgbvr13_el1
														
 
															-	mrs	x17, dbgbvr12_el1
														
 
															-	mrs	x16, dbgbvr11_el1
														
 
															-	mrs	x15, dbgbvr10_el1
														
 
															-	mrs	x14, dbgbvr9_el1
														
 
															-	mrs	x13, dbgbvr8_el1
														
 
															-	mrs	x12, dbgbvr7_el1
														
 
															-	mrs	x11, dbgbvr6_el1
														
 
															-	mrs	x10, dbgbvr5_el1
														
 
															-	mrs	x9, dbgbvr4_el1
														
 
															-	mrs	x8, dbgbvr3_el1
														
 
															-	mrs	x7, dbgbvr2_el1
														
 
															-	mrs	x6, dbgbvr1_el1
														
 
															-	mrs	x5, dbgbvr0_el1
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x24, lsl #2
														
 
															-	br	x26
														
 
															-
														
 
															-1:
														
 
															-	str	x20, [x3, #(15 * 8)]
														
 
															-	str	x19, [x3, #(14 * 8)]
														
 
															-	str	x18, [x3, #(13 * 8)]
														
 
															-	str	x17, [x3, #(12 * 8)]
														
 
															-	str	x16, [x3, #(11 * 8)]
														
 
															-	str	x15, [x3, #(10 * 8)]
														
 
															-	str	x14, [x3, #(9 * 8)]
														
 
															-	str	x13, [x3, #(8 * 8)]
														
 
															-	str	x12, [x3, #(7 * 8)]
														
 
															-	str	x11, [x3, #(6 * 8)]
														
 
															-	str	x10, [x3, #(5 * 8)]
														
 
															-	str	x9, [x3, #(4 * 8)]
														
 
															-	str	x8, [x3, #(3 * 8)]
														
 
															-	str	x7, [x3, #(2 * 8)]
														
 
															-	str	x6, [x3, #(1 * 8)]
														
 
															-	str	x5, [x3, #(0 * 8)]
														
 
															-
														
 
															-	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x25, lsl #2
														
 
															-	br	x26
														
 
															-1:
														
 
															-	mrs	x20, dbgwcr15_el1
														
 
															-	mrs	x19, dbgwcr14_el1
														
 
															-	mrs	x18, dbgwcr13_el1
														
 
															-	mrs	x17, dbgwcr12_el1
														
 
															-	mrs	x16, dbgwcr11_el1
														
 
															-	mrs	x15, dbgwcr10_el1
														
 
															-	mrs	x14, dbgwcr9_el1
														
 
															-	mrs	x13, dbgwcr8_el1
														
 
															-	mrs	x12, dbgwcr7_el1
														
 
															-	mrs	x11, dbgwcr6_el1
														
 
															-	mrs	x10, dbgwcr5_el1
														
 
															-	mrs	x9, dbgwcr4_el1
														
 
															-	mrs	x8, dbgwcr3_el1
														
 
															-	mrs	x7, dbgwcr2_el1
														
 
															-	mrs	x6, dbgwcr1_el1
														
 
															-	mrs	x5, dbgwcr0_el1
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x25, lsl #2
														
 
															-	br	x26
														
 
															-
														
 
															-1:
														
 
															-	str	x20, [x3, #(15 * 8)]
														
 
															-	str	x19, [x3, #(14 * 8)]
														
 
															-	str	x18, [x3, #(13 * 8)]
														
 
															-	str	x17, [x3, #(12 * 8)]
														
 
															-	str	x16, [x3, #(11 * 8)]
														
 
															-	str	x15, [x3, #(10 * 8)]
														
 
															-	str	x14, [x3, #(9 * 8)]
														
 
															-	str	x13, [x3, #(8 * 8)]
														
 
															-	str	x12, [x3, #(7 * 8)]
														
 
															-	str	x11, [x3, #(6 * 8)]
														
 
															-	str	x10, [x3, #(5 * 8)]
														
 
															-	str	x9, [x3, #(4 * 8)]
														
 
															-	str	x8, [x3, #(3 * 8)]
														
 
															-	str	x7, [x3, #(2 * 8)]
														
 
															-	str	x6, [x3, #(1 * 8)]
														
 
															-	str	x5, [x3, #(0 * 8)]
														
 
															-
														
 
															-	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x25, lsl #2
														
 
															-	br	x26
														
 
															-1:
														
 
															-	mrs	x20, dbgwvr15_el1
														
 
															-	mrs	x19, dbgwvr14_el1
														
 
															-	mrs	x18, dbgwvr13_el1
														
 
															-	mrs	x17, dbgwvr12_el1
														
 
															-	mrs	x16, dbgwvr11_el1
														
 
															-	mrs	x15, dbgwvr10_el1
														
 
															-	mrs	x14, dbgwvr9_el1
														
 
															-	mrs	x13, dbgwvr8_el1
														
 
															-	mrs	x12, dbgwvr7_el1
														
 
															-	mrs	x11, dbgwvr6_el1
														
 
															-	mrs	x10, dbgwvr5_el1
														
 
															-	mrs	x9, dbgwvr4_el1
														
 
															-	mrs	x8, dbgwvr3_el1
														
 
															-	mrs	x7, dbgwvr2_el1
														
 
															-	mrs	x6, dbgwvr1_el1
														
 
															-	mrs	x5, dbgwvr0_el1
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x25, lsl #2
														
 
															-	br	x26
														
 
															-
														
 
															+	mrs	x21, \type\()15_el1
														
 
															+	mrs	x20, \type\()14_el1
														
 
															+	mrs	x19, \type\()13_el1
														
 
															+	mrs	x18, \type\()12_el1
														
 
															+	mrs	x17, \type\()11_el1
														
 
															+	mrs	x16, \type\()10_el1
														
 
															+	mrs	x15, \type\()9_el1
														
 
															+	mrs	x14, \type\()8_el1
														
 
															+	mrs	x13, \type\()7_el1
														
 
															+	mrs	x12, \type\()6_el1
														
 
															+	mrs	x11, \type\()5_el1
														
 
															+	mrs	x10, \type\()4_el1
														
 
															+	mrs	x9, \type\()3_el1
														
 
															+	mrs	x8, \type\()2_el1
														
 
															+	mrs	x7, \type\()1_el1
														
 
															+	mrs	x6, \type\()0_el1
														
 
															+
														
 
															+	adr	x22, 1f
														
 
															+	add	x22, x22, x5, lsl #2
														
 
															+	br	x22
														
 
															 1:
														
 
															-	str	x20, [x3, #(15 * 8)]
														
 
															-	str	x19, [x3, #(14 * 8)]
														
 
															-	str	x18, [x3, #(13 * 8)]
														
 
															-	str	x17, [x3, #(12 * 8)]
														
 
															-	str	x16, [x3, #(11 * 8)]
														
 
															-	str	x15, [x3, #(10 * 8)]
														
 
															-	str	x14, [x3, #(9 * 8)]
														
 
															-	str	x13, [x3, #(8 * 8)]
														
 
															-	str	x12, [x3, #(7 * 8)]
														
 
															-	str	x11, [x3, #(6 * 8)]
														
 
															-	str	x10, [x3, #(5 * 8)]
														
 
															-	str	x9, [x3, #(4 * 8)]
														
 
															-	str	x8, [x3, #(3 * 8)]
														
 
															-	str	x7, [x3, #(2 * 8)]
														
 
															-	str	x6, [x3, #(1 * 8)]
														
 
															-	str	x5, [x3, #(0 * 8)]
														
 
															-
														
 
															-	mrs	x21, mdccint_el1
														
 
															-	str	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
														
 
															+	str	x21, [x4, #(15 * 8)]
														
 
															+	str	x20, [x4, #(14 * 8)]
														
 
															+	str	x19, [x4, #(13 * 8)]
														
 
															+	str	x18, [x4, #(12 * 8)]
														
 
															+	str	x17, [x4, #(11 * 8)]
														
 
															+	str	x16, [x4, #(10 * 8)]
														
 
															+	str	x15, [x4, #(9 * 8)]
														
 
															+	str	x14, [x4, #(8 * 8)]
														
 
															+	str	x13, [x4, #(7 * 8)]
														
 
															+	str	x12, [x4, #(6 * 8)]
														
 
															+	str	x11, [x4, #(5 * 8)]
														
 
															+	str	x10, [x4, #(4 * 8)]
														
 
															+	str	x9, [x4, #(3 * 8)]
														
 
															+	str	x8, [x4, #(2 * 8)]
														
 
															+	str	x7, [x4, #(1 * 8)]
														
 
															+	str	x6, [x4, #(0 * 8)]
														
 
															 .endm
														
 
															 .macro restore_sysregs
														
@@ -467,195 +320,52 @@
 
															 	msr	mdscr_el1,	x25
														
 
															 .endm
														
 
															-.macro restore_debug
														
 
															-	// x2: base address for cpu context
														
 
															-	// x3: tmp register
														
 
															-
														
 
															-	mrs	x26, id_aa64dfr0_el1
														
 
															-	ubfx	x24, x26, #12, #4	// Extract BRPs
														
 
															-	ubfx	x25, x26, #20, #4	// Extract WRPs
														
 
															-	mov	w26, #15
														
 
															-	sub	w24, w26, w24		// How many BPs to skip
														
 
															-	sub	w25, w26, w25		// How many WPs to skip
														
 
															-
														
 
															-	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
														
 
															+.macro restore_debug type
														
 
															+	// x4: pointer to register set
														
 
															+	// x5: number of registers to skip
														
 
															+	// x6..x22 trashed
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x24, lsl #2
														
 
															-	br	x26
														
 
															-1:
														
 
															-	ldr	x20, [x3, #(15 * 8)]
														
 
															-	ldr	x19, [x3, #(14 * 8)]
														
 
															-	ldr	x18, [x3, #(13 * 8)]
														
 
															-	ldr	x17, [x3, #(12 * 8)]
														
 
															-	ldr	x16, [x3, #(11 * 8)]
														
 
															-	ldr	x15, [x3, #(10 * 8)]
														
 
															-	ldr	x14, [x3, #(9 * 8)]
														
 
															-	ldr	x13, [x3, #(8 * 8)]
														
 
															-	ldr	x12, [x3, #(7 * 8)]
														
 
															-	ldr	x11, [x3, #(6 * 8)]
														
 
															-	ldr	x10, [x3, #(5 * 8)]
														
 
															-	ldr	x9, [x3, #(4 * 8)]
														
 
															-	ldr	x8, [x3, #(3 * 8)]
														
 
															-	ldr	x7, [x3, #(2 * 8)]
														
 
															-	ldr	x6, [x3, #(1 * 8)]
														
 
															-	ldr	x5, [x3, #(0 * 8)]
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x24, lsl #2
														
 
															-	br	x26
														
 
															+	adr	x22, 1f
														
 
															+	add	x22, x22, x5, lsl #2
														
 
															+	br	x22
														
 
															 1:
														
 
															-	msr	dbgbcr15_el1, x20
														
 
															-	msr	dbgbcr14_el1, x19
														
 
															-	msr	dbgbcr13_el1, x18
														
 
															-	msr	dbgbcr12_el1, x17
														
 
															-	msr	dbgbcr11_el1, x16
														
 
															-	msr	dbgbcr10_el1, x15
														
 
															-	msr	dbgbcr9_el1, x14
														
 
															-	msr	dbgbcr8_el1, x13
														
 
															-	msr	dbgbcr7_el1, x12
														
 
															-	msr	dbgbcr6_el1, x11
														
 
															-	msr	dbgbcr5_el1, x10
														
 
															-	msr	dbgbcr4_el1, x9
														
 
															-	msr	dbgbcr3_el1, x8
														
 
															-	msr	dbgbcr2_el1, x7
														
 
															-	msr	dbgbcr1_el1, x6
														
 
															-	msr	dbgbcr0_el1, x5
														
 
															-
														
 
															-	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x24, lsl #2
														
 
															-	br	x26
														
 
															+	ldr	x21, [x4, #(15 * 8)]
														
 
															+	ldr	x20, [x4, #(14 * 8)]
														
 
															+	ldr	x19, [x4, #(13 * 8)]
														
 
															+	ldr	x18, [x4, #(12 * 8)]
														
 
															+	ldr	x17, [x4, #(11 * 8)]
														
 
															+	ldr	x16, [x4, #(10 * 8)]
														
 
															+	ldr	x15, [x4, #(9 * 8)]
														
 
															+	ldr	x14, [x4, #(8 * 8)]
														
 
															+	ldr	x13, [x4, #(7 * 8)]
														
 
															+	ldr	x12, [x4, #(6 * 8)]
														
 
															+	ldr	x11, [x4, #(5 * 8)]
														
 
															+	ldr	x10, [x4, #(4 * 8)]
														
 
															+	ldr	x9, [x4, #(3 * 8)]
														
 
															+	ldr	x8, [x4, #(2 * 8)]
														
 
															+	ldr	x7, [x4, #(1 * 8)]
														
 
															+	ldr	x6, [x4, #(0 * 8)]
														
 
															+
														
 
															+	adr	x22, 1f
														
 
															+	add	x22, x22, x5, lsl #2
														
 
															+	br	x22
														
 
															 1:
														
 
															-	ldr	x20, [x3, #(15 * 8)]
														
 
															-	ldr	x19, [x3, #(14 * 8)]
														
 
															-	ldr	x18, [x3, #(13 * 8)]
														
 
															-	ldr	x17, [x3, #(12 * 8)]
														
 
															-	ldr	x16, [x3, #(11 * 8)]
														
 
															-	ldr	x15, [x3, #(10 * 8)]
														
 
															-	ldr	x14, [x3, #(9 * 8)]
														
 
															-	ldr	x13, [x3, #(8 * 8)]
														
 
															-	ldr	x12, [x3, #(7 * 8)]
														
 
															-	ldr	x11, [x3, #(6 * 8)]
														
 
															-	ldr	x10, [x3, #(5 * 8)]
														
 
															-	ldr	x9, [x3, #(4 * 8)]
														
 
															-	ldr	x8, [x3, #(3 * 8)]
														
 
															-	ldr	x7, [x3, #(2 * 8)]
														
 
															-	ldr	x6, [x3, #(1 * 8)]
														
 
															-	ldr	x5, [x3, #(0 * 8)]
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x24, lsl #2
														
 
															-	br	x26
														
 
															-1:
														
 
															-	msr	dbgbvr15_el1, x20
														
 
															-	msr	dbgbvr14_el1, x19
														
 
															-	msr	dbgbvr13_el1, x18
														
 
															-	msr	dbgbvr12_el1, x17
														
 
															-	msr	dbgbvr11_el1, x16
														
 
															-	msr	dbgbvr10_el1, x15
														
 
															-	msr	dbgbvr9_el1, x14
														
 
															-	msr	dbgbvr8_el1, x13
														
 
															-	msr	dbgbvr7_el1, x12
														
 
															-	msr	dbgbvr6_el1, x11
														
 
															-	msr	dbgbvr5_el1, x10
														
 
															-	msr	dbgbvr4_el1, x9
														
 
															-	msr	dbgbvr3_el1, x8
														
 
															-	msr	dbgbvr2_el1, x7
														
 
															-	msr	dbgbvr1_el1, x6
														
 
															-	msr	dbgbvr0_el1, x5
														
 
															-
														
 
															-	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x25, lsl #2
														
 
															-	br	x26
														
 
															-1:
														
 
															-	ldr	x20, [x3, #(15 * 8)]
														
 
															-	ldr	x19, [x3, #(14 * 8)]
														
 
															-	ldr	x18, [x3, #(13 * 8)]
														
 
															-	ldr	x17, [x3, #(12 * 8)]
														
 
															-	ldr	x16, [x3, #(11 * 8)]
														
 
															-	ldr	x15, [x3, #(10 * 8)]
														
 
															-	ldr	x14, [x3, #(9 * 8)]
														
 
															-	ldr	x13, [x3, #(8 * 8)]
														
 
															-	ldr	x12, [x3, #(7 * 8)]
														
 
															-	ldr	x11, [x3, #(6 * 8)]
														
 
															-	ldr	x10, [x3, #(5 * 8)]
														
 
															-	ldr	x9, [x3, #(4 * 8)]
														
 
															-	ldr	x8, [x3, #(3 * 8)]
														
 
															-	ldr	x7, [x3, #(2 * 8)]
														
 
															-	ldr	x6, [x3, #(1 * 8)]
														
 
															-	ldr	x5, [x3, #(0 * 8)]
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x25, lsl #2
														
 
															-	br	x26
														
 
															-1:
														
 
															-	msr	dbgwcr15_el1, x20
														
 
															-	msr	dbgwcr14_el1, x19
														
 
															-	msr	dbgwcr13_el1, x18
														
 
															-	msr	dbgwcr12_el1, x17
														
 
															-	msr	dbgwcr11_el1, x16
														
 
															-	msr	dbgwcr10_el1, x15
														
 
															-	msr	dbgwcr9_el1, x14
														
 
															-	msr	dbgwcr8_el1, x13
														
 
															-	msr	dbgwcr7_el1, x12
														
 
															-	msr	dbgwcr6_el1, x11
														
 
															-	msr	dbgwcr5_el1, x10
														
 
															-	msr	dbgwcr4_el1, x9
														
 
															-	msr	dbgwcr3_el1, x8
														
 
															-	msr	dbgwcr2_el1, x7
														
 
															-	msr	dbgwcr1_el1, x6
														
 
															-	msr	dbgwcr0_el1, x5
														
 
															-
														
 
															-	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x25, lsl #2
														
 
															-	br	x26
														
 
															-1:
														
 
															-	ldr	x20, [x3, #(15 * 8)]
														
 
															-	ldr	x19, [x3, #(14 * 8)]
														
 
															-	ldr	x18, [x3, #(13 * 8)]
														
 
															-	ldr	x17, [x3, #(12 * 8)]
														
 
															-	ldr	x16, [x3, #(11 * 8)]
														
 
															-	ldr	x15, [x3, #(10 * 8)]
														
 
															-	ldr	x14, [x3, #(9 * 8)]
														
 
															-	ldr	x13, [x3, #(8 * 8)]
														
 
															-	ldr	x12, [x3, #(7 * 8)]
														
 
															-	ldr	x11, [x3, #(6 * 8)]
														
 
															-	ldr	x10, [x3, #(5 * 8)]
														
 
															-	ldr	x9, [x3, #(4 * 8)]
														
 
															-	ldr	x8, [x3, #(3 * 8)]
														
 
															-	ldr	x7, [x3, #(2 * 8)]
														
 
															-	ldr	x6, [x3, #(1 * 8)]
														
 
															-	ldr	x5, [x3, #(0 * 8)]
														
 
															-
														
 
															-	adr	x26, 1f
														
 
															-	add	x26, x26, x25, lsl #2
														
 
															-	br	x26
														
 
															-1:
														
 
															-	msr	dbgwvr15_el1, x20
														
 
															-	msr	dbgwvr14_el1, x19
														
 
															-	msr	dbgwvr13_el1, x18
														
 
															-	msr	dbgwvr12_el1, x17
														
 
															-	msr	dbgwvr11_el1, x16
														
 
															-	msr	dbgwvr10_el1, x15
														
 
															-	msr	dbgwvr9_el1, x14
														
 
															-	msr	dbgwvr8_el1, x13
														
 
															-	msr	dbgwvr7_el1, x12
														
 
															-	msr	dbgwvr6_el1, x11
														
 
															-	msr	dbgwvr5_el1, x10
														
 
															-	msr	dbgwvr4_el1, x9
														
 
															-	msr	dbgwvr3_el1, x8
														
 
															-	msr	dbgwvr2_el1, x7
														
 
															-	msr	dbgwvr1_el1, x6
														
 
															-	msr	dbgwvr0_el1, x5
														
 
															-
														
 
															-	ldr	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
														
 
															-	msr	mdccint_el1, x21
														
 
															+	msr	\type\()15_el1, x21
														
 
															+	msr	\type\()14_el1, x20
														
 
															+	msr	\type\()13_el1, x19
														
 
															+	msr	\type\()12_el1, x18
														
 
															+	msr	\type\()11_el1, x17
														
 
															+	msr	\type\()10_el1, x16
														
 
															+	msr	\type\()9_el1, x15
														
 
															+	msr	\type\()8_el1, x14
														
 
															+	msr	\type\()7_el1, x13
														
 
															+	msr	\type\()6_el1, x12
														
 
															+	msr	\type\()5_el1, x11
														
 
															+	msr	\type\()4_el1, x10
														
 
															+	msr	\type\()3_el1, x9
														
 
															+	msr	\type\()2_el1, x8
														
 
															+	msr	\type\()1_el1, x7
														
 
															+	msr	\type\()0_el1, x6
														
 
															 .endm
														
 
															 .macro skip_32bit_state tmp, target
														
@@ -675,6 +385,14 @@
 
															 	tbz	\tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
														
 
															 .endm
														
 
															+/*
														
 
															+ * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled)
														
 
															+ */
														
 
															+.macro skip_fpsimd_state tmp, target
														
 
															+	mrs	\tmp, cptr_el2
														
 
															+	tbnz	\tmp, #CPTR_EL2_TFP_SHIFT, \target
														
 
															+.endm
														
 
															+
														
 
															 .macro compute_debug_state target
														
 
															 	// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
														
 
															 	// is set, we do a full save/restore cycle and disable trapping.
														
@@ -713,10 +431,12 @@
 
															 	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
														
 
															 	mrs	x4, dacr32_el2
														
 
															 	mrs	x5, ifsr32_el2
														
 
															-	mrs	x6, fpexc32_el2
														
 
															 	stp	x4, x5, [x3]
														
 
															-	str	x6, [x3, #16]
														
 
															+	skip_fpsimd_state x8, 3f
														
 
															+	mrs	x6, fpexc32_el2
														
 
															+	str	x6, [x3, #16]
														
 
															+3:
														
 
															 	skip_debug_state x8, 2f
														
 
															 	mrs	x7, dbgvcr32_el2
														
 
															 	str	x7, [x3, #24]
														
@@ -743,10 +463,8 @@
 
															 	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
														
 
															 	ldp	x4, x5, [x3]
														
 
															-	ldr	x6, [x3, #16]
														
 
															 	msr	dacr32_el2, x4
														
 
															 	msr	ifsr32_el2, x5
														
 
															-	msr	fpexc32_el2, x6
														
 
															 	skip_debug_state x8, 2f
														
 
															 	ldr	x7, [x3, #24]
														
@@ -763,31 +481,35 @@
 
															 .macro activate_traps
														
 
															 	ldr     x2, [x0, #VCPU_HCR_EL2]
														
 
															+
														
 
															+	/*
														
 
															+	 * We are about to set CPTR_EL2.TFP to trap all floating point
														
 
															+	 * register accesses to EL2, however, the ARM ARM clearly states that
														
 
															+	 * traps are only taken to EL2 if the operation would not otherwise
														
 
															+	 * trap to EL1.  Therefore, always make sure that for 32-bit guests,
														
 
															+	 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
														
 
															+	 */
														
 
															+	tbnz	x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state
														
 
															+	mov	x3, #(1 << 30)
														
 
															+	msr	fpexc32_el2, x3
														
 
															+	isb
														
 
															+99:
														
 
															 	msr     hcr_el2, x2
														
 
															 	mov	x2, #CPTR_EL2_TTA
														
 
															+	orr     x2, x2, #CPTR_EL2_TFP
														
 
															 	msr	cptr_el2, x2
														
 
															 	mov	x2, #(1 << 15)	// Trap CP15 Cr=15
														
 
															 	msr	hstr_el2, x2
														
 
															-	mrs	x2, mdcr_el2
														
 
															-	and	x2, x2, #MDCR_EL2_HPMN_MASK
														
 
															-	orr	x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
														
 
															-	orr	x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA)
														
 
															-
														
 
															-	// Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap
														
 
															-	// if not dirty.
														
 
															-	ldr	x3, [x0, #VCPU_DEBUG_FLAGS]
														
 
															-	tbnz	x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f
														
 
															-	orr	x2, x2,  #MDCR_EL2_TDA
														
 
															-1:
														
 
															+	// Monitor Debug Config - see kvm_arm_setup_debug()
														
 
															+	ldr	x2, [x0, #VCPU_MDCR_EL2]
														
 
															 	msr	mdcr_el2, x2
														
 
															 .endm
														
 
															 .macro deactivate_traps
														
 
															 	mov	x2, #HCR_RW
														
 
															 	msr	hcr_el2, x2
														
 
															-	msr	cptr_el2, xzr
														
 
															 	msr	hstr_el2, xzr
														
 
															 	mrs	x2, mdcr_el2
														
@@ -900,21 +622,101 @@ __restore_sysregs:
 
															 	restore_sysregs
														
 
															 	ret
														
 
															+/* Save debug state */
														
 
															 __save_debug:
														
 
															-	save_debug
														
 
															+	// x2: ptr to CPU context
														
 
															+	// x3: ptr to debug reg struct
														
 
															+	// x4/x5/x6-22/x24-26: trashed
														
 
															+
														
 
															+	mrs	x26, id_aa64dfr0_el1
														
 
															+	ubfx	x24, x26, #12, #4	// Extract BRPs
														
 
															+	ubfx	x25, x26, #20, #4	// Extract WRPs
														
 
															+	mov	w26, #15
														
 
															+	sub	w24, w26, w24		// How many BPs to skip
														
 
															+	sub	w25, w26, w25		// How many WPs to skip
														
 
															+
														
 
															+	mov	x5, x24
														
 
															+	add	x4, x3, #DEBUG_BCR
														
 
															+	save_debug dbgbcr
														
 
															+	add	x4, x3, #DEBUG_BVR
														
 
															+	save_debug dbgbvr
														
 
															+
														
 
															+	mov	x5, x25
														
 
															+	add	x4, x3, #DEBUG_WCR
														
 
															+	save_debug dbgwcr
														
 
															+	add	x4, x3, #DEBUG_WVR
														
 
															+	save_debug dbgwvr
														
 
															+
														
 
															+	mrs	x21, mdccint_el1
														
 
															+	str	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
														
 
															 	ret
														
 
															+/* Restore debug state */
														
 
															 __restore_debug:
														
 
															-	restore_debug
														
 
															+	// x2: ptr to CPU context
														
 
															+	// x3: ptr to debug reg struct
														
 
															+	// x4/x5/x6-22/x24-26: trashed
														
 
															+
														
 
															+	mrs	x26, id_aa64dfr0_el1
														
 
															+	ubfx	x24, x26, #12, #4	// Extract BRPs
														
 
															+	ubfx	x25, x26, #20, #4	// Extract WRPs
														
 
															+	mov	w26, #15
														
 
															+	sub	w24, w26, w24		// How many BPs to skip
														
 
															+	sub	w25, w26, w25		// How many WPs to skip
														
 
															+
														
 
															+	mov	x5, x24
														
 
															+	add	x4, x3, #DEBUG_BCR
														
 
															+	restore_debug dbgbcr
														
 
															+	add	x4, x3, #DEBUG_BVR
														
 
															+	restore_debug dbgbvr
														
 
															+
														
 
															+	mov	x5, x25
														
 
															+	add	x4, x3, #DEBUG_WCR
														
 
															+	restore_debug dbgwcr
														
 
															+	add	x4, x3, #DEBUG_WVR
														
 
															+	restore_debug dbgwvr
														
 
															+
														
 
															+	ldr	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
														
 
															+	msr	mdccint_el1, x21
														
 
															+
														
 
															 	ret
														
 
															 __save_fpsimd:
														
 
															+	skip_fpsimd_state x3, 1f
														
 
															 	save_fpsimd
														
 
															-	ret
														
 
															+1:	ret
														
 
															 __restore_fpsimd:
														
 
															+	skip_fpsimd_state x3, 1f
														
 
															 	restore_fpsimd
														
 
															-	ret
														
 
															+1:	ret
														
 
															+
														
 
															+switch_to_guest_fpsimd:
														
 
															+	push	x4, lr
														
 
															+
														
 
															+	mrs	x2, cptr_el2
														
 
															+	bic	x2, x2, #CPTR_EL2_TFP
														
 
															+	msr	cptr_el2, x2
														
 
															+	isb
														
 
															+
														
 
															+	mrs	x0, tpidr_el2
														
 
															+
														
 
															+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
														
 
															+	kern_hyp_va x2
														
 
															+	bl __save_fpsimd
														
 
															+
														
 
															+	add	x2, x0, #VCPU_CONTEXT
														
 
															+	bl __restore_fpsimd
														
 
															+
														
 
															+	skip_32bit_state x3, 1f
														
 
															+	ldr	x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
														
 
															+	msr	fpexc32_el2, x4
														
 
															+1:
														
 
															+	pop	x4, lr
														
 
															+	pop	x2, x3
														
 
															+	pop	x0, x1
														
 
															+
														
 
															+	eret
														
 
															 /*
														
 
															  * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
														
@@ -936,10 +738,10 @@ ENTRY(__kvm_vcpu_run)
 
															 	kern_hyp_va x2
														
 
															 	save_host_regs
														
 
															-	bl __save_fpsimd
														
 
															 	bl __save_sysregs
														
 
															 	compute_debug_state 1f
														
 
															+	add	x3, x0, #VCPU_HOST_DEBUG_STATE
														
 
															 	bl	__save_debug
														
 
															 1:
														
 
															 	activate_traps
														
@@ -952,9 +754,10 @@ ENTRY(__kvm_vcpu_run)
 
															 	add	x2, x0, #VCPU_CONTEXT
														
 
															 	bl __restore_sysregs
														
 
															-	bl __restore_fpsimd
														
 
															 	skip_debug_state x3, 1f
														
 
															+	ldr	x3, [x0, #VCPU_DEBUG_PTR]
														
 
															+	kern_hyp_va x3
														
 
															 	bl	__restore_debug
														
 
															 1:
														
 
															 	restore_guest_32bit_state
														
@@ -975,6 +778,8 @@ __kvm_vcpu_return:
 
															 	bl __save_sysregs
														
 
															 	skip_debug_state x3, 1f
														
 
															+	ldr	x3, [x0, #VCPU_DEBUG_PTR]
														
 
															+	kern_hyp_va x3
														
 
															 	bl	__save_debug
														
 
															 1:
														
 
															 	save_guest_32bit_state
														
@@ -991,12 +796,15 @@ __kvm_vcpu_return:
 
															 	bl __restore_sysregs
														
 
															 	bl __restore_fpsimd
														
 
															+	/* Clear FPSIMD and Trace trapping */
														
 
															+	msr     cptr_el2, xzr
														
 
															 	skip_debug_state x3, 1f
														
 
															 	// Clear the dirty flag for the next run, as all the state has
														
 
															 	// already been saved. Note that we nuke the whole 64bit word.
														
 
															 	// If we ever add more flags, we'll have to be more careful...
														
 
															 	str	xzr, [x0, #VCPU_DEBUG_FLAGS]
														
 
															+	add	x3, x0, #VCPU_HOST_DEBUG_STATE
														
 
															 	bl	__restore_debug
														
 
															 1:
														
 
															 	restore_host_regs
														
@@ -1199,6 +1007,11 @@ el1_trap:
 
															 	 * x1: ESR
														
 
															 	 * x2: ESR_EC
														
 
															 	 */
														
 
															+
														
 
															+	/* Guest accessed VFP/SIMD registers, save host, restore Guest */
														
 
															+	cmp	x2, #ESR_ELx_EC_FP_ASIMD
														
 
															+	b.eq	switch_to_guest_fpsimd
														
 
															+
														
 
															 	cmp	x2, #ESR_ELx_EC_DABT_LOW
														
 
															 	mov	x0, #ESR_ELx_EC_IABT_LOW
														
 
															 	ccmp	x2, x0, #4, ne
														
@@ -1293,4 +1106,10 @@ ENTRY(__kvm_hyp_vector)
 
															 	ventry	el1_error_invalid		// Error 32-bit EL1
														
 
															 ENDPROC(__kvm_hyp_vector)
														
 
															+
														
 
															+ENTRY(__kvm_get_mdcr_el2)
														
 
															+	mrs	x0, mdcr_el2
														
 
															+	ret
														
 
															+ENDPROC(__kvm_get_mdcr_el2)
														
 
															+
														
 
															 	.popsection
														
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -22,6 +22,7 @@
 
															 #include <linux/errno.h>
														
 
															 #include <linux/kvm_host.h>
														
 
															 #include <linux/kvm.h>
														
 
															+#include <linux/hw_breakpoint.h>
														
 
															 #include <kvm/arm_arch_timer.h>
														
@@ -56,6 +57,12 @@ static bool cpu_has_32bit_el1(void)
 
															 	return !!(pfr0 & 0x20);
														
 
															 }
														
 
															+/**
														
 
															+ * kvm_arch_dev_ioctl_check_extension
														
 
															+ *
														
 
															+ * We currently assume that the number of HW registers is uniform
														
 
															+ * across all CPUs (see cpuinfo_sanity_check).
														
 
															+ */
														
 
															 int kvm_arch_dev_ioctl_check_extension(long ext)
														
 
															 {
														
 
															 	int r;
														
@@ -64,6 +71,15 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_ARM_EL1_32BIT:
														
 
															 		r = cpu_has_32bit_el1();
														
 
															 		break;
														
 
															+	case KVM_CAP_GUEST_DEBUG_HW_BPS:
														
 
															+		r = get_num_brps();
														
 
															+		break;
														
 
															+	case KVM_CAP_GUEST_DEBUG_HW_WPS:
														
 
															+		r = get_num_wrps();
														
 
															+		break;
														
 
															+	case KVM_CAP_SET_GUEST_DEBUG:
														
 
															+		r = 1;
														
 
															+		break;
														
 
															 	default:
														
 
															 		r = 0;
														
 
															 	}
														
@@ -105,7 +121,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 
															 	kvm_reset_sys_regs(vcpu);
														
 
															 	/* Reset timer */
														
 
															-	kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
														
 
															-
														
 
															-	return 0;
														
 
															+	return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
														
 
															 }
														
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -38,6 +38,8 @@
 
															 #include "sys_regs.h"
														
 
															+#include "trace.h"
														
 
															+
														
 
															 /*
														
 
															  * All of this file is extremly similar to the ARM coproc.c, but the
														
 
															  * types are different. My gut feeling is that it should be pretty
														
@@ -208,9 +210,217 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
 
															 		*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
														
 
															 	}
														
 
															+	trace_trap_reg(__func__, r->reg, p->is_write, *vcpu_reg(vcpu, p->Rt));
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * reg_to_dbg/dbg_to_reg
														
 
															+ *
														
 
															+ * A 32 bit write to a debug register leave top bits alone
														
 
															+ * A 32 bit read from a debug register only returns the bottom bits
														
 
															+ *
														
 
															+ * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the
														
 
															+ * hyp.S code switches between host and guest values in future.
														
 
															+ */
														
 
															+static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
														
 
															+			      const struct sys_reg_params *p,
														
 
															+			      u64 *dbg_reg)
														
 
															+{
														
 
															+	u64 val = *vcpu_reg(vcpu, p->Rt);
														
 
															+
														
 
															+	if (p->is_32bit) {
														
 
															+		val &= 0xffffffffUL;
														
 
															+		val |= ((*dbg_reg >> 32) << 32);
														
 
															+	}
														
 
															+
														
 
															+	*dbg_reg = val;
														
 
															+	vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
														
 
															+}
														
 
															+
														
 
															+static inline void dbg_to_reg(struct kvm_vcpu *vcpu,
														
 
															+			      const struct sys_reg_params *p,
														
 
															+			      u64 *dbg_reg)
														
 
															+{
														
 
															+	u64 val = *dbg_reg;
														
 
															+
														
 
															+	if (p->is_32bit)
														
 
															+		val &= 0xffffffffUL;
														
 
															+
														
 
															+	*vcpu_reg(vcpu, p->Rt) = val;
														
 
															+}
														
 
															+
														
 
															+static inline bool trap_bvr(struct kvm_vcpu *vcpu,
														
 
															+			    const struct sys_reg_params *p,
														
 
															+			    const struct sys_reg_desc *rd)
														
 
															+{
														
 
															+	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
														
 
															+
														
 
															+	if (p->is_write)
														
 
															+		reg_to_dbg(vcpu, p, dbg_reg);
														
 
															+	else
														
 
															+		dbg_to_reg(vcpu, p, dbg_reg);
														
 
															+
														
 
															+	trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															+static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
														
 
															+		const struct kvm_one_reg *reg, void __user *uaddr)
														
 
															+{
														
 
															+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
														
 
															+
														
 
															+	if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
														
 
															+		return -EFAULT;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
														
 
															+	const struct kvm_one_reg *reg, void __user *uaddr)
														
 
															+{
														
 
															+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
														
 
															+
														
 
															+	if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
														
 
															+		return -EFAULT;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static inline void reset_bvr(struct kvm_vcpu *vcpu,
														
 
															+			     const struct sys_reg_desc *rd)
														
 
															+{
														
 
															+	vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
														
 
															+}
														
 
															+
														
 
															+static inline bool trap_bcr(struct kvm_vcpu *vcpu,
														
 
															+			    const struct sys_reg_params *p,
														
 
															+			    const struct sys_reg_desc *rd)
														
 
															+{
														
 
															+	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
														
 
															+
														
 
															+	if (p->is_write)
														
 
															+		reg_to_dbg(vcpu, p, dbg_reg);
														
 
															+	else
														
 
															+		dbg_to_reg(vcpu, p, dbg_reg);
														
 
															+
														
 
															+	trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															+static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
														
 
															+		const struct kvm_one_reg *reg, void __user *uaddr)
														
 
															+{
														
 
															+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
														
 
															+
														
 
															+	if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
														
 
															+	const struct kvm_one_reg *reg, void __user *uaddr)
														
 
															+{
														
 
															+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
														
 
															+
														
 
															+	if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
														
 
															+		return -EFAULT;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static inline void reset_bcr(struct kvm_vcpu *vcpu,
														
 
															+			     const struct sys_reg_desc *rd)
														
 
															+{
														
 
															+	vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
														
 
															+}
														
 
															+
														
 
															+static inline bool trap_wvr(struct kvm_vcpu *vcpu,
														
 
															+			    const struct sys_reg_params *p,
														
 
															+			    const struct sys_reg_desc *rd)
														
 
															+{
														
 
															+	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
														
 
															+
														
 
															+	if (p->is_write)
														
 
															+		reg_to_dbg(vcpu, p, dbg_reg);
														
 
															+	else
														
 
															+		dbg_to_reg(vcpu, p, dbg_reg);
														
 
															+
														
 
															+	trace_trap_reg(__func__, rd->reg, p->is_write,
														
 
															+		vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]);
														
 
															+
														
 
															 	return true;
														
 
															 }
														
 
															+static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
														
 
															+		const struct kvm_one_reg *reg, void __user *uaddr)
														
 
															+{
														
 
															+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
														
 
															+
														
 
															+	if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
														
 
															+		return -EFAULT;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
														
 
															+	const struct kvm_one_reg *reg, void __user *uaddr)
														
 
															+{
														
 
															+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
														
 
															+
														
 
															+	if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
														
 
															+		return -EFAULT;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static inline void reset_wvr(struct kvm_vcpu *vcpu,
														
 
															+			     const struct sys_reg_desc *rd)
														
 
															+{
														
 
															+	vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
														
 
															+}
														
 
															+
														
 
															+static inline bool trap_wcr(struct kvm_vcpu *vcpu,
														
 
															+			    const struct sys_reg_params *p,
														
 
															+			    const struct sys_reg_desc *rd)
														
 
															+{
														
 
															+	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
														
 
															+
														
 
															+	if (p->is_write)
														
 
															+		reg_to_dbg(vcpu, p, dbg_reg);
														
 
															+	else
														
 
															+		dbg_to_reg(vcpu, p, dbg_reg);
														
 
															+
														
 
															+	trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															+static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
														
 
															+		const struct kvm_one_reg *reg, void __user *uaddr)
														
 
															+{
														
 
															+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
														
 
															+
														
 
															+	if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
														
 
															+		return -EFAULT;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
														
 
															+	const struct kvm_one_reg *reg, void __user *uaddr)
														
 
															+{
														
 
															+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
														
 
															+
														
 
															+	if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
														
 
															+		return -EFAULT;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static inline void reset_wcr(struct kvm_vcpu *vcpu,
														
 
															+			     const struct sys_reg_desc *rd)
														
 
															+{
														
 
															+	vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
														
 
															+}
														
 
															+
														
 
															 static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
														
 
															 {
														
 
															 	u64 amair;
														
@@ -240,16 +450,16 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 
															 #define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
														
 
															 	/* DBGBVRn_EL1 */						\
														
 
															 	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100),	\
														
 
															-	  trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 },		\
														
 
															+	  trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr },		\
														
 
															 	/* DBGBCRn_EL1 */						\
														
 
															 	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101),	\
														
 
															-	  trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 },		\
														
 
															+	  trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr },		\
														
 
															 	/* DBGWVRn_EL1 */						\
														
 
															 	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110),	\
														
 
															-	  trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 },		\
														
 
															+	  trap_wvr, reset_wvr, n, 0,  get_wvr, set_wvr },		\
														
 
															 	/* DBGWCRn_EL1 */						\
														
 
															 	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111),	\
														
 
															-	  trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
														
 
															+	  trap_wcr, reset_wcr, n, 0,  get_wcr, set_wcr }
														
 
															 /*
														
 
															  * Architected system registers.
														
@@ -516,28 +726,57 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
 
															 	return true;
														
 
															 }
														
 
															-#define DBG_BCR_BVR_WCR_WVR(n)					\
														
 
															-	/* DBGBVRn */						\
														
 
															-	{ Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32,	\
														
 
															-	  NULL, (cp14_DBGBVR0 + (n) * 2) },			\
														
 
															-	/* DBGBCRn */						\
														
 
															-	{ Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32,	\
														
 
															-	  NULL, (cp14_DBGBCR0 + (n) * 2) },			\
														
 
															-	/* DBGWVRn */						\
														
 
															-	{ Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32,	\
														
 
															-	  NULL, (cp14_DBGWVR0 + (n) * 2) },			\
														
 
															-	/* DBGWCRn */						\
														
 
															-	{ Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32,	\
														
 
															-	  NULL, (cp14_DBGWCR0 + (n) * 2) }
														
 
															-
														
 
															-#define DBGBXVR(n)						\
														
 
															-	{ Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32,	\
														
 
															-	  NULL, cp14_DBGBXVR0 + n * 2 }
														
 
															+/* AArch32 debug register mappings
														
 
															+ *
														
 
															+ * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0]
														
 
															+ * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32]
														
 
															+ *
														
 
															+ * All control registers and watchpoint value registers are mapped to
														
 
															+ * the lower 32 bits of their AArch64 equivalents. We share the trap
														
 
															+ * handlers with the above AArch64 code which checks what mode the
														
 
															+ * system is in.
														
 
															+ */
														
 
															+
														
 
															+static inline bool trap_xvr(struct kvm_vcpu *vcpu,
														
 
															+			    const struct sys_reg_params *p,
														
 
															+			    const struct sys_reg_desc *rd)
														
 
															+{
														
 
															+	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
														
 
															+
														
 
															+	if (p->is_write) {
														
 
															+		u64 val = *dbg_reg;
														
 
															+
														
 
															+		val &= 0xffffffffUL;
														
 
															+		val |= *vcpu_reg(vcpu, p->Rt) << 32;
														
 
															+		*dbg_reg = val;
														
 
															+
														
 
															+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
														
 
															+	} else {
														
 
															+		*vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32;
														
 
															+	}
														
 
															+
														
 
															+	trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															+#define DBG_BCR_BVR_WCR_WVR(n)						\
														
 
															+	/* DBGBVRn */							\
														
 
															+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, 	\
														
 
															+	/* DBGBCRn */							\
														
 
															+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n },	\
														
 
															+	/* DBGWVRn */							\
														
 
															+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n },	\
														
 
															+	/* DBGWCRn */							\
														
 
															+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n }
														
 
															+
														
 
															+#define DBGBXVR(n)							\
														
 
															+	{ Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n }
														
 
															 /*
														
 
															  * Trapped cp14 registers. We generally ignore most of the external
														
 
															  * debug, on the principle that they don't really make sense to a
														
 
															- * guest. Revisit this one day, whould this principle change.
														
 
															+ * guest. Revisit this one day, would this principle change.
														
 
															  */
														
 
															 static const struct sys_reg_desc cp14_regs[] = {
														
 
															 	/* DBGIDR */
														
@@ -999,6 +1238,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 	struct sys_reg_params params;
														
 
															 	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
														
 
															+	trace_kvm_handle_sys_reg(esr);
														
 
															+
														
 
															 	params.is_aarch32 = false;
														
 
															 	params.is_32bit = false;
														
 
															 	params.Op0 = (esr >> 20) & 3;
														
@@ -1303,6 +1544,9 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
 
															 	if (!r)
														
 
															 		return get_invariant_sys_reg(reg->id, uaddr);
														
 
															+	if (r->get_user)
														
 
															+		return (r->get_user)(vcpu, r, reg, uaddr);
														
 
															+
														
 
															 	return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
														
 
															 }
														
@@ -1321,6 +1565,9 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
 
															 	if (!r)
														
 
															 		return set_invariant_sys_reg(reg->id, uaddr);
														
 
															+	if (r->set_user)
														
 
															+		return (r->set_user)(vcpu, r, reg, uaddr);
														
 
															+
														
 
															 	return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
														
 
															 }
														
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -55,6 +55,12 @@ struct sys_reg_desc {
 
															 	/* Value (usually reset value) */
														
 
															 	u64 val;
														
 
															+
														
 
															+	/* Custom get/set_user functions, fallback to generic if NULL */
														
 
															+	int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
														
 
															+			const struct kvm_one_reg *reg, void __user *uaddr);
														
 
															+	int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
														
 
															+			const struct kvm_one_reg *reg, void __user *uaddr);
														
 
															 };
														
 
															 static inline void print_sys_reg_instr(const struct sys_reg_params *p)
														
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -94,6 +94,8 @@ static int __init sys_reg_genericv8_init(void)
 
															 					  &genericv8_target_table);
														
 
															 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
														
 
															 					  &genericv8_target_table);
														
 
															+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8,
														
 
															+					  &genericv8_target_table);
														
 
															 	return 0;
														
 
															 }
														
--- a/arch/arm64/kvm/trace.h
+++ b/arch/arm64/kvm/trace.h
@@ -44,6 +44,129 @@ TRACE_EVENT(kvm_hvc_arm64,
 
															 		  __entry->vcpu_pc, __entry->r0, __entry->imm)
														
 
															 );
														
 
															+TRACE_EVENT(kvm_arm_setup_debug,
														
 
															+	TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
														
 
															+	TP_ARGS(vcpu, guest_debug),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(struct kvm_vcpu *, vcpu)
														
 
															+		__field(__u32, guest_debug)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->vcpu = vcpu;
														
 
															+		__entry->guest_debug = guest_debug;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(kvm_arm_clear_debug,
														
 
															+	TP_PROTO(__u32 guest_debug),
														
 
															+	TP_ARGS(guest_debug),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(__u32, guest_debug)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->guest_debug = guest_debug;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("flags: 0x%08x", __entry->guest_debug)
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(kvm_arm_set_dreg32,
														
 
															+	TP_PROTO(const char *name, __u32 value),
														
 
															+	TP_ARGS(name, value),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(const char *, name)
														
 
															+		__field(__u32, value)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->name = name;
														
 
															+		__entry->value = value;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("%s: 0x%08x", __entry->name, __entry->value)
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(kvm_arm_set_regset,
														
 
															+	TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
														
 
															+	TP_ARGS(type, len, control, value),
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(const char *, name)
														
 
															+		__field(int, len)
														
 
															+		__array(u64, ctrls, 16)
														
 
															+		__array(u64, values, 16)
														
 
															+	),
														
 
															+	TP_fast_assign(
														
 
															+		__entry->name = type;
														
 
															+		__entry->len = len;
														
 
															+		memcpy(__entry->ctrls, control, len << 3);
														
 
															+		memcpy(__entry->values, value, len << 3);
														
 
															+	),
														
 
															+	TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
														
 
															+		__print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
														
 
															+		__print_array(__entry->values, __entry->len, sizeof(__u64)))
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(trap_reg,
														
 
															+	TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
														
 
															+	TP_ARGS(fn, reg, is_write, write_value),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(const char *, fn)
														
 
															+		__field(int, reg)
														
 
															+		__field(bool, is_write)
														
 
															+		__field(u64, write_value)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->fn = fn;
														
 
															+		__entry->reg = reg;
														
 
															+		__entry->is_write = is_write;
														
 
															+		__entry->write_value = write_value;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("%s %s reg %d (0x%08llx)", __entry->fn,  __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(kvm_handle_sys_reg,
														
 
															+	TP_PROTO(unsigned long hsr),
														
 
															+	TP_ARGS(hsr),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(unsigned long,	hsr)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->hsr = hsr;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("HSR 0x%08lx", __entry->hsr)
														
 
															+);
														
 
															+
														
 
															+TRACE_EVENT(kvm_set_guest_debug,
														
 
															+	TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
														
 
															+	TP_ARGS(vcpu, guest_debug),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(struct kvm_vcpu *, vcpu)
														
 
															+		__field(__u32, guest_debug)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->vcpu = vcpu;
														
 
															+		__entry->guest_debug = guest_debug;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
														
 
															+);
														
 
															+
														
 
															+
														
 
															 #endif /* _TRACE_ARM64_KVM_H */
														
 
															 #undef TRACE_INCLUDE_PATH
														
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -158,6 +158,7 @@ extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
 
															 			bool *writable);
														
 
															 extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
														
 
															 			unsigned long *rmap, long pte_index, int realmode);
														
 
															+extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize);
														
 
															 extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
														
 
															 			unsigned long pte_index);
														
 
															 void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
														
@@ -225,12 +226,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
 
															 	return vcpu->arch.cr;
														
 
															 }
														
 
															-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
														
 
															+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
														
 
															 {
														
 
															 	vcpu->arch.xer = val;
														
 
															 }
														
 
															-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
														
 
															+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	return vcpu->arch.xer;
														
 
															 }
														
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -25,6 +25,12 @@
 
															 #define XICS_MFRR		0xc
														
 
															 #define XICS_IPI		2	/* interrupt source # for IPIs */
														
 
															+/* Maximum number of threads per physical core */
														
 
															+#define MAX_SMT_THREADS		8
														
 
															+
														
 
															+/* Maximum number of subcores per physical core */
														
 
															+#define MAX_SUBCORES		4
														
 
															+
														
 
															 #ifdef __ASSEMBLY__
														
 
															 #ifdef CONFIG_KVM_BOOK3S_HANDLER
														
@@ -65,6 +71,19 @@ kvmppc_resume_\intno:
 
															 #else  /*__ASSEMBLY__ */
														
 
															+struct kvmppc_vcore;
														
 
															+
														
 
															+/* Struct used for coordinating micro-threading (split-core) mode changes */
														
 
															+struct kvm_split_mode {
														
 
															+	unsigned long	rpr;
														
 
															+	unsigned long	pmmar;
														
 
															+	unsigned long	ldbar;
														
 
															+	u8		subcore_size;
														
 
															+	u8		do_nap;
														
 
															+	u8		napped[MAX_SMT_THREADS];
														
 
															+	struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
														
 
															+};
														
 
															+
														
 
															 /*
														
 
															  * This struct goes in the PACA on 64-bit processors.  It is used
														
 
															  * to store host state that needs to be saved when we enter a guest
														
@@ -100,6 +119,7 @@ struct kvmppc_host_state {
 
															 	u64 host_spurr;
														
 
															 	u64 host_dscr;
														
 
															 	u64 dec_expires;
														
 
															+	struct kvm_split_mode *kvm_split_mode;
														
 
															 #endif
														
 
															 #ifdef CONFIG_PPC_BOOK3S_64
														
 
															 	u64 cfar;
														
@@ -112,7 +132,7 @@ struct kvmppc_book3s_shadow_vcpu {
 
															 	bool in_use;
														
 
															 	ulong gpr[14];
														
 
															 	u32 cr;
														
 
															-	u32 xer;
														
 
															+	ulong xer;
														
 
															 	ulong ctr;
														
 
															 	ulong lr;
														
 
															 	ulong pc;
														
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -54,12 +54,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
 
															 	return vcpu->arch.cr;
														
 
															 }
														
 
															-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
														
 
															+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
														
 
															 {
														
 
															 	vcpu->arch.xer = val;
														
 
															 }
														
 
															-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
														
 
															+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	return vcpu->arch.xer;
														
 
															 }
														
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -205,8 +205,10 @@ struct revmap_entry {
 
															  */
														
 
															 #define KVMPPC_RMAP_LOCK_BIT	63
														
 
															 #define KVMPPC_RMAP_RC_SHIFT	32
														
 
															+#define KVMPPC_RMAP_CHG_SHIFT	48
														
 
															 #define KVMPPC_RMAP_REFERENCED	(HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
														
 
															 #define KVMPPC_RMAP_CHANGED	(HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
														
 
															+#define KVMPPC_RMAP_CHG_ORDER	(0x3ful << KVMPPC_RMAP_CHG_SHIFT)
														
 
															 #define KVMPPC_RMAP_PRESENT	0x100000000ul
														
 
															 #define KVMPPC_RMAP_INDEX	0xfffffffful
														
@@ -278,7 +280,9 @@ struct kvmppc_vcore {
 
															 	u16 last_cpu;
														
 
															 	u8 vcore_state;
														
 
															 	u8 in_guest;
														
 
															+	struct kvmppc_vcore *master_vcore;
														
 
															 	struct list_head runnable_threads;
														
 
															+	struct list_head preempt_list;
														
 
															 	spinlock_t lock;
														
 
															 	wait_queue_head_t wq;
														
 
															 	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
														
@@ -300,12 +304,21 @@ struct kvmppc_vcore {
 
															 #define VCORE_EXIT_MAP(vc)	((vc)->entry_exit_map >> 8)
														
 
															 #define VCORE_IS_EXITING(vc)	(VCORE_EXIT_MAP(vc) != 0)
														
 
															-/* Values for vcore_state */
														
 
															+/* This bit is used when a vcore exit is triggered from outside the vcore */
														
 
															+#define VCORE_EXIT_REQ		0x10000
														
 
															+
														
 
															+/*
														
 
															+ * Values for vcore_state.
														
 
															+ * Note that these are arranged such that lower values
														
 
															+ * (< VCORE_SLEEPING) don't require stolen time accounting
														
 
															+ * on load/unload, and higher values do.
														
 
															+ */
														
 
															 #define VCORE_INACTIVE	0
														
 
															-#define VCORE_SLEEPING	1
														
 
															-#define VCORE_PREEMPT	2
														
 
															-#define VCORE_RUNNING	3
														
 
															-#define VCORE_EXITING	4
														
 
															+#define VCORE_PREEMPT	1
														
 
															+#define VCORE_PIGGYBACK	2
														
 
															+#define VCORE_SLEEPING	3
														
 
															+#define VCORE_RUNNING	4
														
 
															+#define VCORE_EXITING	5
														
 
															 /*
														
 
															  * Struct used to manage memory for a virtual processor area
														
@@ -473,7 +486,7 @@ struct kvm_vcpu_arch {
 
															 	ulong ciabr;
														
 
															 	ulong cfar;
														
 
															 	ulong ppr;
														
 
															-	ulong pspb;
														
 
															+	u32 pspb;
														
 
															 	ulong fscr;
														
 
															 	ulong shadow_fscr;
														
 
															 	ulong ebbhr;
														
@@ -619,6 +632,7 @@ struct kvm_vcpu_arch {
 
															 	int trap;
														
 
															 	int state;
														
 
															 	int ptid;
														
 
															+	int thread_cpu;
														
 
															 	bool timer_running;
														
 
															 	wait_queue_head_t cpu_run;
														
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -287,7 +287,7 @@
 
															 /* POWER8 Micro Partition Prefetch (MPP) parameters */
														
 
															 /* Address mask is common for LOGMPP instruction and MPPR SPR */
														
 
															-#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000
														
 
															+#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000ULL
														
 
															 /* Bits 60 and 61 of MPP SPR should be set to one of the following */
														
 
															 /* Aborting the fetch is indeed setting 00 in the table size bits */
														
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -511,6 +511,8 @@ int main(void)
 
															 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
														
 
															 	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
														
 
															 	DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
														
 
															+	DEFINE(VCPU_CPU, offsetof(struct kvm_vcpu, cpu));
														
 
															+	DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu));
														
 
															 #endif
														
 
															 #ifdef CONFIG_PPC_BOOK3S
														
 
															 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
														
@@ -673,7 +675,14 @@ int main(void)
 
															 	HSTATE_FIELD(HSTATE_DSCR, host_dscr);
														
 
															 	HSTATE_FIELD(HSTATE_DABR, dabr);
														
 
															 	HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
														
 
															+	HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode);
														
 
															 	DEFINE(IPI_PRIORITY, IPI_PRIORITY);
														
 
															+	DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr));
														
 
															+	DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar));
														
 
															+	DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar));
														
 
															+	DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size));
														
 
															+	DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap));
														
 
															+	DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped));
														
 
															 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
														
 
															 #ifdef CONFIG_PPC_BOOK3S_64
														
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -74,14 +74,14 @@ config KVM_BOOK3S_64
 
															 	  If unsure, say N.
														
 
															 config KVM_BOOK3S_64_HV
														
 
															-	tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
														
 
															+	tristate "KVM for POWER7 and later using hypervisor mode in host"
														
 
															 	depends on KVM_BOOK3S_64 && PPC_POWERNV
														
 
															 	select KVM_BOOK3S_HV_POSSIBLE
														
 
															 	select MMU_NOTIFIER
														
 
															 	select CMA
														
 
															 	---help---
														
 
															 	  Support running unmodified book3s_64 guest kernels in
														
 
															-	  virtual machines on POWER7 and PPC970 processors that have
														
 
															+	  virtual machines on POWER7 and newer processors that have
														
 
															 	  hypervisor mode available to the host.
														
 
															 	  If you say Y here, KVM will use the hardware virtualization
														
@@ -89,8 +89,8 @@ config KVM_BOOK3S_64_HV
 
															 	  guest operating systems will run at full hardware speed
														
 
															 	  using supervisor and user modes.  However, this also means
														
 
															 	  that KVM is not usable under PowerVM (pHyp), is only usable
														
 
															-	  on POWER7 (or later) processors and PPC970-family processors,
														
 
															-	  and cannot emulate a different processor from the host processor.
														
 
															+	  on POWER7 or later processors, and cannot emulate a
														
 
															+	  different processor from the host processor.
														
 
															 	  If unsure, say N.
														
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -240,7 +240,8 @@ void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
 
															 	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
														
 
															 }
														
 
															-int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
														
 
															+static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
														
 
															+					 unsigned int priority)
														
 
															 {
														
 
															 	int deliver = 1;
														
 
															 	int vec = 0;
														
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -26,6 +26,7 @@
 
															 #include <asm/machdep.h>
														
 
															 #include <asm/mmu_context.h>
														
 
															 #include <asm/hw_irq.h>
														
 
															+#include "book3s.h"
														
 
															 /* #define DEBUG_MMU */
														
 
															 /* #define DEBUG_SR */
														
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -28,6 +28,7 @@
 
															 #include <asm/mmu_context.h>
														
 
															 #include <asm/hw_irq.h>
														
 
															 #include "trace_pr.h"
														
 
															+#include "book3s.h"
														
 
															 #define PTE_SIZE 12
														
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -761,6 +761,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
															 			/* Harvest R and C */
														
 
															 			rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
														
 
															 			*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
														
 
															+			if (rcbits & HPTE_R_C)
														
 
															+				kvmppc_update_rmap_change(rmapp, psize);
														
 
															 			if (rcbits & ~rev[i].guest_rpte) {
														
 
															 				rev[i].guest_rpte = ptel | rcbits;
														
 
															 				note_hpte_modification(kvm, &rev[i]);
														
@@ -927,8 +929,12 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 
															  retry:
														
 
															 	lock_rmap(rmapp);
														
 
															 	if (*rmapp & KVMPPC_RMAP_CHANGED) {
														
 
															-		*rmapp &= ~KVMPPC_RMAP_CHANGED;
														
 
															+		long change_order = (*rmapp & KVMPPC_RMAP_CHG_ORDER)
														
 
															+			>> KVMPPC_RMAP_CHG_SHIFT;
														
 
															+		*rmapp &= ~(KVMPPC_RMAP_CHANGED | KVMPPC_RMAP_CHG_ORDER);
														
 
															 		npages_dirty = 1;
														
 
															+		if (change_order > PAGE_SHIFT)
														
 
															+			npages_dirty = 1ul << (change_order - PAGE_SHIFT);
														
 
															 	}
														
 
															 	if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
														
 
															 		unlock_rmap(rmapp);
														
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -23,6 +23,7 @@
 
															 #include <asm/reg.h>
														
 
															 #include <asm/switch_to.h>
														
 
															 #include <asm/time.h>
														
 
															+#include "book3s.h"
														
 
															 #define OP_19_XOP_RFID		18
														
 
															 #define OP_19_XOP_RFI		50
														
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,12 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
 
															 #define MPP_BUFFER_ORDER	3
														
 
															 #endif
														
 
															+static int dynamic_mt_modes = 6;
														
 
															+module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
														
 
															+MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
														
 
															+static int target_smt_mode;
														
 
															+module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
														
 
															+MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
														
 
															 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
														
 
															 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
														
@@ -114,7 +120,7 @@ static bool kvmppc_ipi_thread(int cpu)
 
															 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	int cpu = vcpu->cpu;
														
 
															+	int cpu;
														
 
															 	wait_queue_head_t *wqp;
														
 
															 	wqp = kvm_arch_vcpu_wq(vcpu);
														
@@ -123,10 +129,11 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 
															 		++vcpu->stat.halt_wakeup;
														
 
															 	}
														
 
															-	if (kvmppc_ipi_thread(cpu + vcpu->arch.ptid))
														
 
															+	if (kvmppc_ipi_thread(vcpu->arch.thread_cpu))
														
 
															 		return;
														
 
															 	/* CPU points to the first thread of the core */
														
 
															+	cpu = vcpu->cpu;
														
 
															 	if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
														
 
															 		smp_send_reschedule(cpu);
														
 
															 }
														
@@ -164,6 +171,27 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 
															  * they should never fail.)
														
 
															  */
														
 
															+static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
														
 
															+{
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	spin_lock_irqsave(&vc->stoltb_lock, flags);
														
 
															+	vc->preempt_tb = mftb();
														
 
															+	spin_unlock_irqrestore(&vc->stoltb_lock, flags);
														
 
															+}
														
 
															+
														
 
															+static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
														
 
															+{
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	spin_lock_irqsave(&vc->stoltb_lock, flags);
														
 
															+	if (vc->preempt_tb != TB_NIL) {
														
 
															+		vc->stolen_tb += mftb() - vc->preempt_tb;
														
 
															+		vc->preempt_tb = TB_NIL;
														
 
															+	}
														
 
															+	spin_unlock_irqrestore(&vc->stoltb_lock, flags);
														
 
															+}
														
 
															+
														
 
															 static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
														
 
															 {
														
 
															 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
														
@@ -175,14 +203,9 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
 
															 	 * vcpu, and once it is set to this vcpu, only this task
														
 
															 	 * ever sets it to NULL.
														
 
															 	 */
														
 
															-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
														
 
															-		spin_lock_irqsave(&vc->stoltb_lock, flags);
														
 
															-		if (vc->preempt_tb != TB_NIL) {
														
 
															-			vc->stolen_tb += mftb() - vc->preempt_tb;
														
 
															-			vc->preempt_tb = TB_NIL;
														
 
															-		}
														
 
															-		spin_unlock_irqrestore(&vc->stoltb_lock, flags);
														
 
															-	}
														
 
															+	if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
														
 
															+		kvmppc_core_end_stolen(vc);
														
 
															+
														
 
															 	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
														
 
															 	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
														
 
															 	    vcpu->arch.busy_preempt != TB_NIL) {
														
@@ -197,11 +220,9 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 
															 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
														
 
															 	unsigned long flags;
														
 
															-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
														
 
															-		spin_lock_irqsave(&vc->stoltb_lock, flags);
														
 
															-		vc->preempt_tb = mftb();
														
 
															-		spin_unlock_irqrestore(&vc->stoltb_lock, flags);
														
 
															-	}
														
 
															+	if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
														
 
															+		kvmppc_core_start_stolen(vc);
														
 
															+
														
 
															 	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
														
 
															 	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
														
 
															 		vcpu->arch.busy_preempt = mftb();
														
@@ -214,12 +235,12 @@ static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
 
															 	kvmppc_end_cede(vcpu);
														
 
															 }
														
 
															-void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
														
 
															+static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
														
 
															 {
														
 
															 	vcpu->arch.pvr = pvr;
														
 
															 }
														
 
															-int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
														
 
															+static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
														
 
															 {
														
 
															 	unsigned long pcr = 0;
														
 
															 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
														
@@ -259,7 +280,7 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
 
															 	return 0;
														
 
															 }
														
 
															-void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
														
 
															+static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	int r;
														
@@ -292,7 +313,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
 
															 	       vcpu->arch.last_inst);
														
 
															 }
														
 
															-struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
														
 
															+static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
														
 
															 {
														
 
															 	int r;
														
 
															 	struct kvm_vcpu *v, *ret = NULL;
														
@@ -641,7 +662,8 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
 
															 	spin_lock(&vcore->lock);
														
 
															 	if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
														
 
															-	    vcore->vcore_state != VCORE_INACTIVE)
														
 
															+	    vcore->vcore_state != VCORE_INACTIVE &&
														
 
															+	    vcore->runner)
														
 
															 		target = vcore->runner;
														
 
															 	spin_unlock(&vcore->lock);
														
@@ -1431,6 +1453,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 
															 	vcore->lpcr = kvm->arch.lpcr;
														
 
															 	vcore->first_vcpuid = core * threads_per_subcore;
														
 
															 	vcore->kvm = kvm;
														
 
															+	INIT_LIST_HEAD(&vcore->preempt_list);
														
 
															 	vcore->mpp_buffer_is_valid = false;
														
@@ -1655,6 +1678,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 
															 	spin_unlock(&vcore->lock);
														
 
															 	vcpu->arch.vcore = vcore;
														
 
															 	vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
														
 
															+	vcpu->arch.thread_cpu = -1;
														
 
															 	vcpu->arch.cpu_type = KVM_CPU_3S_64;
														
 
															 	kvmppc_sanity_check(vcpu);
														
@@ -1749,6 +1773,7 @@ static int kvmppc_grab_hwthread(int cpu)
 
															 	/* Ensure the thread won't go into the kernel if it wakes */
														
 
															 	tpaca->kvm_hstate.kvm_vcpu = NULL;
														
 
															+	tpaca->kvm_hstate.kvm_vcore = NULL;
														
 
															 	tpaca->kvm_hstate.napping = 0;
														
 
															 	smp_wmb();
														
 
															 	tpaca->kvm_hstate.hwthread_req = 1;
														
@@ -1780,26 +1805,32 @@ static void kvmppc_release_hwthread(int cpu)
 
															 	tpaca = &paca[cpu];
														
 
															 	tpaca->kvm_hstate.hwthread_req = 0;
														
 
															 	tpaca->kvm_hstate.kvm_vcpu = NULL;
														
 
															+	tpaca->kvm_hstate.kvm_vcore = NULL;
														
 
															+	tpaca->kvm_hstate.kvm_split_mode = NULL;
														
 
															 }
														
 
															-static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
														
 
															+static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
														
 
															 {
														
 
															 	int cpu;
														
 
															 	struct paca_struct *tpaca;
														
 
															-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
														
 
															+	struct kvmppc_vcore *mvc = vc->master_vcore;
														
 
															-	if (vcpu->arch.timer_running) {
														
 
															-		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
														
 
															-		vcpu->arch.timer_running = 0;
														
 
															+	cpu = vc->pcpu;
														
 
															+	if (vcpu) {
														
 
															+		if (vcpu->arch.timer_running) {
														
 
															+			hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
														
 
															+			vcpu->arch.timer_running = 0;
														
 
															+		}
														
 
															+		cpu += vcpu->arch.ptid;
														
 
															+		vcpu->cpu = mvc->pcpu;
														
 
															+		vcpu->arch.thread_cpu = cpu;
														
 
															 	}
														
 
															-	cpu = vc->pcpu + vcpu->arch.ptid;
														
 
															 	tpaca = &paca[cpu];
														
 
															-	tpaca->kvm_hstate.kvm_vcore = vc;
														
 
															-	tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
														
 
															-	vcpu->cpu = vc->pcpu;
														
 
															-	/* Order stores to hstate.kvm_vcore etc. before store to kvm_vcpu */
														
 
															-	smp_wmb();
														
 
															 	tpaca->kvm_hstate.kvm_vcpu = vcpu;
														
 
															+	tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
														
 
															+	/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
														
 
															+	smp_wmb();
														
 
															+	tpaca->kvm_hstate.kvm_vcore = mvc;
														
 
															 	if (cpu != smp_processor_id())
														
 
															 		kvmppc_ipi_thread(cpu);
														
 
															 }
														
@@ -1812,12 +1843,12 @@ static void kvmppc_wait_for_nap(void)
 
															 	for (loops = 0; loops < 1000000; ++loops) {
														
 
															 		/*
														
 
															 		 * Check if all threads are finished.
														
 
															-		 * We set the vcpu pointer when starting a thread
														
 
															+		 * We set the vcore pointer when starting a thread
														
 
															 		 * and the thread clears it when finished, so we look
														
 
															-		 * for any threads that still have a non-NULL vcpu ptr.
														
 
															+		 * for any threads that still have a non-NULL vcore ptr.
														
 
															 		 */
														
 
															 		for (i = 1; i < threads_per_subcore; ++i)
														
 
															-			if (paca[cpu + i].kvm_hstate.kvm_vcpu)
														
 
															+			if (paca[cpu + i].kvm_hstate.kvm_vcore)
														
 
															 				break;
														
 
															 		if (i == threads_per_subcore) {
														
 
															 			HMT_medium();
														
@@ -1827,7 +1858,7 @@ static void kvmppc_wait_for_nap(void)
 
															 	}
														
 
															 	HMT_medium();
														
 
															 	for (i = 1; i < threads_per_subcore; ++i)
														
 
															-		if (paca[cpu + i].kvm_hstate.kvm_vcpu)
														
 
															+		if (paca[cpu + i].kvm_hstate.kvm_vcore)
														
 
															 			pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
														
 
															 }
														
@@ -1890,6 +1921,278 @@ static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
 
															 	mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
														
 
															 }
														
 
															+/*
														
 
															+ * A list of virtual cores for each physical CPU.
														
 
															+ * These are vcores that could run but their runner VCPU tasks are
														
 
															+ * (or may be) preempted.
														
 
															+ */
														
 
															+struct preempted_vcore_list {
														
 
															+	struct list_head	list;
														
 
															+	spinlock_t		lock;
														
 
															+};
														
 
															+
														
 
															+static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
														
 
															+
														
 
															+static void init_vcore_lists(void)
														
 
															+{
														
 
															+	int cpu;
														
 
															+
														
 
															+	for_each_possible_cpu(cpu) {
														
 
															+		struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
														
 
															+		spin_lock_init(&lp->lock);
														
 
															+		INIT_LIST_HEAD(&lp->list);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
														
 
															+{
														
 
															+	struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
														
 
															+
														
 
															+	vc->vcore_state = VCORE_PREEMPT;
														
 
															+	vc->pcpu = smp_processor_id();
														
 
															+	if (vc->num_threads < threads_per_subcore) {
														
 
															+		spin_lock(&lp->lock);
														
 
															+		list_add_tail(&vc->preempt_list, &lp->list);
														
 
															+		spin_unlock(&lp->lock);
														
 
															+	}
														
 
															+
														
 
															+	/* Start accumulating stolen time */
														
 
															+	kvmppc_core_start_stolen(vc);
														
 
															+}
														
 
															+
														
 
															+static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
														
 
															+{
														
 
															+	struct preempted_vcore_list *lp;
														
 
															+
														
 
															+	kvmppc_core_end_stolen(vc);
														
 
															+	if (!list_empty(&vc->preempt_list)) {
														
 
															+		lp = &per_cpu(preempted_vcores, vc->pcpu);
														
 
															+		spin_lock(&lp->lock);
														
 
															+		list_del_init(&vc->preempt_list);
														
 
															+		spin_unlock(&lp->lock);
														
 
															+	}
														
 
															+	vc->vcore_state = VCORE_INACTIVE;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * This stores information about the virtual cores currently
														
 
															+ * assigned to a physical core.
														
 
															+ */
														
 
															+struct core_info {
														
 
															+	int		n_subcores;
														
 
															+	int		max_subcore_threads;
														
 
															+	int		total_threads;
														
 
															+	int		subcore_threads[MAX_SUBCORES];
														
 
															+	struct kvm	*subcore_vm[MAX_SUBCORES];
														
 
															+	struct list_head vcs[MAX_SUBCORES];
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
														
 
															+ * respectively in 2-way micro-threading (split-core) mode.
														
 
															+ */
														
 
															+static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
														
 
															+
														
 
															+static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
														
 
															+{
														
 
															+	int sub;
														
 
															+
														
 
															+	memset(cip, 0, sizeof(*cip));
														
 
															+	cip->n_subcores = 1;
														
 
															+	cip->max_subcore_threads = vc->num_threads;
														
 
															+	cip->total_threads = vc->num_threads;
														
 
															+	cip->subcore_threads[0] = vc->num_threads;
														
 
															+	cip->subcore_vm[0] = vc->kvm;
														
 
															+	for (sub = 0; sub < MAX_SUBCORES; ++sub)
														
 
															+		INIT_LIST_HEAD(&cip->vcs[sub]);
														
 
															+	list_add_tail(&vc->preempt_list, &cip->vcs[0]);
														
 
															+}
														
 
															+
														
 
															+static bool subcore_config_ok(int n_subcores, int n_threads)
														
 
															+{
														
 
															+	/* Can only dynamically split if unsplit to begin with */
														
 
															+	if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
														
 
															+		return false;
														
 
															+	if (n_subcores > MAX_SUBCORES)
														
 
															+		return false;
														
 
															+	if (n_subcores > 1) {
														
 
															+		if (!(dynamic_mt_modes & 2))
														
 
															+			n_subcores = 4;
														
 
															+		if (n_subcores > 2 && !(dynamic_mt_modes & 4))
														
 
															+			return false;
														
 
															+	}
														
 
															+
														
 
															+	return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
														
 
															+}
														
 
															+
														
 
															+static void init_master_vcore(struct kvmppc_vcore *vc)
														
 
															+{
														
 
															+	vc->master_vcore = vc;
														
 
															+	vc->entry_exit_map = 0;
														
 
															+	vc->in_guest = 0;
														
 
															+	vc->napping_threads = 0;
														
 
															+	vc->conferring_threads = 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * See if the existing subcores can be split into 3 (or fewer) subcores
														
 
															+ * of at most two threads each, so we can fit in another vcore.  This
														
 
															+ * assumes there are at most two subcores and at most 6 threads in total.
														
 
															+ */
														
 
															+static bool can_split_piggybacked_subcores(struct core_info *cip)
														
 
															+{
														
 
															+	int sub, new_sub;
														
 
															+	int large_sub = -1;
														
 
															+	int thr;
														
 
															+	int n_subcores = cip->n_subcores;
														
 
															+	struct kvmppc_vcore *vc, *vcnext;
														
 
															+	struct kvmppc_vcore *master_vc = NULL;
														
 
															+
														
 
															+	for (sub = 0; sub < cip->n_subcores; ++sub) {
														
 
															+		if (cip->subcore_threads[sub] <= 2)
														
 
															+			continue;
														
 
															+		if (large_sub >= 0)
														
 
															+			return false;
														
 
															+		large_sub = sub;
														
 
															+		vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
														
 
															+				      preempt_list);
														
 
															+		if (vc->num_threads > 2)
														
 
															+			return false;
														
 
															+		n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
														
 
															+	}
														
 
															+	if (n_subcores > 3 || large_sub < 0)
														
 
															+		return false;
														
 
															+
														
 
															+	/*
														
 
															+	 * Seems feasible, so go through and move vcores to new subcores.
														
 
															+	 * Note that when we have two or more vcores in one subcore,
														
 
															+	 * all those vcores must have only one thread each.
														
 
															+	 */
														
 
															+	new_sub = cip->n_subcores;
														
 
															+	thr = 0;
														
 
															+	sub = large_sub;
														
 
															+	list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) {
														
 
															+		if (thr >= 2) {
														
 
															+			list_del(&vc->preempt_list);
														
 
															+			list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]);
														
 
															+			/* vc->num_threads must be 1 */
														
 
															+			if (++cip->subcore_threads[new_sub] == 1) {
														
 
															+				cip->subcore_vm[new_sub] = vc->kvm;
														
 
															+				init_master_vcore(vc);
														
 
															+				master_vc = vc;
														
 
															+				++cip->n_subcores;
														
 
															+			} else {
														
 
															+				vc->master_vcore = master_vc;
														
 
															+				++new_sub;
														
 
															+			}
														
 
															+		}
														
 
															+		thr += vc->num_threads;
														
 
															+	}
														
 
															+	cip->subcore_threads[large_sub] = 2;
														
 
															+	cip->max_subcore_threads = 2;
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															+static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
														
 
															+{
														
 
															+	int n_threads = vc->num_threads;
														
 
															+	int sub;
														
 
															+
														
 
															+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
														
 
															+		return false;
														
 
															+
														
 
															+	if (n_threads < cip->max_subcore_threads)
														
 
															+		n_threads = cip->max_subcore_threads;
														
 
															+	if (subcore_config_ok(cip->n_subcores + 1, n_threads)) {
														
 
															+		cip->max_subcore_threads = n_threads;
														
 
															+	} else if (cip->n_subcores <= 2 && cip->total_threads <= 6 &&
														
 
															+		   vc->num_threads <= 2) {
														
 
															+		/*
														
 
															+		 * We may be able to fit another subcore in by
														
 
															+		 * splitting an existing subcore with 3 or 4
														
 
															+		 * threads into two 2-thread subcores, or one
														
 
															+		 * with 5 or 6 threads into three subcores.
														
 
															+		 * We can only do this if those subcores have
														
 
															+		 * piggybacked virtual cores.
														
 
															+		 */
														
 
															+		if (!can_split_piggybacked_subcores(cip))
														
 
															+			return false;
														
 
															+	} else {
														
 
															+		return false;
														
 
															+	}
														
 
															+
														
 
															+	sub = cip->n_subcores;
														
 
															+	++cip->n_subcores;
														
 
															+	cip->total_threads += vc->num_threads;
														
 
															+	cip->subcore_threads[sub] = vc->num_threads;
														
 
															+	cip->subcore_vm[sub] = vc->kvm;
														
 
															+	init_master_vcore(vc);
														
 
															+	list_del(&vc->preempt_list);
														
 
															+	list_add_tail(&vc->preempt_list, &cip->vcs[sub]);
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															+static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
														
 
															+				  struct core_info *cip, int sub)
														
 
															+{
														
 
															+	struct kvmppc_vcore *vc;
														
 
															+	int n_thr;
														
 
															+
														
 
															+	vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
														
 
															+			      preempt_list);
														
 
															+
														
 
															+	/* require same VM and same per-core reg values */
														
 
															+	if (pvc->kvm != vc->kvm ||
														
 
															+	    pvc->tb_offset != vc->tb_offset ||
														
 
															+	    pvc->pcr != vc->pcr ||
														
 
															+	    pvc->lpcr != vc->lpcr)
														
 
															+		return false;
														
 
															+
														
 
															+	/* P8 guest with > 1 thread per core would see wrong TIR value */
														
 
															+	if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
														
 
															+	    (vc->num_threads > 1 || pvc->num_threads > 1))
														
 
															+		return false;
														
 
															+
														
 
															+	n_thr = cip->subcore_threads[sub] + pvc->num_threads;
														
 
															+	if (n_thr > cip->max_subcore_threads) {
														
 
															+		if (!subcore_config_ok(cip->n_subcores, n_thr))
														
 
															+			return false;
														
 
															+		cip->max_subcore_threads = n_thr;
														
 
															+	}
														
 
															+
														
 
															+	cip->total_threads += pvc->num_threads;
														
 
															+	cip->subcore_threads[sub] = n_thr;
														
 
															+	pvc->master_vcore = vc;
														
 
															+	list_del(&pvc->preempt_list);
														
 
															+	list_add_tail(&pvc->preempt_list, &cip->vcs[sub]);
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Work out whether it is possible to piggyback the execution of
														
 
															+ * vcore *pvc onto the execution of the other vcores described in *cip.
														
 
															+ */
														
 
															+static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
														
 
															+			  int target_threads)
														
 
															+{
														
 
															+	int sub;
														
 
															+
														
 
															+	if (cip->total_threads + pvc->num_threads > target_threads)
														
 
															+		return false;
														
 
															+	for (sub = 0; sub < cip->n_subcores; ++sub)
														
 
															+		if (cip->subcore_threads[sub] &&
														
 
															+		    can_piggyback_subcore(pvc, cip, sub))
														
 
															+			return true;
														
 
															+
														
 
															+	if (can_dynamic_split(pvc, cip))
														
 
															+		return true;
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															 static void prepare_threads(struct kvmppc_vcore *vc)
														
 
															 {
														
 
															 	struct kvm_vcpu *vcpu, *vnext;
														
@@ -1909,12 +2212,45 @@ static void prepare_threads(struct kvmppc_vcore *vc)
 
															 	}
														
 
															 }
														
 
															-static void post_guest_process(struct kvmppc_vcore *vc)
														
 
															+static void collect_piggybacks(struct core_info *cip, int target_threads)
														
 
															+{
														
 
															+	struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
														
 
															+	struct kvmppc_vcore *pvc, *vcnext;
														
 
															+
														
 
															+	spin_lock(&lp->lock);
														
 
															+	list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
														
 
															+		if (!spin_trylock(&pvc->lock))
														
 
															+			continue;
														
 
															+		prepare_threads(pvc);
														
 
															+		if (!pvc->n_runnable) {
														
 
															+			list_del_init(&pvc->preempt_list);
														
 
															+			if (pvc->runner == NULL) {
														
 
															+				pvc->vcore_state = VCORE_INACTIVE;
														
 
															+				kvmppc_core_end_stolen(pvc);
														
 
															+			}
														
 
															+			spin_unlock(&pvc->lock);
														
 
															+			continue;
														
 
															+		}
														
 
															+		if (!can_piggyback(pvc, cip, target_threads)) {
														
 
															+			spin_unlock(&pvc->lock);
														
 
															+			continue;
														
 
															+		}
														
 
															+		kvmppc_core_end_stolen(pvc);
														
 
															+		pvc->vcore_state = VCORE_PIGGYBACK;
														
 
															+		if (cip->total_threads >= target_threads)
														
 
															+			break;
														
 
															+	}
														
 
															+	spin_unlock(&lp->lock);
														
 
															+}
														
 
															+
														
 
															+static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
														
 
															 {
														
 
															+	int still_running = 0;
														
 
															 	u64 now;
														
 
															 	long ret;
														
 
															 	struct kvm_vcpu *vcpu, *vnext;
														
 
															+	spin_lock(&vc->lock);
														
 
															 	now = get_tb();
														
 
															 	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
														
 
															 				 arch.run_list) {
														
@@ -1933,17 +2269,36 @@ static void post_guest_process(struct kvmppc_vcore *vc)
 
															 		vcpu->arch.ret = ret;
														
 
															 		vcpu->arch.trap = 0;
														
 
															-		if (vcpu->arch.ceded) {
														
 
															-			if (!is_kvmppc_resume_guest(ret))
														
 
															-				kvmppc_end_cede(vcpu);
														
 
															-			else
														
 
															+		if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
														
 
															+			if (vcpu->arch.pending_exceptions)
														
 
															+				kvmppc_core_prepare_to_enter(vcpu);
														
 
															+			if (vcpu->arch.ceded)
														
 
															 				kvmppc_set_timer(vcpu);
														
 
															-		}
														
 
															-		if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
														
 
															+			else
														
 
															+				++still_running;
														
 
															+		} else {
														
 
															 			kvmppc_remove_runnable(vc, vcpu);
														
 
															 			wake_up(&vcpu->arch.cpu_run);
														
 
															 		}
														
 
															 	}
														
 
															+	list_del_init(&vc->preempt_list);
														
 
															+	if (!is_master) {
														
 
															+		if (still_running > 0) {
														
 
															+			kvmppc_vcore_preempt(vc);
														
 
															+		} else if (vc->runner) {
														
 
															+			vc->vcore_state = VCORE_PREEMPT;
														
 
															+			kvmppc_core_start_stolen(vc);
														
 
															+		} else {
														
 
															+			vc->vcore_state = VCORE_INACTIVE;
														
 
															+		}
														
 
															+		if (vc->n_runnable > 0 && vc->runner == NULL) {
														
 
															+			/* make sure there's a candidate runner awake */
														
 
															+			vcpu = list_first_entry(&vc->runnable_threads,
														
 
															+						struct kvm_vcpu, arch.run_list);
														
 
															+			wake_up(&vcpu->arch.cpu_run);
														
 
															+		}
														
 
															+	}
														
 
															+	spin_unlock(&vc->lock);
														
 
															 }
														
 
															 /*
														
@@ -1955,6 +2310,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
															 	struct kvm_vcpu *vcpu, *vnext;
														
 
															 	int i;
														
 
															 	int srcu_idx;
														
 
															+	struct core_info core_info;
														
 
															+	struct kvmppc_vcore *pvc, *vcnext;
														
 
															+	struct kvm_split_mode split_info, *sip;
														
 
															+	int split, subcore_size, active;
														
 
															+	int sub;
														
 
															+	bool thr0_done;
														
 
															+	unsigned long cmd_bit, stat_bit;
														
 
															+	int pcpu, thr;
														
 
															+	int target_threads;
														
 
															 	/*
														
 
															 	 * Remove from the list any threads that have a signal pending
														
@@ -1969,11 +2333,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
															 	/*
														
 
															 	 * Initialize *vc.
														
 
															 	 */
														
 
															-	vc->entry_exit_map = 0;
														
 
															+	init_master_vcore(vc);
														
 
															 	vc->preempt_tb = TB_NIL;
														
 
															-	vc->in_guest = 0;
														
 
															-	vc->napping_threads = 0;
														
 
															-	vc->conferring_threads = 0;
														
 
															 	/*
														
 
															 	 * Make sure we are running on primary threads, and that secondary
														
@@ -1991,24 +2352,120 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
															 		goto out;
														
 
															 	}
														
 
															+	/*
														
 
															+	 * See if we could run any other vcores on the physical core
														
 
															+	 * along with this one.
														
 
															+	 */
														
 
															+	init_core_info(&core_info, vc);
														
 
															+	pcpu = smp_processor_id();
														
 
															+	target_threads = threads_per_subcore;
														
 
															+	if (target_smt_mode && target_smt_mode < target_threads)
														
 
															+		target_threads = target_smt_mode;
														
 
															+	if (vc->num_threads < target_threads)
														
 
															+		collect_piggybacks(&core_info, target_threads);
														
 
															+
														
 
															+	/* Decide on micro-threading (split-core) mode */
														
 
															+	subcore_size = threads_per_subcore;
														
 
															+	cmd_bit = stat_bit = 0;
														
 
															+	split = core_info.n_subcores;
														
 
															+	sip = NULL;
														
 
															+	if (split > 1) {
														
 
															+		/* threads_per_subcore must be MAX_SMT_THREADS (8) here */
														
 
															+		if (split == 2 && (dynamic_mt_modes & 2)) {
														
 
															+			cmd_bit = HID0_POWER8_1TO2LPAR;
														
 
															+			stat_bit = HID0_POWER8_2LPARMODE;
														
 
															+		} else {
														
 
															+			split = 4;
														
 
															+			cmd_bit = HID0_POWER8_1TO4LPAR;
														
 
															+			stat_bit = HID0_POWER8_4LPARMODE;
														
 
															+		}
														
 
															+		subcore_size = MAX_SMT_THREADS / split;
														
 
															+		sip = &split_info;
														
 
															+		memset(&split_info, 0, sizeof(split_info));
														
 
															+		split_info.rpr = mfspr(SPRN_RPR);
														
 
															+		split_info.pmmar = mfspr(SPRN_PMMAR);
														
 
															+		split_info.ldbar = mfspr(SPRN_LDBAR);
														
 
															+		split_info.subcore_size = subcore_size;
														
 
															+		for (sub = 0; sub < core_info.n_subcores; ++sub)
														
 
															+			split_info.master_vcs[sub] =
														
 
															+				list_first_entry(&core_info.vcs[sub],
														
 
															+					struct kvmppc_vcore, preempt_list);
														
 
															+		/* order writes to split_info before kvm_split_mode pointer */
														
 
															+		smp_wmb();
														
 
															+	}
														
 
															+	pcpu = smp_processor_id();
														
 
															+	for (thr = 0; thr < threads_per_subcore; ++thr)
														
 
															+		paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
														
 
															+
														
 
															+	/* Initiate micro-threading (split-core) if required */
														
 
															+	if (cmd_bit) {
														
 
															+		unsigned long hid0 = mfspr(SPRN_HID0);
														
 
															+
														
 
															+		hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
														
 
															+		mb();
														
 
															+		mtspr(SPRN_HID0, hid0);
														
 
															+		isync();
														
 
															+		for (;;) {
														
 
															+			hid0 = mfspr(SPRN_HID0);
														
 
															+			if (hid0 & stat_bit)
														
 
															+				break;
														
 
															+			cpu_relax();
														
 
															+		}
														
 
															+	}
														
 
															-	vc->pcpu = smp_processor_id();
														
 
															-	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
														
 
															-		kvmppc_start_thread(vcpu);
														
 
															-		kvmppc_create_dtl_entry(vcpu, vc);
														
 
															-		trace_kvm_guest_enter(vcpu);
														
 
															+	/* Start all the threads */
														
 
															+	active = 0;
														
 
															+	for (sub = 0; sub < core_info.n_subcores; ++sub) {
														
 
															+		thr = subcore_thread_map[sub];
														
 
															+		thr0_done = false;
														
 
															+		active |= 1 << thr;
														
 
															+		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
														
 
															+			pvc->pcpu = pcpu + thr;
														
 
															+			list_for_each_entry(vcpu, &pvc->runnable_threads,
														
 
															+					    arch.run_list) {
														
 
															+				kvmppc_start_thread(vcpu, pvc);
														
 
															+				kvmppc_create_dtl_entry(vcpu, pvc);
														
 
															+				trace_kvm_guest_enter(vcpu);
														
 
															+				if (!vcpu->arch.ptid)
														
 
															+					thr0_done = true;
														
 
															+				active |= 1 << (thr + vcpu->arch.ptid);
														
 
															+			}
														
 
															+			/*
														
 
															+			 * We need to start the first thread of each subcore
														
 
															+			 * even if it doesn't have a vcpu.
														
 
															+			 */
														
 
															+			if (pvc->master_vcore == pvc && !thr0_done)
														
 
															+				kvmppc_start_thread(NULL, pvc);
														
 
															+			thr += pvc->num_threads;
														
 
															+		}
														
 
															 	}
														
 
															-	/* Set this explicitly in case thread 0 doesn't have a vcpu */
														
 
															-	get_paca()->kvm_hstate.kvm_vcore = vc;
														
 
															-	get_paca()->kvm_hstate.ptid = 0;
														
 
															+	/*
														
 
															+	 * Ensure that split_info.do_nap is set after setting
														
 
															+	 * the vcore pointer in the PACA of the secondaries.
														
 
															+	 */
														
 
															+	smp_mb();
														
 
															+	if (cmd_bit)
														
 
															+		split_info.do_nap = 1;	/* ask secondaries to nap when done */
														
 
															+
														
 
															+	/*
														
 
															+	 * When doing micro-threading, poke the inactive threads as well.
														
 
															+	 * This gets them to the nap instruction after kvm_do_nap,
														
 
															+	 * which reduces the time taken to unsplit later.
														
 
															+	 */
														
 
															+	if (split > 1)
														
 
															+		for (thr = 1; thr < threads_per_subcore; ++thr)
														
 
															+			if (!(active & (1 << thr)))
														
 
															+				kvmppc_ipi_thread(pcpu + thr);
														
 
															 	vc->vcore_state = VCORE_RUNNING;
														
 
															 	preempt_disable();
														
 
															 	trace_kvmppc_run_core(vc, 0);
														
 
															-	spin_unlock(&vc->lock);
														
 
															+	for (sub = 0; sub < core_info.n_subcores; ++sub)
														
 
															+		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
														
 
															+			spin_unlock(&pvc->lock);
														
 
															 	kvm_guest_enter();
														
@@ -2019,32 +2476,58 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
															 	__kvmppc_vcore_entry();
														
 
															-	spin_lock(&vc->lock);
														
 
															-
														
 
															 	if (vc->mpp_buffer)
														
 
															 		kvmppc_start_saving_l2_cache(vc);
														
 
															-	/* disable sending of IPIs on virtual external irqs */
														
 
															-	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
														
 
															-		vcpu->cpu = -1;
														
 
															-	/* wait for secondary threads to finish writing their state to memory */
														
 
															-	kvmppc_wait_for_nap();
														
 
															-	for (i = 0; i < threads_per_subcore; ++i)
														
 
															-		kvmppc_release_hwthread(vc->pcpu + i);
														
 
															+	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
														
 
															+
														
 
															+	spin_lock(&vc->lock);
														
 
															 	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
														
 
															 	vc->vcore_state = VCORE_EXITING;
														
 
															-	spin_unlock(&vc->lock);
														
 
															-	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
														
 
															+	/* wait for secondary threads to finish writing their state to memory */
														
 
															+	kvmppc_wait_for_nap();
														
 
															+
														
 
															+	/* Return to whole-core mode if we split the core earlier */
														
 
															+	if (split > 1) {
														
 
															+		unsigned long hid0 = mfspr(SPRN_HID0);
														
 
															+		unsigned long loops = 0;
														
 
															+
														
 
															+		hid0 &= ~HID0_POWER8_DYNLPARDIS;
														
 
															+		stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
														
 
															+		mb();
														
 
															+		mtspr(SPRN_HID0, hid0);
														
 
															+		isync();
														
 
															+		for (;;) {
														
 
															+			hid0 = mfspr(SPRN_HID0);
														
 
															+			if (!(hid0 & stat_bit))
														
 
															+				break;
														
 
															+			cpu_relax();
														
 
															+			++loops;
														
 
															+		}
														
 
															+		split_info.do_nap = 0;
														
 
															+	}
														
 
															+
														
 
															+	/* Let secondaries go back to the offline loop */
														
 
															+	for (i = 0; i < threads_per_subcore; ++i) {
														
 
															+		kvmppc_release_hwthread(pcpu + i);
														
 
															+		if (sip && sip->napped[i])
														
 
															+			kvmppc_ipi_thread(pcpu + i);
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock(&vc->lock);
														
 
															 	/* make sure updates to secondary vcpu structs are visible now */
														
 
															 	smp_mb();
														
 
															 	kvm_guest_exit();
														
 
															-	preempt_enable();
														
 
															+	for (sub = 0; sub < core_info.n_subcores; ++sub)
														
 
															+		list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
														
 
															+					 preempt_list)
														
 
															+			post_guest_process(pvc, pvc == vc);
														
 
															 	spin_lock(&vc->lock);
														
 
															-	post_guest_process(vc);
														
 
															+	preempt_enable();
														
 
															  out:
														
 
															 	vc->vcore_state = VCORE_INACTIVE;
														
@@ -2055,13 +2538,17 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
															  * Wait for some other vcpu thread to execute us, and
														
 
															  * wake us up when we need to handle something in the host.
														
 
															  */
														
 
															-static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
														
 
															+static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
														
 
															+				 struct kvm_vcpu *vcpu, int wait_state)
														
 
															 {
														
 
															 	DEFINE_WAIT(wait);
														
 
															 	prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
														
 
															-	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
														
 
															+	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
														
 
															+		spin_unlock(&vc->lock);
														
 
															 		schedule();
														
 
															+		spin_lock(&vc->lock);
														
 
															+	}
														
 
															 	finish_wait(&vcpu->arch.cpu_run, &wait);
														
 
															 }
														
@@ -2137,9 +2624,21 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
															 	 * this thread straight away and have it join in.
														
 
															 	 */
														
 
															 	if (!signal_pending(current)) {
														
 
															-		if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) {
														
 
															+		if (vc->vcore_state == VCORE_PIGGYBACK) {
														
 
															+			struct kvmppc_vcore *mvc = vc->master_vcore;
														
 
															+			if (spin_trylock(&mvc->lock)) {
														
 
															+				if (mvc->vcore_state == VCORE_RUNNING &&
														
 
															+				    !VCORE_IS_EXITING(mvc)) {
														
 
															+					kvmppc_create_dtl_entry(vcpu, vc);
														
 
															+					kvmppc_start_thread(vcpu, vc);
														
 
															+					trace_kvm_guest_enter(vcpu);
														
 
															+				}
														
 
															+				spin_unlock(&mvc->lock);
														
 
															+			}
														
 
															+		} else if (vc->vcore_state == VCORE_RUNNING &&
														
 
															+			   !VCORE_IS_EXITING(vc)) {
														
 
															 			kvmppc_create_dtl_entry(vcpu, vc);
														
 
															-			kvmppc_start_thread(vcpu);
														
 
															+			kvmppc_start_thread(vcpu, vc);
														
 
															 			trace_kvm_guest_enter(vcpu);
														
 
															 		} else if (vc->vcore_state == VCORE_SLEEPING) {
														
 
															 			wake_up(&vc->wq);
														
@@ -2149,10 +2648,11 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
															 	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
														
 
															 	       !signal_pending(current)) {
														
 
															+		if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
														
 
															+			kvmppc_vcore_end_preempt(vc);
														
 
															+
														
 
															 		if (vc->vcore_state != VCORE_INACTIVE) {
														
 
															-			spin_unlock(&vc->lock);
														
 
															-			kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
														
 
															-			spin_lock(&vc->lock);
														
 
															+			kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
														
 
															 			continue;
														
 
															 		}
														
 
															 		list_for_each_entry_safe(v, vn, &vc->runnable_threads,
														
@@ -2179,10 +2679,11 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
															 		if (n_ceded == vc->n_runnable) {
														
 
															 			kvmppc_vcore_blocked(vc);
														
 
															 		} else if (need_resched()) {
														
 
															-			vc->vcore_state = VCORE_PREEMPT;
														
 
															+			kvmppc_vcore_preempt(vc);
														
 
															 			/* Let something else run */
														
 
															 			cond_resched_lock(&vc->lock);
														
 
															-			vc->vcore_state = VCORE_INACTIVE;
														
 
															+			if (vc->vcore_state == VCORE_PREEMPT)
														
 
															+				kvmppc_vcore_end_preempt(vc);
														
 
															 		} else {
														
 
															 			kvmppc_run_core(vc);
														
 
															 		}
														
@@ -2191,11 +2692,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
															 	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
														
 
															 	       (vc->vcore_state == VCORE_RUNNING ||
														
 
															-		vc->vcore_state == VCORE_EXITING)) {
														
 
															-		spin_unlock(&vc->lock);
														
 
															-		kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
														
 
															-		spin_lock(&vc->lock);
														
 
															-	}
														
 
															+		vc->vcore_state == VCORE_EXITING))
														
 
															+		kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
														
 
															 	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
														
 
															 		kvmppc_remove_runnable(vc, vcpu);
														
@@ -2755,6 +3253,8 @@ static int kvmppc_book3s_init_hv(void)
 
															 	init_default_hcalls();
														
 
															+	init_vcore_lists();
														
 
															+
														
 
															 	r = kvmppc_mmu_hv_init();
														
 
															 	return r;
														
 
															 }
														
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -110,14 +110,15 @@ void __init kvm_cma_reserve(void)
 
															 long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
														
 
															 			    unsigned int yield_count)
														
 
															 {
														
 
															-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
														
 
															+	struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
														
 
															+	int ptid = local_paca->kvm_hstate.ptid;
														
 
															 	int threads_running;
														
 
															 	int threads_ceded;
														
 
															 	int threads_conferring;
														
 
															 	u64 stop = get_tb() + 10 * tb_ticks_per_usec;
														
 
															 	int rv = H_SUCCESS; /* => don't yield */
														
 
															-	set_bit(vcpu->arch.ptid, &vc->conferring_threads);
														
 
															+	set_bit(ptid, &vc->conferring_threads);
														
 
															 	while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
														
 
															 		threads_running = VCORE_ENTRY_MAP(vc);
														
 
															 		threads_ceded = vc->napping_threads;
														
@@ -127,7 +128,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
 
															 			break;
														
 
															 		}
														
 
															 	}
														
 
															-	clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
														
 
															+	clear_bit(ptid, &vc->conferring_threads);
														
 
															 	return rv;
														
 
															 }
														
@@ -238,7 +239,8 @@ void kvmhv_commence_exit(int trap)
 
															 {
														
 
															 	struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
														
 
															 	int ptid = local_paca->kvm_hstate.ptid;
														
 
															-	int me, ee;
														
 
															+	struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
														
 
															+	int me, ee, i;
														
 
															 	/* Set our bit in the threads-exiting-guest map in the 0xff00
														
 
															 	   bits of vcore->entry_exit_map */
														
@@ -258,4 +260,26 @@ void kvmhv_commence_exit(int trap)
 
															 	 */
														
 
															 	if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
														
 
															 		kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
														
 
															+
														
 
															+	/*
														
 
															+	 * If we are doing dynamic micro-threading, interrupt the other
														
 
															+	 * subcores to pull them out of their guests too.
														
 
															+	 */
														
 
															+	if (!sip)
														
 
															+		return;
														
 
															+
														
 
															+	for (i = 0; i < MAX_SUBCORES; ++i) {
														
 
															+		vc = sip->master_vcs[i];
														
 
															+		if (!vc)
														
 
															+			break;
														
 
															+		do {
														
 
															+			ee = vc->entry_exit_map;
														
 
															+			/* Already asked to exit? */
														
 
															+			if ((ee >> 8) != 0)
														
 
															+				break;
														
 
															+		} while (cmpxchg(&vc->entry_exit_map, ee,
														
 
															+				 ee | VCORE_EXIT_REQ) != ee);
														
 
															+		if ((ee >> 8) == 0)
														
 
															+			kvmhv_interrupt_vcore(vc, ee);
														
 
															+	}
														
 
															 }
														
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -12,6 +12,7 @@
 
															 #include <linux/kvm_host.h>
														
 
															 #include <linux/hugetlb.h>
														
 
															 #include <linux/module.h>
														
 
															+#include <linux/log2.h>
														
 
															 #include <asm/tlbflush.h>
														
 
															 #include <asm/kvm_ppc.h>
														
@@ -97,25 +98,52 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
														
 
															+/* Update the changed page order field of an rmap entry */
														
 
															+void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
														
 
															+{
														
 
															+	unsigned long order;
														
 
															+
														
 
															+	if (!psize)
														
 
															+		return;
														
 
															+	order = ilog2(psize);
														
 
															+	order <<= KVMPPC_RMAP_CHG_SHIFT;
														
 
															+	if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER))
														
 
															+		*rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
														
 
															+
														
 
															+/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
														
 
															+static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
														
 
															+				      unsigned long hpte_gr)
														
 
															+{
														
 
															+	struct kvm_memory_slot *memslot;
														
 
															+	unsigned long *rmap;
														
 
															+	unsigned long gfn;
														
 
															+
														
 
															+	gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr));
														
 
															+	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
														
 
															+	if (!memslot)
														
 
															+		return NULL;
														
 
															+
														
 
															+	rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
														
 
															+	return rmap;
														
 
															+}
														
 
															+
														
 
															 /* Remove this HPTE from the chain for a real page */
														
 
															 static void remove_revmap_chain(struct kvm *kvm, long pte_index,
														
 
															 				struct revmap_entry *rev,
														
 
															 				unsigned long hpte_v, unsigned long hpte_r)
														
 
															 {
														
 
															 	struct revmap_entry *next, *prev;
														
 
															-	unsigned long gfn, ptel, head;
														
 
															-	struct kvm_memory_slot *memslot;
														
 
															+	unsigned long ptel, head;
														
 
															 	unsigned long *rmap;
														
 
															 	unsigned long rcbits;
														
 
															 	rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
														
 
															 	ptel = rev->guest_rpte |= rcbits;
														
 
															-	gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
														
 
															-	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
														
 
															-	if (!memslot)
														
 
															+	rmap = revmap_for_hpte(kvm, hpte_v, ptel);
														
 
															+	if (!rmap)
														
 
															 		return;
														
 
															-
														
 
															-	rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
														
 
															 	lock_rmap(rmap);
														
 
															 	head = *rmap & KVMPPC_RMAP_INDEX;
														
@@ -131,6 +159,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 
															 			*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
														
 
															 	}
														
 
															 	*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
														
 
															+	if (rcbits & HPTE_R_C)
														
 
															+		kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r));
														
 
															 	unlock_rmap(rmap);
														
 
															 }
														
@@ -421,14 +451,20 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 
															 	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
														
 
															 	v = pte & ~HPTE_V_HVLOCK;
														
 
															 	if (v & HPTE_V_VALID) {
														
 
															-		u64 pte1;
														
 
															-
														
 
															-		pte1 = be64_to_cpu(hpte[1]);
														
 
															 		hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
														
 
															-		rb = compute_tlbie_rb(v, pte1, pte_index);
														
 
															+		rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index);
														
 
															 		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
														
 
															-		/* Read PTE low word after tlbie to get final R/C values */
														
 
															-		remove_revmap_chain(kvm, pte_index, rev, v, pte1);
														
 
															+		/*
														
 
															+		 * The reference (R) and change (C) bits in a HPT
														
 
															+		 * entry can be set by hardware at any time up until
														
 
															+		 * the HPTE is invalidated and the TLB invalidation
														
 
															+		 * sequence has completed.  This means that when
														
 
															+		 * removing a HPTE, we need to re-read the HPTE after
														
 
															+		 * the invalidation sequence has completed in order to
														
 
															+		 * obtain reliable values of R and C.
														
 
															+		 */
														
 
															+		remove_revmap_chain(kvm, pte_index, rev, v,
														
 
															+				    be64_to_cpu(hpte[1]));
														
 
															 	}
														
 
															 	r = rev->guest_rpte & ~HPTE_GR_RESERVED;
														
 
															 	note_hpte_modification(kvm, rev);
														
@@ -655,6 +691,105 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 
															 	return H_SUCCESS;
														
 
															 }
														
 
															+long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
														
 
															+			unsigned long pte_index)
														
 
															+{
														
 
															+	struct kvm *kvm = vcpu->kvm;
														
 
															+	__be64 *hpte;
														
 
															+	unsigned long v, r, gr;
														
 
															+	struct revmap_entry *rev;
														
 
															+	unsigned long *rmap;
														
 
															+	long ret = H_NOT_FOUND;
														
 
															+
														
 
															+	if (pte_index >= kvm->arch.hpt_npte)
														
 
															+		return H_PARAMETER;
														
 
															+
														
 
															+	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
														
 
															+	hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
														
 
															+	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
														
 
															+		cpu_relax();
														
 
															+	v = be64_to_cpu(hpte[0]);
														
 
															+	r = be64_to_cpu(hpte[1]);
														
 
															+	if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
														
 
															+		goto out;
														
 
															+
														
 
															+	gr = rev->guest_rpte;
														
 
															+	if (rev->guest_rpte & HPTE_R_R) {
														
 
															+		rev->guest_rpte &= ~HPTE_R_R;
														
 
															+		note_hpte_modification(kvm, rev);
														
 
															+	}
														
 
															+	if (v & HPTE_V_VALID) {
														
 
															+		gr |= r & (HPTE_R_R | HPTE_R_C);
														
 
															+		if (r & HPTE_R_R) {
														
 
															+			kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
														
 
															+			rmap = revmap_for_hpte(kvm, v, gr);
														
 
															+			if (rmap) {
														
 
															+				lock_rmap(rmap);
														
 
															+				*rmap |= KVMPPC_RMAP_REFERENCED;
														
 
															+				unlock_rmap(rmap);
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+	vcpu->arch.gpr[4] = gr;
														
 
															+	ret = H_SUCCESS;
														
 
															+ out:
														
 
															+	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
														
 
															+			unsigned long pte_index)
														
 
															+{
														
 
															+	struct kvm *kvm = vcpu->kvm;
														
 
															+	__be64 *hpte;
														
 
															+	unsigned long v, r, gr;
														
 
															+	struct revmap_entry *rev;
														
 
															+	unsigned long *rmap;
														
 
															+	long ret = H_NOT_FOUND;
														
 
															+
														
 
															+	if (pte_index >= kvm->arch.hpt_npte)
														
 
															+		return H_PARAMETER;
														
 
															+
														
 
															+	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
														
 
															+	hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
														
 
															+	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
														
 
															+		cpu_relax();
														
 
															+	v = be64_to_cpu(hpte[0]);
														
 
															+	r = be64_to_cpu(hpte[1]);
														
 
															+	if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
														
 
															+		goto out;
														
 
															+
														
 
															+	gr = rev->guest_rpte;
														
 
															+	if (gr & HPTE_R_C) {
														
 
															+		rev->guest_rpte &= ~HPTE_R_C;
														
 
															+		note_hpte_modification(kvm, rev);
														
 
															+	}
														
 
															+	if (v & HPTE_V_VALID) {
														
 
															+		/* need to make it temporarily absent so C is stable */
														
 
															+		hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
														
 
															+		kvmppc_invalidate_hpte(kvm, hpte, pte_index);
														
 
															+		r = be64_to_cpu(hpte[1]);
														
 
															+		gr |= r & (HPTE_R_R | HPTE_R_C);
														
 
															+		if (r & HPTE_R_C) {
														
 
															+			unsigned long psize = hpte_page_size(v, r);
														
 
															+			hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
														
 
															+			eieio();
														
 
															+			rmap = revmap_for_hpte(kvm, v, gr);
														
 
															+			if (rmap) {
														
 
															+				lock_rmap(rmap);
														
 
															+				*rmap |= KVMPPC_RMAP_CHANGED;
														
 
															+				kvmppc_update_rmap_change(rmap, psize);
														
 
															+				unlock_rmap(rmap);
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+	vcpu->arch.gpr[4] = gr;
														
 
															+	ret = H_SUCCESS;
														
 
															+ out:
														
 
															+	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
														
 
															 			unsigned long pte_index)
														
 
															 {
														
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -67,14 +67,12 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
 
															 	}
														
 
															 	/* Check if the core is loaded, if not, too hard */
														
 
															-	cpu = vcpu->cpu;
														
 
															+	cpu = vcpu->arch.thread_cpu;
														
 
															 	if (cpu < 0 || cpu >= nr_cpu_ids) {
														
 
															 		this_icp->rm_action |= XICS_RM_KICK_VCPU;
														
 
															 		this_icp->rm_kick_target = vcpu;
														
 
															 		return;
														
 
															 	}
														
 
															-	/* In SMT cpu will always point to thread 0, we adjust it */
														
 
															-	cpu += vcpu->arch.ptid;
														
 
															 	smp_mb();
														
 
															 	kvmhv_rm_send_ipi(cpu);
														
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -128,6 +128,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
															 	subf	r4, r4, r3
														
 
															 	mtspr	SPRN_DEC, r4
														
 
															+	/* hwthread_req may have got set by cede or no vcpu, so clear it */
														
 
															+	li	r0, 0
														
 
															+	stb	r0, HSTATE_HWTHREAD_REQ(r13)
														
 
															+
														
 
															 	/*
														
 
															 	 * For external and machine check interrupts, we need
														
 
															 	 * to call the Linux handler to process the interrupt.
														
@@ -215,7 +219,6 @@ kvm_novcpu_wakeup:
 
															 	ld	r5, HSTATE_KVM_VCORE(r13)
														
 
															 	li	r0, 0
														
 
															 	stb	r0, HSTATE_NAPPING(r13)
														
 
															-	stb	r0, HSTATE_HWTHREAD_REQ(r13)
														
 
															 	/* check the wake reason */
														
 
															 	bl	kvmppc_check_wake_reason
														
@@ -315,10 +318,10 @@ kvm_start_guest:
 
															 	cmpdi	r3, 0
														
 
															 	bge	kvm_no_guest
														
 
															-	/* get vcpu pointer, NULL if we have no vcpu to run */
														
 
															-	ld	r4,HSTATE_KVM_VCPU(r13)
														
 
															-	cmpdi	r4,0
														
 
															-	/* if we have no vcpu to run, go back to sleep */
														
 
															+	/* get vcore pointer, NULL if we have nothing to run */
														
 
															+	ld	r5,HSTATE_KVM_VCORE(r13)
														
 
															+	cmpdi	r5,0
														
 
															+	/* if we have no vcore to run, go back to sleep */
														
 
															 	beq	kvm_no_guest
														
 
															 kvm_secondary_got_guest:
														
@@ -327,21 +330,42 @@ kvm_secondary_got_guest:
 
															 	ld	r6, PACA_DSCR_DEFAULT(r13)
														
 
															 	std	r6, HSTATE_DSCR(r13)
														
 
															-	/* Order load of vcore, ptid etc. after load of vcpu */
														
 
															+	/* On thread 0 of a subcore, set HDEC to max */
														
 
															+	lbz	r4, HSTATE_PTID(r13)
														
 
															+	cmpwi	r4, 0
														
 
															+	bne	63f
														
 
															+	lis	r6, 0x7fff
														
 
															+	ori	r6, r6, 0xffff
														
 
															+	mtspr	SPRN_HDEC, r6
														
 
															+	/* and set per-LPAR registers, if doing dynamic micro-threading */
														
 
															+	ld	r6, HSTATE_SPLIT_MODE(r13)
														
 
															+	cmpdi	r6, 0
														
 
															+	beq	63f
														
 
															+	ld	r0, KVM_SPLIT_RPR(r6)
														
 
															+	mtspr	SPRN_RPR, r0
														
 
															+	ld	r0, KVM_SPLIT_PMMAR(r6)
														
 
															+	mtspr	SPRN_PMMAR, r0
														
 
															+	ld	r0, KVM_SPLIT_LDBAR(r6)
														
 
															+	mtspr	SPRN_LDBAR, r0
														
 
															+	isync
														
 
															+63:
														
 
															+	/* Order load of vcpu after load of vcore */
														
 
															 	lwsync
														
 
															+	ld	r4, HSTATE_KVM_VCPU(r13)
														
 
															 	bl	kvmppc_hv_entry
														
 
															 	/* Back from the guest, go back to nap */
														
 
															-	/* Clear our vcpu pointer so we don't come back in early */
														
 
															+	/* Clear our vcpu and vcore pointers so we don't come back in early */
														
 
															 	li	r0, 0
														
 
															+	std	r0, HSTATE_KVM_VCPU(r13)
														
 
															 	/*
														
 
															-	 * Once we clear HSTATE_KVM_VCPU(r13), the code in
														
 
															+	 * Once we clear HSTATE_KVM_VCORE(r13), the code in
														
 
															 	 * kvmppc_run_core() is going to assume that all our vcpu
														
 
															 	 * state is visible in memory.  This lwsync makes sure
														
 
															 	 * that that is true.
														
 
															 	 */
														
 
															 	lwsync
														
 
															-	std	r0, HSTATE_KVM_VCPU(r13)
														
 
															+	std	r0, HSTATE_KVM_VCORE(r13)
														
 
															 /*
														
 
															  * At this point we have finished executing in the guest.
														
@@ -374,16 +398,71 @@ kvm_no_guest:
 
															 	b	power7_wakeup_loss
														
 
															 53:	HMT_LOW
														
 
															-	ld	r4, HSTATE_KVM_VCPU(r13)
														
 
															-	cmpdi	r4, 0
														
 
															+	ld	r5, HSTATE_KVM_VCORE(r13)
														
 
															+	cmpdi	r5, 0
														
 
															+	bne	60f
														
 
															+	ld	r3, HSTATE_SPLIT_MODE(r13)
														
 
															+	cmpdi	r3, 0
														
 
															+	beq	kvm_no_guest
														
 
															+	lbz	r0, KVM_SPLIT_DO_NAP(r3)
														
 
															+	cmpwi	r0, 0
														
 
															 	beq	kvm_no_guest
														
 
															 	HMT_MEDIUM
														
 
															+	b	kvm_unsplit_nap
														
 
															+60:	HMT_MEDIUM
														
 
															 	b	kvm_secondary_got_guest
														
 
															 54:	li	r0, KVM_HWTHREAD_IN_KVM
														
 
															 	stb	r0, HSTATE_HWTHREAD_STATE(r13)
														
 
															 	b	kvm_no_guest
														
 
															+/*
														
 
															+ * Here the primary thread is trying to return the core to
														
 
															+ * whole-core mode, so we need to nap.
														
 
															+ */
														
 
															+kvm_unsplit_nap:
														
 
															+	/*
														
 
															+	 * Ensure that secondary doesn't nap when it has
														
 
															+	 * its vcore pointer set.
														
 
															+	 */
														
 
															+	sync		/* matches smp_mb() before setting split_info.do_nap */
														
 
															+	ld	r0, HSTATE_KVM_VCORE(r13)
														
 
															+	cmpdi	r0, 0
														
 
															+	bne	kvm_no_guest
														
 
															+	/* clear any pending message */
														
 
															+BEGIN_FTR_SECTION
														
 
															+	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
														
 
															+	PPC_MSGCLR(6)
														
 
															+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
														
 
															+	/* Set kvm_split_mode.napped[tid] = 1 */
														
 
															+	ld	r3, HSTATE_SPLIT_MODE(r13)
														
 
															+	li	r0, 1
														
 
															+	lhz	r4, PACAPACAINDEX(r13)
														
 
															+	clrldi	r4, r4, 61	/* micro-threading => P8 => 8 threads/core */
														
 
															+	addi	r4, r4, KVM_SPLIT_NAPPED
														
 
															+	stbx	r0, r3, r4
														
 
															+	/* Check the do_nap flag again after setting napped[] */
														
 
															+	sync
														
 
															+	lbz	r0, KVM_SPLIT_DO_NAP(r3)
														
 
															+	cmpwi	r0, 0
														
 
															+	beq	57f
														
 
															+	li	r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
														
 
															+	mfspr	r4, SPRN_LPCR
														
 
															+	rlwimi	r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
														
 
															+	mtspr	SPRN_LPCR, r4
														
 
															+	isync
														
 
															+	std	r0, HSTATE_SCRATCH0(r13)
														
 
															+	ptesync
														
 
															+	ld	r0, HSTATE_SCRATCH0(r13)
														
 
															+1:	cmpd	r0, r0
														
 
															+	bne	1b
														
 
															+	nap
														
 
															+	b	.
														
 
															+
														
 
															+57:	li	r0, 0
														
 
															+	stbx	r0, r3, r4
														
 
															+	b	kvm_no_guest
														
 
															+
														
 
															 /******************************************************************************
														
 
															  *                                                                            *
														
 
															  *                               Entry code                                   *
														
@@ -854,7 +933,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
															 	cmpwi	r0, 0
														
 
															 	bne	21f
														
 
															 	HMT_LOW
														
 
															-20:	lbz	r0, VCORE_IN_GUEST(r5)
														
 
															+20:	lwz	r3, VCORE_ENTRY_EXIT(r5)
														
 
															+	cmpwi	r3, 0x100
														
 
															+	bge	no_switch_exit
														
 
															+	lbz	r0, VCORE_IN_GUEST(r5)
														
 
															 	cmpwi	r0, 0
														
 
															 	beq	20b
														
 
															 	HMT_MEDIUM
														
@@ -870,7 +952,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
															 	blt	hdec_soon
														
 
															 	ld	r6, VCPU_CTR(r4)
														
 
															-	lwz	r7, VCPU_XER(r4)
														
 
															+	ld	r7, VCPU_XER(r4)
														
 
															 	mtctr	r6
														
 
															 	mtxer	r7
														
@@ -985,9 +1067,13 @@ secondary_too_late:
 
															 #endif
														
 
															 11:	b	kvmhv_switch_to_host
														
 
															+no_switch_exit:
														
 
															+	HMT_MEDIUM
														
 
															+	li	r12, 0
														
 
															+	b	12f
														
 
															 hdec_soon:
														
 
															 	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
														
 
															-	stw	r12, VCPU_TRAP(r4)
														
 
															+12:	stw	r12, VCPU_TRAP(r4)
														
 
															 	mr	r9, r4
														
 
															 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
														
 
															 	addi	r3, r4, VCPU_TB_RMEXIT
														
@@ -1103,7 +1189,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
															 	mfctr	r3
														
 
															 	mfxer	r4
														
 
															 	std	r3, VCPU_CTR(r9)
														
 
															-	stw	r4, VCPU_XER(r9)
														
 
															+	std	r4, VCPU_XER(r9)
														
 
															 	/* If this is a page table miss then see if it's theirs or ours */
														
 
															 	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
														
@@ -1127,6 +1213,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
															 	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
														
 
															 	bne	3f
														
 
															 	lbz	r0, HSTATE_HOST_IPI(r13)
														
 
															+	cmpwi	r0, 0
														
 
															 	beq	4f
														
 
															 	b	guest_exit_cont
														
 
															 3:
														
@@ -1176,6 +1263,11 @@ mc_cont:
 
															 	ld	r9, HSTATE_KVM_VCPU(r13)
														
 
															 	lwz	r12, VCPU_TRAP(r9)
														
 
															+	/* Stop others sending VCPU interrupts to this physical CPU */
														
 
															+	li	r0, -1
														
 
															+	stw	r0, VCPU_CPU(r9)
														
 
															+	stw	r0, VCPU_THREAD_CPU(r9)
														
 
															+
														
 
															 	/* Save guest CTRL register, set runlatch to 1 */
														
 
															 	mfspr	r6,SPRN_CTRLF
														
 
															 	stw	r6,VCPU_CTRL(r9)
														
@@ -1540,12 +1632,17 @@ kvmhv_switch_to_host:
 
															 	/* Primary thread waits for all the secondaries to exit guest */
														
 
															 15:	lwz	r3,VCORE_ENTRY_EXIT(r5)
														
 
															-	srwi	r0,r3,8
														
 
															+	rlwinm	r0,r3,32-8,0xff
														
 
															 	clrldi	r3,r3,56
														
 
															 	cmpw	r3,r0
														
 
															 	bne	15b
														
 
															 	isync
														
 
															+	/* Did we actually switch to the guest at all? */
														
 
															+	lbz	r6, VCORE_IN_GUEST(r5)
														
 
															+	cmpwi	r6, 0
														
 
															+	beq	19f
														
 
															+
														
 
															 	/* Primary thread switches back to host partition */
														
 
															 	ld	r6,KVM_HOST_SDR1(r4)
														
 
															 	lwz	r7,KVM_HOST_LPID(r4)
														
@@ -1589,7 +1686,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
															 18:
														
 
															 	/* Signal secondary CPUs to continue */
														
 
															 	stb	r0,VCORE_IN_GUEST(r5)
														
 
															-	lis	r8,0x7fff		/* MAX_INT@h */
														
 
															+19:	lis	r8,0x7fff		/* MAX_INT@h */
														
 
															 	mtspr	SPRN_HDEC,r8
														
 
															 16:	ld	r8,KVM_HOST_LPCR(r4)
														
@@ -1675,7 +1772,7 @@ kvmppc_hdsi:
 
															 	bl	kvmppc_msr_interrupt
														
 
															 fast_interrupt_c_return:
														
 
															 6:	ld	r7, VCPU_CTR(r9)
														
 
															-	lwz	r8, VCPU_XER(r9)
														
 
															+	ld	r8, VCPU_XER(r9)
														
 
															 	mtctr	r7
														
 
															 	mtxer	r8
														
 
															 	mr	r4, r9
														
@@ -1816,8 +1913,8 @@ hcall_real_table:
 
															 	.long	DOTSYM(kvmppc_h_remove) - hcall_real_table
														
 
															 	.long	DOTSYM(kvmppc_h_enter) - hcall_real_table
														
 
															 	.long	DOTSYM(kvmppc_h_read) - hcall_real_table
														
 
															-	.long	0		/* 0x10 - H_CLEAR_MOD */
														
 
															-	.long	0		/* 0x14 - H_CLEAR_REF */
														
 
															+	.long	DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
														
 
															+	.long	DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
														
 
															 	.long	DOTSYM(kvmppc_h_protect) - hcall_real_table
														
 
															 	.long	DOTSYM(kvmppc_h_get_tce) - hcall_real_table
														
 
															 	.long	DOTSYM(kvmppc_h_put_tce) - hcall_real_table
														
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -352,7 +352,7 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
 
															 	return kvmppc_get_field(inst, msb + 32, lsb + 32);
														
 
															 }
														
 
															-bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
														
 
															+static bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
														
 
															 {
														
 
															 	if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
														
 
															 		return false;
														
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -123,7 +123,7 @@ no_dcbz32_on:
 
															 	PPC_LL	r8, SVCPU_CTR(r3)
														
 
															 	PPC_LL	r9, SVCPU_LR(r3)
														
 
															 	lwz	r10, SVCPU_CR(r3)
														
 
															-	lwz	r11, SVCPU_XER(r3)
														
 
															+	PPC_LL	r11, SVCPU_XER(r3)
														
 
															 	mtctr	r8
														
 
															 	mtlr	r9
														
@@ -237,7 +237,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 
															 	mfctr	r8
														
 
															 	mflr	r9
														
 
															-	stw	r5, SVCPU_XER(r13)
														
 
															+	PPC_STL	r5, SVCPU_XER(r13)
														
 
															 	PPC_STL	r6, SVCPU_FAULT_DAR(r13)
														
 
															 	stw	r7, SVCPU_FAULT_DSISR(r13)
														
 
															 	PPC_STL	r8, SVCPU_CTR(r13)
														
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -41,7 +41,7 @@
 
															  * =======
														
 
															  *
														
 
															  * Each ICS has a spin lock protecting the information about the IRQ
														
 
															- * sources and avoiding simultaneous deliveries if the same interrupt.
														
 
															+ * sources and avoiding simultaneous deliveries of the same interrupt.
														
 
															  *
														
 
															  * ICP operations are done via a single compare & swap transaction
														
 
															  * (most ICP state fits in the union kvmppc_icp_state)
														
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -933,6 +933,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
 
															 #endif
														
 
															 		break;
														
 
															 	case BOOKE_INTERRUPT_CRITICAL:
														
 
															+		kvmppc_fill_pt_regs(&regs);
														
 
															 		unknown_exception(&regs);
														
 
															 		break;
														
 
															 	case BOOKE_INTERRUPT_DEBUG:
														
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -377,7 +377,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
 
															 			| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
														
 
															 		vcpu->arch.shared->mas1 =
														
 
															 			  (vcpu->arch.shared->mas6 & MAS6_SPID0)
														
 
															-			| (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0))
														
 
															+			| ((vcpu->arch.shared->mas6 & MAS6_SAS) ? MAS1_TS : 0)
														
 
															 			| (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0));
														
 
															 		vcpu->arch.shared->mas2 &= MAS2_EPN;
														
 
															 		vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
														
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -660,7 +660,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
															 	return kvmppc_core_pending_dec(vcpu);
														
 
															 }
														
 
															-enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
														
 
															+static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
														
 
															 {
														
 
															 	struct kvm_vcpu *vcpu;
														
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -650,6 +650,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
 
															 	u16 sel;
														
 
															 	la = seg_base(ctxt, addr.seg) + addr.ea;
														
 
															+	*linear = la;
														
 
															 	*max_size = 0;
														
 
															 	switch (mode) {
														
 
															 	case X86EMUL_MODE_PROT64:
														
@@ -693,7 +694,6 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
 
															 	}
														
 
															 	if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
														
 
															 		return emulate_gp(ctxt, 0);
														
 
															-	*linear = la;
														
 
															 	return X86EMUL_CONTINUE;
														
 
															 bad:
														
 
															 	if (addr.seg == VCPU_SREG_SS)
														
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3309,13 +3309,14 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 
															 	walk_shadow_page_lockless_begin(vcpu);
														
 
															-	for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level;
														
 
															+	for (shadow_walk_init(&iterator, vcpu, addr),
														
 
															+		 leaf = root = iterator.level;
														
 
															 	     shadow_walk_okay(&iterator);
														
 
															 	     __shadow_walk_next(&iterator, spte)) {
														
 
															-		leaf = iterator.level;
														
 
															 		spte = mmu_spte_get_lockless(iterator.sptep);
														
 
															 		sptes[leaf - 1] = spte;
														
 
															+		leaf--;
														
 
															 		if (!is_shadow_present_pte(spte))
														
 
															 			break;
														
@@ -3329,7 +3330,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 
															 	if (reserved) {
														
 
															 		pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
														
 
															 		       __func__, addr);
														
 
															-		while (root >= leaf) {
														
 
															+		while (root > leaf) {
														
 
															 			pr_err("------ spte 0x%llx level %d.\n",
														
 
															 			       sptes[root - 1], root);
														
 
															 			root--;
														
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5943,6 +5943,7 @@ static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
 
															 	put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
														
 
															 }
														
 
															+#ifdef CONFIG_X86_64
														
 
															 static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
														
 
															 {
														
 
															 	struct kvm_segment seg;
														
@@ -5958,6 +5959,7 @@ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
 
															 	put_smstate(u32, buf, offset + 4, seg.limit);
														
 
															 	put_smstate(u64, buf, offset + 8, seg.base);
														
 
															 }
														
 
															+#endif
														
 
															 static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
														
 
															 {
														
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -52,13 +52,16 @@ struct arch_timer_cpu {
 
															 	/* Timer IRQ */
														
 
															 	const struct kvm_irq_level	*irq;
														
 
															+
														
 
															+	/* VGIC mapping */
														
 
															+	struct irq_phys_map		*map;
														
 
															 };
														
 
															 int kvm_timer_hyp_init(void);
														
 
															 void kvm_timer_enable(struct kvm *kvm);
														
 
															 void kvm_timer_init(struct kvm *kvm);
														
 
															-void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
														
 
															-			  const struct kvm_irq_level *irq);
														
 
															+int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
														
 
															+			 const struct kvm_irq_level *irq);
														
 
															 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
														
 
															 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
														
 
															 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
														
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -95,11 +95,15 @@ enum vgic_type {
 
															 #define LR_STATE_ACTIVE		(1 << 1)
														
 
															 #define LR_STATE_MASK		(3 << 0)
														
 
															 #define LR_EOI_INT		(1 << 2)
														
 
															+#define LR_HW			(1 << 3)
														
 
															 struct vgic_lr {
														
 
															-	u16	irq;
														
 
															-	u8	source;
														
 
															-	u8	state;
														
 
															+	unsigned irq:10;
														
 
															+	union {
														
 
															+		unsigned hwirq:10;
														
 
															+		unsigned source:3;
														
 
															+	};
														
 
															+	unsigned state:4;
														
 
															 };
														
 
															 struct vgic_vmcr {
														
@@ -155,6 +159,19 @@ struct vgic_io_device {
 
															 	struct kvm_io_device dev;
														
 
															 };
														
 
															+struct irq_phys_map {
														
 
															+	u32			virt_irq;
														
 
															+	u32			phys_irq;
														
 
															+	u32			irq;
														
 
															+	bool			active;
														
 
															+};
														
 
															+
														
 
															+struct irq_phys_map_entry {
														
 
															+	struct list_head	entry;
														
 
															+	struct rcu_head		rcu;
														
 
															+	struct irq_phys_map	map;
														
 
															+};
														
 
															+
														
 
															 struct vgic_dist {
														
 
															 	spinlock_t		lock;
														
 
															 	bool			in_kernel;
														
@@ -252,6 +269,10 @@ struct vgic_dist {
 
															 	struct vgic_vm_ops	vm_ops;
														
 
															 	struct vgic_io_device	dist_iodev;
														
 
															 	struct vgic_io_device	*redist_iodevs;
														
 
															+
														
 
															+	/* Virtual irq to hwirq mapping */
														
 
															+	spinlock_t		irq_phys_map_lock;
														
 
															+	struct list_head	irq_phys_map_list;
														
 
															 };
														
 
															 struct vgic_v2_cpu_if {
														
@@ -303,6 +324,9 @@ struct vgic_cpu {
 
															 		struct vgic_v2_cpu_if	vgic_v2;
														
 
															 		struct vgic_v3_cpu_if	vgic_v3;
														
 
															 	};
														
 
															+
														
 
															+	/* Protected by the distributor's irq_phys_map_lock */
														
 
															+	struct list_head	irq_phys_map_list;
														
 
															 };
														
 
															 #define LR_EMPTY	0xff
														
@@ -317,16 +341,25 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 
															 int kvm_vgic_hyp_init(void);
														
 
															 int kvm_vgic_map_resources(struct kvm *kvm);
														
 
															 int kvm_vgic_get_max_vcpus(void);
														
 
															+void kvm_vgic_early_init(struct kvm *kvm);
														
 
															 int kvm_vgic_create(struct kvm *kvm, u32 type);
														
 
															 void kvm_vgic_destroy(struct kvm *kvm);
														
 
															+void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
														
 
															 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
														
 
															 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
														
 
															 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
														
 
															 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
														
 
															 			bool level);
														
 
															+int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
														
 
															+			       struct irq_phys_map *map, bool level);
														
 
															 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
														
 
															 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
														
 
															 int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
														
 
															+struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
														
 
															+					   int virt_irq, int irq);
														
 
															+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
														
 
															+bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
														
 
															+void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
														
 
															 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
														
 
															 #define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
														
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -270,9 +270,12 @@
 
															 #define ICH_LR_EOI			(1UL << 41)
														
 
															 #define ICH_LR_GROUP			(1UL << 60)
														
 
															+#define ICH_LR_HW			(1UL << 61)
														
 
															 #define ICH_LR_STATE			(3UL << 62)
														
 
															 #define ICH_LR_PENDING_BIT		(1UL << 62)
														
 
															 #define ICH_LR_ACTIVE_BIT		(1UL << 63)
														
 
															+#define ICH_LR_PHYS_ID_SHIFT		32
														
 
															+#define ICH_LR_PHYS_ID_MASK		(0x3ffUL << ICH_LR_PHYS_ID_SHIFT)
														
 
															 #define ICH_MISR_EOI			(1 << 0)
														
 
															 #define ICH_MISR_U			(1 << 1)
														
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -75,11 +75,12 @@
 
															 #define GICH_LR_VIRTUALID		(0x3ff << 0)
														
 
															 #define GICH_LR_PHYSID_CPUID_SHIFT	(10)
														
 
															-#define GICH_LR_PHYSID_CPUID		(7 << GICH_LR_PHYSID_CPUID_SHIFT)
														
 
															+#define GICH_LR_PHYSID_CPUID		(0x3ff << GICH_LR_PHYSID_CPUID_SHIFT)
														
 
															 #define GICH_LR_STATE			(3 << 28)
														
 
															 #define GICH_LR_PENDING_BIT		(1 << 28)
														
 
															 #define GICH_LR_ACTIVE_BIT		(1 << 29)
														
 
															 #define GICH_LR_EOI			(1 << 19)
														
 
															+#define GICH_LR_HW			(1 << 31)
														
 
															 #define GICH_VMCR_CTRL_SHIFT		0
														
 
															 #define GICH_VMCR_CTRL_MASK		(0x21f << GICH_VMCR_CTRL_SHIFT)
														
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -242,6 +242,7 @@ struct kvm_vcpu {
 
															 	int sigset_active;
														
 
															 	sigset_t sigset;
														
 
															 	struct kvm_vcpu_stat stat;
														
 
															+	unsigned int halt_poll_ns;
														
 
															 #ifdef CONFIG_HAS_IOMEM
														
 
															 	int mmio_needed;
														
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -358,6 +358,36 @@ TRACE_EVENT(
 
															 #endif
														
 
															+TRACE_EVENT(kvm_halt_poll_ns,
														
 
															+	TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
														
 
															+	TP_ARGS(grow, vcpu_id, new, old),
														
 
															+
														
 
															+	TP_STRUCT__entry(
														
 
															+		__field(bool, grow)
														
 
															+		__field(unsigned int, vcpu_id)
														
 
															+		__field(int, new)
														
 
															+		__field(int, old)
														
 
															+	),
														
 
															+
														
 
															+	TP_fast_assign(
														
 
															+		__entry->grow           = grow;
														
 
															+		__entry->vcpu_id        = vcpu_id;
														
 
															+		__entry->new            = new;
														
 
															+		__entry->old            = old;
														
 
															+	),
														
 
															+
														
 
															+	TP_printk("vcpu %u: halt_poll_ns %d (%s %d)",
														
 
															+			__entry->vcpu_id,
														
 
															+			__entry->new,
														
 
															+			__entry->grow ? "grow" : "shrink",
														
 
															+			__entry->old)
														
 
															+);
														
 
															+
														
 
															+#define trace_kvm_halt_poll_ns_grow(vcpu_id, new, old) \
														
 
															+	trace_kvm_halt_poll_ns(true, vcpu_id, new, old)
														
 
															+#define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \
														
 
															+	trace_kvm_halt_poll_ns(false, vcpu_id, new, old)
														
 
															+
														
 
															 #endif /* _TRACE_KVM_MAIN_H */
														
 
															 /* This part must be outside protection */
														
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -237,6 +237,7 @@ struct kvm_run {
 
															 			__u32 count;
														
 
															 			__u64 data_offset; /* relative to kvm_run start */
														
 
															 		} io;
														
 
															+		/* KVM_EXIT_DEBUG */
														
 
															 		struct {
														
 
															 			struct kvm_debug_exit_arch arch;
														
 
															 		} debug;
														
@@ -285,6 +286,7 @@ struct kvm_run {
 
															 			__u32 data;
														
 
															 			__u8  is_write;
														
 
															 		} dcr;
														
 
															+		/* KVM_EXIT_INTERNAL_ERROR */
														
 
															 		struct {
														
 
															 			__u32 suberror;
														
 
															 			/* Available with KVM_CAP_INTERNAL_ERROR_DATA: */
														
@@ -295,6 +297,7 @@ struct kvm_run {
 
															 		struct {
														
 
															 			__u64 gprs[32];
														
 
															 		} osi;
														
 
															+		/* KVM_EXIT_PAPR_HCALL */
														
 
															 		struct {
														
 
															 			__u64 nr;
														
 
															 			__u64 ret;
														
@@ -819,6 +822,8 @@ struct kvm_ppc_smmu_info {
 
															 #define KVM_CAP_DISABLE_QUIRKS 116
														
 
															 #define KVM_CAP_X86_SMM 117
														
 
															 #define KVM_CAP_MULTI_ADDRESS_SPACE 118
														
 
															+#define KVM_CAP_GUEST_DEBUG_HW_BPS 119
														
 
															+#define KVM_CAP_GUEST_DEBUG_HW_WPS 120
														
 
															 #ifdef KVM_CAP_IRQ_ROUTING
														
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -64,10 +64,10 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
 
															 	int ret;
														
 
															 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
														
 
															-	timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
														
 
															-	ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
														
 
															-				  timer->irq->irq,
														
 
															-				  timer->irq->level);
														
 
															+	kvm_vgic_set_phys_irq_active(timer->map, true);
														
 
															+	ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
														
 
															+					 timer->map,
														
 
															+					 timer->irq->level);
														
 
															 	WARN_ON(ret);
														
 
															 }
														
@@ -117,7 +117,8 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
 
															 	cycle_t cval, now;
														
 
															 	if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
														
 
															-		!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
														
 
															+	    !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) ||
														
 
															+	    kvm_vgic_get_phys_irq_active(timer->map))
														
 
															 		return false;
														
 
															 	cval = timer->cntv_cval;
														
@@ -184,10 +185,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 
															 	timer_arm(timer, ns);
														
 
															 }
														
 
															-void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
														
 
															-			  const struct kvm_irq_level *irq)
														
 
															+int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
														
 
															+			 const struct kvm_irq_level *irq)
														
 
															 {
														
 
															 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
														
 
															+	struct irq_phys_map *map;
														
 
															 	/*
														
 
															 	 * The vcpu timer irq number cannot be determined in
														
@@ -196,6 +198,17 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 
															 	 * vcpu timer irq number when the vcpu is reset.
														
 
															 	 */
														
 
															 	timer->irq = irq;
														
 
															+
														
 
															+	/*
														
 
															+	 * Tell the VGIC that the virtual interrupt is tied to a
														
 
															+	 * physical interrupt. We do that once per VCPU.
														
 
															+	 */
														
 
															+	map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq);
														
 
															+	if (WARN_ON(IS_ERR(map)))
														
 
															+		return PTR_ERR(map);
														
 
															+
														
 
															+	timer->map = map;
														
 
															+	return 0;
														
 
															 }
														
 
															 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
														
@@ -335,6 +348,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
 
															 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
														
 
															 	timer_disarm(timer);
														
 
															+	if (timer->map)
														
 
															+		kvm_vgic_unmap_phys_irq(vcpu, timer->map);
														
 
															 }
														
 
															 void kvm_timer_enable(struct kvm *kvm)
														
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -48,6 +48,10 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
 
															 		lr_desc.state |= LR_STATE_ACTIVE;
														
 
															 	if (val & GICH_LR_EOI)
														
 
															 		lr_desc.state |= LR_EOI_INT;
														
 
															+	if (val & GICH_LR_HW) {
														
 
															+		lr_desc.state |= LR_HW;
														
 
															+		lr_desc.hwirq = (val & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT;
														
 
															+	}
														
 
															 	return lr_desc;
														
 
															 }
														
@@ -55,7 +59,9 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
 
															 static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
														
 
															 			   struct vgic_lr lr_desc)
														
 
															 {
														
 
															-	u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
														
 
															+	u32 lr_val;
														
 
															+
														
 
															+	lr_val = lr_desc.irq;
														
 
															 	if (lr_desc.state & LR_STATE_PENDING)
														
 
															 		lr_val |= GICH_LR_PENDING_BIT;
														
@@ -64,6 +70,14 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
 
															 	if (lr_desc.state & LR_EOI_INT)
														
 
															 		lr_val |= GICH_LR_EOI;
														
 
															+	if (lr_desc.state & LR_HW) {
														
 
															+		lr_val |= GICH_LR_HW;
														
 
															+		lr_val |= (u32)lr_desc.hwirq << GICH_LR_PHYSID_CPUID_SHIFT;
														
 
															+	}
														
 
															+
														
 
															+	if (lr_desc.irq < VGIC_NR_SGIS)
														
 
															+		lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
														
 
															+
														
 
															 	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
														
 
															 }
														
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -67,6 +67,10 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
 
															 		lr_desc.state |= LR_STATE_ACTIVE;
														
 
															 	if (val & ICH_LR_EOI)
														
 
															 		lr_desc.state |= LR_EOI_INT;
														
 
															+	if (val & ICH_LR_HW) {
														
 
															+		lr_desc.state |= LR_HW;
														
 
															+		lr_desc.hwirq = (val >> ICH_LR_PHYS_ID_SHIFT) & GENMASK(9, 0);
														
 
															+	}
														
 
															 	return lr_desc;
														
 
															 }
														
@@ -84,10 +88,17 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
 
															 	 * Eventually we want to make this configurable, so we may revisit
														
 
															 	 * this in the future.
														
 
															 	 */
														
 
															-	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
														
 
															+	switch (vcpu->kvm->arch.vgic.vgic_model) {
														
 
															+	case KVM_DEV_TYPE_ARM_VGIC_V3:
														
 
															 		lr_val |= ICH_LR_GROUP;
														
 
															-	else
														
 
															-		lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
														
 
															+		break;
														
 
															+	case  KVM_DEV_TYPE_ARM_VGIC_V2:
														
 
															+		if (lr_desc.irq < VGIC_NR_SGIS)
														
 
															+			lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
														
 
															+		break;
														
 
															+	default:
														
 
															+		BUG();
														
 
															+	}
														
 
															 	if (lr_desc.state & LR_STATE_PENDING)
														
 
															 		lr_val |= ICH_LR_PENDING_BIT;
														
@@ -95,6 +106,10 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
 
															 		lr_val |= ICH_LR_ACTIVE_BIT;
														
 
															 	if (lr_desc.state & LR_EOI_INT)
														
 
															 		lr_val |= ICH_LR_EOI;
														
 
															+	if (lr_desc.state & LR_HW) {
														
 
															+		lr_val |= ICH_LR_HW;
														
 
															+		lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT;
														
 
															+	}
														
 
															 	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
														
 
															 }
														
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -24,6 +24,7 @@
 
															 #include <linux/of.h>
														
 
															 #include <linux/of_address.h>
														
 
															 #include <linux/of_irq.h>
														
 
															+#include <linux/rculist.h>
														
 
															 #include <linux/uaccess.h>
														
 
															 #include <asm/kvm_emulate.h>
														
@@ -74,6 +75,28 @@
 
															  *   cause the interrupt to become inactive in such a situation.
														
 
															  *   Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
														
 
															  *   inactive as long as the external input line is held high.
														
 
															+ *
														
 
															+ *
														
 
															+ * Initialization rules: there are multiple stages to the vgic
														
 
															+ * initialization, both for the distributor and the CPU interfaces.
														
 
															+ *
														
 
															+ * Distributor:
														
 
															+ *
														
 
															+ * - kvm_vgic_early_init(): initialization of static data that doesn't
														
 
															+ *   depend on any sizing information or emulation type. No allocation
														
 
															+ *   is allowed there.
														
 
															+ *
														
 
															+ * - vgic_init(): allocation and initialization of the generic data
														
 
															+ *   structures that depend on sizing information (number of CPUs,
														
 
															+ *   number of interrupts). Also initializes the vcpu specific data
														
 
															+ *   structures. Can be executed lazily for GICv2.
														
 
															+ *   [to be renamed to kvm_vgic_init??]
														
 
															+ *
														
 
															+ * CPU Interface:
														
 
															+ *
														
 
															+ * - kvm_vgic_cpu_early_init(): initialization of static data that
														
 
															+ *   doesn't depend on any sizing information or emulation type. No
														
 
															+ *   allocation is allowed there.
														
 
															  */
														
 
															 #include "vgic.h"
														
@@ -82,6 +105,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 
															 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
														
 
															 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
														
 
															 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
														
 
															+static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
														
 
															+						int virt_irq);
														
 
															 static const struct vgic_ops *vgic_ops;
														
 
															 static const struct vgic_params *vgic;
														
@@ -375,7 +400,7 @@ void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
 
															 static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
														
 
															 {
														
 
															-	return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
														
 
															+	return !vgic_irq_is_queued(vcpu, irq);
														
 
															 }
														
 
															 /**
														
@@ -1115,6 +1140,39 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
 
															 	if (!vgic_irq_is_edge(vcpu, irq))
														
 
															 		vlr.state |= LR_EOI_INT;
														
 
															+	if (vlr.irq >= VGIC_NR_SGIS) {
														
 
															+		struct irq_phys_map *map;
														
 
															+		map = vgic_irq_map_search(vcpu, irq);
														
 
															+
														
 
															+		/*
														
 
															+		 * If we have a mapping, and the virtual interrupt is
														
 
															+		 * being injected, then we must set the state to
														
 
															+		 * active in the physical world. Otherwise the
														
 
															+		 * physical interrupt will fire and the guest will
														
 
															+		 * exit before processing the virtual interrupt.
														
 
															+		 */
														
 
															+		if (map) {
														
 
															+			int ret;
														
 
															+
														
 
															+			BUG_ON(!map->active);
														
 
															+			vlr.hwirq = map->phys_irq;
														
 
															+			vlr.state |= LR_HW;
														
 
															+			vlr.state &= ~LR_EOI_INT;
														
 
															+
														
 
															+			ret = irq_set_irqchip_state(map->irq,
														
 
															+						    IRQCHIP_STATE_ACTIVE,
														
 
															+						    true);
														
 
															+			WARN_ON(ret);
														
 
															+
														
 
															+			/*
														
 
															+			 * Make sure we're not going to sample this
														
 
															+			 * again, as a HW-backed interrupt cannot be
														
 
															+			 * in the PENDING_ACTIVE stage.
														
 
															+			 */
														
 
															+			vgic_irq_set_queued(vcpu, irq);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															 	vgic_set_lr(vcpu, lr_nr, vlr);
														
 
															 	vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
														
 
															 }
														
@@ -1339,6 +1397,39 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 
															 	return level_pending;
														
 
															 }
														
 
															+/*
														
 
															+ * Save the physical active state, and reset it to inactive.
														
 
															+ *
														
 
															+ * Return 1 if HW interrupt went from active to inactive, and 0 otherwise.
														
 
															+ */
														
 
															+static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
														
 
															+{
														
 
															+	struct irq_phys_map *map;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!(vlr.state & LR_HW))
														
 
															+		return 0;
														
 
															+
														
 
															+	map = vgic_irq_map_search(vcpu, vlr.irq);
														
 
															+	BUG_ON(!map || !map->active);
														
 
															+
														
 
															+	ret = irq_get_irqchip_state(map->irq,
														
 
															+				    IRQCHIP_STATE_ACTIVE,
														
 
															+				    &map->active);
														
 
															+
														
 
															+	WARN_ON(ret);
														
 
															+
														
 
															+	if (map->active) {
														
 
															+		ret = irq_set_irqchip_state(map->irq,
														
 
															+					    IRQCHIP_STATE_ACTIVE,
														
 
															+					    false);
														
 
															+		WARN_ON(ret);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															 /* Sync back the VGIC state after a guest run */
														
 
															 static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
														
 
															 {
														
@@ -1353,14 +1444,31 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 
															 	elrsr = vgic_get_elrsr(vcpu);
														
 
															 	elrsr_ptr = u64_to_bitmask(&elrsr);
														
 
															-	/* Clear mappings for empty LRs */
														
 
															-	for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
														
 
															+	/* Deal with HW interrupts, and clear mappings for empty LRs */
														
 
															+	for (lr = 0; lr < vgic->nr_lr; lr++) {
														
 
															 		struct vgic_lr vlr;
														
 
															-		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
														
 
															+		if (!test_bit(lr, vgic_cpu->lr_used))
														
 
															 			continue;
														
 
															 		vlr = vgic_get_lr(vcpu, lr);
														
 
															+		if (vgic_sync_hwirq(vcpu, vlr)) {
														
 
															+			/*
														
 
															+			 * So this is a HW interrupt that the guest
														
 
															+			 * EOI-ed. Clean the LR state and allow the
														
 
															+			 * interrupt to be sampled again.
														
 
															+			 */
														
 
															+			vlr.state = 0;
														
 
															+			vlr.hwirq = 0;
														
 
															+			vgic_set_lr(vcpu, lr, vlr);
														
 
															+			vgic_irq_clear_queued(vcpu, vlr.irq);
														
 
															+			set_bit(lr, elrsr_ptr);
														
 
															+		}
														
 
															+
														
 
															+		if (!test_bit(lr, elrsr_ptr))
														
 
															+			continue;
														
 
															+
														
 
															+		clear_bit(lr, vgic_cpu->lr_used);
														
 
															 		BUG_ON(vlr.irq >= dist->nr_irqs);
														
 
															 		vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
														
@@ -1447,7 +1555,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 
															 }
														
 
															 static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
														
 
															-				  unsigned int irq_num, bool level)
														
 
															+				   struct irq_phys_map *map,
														
 
															+				   unsigned int irq_num, bool level)
														
 
															 {
														
 
															 	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															 	struct kvm_vcpu *vcpu;
														
@@ -1455,6 +1564,9 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
															 	int enabled;
														
 
															 	bool ret = true, can_inject = true;
														
 
															+	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															 	spin_lock(&dist->lock);
														
 
															 	vcpu = kvm_get_vcpu(kvm, cpuid);
														
@@ -1517,18 +1629,46 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
															 out:
														
 
															 	spin_unlock(&dist->lock);
														
 
															-	return ret ? cpuid : -EINVAL;
														
 
															+	if (ret) {
														
 
															+		/* kick the specified vcpu */
														
 
															+		kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid));
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int vgic_lazy_init(struct kvm *kvm)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	if (unlikely(!vgic_initialized(kvm))) {
														
 
															+		/*
														
 
															+		 * We only provide the automatic initialization of the VGIC
														
 
															+		 * for the legacy case of a GICv2. Any other type must
														
 
															+		 * be explicitly initialized once setup with the respective
														
 
															+		 * KVM device call.
														
 
															+		 */
														
 
															+		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
														
 
															+			return -EBUSY;
														
 
															+
														
 
															+		mutex_lock(&kvm->lock);
														
 
															+		ret = vgic_init(kvm);
														
 
															+		mutex_unlock(&kvm->lock);
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															 }
														
 
															 /**
														
 
															  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
														
 
															  * @kvm:     The VM structure pointer
														
 
															  * @cpuid:   The CPU for PPIs
														
 
															- * @irq_num: The IRQ number that is assigned to the device
														
 
															+ * @irq_num: The IRQ number that is assigned to the device. This IRQ
														
 
															+ *           must not be mapped to a HW interrupt.
														
 
															  * @level:   Edge-triggered:  true:  to trigger the interrupt
														
 
															  *			      false: to ignore the call
														
 
															- *	     Level-sensitive  true:  activates an interrupt
														
 
															- *			      false: deactivates an interrupt
														
 
															+ *	     Level-sensitive  true:  raise the input signal
														
 
															+ *			      false: lower the input signal
														
 
															  *
														
 
															  * The GIC is not concerned with devices being active-LOW or active-HIGH for
														
 
															  * level-sensitive interrupts.  You can think of the level parameter as 1
														
@@ -1537,39 +1677,44 @@ out:
 
															 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
														
 
															 			bool level)
														
 
															 {
														
 
															-	int ret = 0;
														
 
															-	int vcpu_id;
														
 
															-
														
 
															-	if (unlikely(!vgic_initialized(kvm))) {
														
 
															-		/*
														
 
															-		 * We only provide the automatic initialization of the VGIC
														
 
															-		 * for the legacy case of a GICv2. Any other type must
														
 
															-		 * be explicitly initialized once setup with the respective
														
 
															-		 * KVM device call.
														
 
															-		 */
														
 
															-		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
														
 
															-			ret = -EBUSY;
														
 
															-			goto out;
														
 
															-		}
														
 
															-		mutex_lock(&kvm->lock);
														
 
															-		ret = vgic_init(kvm);
														
 
															-		mutex_unlock(&kvm->lock);
														
 
															+	struct irq_phys_map *map;
														
 
															+	int ret;
														
 
															-		if (ret)
														
 
															-			goto out;
														
 
															-	}
														
 
															+	ret = vgic_lazy_init(kvm);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															-	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
														
 
															+	map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num);
														
 
															+	if (map)
														
 
															 		return -EINVAL;
														
 
															-	vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
														
 
															-	if (vcpu_id >= 0) {
														
 
															-		/* kick the specified vcpu */
														
 
															-		kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
														
 
															-	}
														
 
															+	return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level);
														
 
															+}
														
 
															-out:
														
 
															-	return ret;
														
 
															+/**
														
 
															+ * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
														
 
															+ * @kvm:     The VM structure pointer
														
 
															+ * @cpuid:   The CPU for PPIs
														
 
															+ * @map:     Pointer to a irq_phys_map structure describing the mapping
														
 
															+ * @level:   Edge-triggered:  true:  to trigger the interrupt
														
 
															+ *			      false: to ignore the call
														
 
															+ *	     Level-sensitive  true:  raise the input signal
														
 
															+ *			      false: lower the input signal
														
 
															+ *
														
 
															+ * The GIC is not concerned with devices being active-LOW or active-HIGH for
														
 
															+ * level-sensitive interrupts.  You can think of the level parameter as 1
														
 
															+ * being HIGH and 0 being LOW and all devices being active-HIGH.
														
 
															+ */
														
 
															+int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
														
 
															+			       struct irq_phys_map *map, bool level)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = vgic_lazy_init(kvm);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level);
														
 
															 }
														
 
															 static irqreturn_t vgic_maintenance_handler(int irq, void *data)
														
@@ -1583,6 +1728,188 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
 
															 	return IRQ_HANDLED;
														
 
															 }
														
 
															+static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu,
														
 
															+						    int virt_irq)
														
 
															+{
														
 
															+	if (virt_irq < VGIC_NR_PRIVATE_IRQS)
														
 
															+		return &vcpu->arch.vgic_cpu.irq_phys_map_list;
														
 
															+	else
														
 
															+		return &vcpu->kvm->arch.vgic.irq_phys_map_list;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ
														
 
															+ * @vcpu: The VCPU pointer
														
 
															+ * @virt_irq: The virtual irq number
														
 
															+ * @irq: The Linux IRQ number
														
 
															+ *
														
 
															+ * Establish a mapping between a guest visible irq (@virt_irq) and a
														
 
															+ * Linux irq (@irq). On injection, @virt_irq will be associated with
														
 
															+ * the physical interrupt represented by @irq. This mapping can be
														
 
															+ * established multiple times as long as the parameters are the same.
														
 
															+ *
														
 
															+ * Returns a valid pointer on success, and an error pointer otherwise
														
 
															+ */
														
 
															+struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
														
 
															+					   int virt_irq, int irq)
														
 
															+{
														
 
															+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															+	struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
														
 
															+	struct irq_phys_map *map;
														
 
															+	struct irq_phys_map_entry *entry;
														
 
															+	struct irq_desc *desc;
														
 
															+	struct irq_data *data;
														
 
															+	int phys_irq;
														
 
															+
														
 
															+	desc = irq_to_desc(irq);
														
 
															+	if (!desc) {
														
 
															+		kvm_err("%s: no interrupt descriptor\n", __func__);
														
 
															+		return ERR_PTR(-EINVAL);
														
 
															+	}
														
 
															+
														
 
															+	data = irq_desc_get_irq_data(desc);
														
 
															+	while (data->parent_data)
														
 
															+		data = data->parent_data;
														
 
															+
														
 
															+	phys_irq = data->hwirq;
														
 
															+
														
 
															+	/* Create a new mapping */
														
 
															+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
														
 
															+	if (!entry)
														
 
															+		return ERR_PTR(-ENOMEM);
														
 
															+
														
 
															+	spin_lock(&dist->irq_phys_map_lock);
														
 
															+
														
 
															+	/* Try to match an existing mapping */
														
 
															+	map = vgic_irq_map_search(vcpu, virt_irq);
														
 
															+	if (map) {
														
 
															+		/* Make sure this mapping matches */
														
 
															+		if (map->phys_irq != phys_irq	||
														
 
															+		    map->irq      != irq)
														
 
															+			map = ERR_PTR(-EINVAL);
														
 
															+
														
 
															+		/* Found an existing, valid mapping */
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	map           = &entry->map;
														
 
															+	map->virt_irq = virt_irq;
														
 
															+	map->phys_irq = phys_irq;
														
 
															+	map->irq      = irq;
														
 
															+
														
 
															+	list_add_tail_rcu(&entry->entry, root);
														
 
															+
														
 
															+out:
														
 
															+	spin_unlock(&dist->irq_phys_map_lock);
														
 
															+	/* If we've found a hit in the existing list, free the useless
														
 
															+	 * entry */
														
 
															+	if (IS_ERR(map) || map != &entry->map)
														
 
															+		kfree(entry);
														
 
															+	return map;
														
 
															+}
														
 
															+
														
 
															+static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
														
 
															+						int virt_irq)
														
 
															+{
														
 
															+	struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
														
 
															+	struct irq_phys_map_entry *entry;
														
 
															+	struct irq_phys_map *map;
														
 
															+
														
 
															+	rcu_read_lock();
														
 
															+
														
 
															+	list_for_each_entry_rcu(entry, root, entry) {
														
 
															+		map = &entry->map;
														
 
															+		if (map->virt_irq == virt_irq) {
														
 
															+			rcu_read_unlock();
														
 
															+			return map;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	rcu_read_unlock();
														
 
															+
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
														
 
															+{
														
 
															+	struct irq_phys_map_entry *entry;
														
 
															+
														
 
															+	entry = container_of(rcu, struct irq_phys_map_entry, rcu);
														
 
															+	kfree(entry);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
														
 
															+ *
														
 
															+ * Return the logical active state of a mapped interrupt. This doesn't
														
 
															+ * necessarily reflects the current HW state.
														
 
															+ */
														
 
															+bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
														
 
															+{
														
 
															+	BUG_ON(!map);
														
 
															+	return map->active;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
														
 
															+ *
														
 
															+ * Set the logical active state of a mapped interrupt. This doesn't
														
 
															+ * immediately affects the HW state.
														
 
															+ */
														
 
															+void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
														
 
															+{
														
 
															+	BUG_ON(!map);
														
 
															+	map->active = active;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
														
 
															+ * @vcpu: The VCPU pointer
														
 
															+ * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
														
 
															+ *
														
 
															+ * Remove an existing mapping between virtual and physical interrupts.
														
 
															+ */
														
 
															+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
														
 
															+{
														
 
															+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
														
 
															+	struct irq_phys_map_entry *entry;
														
 
															+	struct list_head *root;
														
 
															+
														
 
															+	if (!map)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq);
														
 
															+
														
 
															+	spin_lock(&dist->irq_phys_map_lock);
														
 
															+
														
 
															+	list_for_each_entry(entry, root, entry) {
														
 
															+		if (&entry->map == map) {
														
 
															+			list_del_rcu(&entry->entry);
														
 
															+			call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock(&dist->irq_phys_map_lock);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root)
														
 
															+{
														
 
															+	struct vgic_dist *dist = &kvm->arch.vgic;
														
 
															+	struct irq_phys_map_entry *entry;
														
 
															+
														
 
															+	spin_lock(&dist->irq_phys_map_lock);
														
 
															+
														
 
															+	list_for_each_entry(entry, root, entry) {
														
 
															+		list_del_rcu(&entry->entry);
														
 
															+		call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock(&dist->irq_phys_map_lock);
														
 
															+}
														
 
															+
														
 
															 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
														
@@ -1591,6 +1918,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
 
															 	kfree(vgic_cpu->active_shared);
														
 
															 	kfree(vgic_cpu->pend_act_shared);
														
 
															 	kfree(vgic_cpu->vgic_irq_lr_map);
														
 
															+	vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
														
 
															 	vgic_cpu->pending_shared = NULL;
														
 
															 	vgic_cpu->active_shared = NULL;
														
 
															 	vgic_cpu->pend_act_shared = NULL;
														
@@ -1627,6 +1955,17 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 
															 	return 0;
														
 
															 }
														
 
															+/**
														
 
															+ * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage
														
 
															+ *
														
 
															+ * No memory allocation should be performed here, only static init.
														
 
															+ */
														
 
															+void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
														
 
															+	INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list);
														
 
															+}
														
 
															+
														
 
															 /**
														
 
															  * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
														
 
															  *
														
@@ -1664,6 +2003,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
 
															 	kfree(dist->irq_spi_target);
														
 
															 	kfree(dist->irq_pending_on_cpu);
														
 
															 	kfree(dist->irq_active_on_cpu);
														
 
															+	vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list);
														
 
															 	dist->irq_sgi_sources = NULL;
														
 
															 	dist->irq_spi_cpu = NULL;
														
 
															 	dist->irq_spi_target = NULL;
														
@@ -1787,6 +2127,18 @@ static int init_vgic_model(struct kvm *kvm, int type)
 
															 	return 0;
														
 
															 }
														
 
															+/**
														
 
															+ * kvm_vgic_early_init - Earliest possible vgic initialization stage
														
 
															+ *
														
 
															+ * No memory allocation should be performed here, only static init.
														
 
															+ */
														
 
															+void kvm_vgic_early_init(struct kvm *kvm)
														
 
															+{
														
 
															+	spin_lock_init(&kvm->arch.vgic.lock);
														
 
															+	spin_lock_init(&kvm->arch.vgic.irq_phys_map_lock);
														
 
															+	INIT_LIST_HEAD(&kvm->arch.vgic.irq_phys_map_list);
														
 
															+}
														
 
															+
														
 
															 int kvm_vgic_create(struct kvm *kvm, u32 type)
														
 
															 {
														
 
															 	int i, vcpu_lock_idx = -1, ret;
														
@@ -1832,7 +2184,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
 
															 	if (ret)
														
 
															 		goto out_unlock;
														
 
															-	spin_lock_init(&kvm->arch.vgic.lock);
														
 
															 	kvm->arch.vgic.in_kernel = true;
														
 
															 	kvm->arch.vgic.vgic_model = type;
														
 
															 	kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
														
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -213,11 +213,15 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
															 			goto out;
														
 
															 		r = -EINVAL;
														
 
															-		if (ue->flags)
														
 
															+		if (ue->flags) {
														
 
															+			kfree(e);
														
 
															 			goto out;
														
 
															+		}
														
 
															 		r = setup_routing_entry(new, e, ue);
														
 
															-		if (r)
														
 
															+		if (r) {
														
 
															+			kfree(e);
														
 
															 			goto out;
														
 
															+		}
														
 
															 		++ue;
														
 
															 	}
														
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -66,9 +66,18 @@
 
															 MODULE_AUTHOR("Qumranet");
														
 
															 MODULE_LICENSE("GPL");
														
 
															-static unsigned int halt_poll_ns;
														
 
															+/* halt polling only reduces halt latency by 5-7 us, 500us is enough */
														
 
															+static unsigned int halt_poll_ns = 500000;
														
 
															 module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
														
 
															+/* Default doubles per-vcpu halt_poll_ns. */
														
 
															+static unsigned int halt_poll_ns_grow = 2;
														
 
															+module_param(halt_poll_ns_grow, int, S_IRUGO);
														
 
															+
														
 
															+/* Default resets per-vcpu halt_poll_ns . */
														
 
															+static unsigned int halt_poll_ns_shrink;
														
 
															+module_param(halt_poll_ns_shrink, int, S_IRUGO);
														
 
															+
														
 
															 /*
														
 
															  * Ordering of locks:
														
 
															  *
														
@@ -217,6 +226,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 
															 	vcpu->kvm = kvm;
														
 
															 	vcpu->vcpu_id = id;
														
 
															 	vcpu->pid = NULL;
														
 
															+	vcpu->halt_poll_ns = 0;
														
 
															 	init_waitqueue_head(&vcpu->wq);
														
 
															 	kvm_async_pf_vcpu_init(vcpu);
														
@@ -1906,6 +1916,35 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
														
 
															+static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	int old, val;
														
 
															+
														
 
															+	old = val = vcpu->halt_poll_ns;
														
 
															+	/* 10us base */
														
 
															+	if (val == 0 && halt_poll_ns_grow)
														
 
															+		val = 10000;
														
 
															+	else
														
 
															+		val *= halt_poll_ns_grow;
														
 
															+
														
 
															+	vcpu->halt_poll_ns = val;
														
 
															+	trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
														
 
															+}
														
 
															+
														
 
															+static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	int old, val;
														
 
															+
														
 
															+	old = val = vcpu->halt_poll_ns;
														
 
															+	if (halt_poll_ns_shrink == 0)
														
 
															+		val = 0;
														
 
															+	else
														
 
															+		val /= halt_poll_ns_shrink;
														
 
															+
														
 
															+	vcpu->halt_poll_ns = val;
														
 
															+	trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
														
 
															+}
														
 
															+
														
 
															 static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	if (kvm_arch_vcpu_runnable(vcpu)) {
														
@@ -1928,10 +1967,11 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 
															 	ktime_t start, cur;
														
 
															 	DEFINE_WAIT(wait);
														
 
															 	bool waited = false;
														
 
															+	u64 block_ns;
														
 
															 	start = cur = ktime_get();
														
 
															-	if (halt_poll_ns) {
														
 
															-		ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
														
 
															+	if (vcpu->halt_poll_ns) {
														
 
															+		ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
														
 
															 		do {
														
 
															 			/*
														
@@ -1960,7 +2000,21 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 
															 	cur = ktime_get();
														
 
															 out:
														
 
															-	trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
														
 
															+	block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
														
 
															+
														
 
															+	if (halt_poll_ns) {
														
 
															+		if (block_ns <= vcpu->halt_poll_ns)
														
 
															+			;
														
 
															+		/* we had a long block, shrink polling */
														
 
															+		else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
														
 
															+			shrink_halt_poll_ns(vcpu);
														
 
															+		/* we had a short halt and our poll time is too small */
														
 
															+		else if (vcpu->halt_poll_ns < halt_poll_ns &&
														
 
															+			block_ns < halt_poll_ns)
														
 
															+			grow_halt_poll_ns(vcpu);
														
 
															+	}
														
 
															+
														
 
															+	trace_kvm_vcpu_wakeup(block_ns, waited);
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_vcpu_block);