8 years ago · b103cc3f10
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -31,8 +31,15 @@ struct arch_timer_context {
 
				 	/* Timer IRQ */
			
 
				 	struct kvm_irq_level		irq;
			
 
				 
			
 
				-	/* Active IRQ state caching */
			
 
				-	bool				active_cleared_last;
			
 
				+	/*
			
 
				+	 * We have multiple paths which can save/restore the timer state
			
 
				+	 * onto the hardware, so we need some way of keeping track of
			
 
				+	 * where the latest state is.
			
 
				+	 *
			
 
				+	 * loaded == true:  State is loaded on the hardware registers.
			
 
				+	 * loaded == false: State is stored in memory.
			
 
				+	 */
			
 
				+	bool			loaded;
			
 
				 
			
 
				 	/* Virtual offset */
			
 
				 	u64			cntvoff;
			
@@ -78,10 +85,15 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
 
				 
			
 
				 u64 kvm_phys_timer_read(void);
			
 
				 
			
 
				+void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu);
			
 
				 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 void kvm_timer_init_vhe(void);
			
 
				 
			
 
				 #define vcpu_vtimer(v)	(&(v)->arch.timer_cpu.vtimer)
			
 
				 #define vcpu_ptimer(v)	(&(v)->arch.timer_cpu.ptimer)
			
 
				+
			
 
				+void enable_el1_phys_timer_access(void);
			
 
				+void disable_el1_phys_timer_access(void);
			
 
				+
			
 
				 #endif
			
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -46,10 +46,9 @@ static const struct kvm_irq_level default_vtimer_irq = {
 
				 	.level	= 1,
			
 
				 };
			
 
				 
			
 
				-void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	vcpu_vtimer(vcpu)->active_cleared_last = false;
			
 
				-}
			
 
				+static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
			
 
				+static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
			
 
				+				 struct arch_timer_context *timer_ctx);
			
 
				 
			
 
				 u64 kvm_phys_timer_read(void)
			
 
				 {
			
@@ -69,17 +68,45 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
 
				 		cancel_work_sync(work);
			
 
				 }
			
 
				 
			
 
				-static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
			
 
				+static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
			
 
				+	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
			
 
				 
			
 
				 	/*
			
 
				-	 * We disable the timer in the world switch and let it be
			
 
				-	 * handled by kvm_timer_sync_hwstate(). Getting a timer
			
 
				-	 * interrupt at this point is a sure sign of some major
			
 
				-	 * breakage.
			
 
				+	 * When using a userspace irqchip with the architected timers, we must
			
 
				+	 * prevent continuously exiting from the guest, and therefore mask the
			
 
				+	 * physical interrupt by disabling it on the host interrupt controller
			
 
				+	 * when the virtual level is high, such that the guest can make
			
 
				+	 * forward progress.  Once we detect the output level being
			
 
				+	 * de-asserted, we unmask the interrupt again so that we exit from the
			
 
				+	 * guest when the timer fires.
			
 
				 	 */
			
 
				-	pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
			
 
				+	if (vtimer->irq.level)
			
 
				+		disable_percpu_irq(host_vtimer_irq);
			
 
				+	else
			
 
				+		enable_percpu_irq(host_vtimer_irq, 0);
			
 
				+}
			
 
				+
			
 
				+static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
			
 
				+{
			
 
				+	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
			
 
				+	struct arch_timer_context *vtimer;
			
 
				+
			
 
				+	if (!vcpu) {
			
 
				+		pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
			
 
				+		return IRQ_NONE;
			
 
				+	}
			
 
				+	vtimer = vcpu_vtimer(vcpu);
			
 
				+
			
 
				+	if (!vtimer->irq.level) {
			
 
				+		vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
			
 
				+		if (kvm_timer_irq_can_fire(vtimer))
			
 
				+			kvm_timer_update_irq(vcpu, true, vtimer);
			
 
				+	}
			
 
				+
			
 
				+	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
			
 
				+		kvm_vtimer_update_mask_user(vcpu);
			
 
				+
			
 
				 	return IRQ_HANDLED;
			
 
				 }
			
 
				 
			
@@ -215,7 +242,6 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	timer_ctx->active_cleared_last = false;
			
 
				 	timer_ctx->irq.level = new_level;
			
 
				 	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
			
 
				 				   timer_ctx->irq.level);
			
@@ -271,10 +297,16 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu,
 
				 	soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(timer_ctx));
			
 
				 }
			
 
				 
			
 
				-static void timer_save_state(struct kvm_vcpu *vcpu)
			
 
				+static void vtimer_save_state(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
			
 
				 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	local_irq_save(flags);
			
 
				+
			
 
				+	if (!vtimer->loaded)
			
 
				+		goto out;
			
 
				 
			
 
				 	if (timer->enabled) {
			
 
				 		vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
			
@@ -283,6 +315,10 @@ static void timer_save_state(struct kvm_vcpu *vcpu)
 
				 
			
 
				 	/* Disable the virtual timer */
			
 
				 	write_sysreg_el0(0, cntv_ctl);
			
 
				+
			
 
				+	vtimer->loaded = false;
			
 
				+out:
			
 
				+	local_irq_restore(flags);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -296,6 +332,8 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
 
				 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
			
 
				 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
			
 
				 
			
 
				+	vtimer_save_state(vcpu);
			
 
				+
			
 
				 	/*
			
 
				 	 * No need to schedule a background timer if any guest timer has
			
 
				 	 * already expired, because kvm_vcpu_block will return before putting
			
@@ -318,22 +356,34 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
 
				 	soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
			
 
				 }
			
 
				 
			
 
				-static void timer_restore_state(struct kvm_vcpu *vcpu)
			
 
				+static void vtimer_restore_state(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
			
 
				 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	local_irq_save(flags);
			
 
				+
			
 
				+	if (vtimer->loaded)
			
 
				+		goto out;
			
 
				 
			
 
				 	if (timer->enabled) {
			
 
				 		write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
			
 
				 		isb();
			
 
				 		write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
			
 
				 	}
			
 
				+
			
 
				+	vtimer->loaded = true;
			
 
				+out:
			
 
				+	local_irq_restore(flags);
			
 
				 }
			
 
				 
			
 
				 void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
			
 
				 
			
 
				+	vtimer_restore_state(vcpu);
			
 
				+
			
 
				 	soft_timer_cancel(&timer->bg_timer, &timer->expired);
			
 
				 }
			
 
				 
			
@@ -352,61 +402,45 @@ static void set_cntvoff(u64 cntvoff)
 
				 	kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
			
 
				 }
			
 
				 
			
 
				-static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu)
			
 
				+static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
			
 
				 	bool phys_active;
			
 
				 	int ret;
			
 
				 
			
 
				-	/*
			
 
				-	* If we enter the guest with the virtual input level to the VGIC
			
 
				-	* asserted, then we have already told the VGIC what we need to, and
			
 
				-	* we don't need to exit from the guest until the guest deactivates
			
 
				-	* the already injected interrupt, so therefore we should set the
			
 
				-	* hardware active state to prevent unnecessary exits from the guest.
			
 
				-	*
			
 
				-	* Also, if we enter the guest with the virtual timer interrupt active,
			
 
				-	* then it must be active on the physical distributor, because we set
			
 
				-	* the HW bit and the guest must be able to deactivate the virtual and
			
 
				-	* physical interrupt at the same time.
			
 
				-	*
			
 
				-	* Conversely, if the virtual input level is deasserted and the virtual
			
 
				-	* interrupt is not active, then always clear the hardware active state
			
 
				-	* to ensure that hardware interrupts from the timer triggers a guest
			
 
				-	* exit.
			
 
				-	*/
			
 
				 	phys_active = vtimer->irq.level ||
			
 
				-			kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
			
 
				-
			
 
				-	/*
			
 
				-	 * We want to avoid hitting the (re)distributor as much as
			
 
				-	 * possible, as this is a potentially expensive MMIO access
			
 
				-	 * (not to mention locks in the irq layer), and a solution for
			
 
				-	 * this is to cache the "active" state in memory.
			
 
				-	 *
			
 
				-	 * Things to consider: we cannot cache an "active set" state,
			
 
				-	 * because the HW can change this behind our back (it becomes
			
 
				-	 * "clear" in the HW). We must then restrict the caching to
			
 
				-	 * the "clear" state.
			
 
				-	 *
			
 
				-	 * The cache is invalidated on:
			
 
				-	 * - vcpu put, indicating that the HW cannot be trusted to be
			
 
				-	 *   in a sane state on the next vcpu load,
			
 
				-	 * - any change in the interrupt state
			
 
				-	 *
			
 
				-	 * Usage conditions:
			
 
				-	 * - cached value is "active clear"
			
 
				-	 * - value to be programmed is "active clear"
			
 
				-	 */
			
 
				-	if (vtimer->active_cleared_last && !phys_active)
			
 
				-		return;
			
 
				+		      kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
			
 
				 
			
 
				 	ret = irq_set_irqchip_state(host_vtimer_irq,
			
 
				 				    IRQCHIP_STATE_ACTIVE,
			
 
				 				    phys_active);
			
 
				 	WARN_ON(ret);
			
 
				+}
			
 
				 
			
 
				-	vtimer->active_cleared_last = !phys_active;
			
 
				+static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	kvm_vtimer_update_mask_user(vcpu);
			
 
				+}
			
 
				+
			
 
				+void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
			
 
				+	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
			
 
				+
			
 
				+	if (unlikely(!timer->enabled))
			
 
				+		return;
			
 
				+
			
 
				+	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
			
 
				+		kvm_timer_vcpu_load_user(vcpu);
			
 
				+	else
			
 
				+		kvm_timer_vcpu_load_vgic(vcpu);
			
 
				+
			
 
				+	set_cntvoff(vtimer->cntvoff);
			
 
				+
			
 
				+	vtimer_restore_state(vcpu);
			
 
				+
			
 
				+	if (has_vhe())
			
 
				+		disable_el1_phys_timer_access();
			
 
				 }
			
 
				 
			
 
				 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
			
@@ -426,23 +460,6 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
 
				 	       ptimer->irq.level != plevel;
			
 
				 }
			
 
				 
			
 
				-static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
			
 
				-{
			
 
				-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
			
 
				-
			
 
				-	/*
			
 
				-	 * To prevent continuously exiting from the guest, we mask the
			
 
				-	 * physical interrupt such that the guest can make forward progress.
			
 
				-	 * Once we detect the output level being deasserted, we unmask the
			
 
				-	 * interrupt again so that we exit from the guest when the timer
			
 
				-	 * fires.
			
 
				-	*/
			
 
				-	if (vtimer->irq.level)
			
 
				-		disable_percpu_irq(host_vtimer_irq);
			
 
				-	else
			
 
				-		enable_percpu_irq(host_vtimer_irq, 0);
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * kvm_timer_flush_hwstate - prepare timers before running the vcpu
			
 
				  * @vcpu: The vcpu pointer
			
@@ -455,23 +472,61 @@ static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
 
				 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
			
 
				-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
			
 
				+	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
			
 
				 
			
 
				 	if (unlikely(!timer->enabled))
			
 
				 		return;
			
 
				 
			
 
				-	kvm_timer_update_state(vcpu);
			
 
				+	if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
			
 
				+		kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
			
 
				 
			
 
				 	/* Set the background timer for the physical timer emulation. */
			
 
				 	phys_timer_emulate(vcpu, vcpu_ptimer(vcpu));
			
 
				+}
			
 
				 
			
 
				-	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
			
 
				-		kvm_timer_flush_hwstate_user(vcpu);
			
 
				-	else
			
 
				-		kvm_timer_flush_hwstate_vgic(vcpu);
			
 
				+void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
			
 
				 
			
 
				-	set_cntvoff(vtimer->cntvoff);
			
 
				-	timer_restore_state(vcpu);
			
 
				+	if (unlikely(!timer->enabled))
			
 
				+		return;
			
 
				+
			
 
				+	if (has_vhe())
			
 
				+		enable_el1_phys_timer_access();
			
 
				+
			
 
				+	vtimer_save_state(vcpu);
			
 
				+
			
 
				+	/*
			
 
				+	 * The kernel may decide to run userspace after calling vcpu_put, so
			
 
				+	 * we reset cntvoff to 0 to ensure a consistent read between user
			
 
				+	 * accesses to the virtual counter and kernel access to the physical
			
 
				+	 * counter.
			
 
				+	 */
			
 
				+	set_cntvoff(0);
			
 
				+}
			
 
				+
			
 
				+static void unmask_vtimer_irq(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
			
 
				+
			
 
				+	if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
			
 
				+		kvm_vtimer_update_mask_user(vcpu);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If the guest disabled the timer without acking the interrupt, then
			
 
				+	 * we must make sure the physical and virtual active states are in
			
 
				+	 * sync by deactivating the physical interrupt, because otherwise we
			
 
				+	 * wouldn't see the next timer interrupt in the host.
			
 
				+	 */
			
 
				+	if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) {
			
 
				+		int ret;
			
 
				+		ret = irq_set_irqchip_state(host_vtimer_irq,
			
 
				+					    IRQCHIP_STATE_ACTIVE,
			
 
				+					    false);
			
 
				+		WARN_ON(ret);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -484,6 +539,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
 
				 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
			
 
				+	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
			
 
				 
			
 
				 	/*
			
 
				 	 * This is to cancel the background timer for the physical timer
			
@@ -491,14 +547,19 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 
				 	 */
			
 
				 	soft_timer_cancel(&timer->phys_timer, NULL);
			
 
				 
			
 
				-	timer_save_state(vcpu);
			
 
				-	set_cntvoff(0);
			
 
				-
			
 
				 	/*
			
 
				-	 * The guest could have modified the timer registers or the timer
			
 
				-	 * could have expired, update the timer state.
			
 
				+	 * If we entered the guest with the vtimer output asserted we have to
			
 
				+	 * check if the guest has modified the timer so that we should lower
			
 
				+	 * the line at this point.
			
 
				 	 */
			
 
				-	kvm_timer_update_state(vcpu);
			
 
				+	if (vtimer->irq.level) {
			
 
				+		vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
			
 
				+		vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
			
 
				+		if (!kvm_timer_should_fire(vtimer)) {
			
 
				+			kvm_timer_update_irq(vcpu, false, vtimer);
			
 
				+			unmask_vtimer_irq(vcpu);
			
 
				+		}
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
			
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -354,18 +354,18 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
				 	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
			
 
				 
			
 
				 	kvm_arm_set_running_vcpu(vcpu);
			
 
				-
			
 
				 	kvm_vgic_load(vcpu);
			
 
				+	kvm_timer_vcpu_load(vcpu);
			
 
				 }
			
 
				 
			
 
				 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				+	kvm_timer_vcpu_put(vcpu);
			
 
				 	kvm_vgic_put(vcpu);
			
 
				 
			
 
				 	vcpu->cpu = -1;
			
 
				 
			
 
				 	kvm_arm_set_running_vcpu(NULL);
			
 
				-	kvm_timer_vcpu_put(vcpu);
			
 
				 }
			
 
				 
			
 
				 static void vcpu_power_off(struct kvm_vcpu *vcpu)
			
@@ -710,15 +710,26 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
				 		kvm_arm_clear_debug(vcpu);
			
 
				 
			
 
				 		/*
			
 
				-		 * We must sync the PMU and timer state before the vgic state so
			
 
				+		 * We must sync the PMU state before the vgic state so
			
 
				 		 * that the vgic can properly sample the updated state of the
			
 
				 		 * interrupt line.
			
 
				 		 */
			
 
				 		kvm_pmu_sync_hwstate(vcpu);
			
 
				-		kvm_timer_sync_hwstate(vcpu);
			
 
				 
			
 
				+		/*
			
 
				+		 * Sync the vgic state before syncing the timer state because
			
 
				+		 * the timer code needs to know if the virtual timer
			
 
				+		 * interrupts are active.
			
 
				+		 */
			
 
				 		kvm_vgic_sync_hwstate(vcpu);
			
 
				 
			
 
				+		/*
			
 
				+		 * Sync the timer hardware state before enabling interrupts as
			
 
				+		 * we don't want vtimer interrupts to race with syncing the
			
 
				+		 * timer virtual interrupt state.
			
 
				+		 */
			
 
				+		kvm_timer_sync_hwstate(vcpu);
			
 
				+
			
 
				 		/*
			
 
				 		 * We may have taken a host interrupt in HYP mode (ie
			
 
				 		 * while executing the guest). This interrupt is still