|
@@ -46,10 +46,9 @@ static const struct kvm_irq_level default_vtimer_irq = {
|
|
|
.level = 1,
|
|
|
};
|
|
|
|
|
|
-void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
|
|
|
-{
|
|
|
- vcpu_vtimer(vcpu)->active_cleared_last = false;
|
|
|
-}
|
|
|
+static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
|
|
|
+static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
|
|
|
+ struct arch_timer_context *timer_ctx);
|
|
|
|
|
|
u64 kvm_phys_timer_read(void)
|
|
|
{
|
|
@@ -69,17 +68,45 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
|
|
|
cancel_work_sync(work);
|
|
|
}
|
|
|
|
|
|
-static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
|
|
|
+static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
- struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
|
|
|
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
|
|
|
|
|
/*
|
|
|
- * We disable the timer in the world switch and let it be
|
|
|
- * handled by kvm_timer_sync_hwstate(). Getting a timer
|
|
|
- * interrupt at this point is a sure sign of some major
|
|
|
- * breakage.
|
|
|
+ * When using a userspace irqchip with the architected timers, we must
|
|
|
+ * prevent continuously exiting from the guest, and therefore mask the
|
|
|
+ * physical interrupt by disabling it on the host interrupt controller
|
|
|
+ * when the virtual level is high, such that the guest can make
|
|
|
+ * forward progress. Once we detect the output level being
|
|
|
+ * de-asserted, we unmask the interrupt again so that we exit from the
|
|
|
+ * guest when the timer fires.
|
|
|
*/
|
|
|
- pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
|
|
|
+ if (vtimer->irq.level)
|
|
|
+ disable_percpu_irq(host_vtimer_irq);
|
|
|
+ else
|
|
|
+ enable_percpu_irq(host_vtimer_irq, 0);
|
|
|
+}
|
|
|
+
|
|
|
+static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
|
|
|
+{
|
|
|
+ struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
|
|
|
+ struct arch_timer_context *vtimer;
|
|
|
+
|
|
|
+ if (!vcpu) {
|
|
|
+ pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
|
|
|
+ return IRQ_NONE;
|
|
|
+ }
|
|
|
+ vtimer = vcpu_vtimer(vcpu);
|
|
|
+
|
|
|
+ if (!vtimer->irq.level) {
|
|
|
+ vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
|
|
|
+ if (kvm_timer_irq_can_fire(vtimer))
|
|
|
+ kvm_timer_update_irq(vcpu, true, vtimer);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
|
|
|
+ kvm_vtimer_update_mask_user(vcpu);
|
|
|
+
|
|
|
return IRQ_HANDLED;
|
|
|
}
|
|
|
|
|
@@ -215,7 +242,6 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
|
|
|
{
|
|
|
int ret;
|
|
|
|
|
|
- timer_ctx->active_cleared_last = false;
|
|
|
timer_ctx->irq.level = new_level;
|
|
|
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
|
|
|
timer_ctx->irq.level);
|
|
@@ -271,10 +297,16 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu,
|
|
|
soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(timer_ctx));
|
|
|
}
|
|
|
|
|
|
-static void timer_save_state(struct kvm_vcpu *vcpu)
|
|
|
+static void vtimer_save_state(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
|
|
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ local_irq_save(flags);
|
|
|
+
|
|
|
+ if (!vtimer->loaded)
|
|
|
+ goto out;
|
|
|
|
|
|
if (timer->enabled) {
|
|
|
vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
|
|
@@ -283,6 +315,10 @@ static void timer_save_state(struct kvm_vcpu *vcpu)
|
|
|
|
|
|
/* Disable the virtual timer */
|
|
|
write_sysreg_el0(0, cntv_ctl);
|
|
|
+
|
|
|
+ vtimer->loaded = false;
|
|
|
+out:
|
|
|
+ local_irq_restore(flags);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -296,6 +332,8 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
|
|
|
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
|
|
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
|
|
|
|
|
|
+ vtimer_save_state(vcpu);
|
|
|
+
|
|
|
/*
|
|
|
* No need to schedule a background timer if any guest timer has
|
|
|
* already expired, because kvm_vcpu_block will return before putting
|
|
@@ -318,22 +356,34 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
|
|
|
soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
|
|
|
}
|
|
|
|
|
|
-static void timer_restore_state(struct kvm_vcpu *vcpu)
|
|
|
+static void vtimer_restore_state(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
|
|
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ local_irq_save(flags);
|
|
|
+
|
|
|
+ if (vtimer->loaded)
|
|
|
+ goto out;
|
|
|
|
|
|
if (timer->enabled) {
|
|
|
write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
|
|
|
isb();
|
|
|
write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
|
|
|
}
|
|
|
+
|
|
|
+ vtimer->loaded = true;
|
|
|
+out:
|
|
|
+ local_irq_restore(flags);
|
|
|
}
|
|
|
|
|
|
void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
|
|
|
|
|
+ vtimer_restore_state(vcpu);
|
|
|
+
|
|
|
soft_timer_cancel(&timer->bg_timer, &timer->expired);
|
|
|
}
|
|
|
|
|
@@ -352,61 +402,45 @@ static void set_cntvoff(u64 cntvoff)
|
|
|
kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
|
|
|
}
|
|
|
|
|
|
-static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu)
|
|
|
+static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
|
|
bool phys_active;
|
|
|
int ret;
|
|
|
|
|
|
- /*
|
|
|
- * If we enter the guest with the virtual input level to the VGIC
|
|
|
- * asserted, then we have already told the VGIC what we need to, and
|
|
|
- * we don't need to exit from the guest until the guest deactivates
|
|
|
- * the already injected interrupt, so therefore we should set the
|
|
|
- * hardware active state to prevent unnecessary exits from the guest.
|
|
|
- *
|
|
|
- * Also, if we enter the guest with the virtual timer interrupt active,
|
|
|
- * then it must be active on the physical distributor, because we set
|
|
|
- * the HW bit and the guest must be able to deactivate the virtual and
|
|
|
- * physical interrupt at the same time.
|
|
|
- *
|
|
|
- * Conversely, if the virtual input level is deasserted and the virtual
|
|
|
- * interrupt is not active, then always clear the hardware active state
|
|
|
- * to ensure that hardware interrupts from the timer triggers a guest
|
|
|
- * exit.
|
|
|
- */
|
|
|
phys_active = vtimer->irq.level ||
|
|
|
- kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
|
|
|
-
|
|
|
- /*
|
|
|
- * We want to avoid hitting the (re)distributor as much as
|
|
|
- * possible, as this is a potentially expensive MMIO access
|
|
|
- * (not to mention locks in the irq layer), and a solution for
|
|
|
- * this is to cache the "active" state in memory.
|
|
|
- *
|
|
|
- * Things to consider: we cannot cache an "active set" state,
|
|
|
- * because the HW can change this behind our back (it becomes
|
|
|
- * "clear" in the HW). We must then restrict the caching to
|
|
|
- * the "clear" state.
|
|
|
- *
|
|
|
- * The cache is invalidated on:
|
|
|
- * - vcpu put, indicating that the HW cannot be trusted to be
|
|
|
- * in a sane state on the next vcpu load,
|
|
|
- * - any change in the interrupt state
|
|
|
- *
|
|
|
- * Usage conditions:
|
|
|
- * - cached value is "active clear"
|
|
|
- * - value to be programmed is "active clear"
|
|
|
- */
|
|
|
- if (vtimer->active_cleared_last && !phys_active)
|
|
|
- return;
|
|
|
+ kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
|
|
|
|
|
|
ret = irq_set_irqchip_state(host_vtimer_irq,
|
|
|
IRQCHIP_STATE_ACTIVE,
|
|
|
phys_active);
|
|
|
WARN_ON(ret);
|
|
|
+}
|
|
|
|
|
|
- vtimer->active_cleared_last = !phys_active;
|
|
|
+static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ kvm_vtimer_update_mask_user(vcpu);
|
|
|
+}
|
|
|
+
|
|
|
+void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
|
|
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
|
|
+
|
|
|
+ if (unlikely(!timer->enabled))
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
|
|
|
+ kvm_timer_vcpu_load_user(vcpu);
|
|
|
+ else
|
|
|
+ kvm_timer_vcpu_load_vgic(vcpu);
|
|
|
+
|
|
|
+ set_cntvoff(vtimer->cntvoff);
|
|
|
+
|
|
|
+ vtimer_restore_state(vcpu);
|
|
|
+
|
|
|
+ if (has_vhe())
|
|
|
+ disable_el1_phys_timer_access();
|
|
|
}
|
|
|
|
|
|
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
|
|
@@ -426,23 +460,6 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
|
|
|
ptimer->irq.level != plevel;
|
|
|
}
|
|
|
|
|
|
-static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
|
|
|
-{
|
|
|
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
|
|
-
|
|
|
- /*
|
|
|
- * To prevent continuously exiting from the guest, we mask the
|
|
|
- * physical interrupt such that the guest can make forward progress.
|
|
|
- * Once we detect the output level being deasserted, we unmask the
|
|
|
- * interrupt again so that we exit from the guest when the timer
|
|
|
- * fires.
|
|
|
- */
|
|
|
- if (vtimer->irq.level)
|
|
|
- disable_percpu_irq(host_vtimer_irq);
|
|
|
- else
|
|
|
- enable_percpu_irq(host_vtimer_irq, 0);
|
|
|
-}
|
|
|
-
|
|
|
/**
|
|
|
* kvm_timer_flush_hwstate - prepare timers before running the vcpu
|
|
|
* @vcpu: The vcpu pointer
|
|
@@ -455,23 +472,61 @@ static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
|
|
|
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
|
|
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
|
|
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
|
|
|
|
|
|
if (unlikely(!timer->enabled))
|
|
|
return;
|
|
|
|
|
|
- kvm_timer_update_state(vcpu);
|
|
|
+ if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
|
|
|
+ kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
|
|
|
|
|
|
/* Set the background timer for the physical timer emulation. */
|
|
|
phys_timer_emulate(vcpu, vcpu_ptimer(vcpu));
|
|
|
+}
|
|
|
|
|
|
- if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
|
|
|
- kvm_timer_flush_hwstate_user(vcpu);
|
|
|
- else
|
|
|
- kvm_timer_flush_hwstate_vgic(vcpu);
|
|
|
+void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
|
|
|
|
|
- set_cntvoff(vtimer->cntvoff);
|
|
|
- timer_restore_state(vcpu);
|
|
|
+ if (unlikely(!timer->enabled))
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (has_vhe())
|
|
|
+ enable_el1_phys_timer_access();
|
|
|
+
|
|
|
+ vtimer_save_state(vcpu);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The kernel may decide to run userspace after calling vcpu_put, so
|
|
|
+ * we reset cntvoff to 0 to ensure a consistent read between user
|
|
|
+ * accesses to the virtual counter and kernel access to the physical
|
|
|
+ * counter.
|
|
|
+ */
|
|
|
+ set_cntvoff(0);
|
|
|
+}
|
|
|
+
|
|
|
+static void unmask_vtimer_irq(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
|
|
+
|
|
|
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
|
|
|
+ kvm_vtimer_update_mask_user(vcpu);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the guest disabled the timer without acking the interrupt, then
|
|
|
+ * we must make sure the physical and virtual active states are in
|
|
|
+ * sync by deactivating the physical interrupt, because otherwise we
|
|
|
+ * wouldn't see the next timer interrupt in the host.
|
|
|
+ */
|
|
|
+ if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) {
|
|
|
+ int ret;
|
|
|
+ ret = irq_set_irqchip_state(host_vtimer_irq,
|
|
|
+ IRQCHIP_STATE_ACTIVE,
|
|
|
+ false);
|
|
|
+ WARN_ON(ret);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -484,6 +539,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
|
|
|
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
|
|
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
|
|
|
|
|
|
/*
|
|
|
* This is to cancel the background timer for the physical timer
|
|
@@ -491,14 +547,19 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
|
|
|
*/
|
|
|
soft_timer_cancel(&timer->phys_timer, NULL);
|
|
|
|
|
|
- timer_save_state(vcpu);
|
|
|
- set_cntvoff(0);
|
|
|
-
|
|
|
/*
|
|
|
- * The guest could have modified the timer registers or the timer
|
|
|
- * could have expired, update the timer state.
|
|
|
+ * If we entered the guest with the vtimer output asserted we have to
|
|
|
+ * check if the guest has modified the timer so that we should lower
|
|
|
+ * the line at this point.
|
|
|
*/
|
|
|
- kvm_timer_update_state(vcpu);
|
|
|
+ if (vtimer->irq.level) {
|
|
|
+ vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
|
|
|
+ vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
|
|
|
+ if (!kvm_timer_should_fire(vtimer)) {
|
|
|
+ kvm_timer_update_irq(vcpu, false, vtimer);
|
|
|
+ unmask_vtimer_irq(vcpu);
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
|