|
@@ -53,11 +53,15 @@
|
|
|
#include <asm/smp.h>
|
|
|
#include <asm/dbell.h>
|
|
|
#include <asm/hmi.h>
|
|
|
+#include <asm/pnv-pci.h>
|
|
|
#include <linux/gfp.h>
|
|
|
#include <linux/vmalloc.h>
|
|
|
#include <linux/highmem.h>
|
|
|
#include <linux/hugetlb.h>
|
|
|
+#include <linux/kvm_irqfd.h>
|
|
|
+#include <linux/irqbypass.h>
|
|
|
#include <linux/module.h>
|
|
|
+#include <linux/compiler.h>
|
|
|
|
|
|
#include "book3s.h"
|
|
|
|
|
@@ -70,6 +74,8 @@
|
|
|
|
|
|
/* Used to indicate that a guest page fault needs to be handled */
|
|
|
#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
|
|
|
+/* Used to indicate that a guest passthrough interrupt needs to be handled */
|
|
|
+#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2)
|
|
|
|
|
|
/* Used as a "null" value for timebase values */
|
|
|
#define TB_NIL (~(u64)0)
|
|
@@ -89,14 +95,55 @@ static struct kernel_param_ops module_param_ops = {
|
|
|
.get = param_get_int,
|
|
|
};
|
|
|
|
|
|
+module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass,
|
|
|
+ S_IRUGO | S_IWUSR);
|
|
|
+MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
|
|
|
+
|
|
|
module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
|
|
|
S_IRUGO | S_IWUSR);
|
|
|
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
|
|
|
#endif
|
|
|
|
|
|
+/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
|
|
|
+static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
|
|
|
+module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
|
|
|
+MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
|
|
|
+
|
|
|
+/* Factor by which the vcore halt poll interval is grown, default is to double
|
|
|
+ */
|
|
|
+static unsigned int halt_poll_ns_grow = 2;
|
|
|
+module_param(halt_poll_ns_grow, int, S_IRUGO);
|
|
|
+MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
|
|
|
+
|
|
|
+/* Factor by which the vcore halt poll interval is shrunk, default is to reset
|
|
|
+ */
|
|
|
+static unsigned int halt_poll_ns_shrink;
|
|
|
+module_param(halt_poll_ns_shrink, int, S_IRUGO);
|
|
|
+MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
|
|
|
+
|
|
|
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
|
|
|
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
|
|
|
|
|
|
+static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
|
|
|
+ int *ip)
|
|
|
+{
|
|
|
+ int i = *ip;
|
|
|
+ struct kvm_vcpu *vcpu;
|
|
|
+
|
|
|
+ while (++i < MAX_SMT_THREADS) {
|
|
|
+ vcpu = READ_ONCE(vc->runnable_threads[i]);
|
|
|
+ if (vcpu) {
|
|
|
+ *ip = i;
|
|
|
+ return vcpu;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return NULL;
|
|
|
+}
|
|
|
+
|
|
|
+/* Used to traverse the list of runnable threads for a given vcore */
|
|
|
+#define for_each_runnable_thread(i, vcpu, vc) \
|
|
|
+ for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
|
|
|
+
|
|
|
static bool kvmppc_ipi_thread(int cpu)
|
|
|
{
|
|
|
/* On POWER8 for IPIs to threads in the same core, use msgsnd */
|
|
@@ -991,6 +1038,9 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|
|
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
|
|
|
r = RESUME_GUEST;
|
|
|
break;
|
|
|
+ case BOOK3S_INTERRUPT_HV_RM_HARD:
|
|
|
+ r = RESUME_PASSTHROUGH;
|
|
|
+ break;
|
|
|
default:
|
|
|
kvmppc_dump_regs(vcpu);
|
|
|
printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
|
|
@@ -1493,7 +1543,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
|
|
|
if (vcore == NULL)
|
|
|
return NULL;
|
|
|
|
|
|
- INIT_LIST_HEAD(&vcore->runnable_threads);
|
|
|
spin_lock_init(&vcore->lock);
|
|
|
spin_lock_init(&vcore->stoltb_lock);
|
|
|
init_swait_queue_head(&vcore->wq);
|
|
@@ -1802,7 +1851,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
|
|
|
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
|
|
|
spin_unlock_irq(&vcpu->arch.tbacct_lock);
|
|
|
--vc->n_runnable;
|
|
|
- list_del(&vcpu->arch.run_list);
|
|
|
+ WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
|
|
|
}
|
|
|
|
|
|
static int kvmppc_grab_hwthread(int cpu)
|
|
@@ -2209,10 +2258,10 @@ static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
|
|
|
|
|
|
static void prepare_threads(struct kvmppc_vcore *vc)
|
|
|
{
|
|
|
- struct kvm_vcpu *vcpu, *vnext;
|
|
|
+ int i;
|
|
|
+ struct kvm_vcpu *vcpu;
|
|
|
|
|
|
- list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
|
|
|
- arch.run_list) {
|
|
|
+ for_each_runnable_thread(i, vcpu, vc) {
|
|
|
if (signal_pending(vcpu->arch.run_task))
|
|
|
vcpu->arch.ret = -EINTR;
|
|
|
else if (vcpu->arch.vpa.update_pending ||
|
|
@@ -2259,15 +2308,14 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
|
|
|
|
|
|
static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
|
|
|
{
|
|
|
- int still_running = 0;
|
|
|
+ int still_running = 0, i;
|
|
|
u64 now;
|
|
|
long ret;
|
|
|
- struct kvm_vcpu *vcpu, *vnext;
|
|
|
+ struct kvm_vcpu *vcpu;
|
|
|
|
|
|
spin_lock(&vc->lock);
|
|
|
now = get_tb();
|
|
|
- list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
|
|
|
- arch.run_list) {
|
|
|
+ for_each_runnable_thread(i, vcpu, vc) {
|
|
|
/* cancel pending dec exception if dec is positive */
|
|
|
if (now < vcpu->arch.dec_expires &&
|
|
|
kvmppc_core_pending_dec(vcpu))
|
|
@@ -2307,8 +2355,8 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
|
|
|
}
|
|
|
if (vc->n_runnable > 0 && vc->runner == NULL) {
|
|
|
/* make sure there's a candidate runner awake */
|
|
|
- vcpu = list_first_entry(&vc->runnable_threads,
|
|
|
- struct kvm_vcpu, arch.run_list);
|
|
|
+ i = -1;
|
|
|
+ vcpu = next_runnable_thread(vc, &i);
|
|
|
wake_up(&vcpu->arch.cpu_run);
|
|
|
}
|
|
|
}
|
|
@@ -2361,7 +2409,7 @@ static inline void kvmppc_set_host_core(int cpu)
|
|
|
*/
|
|
|
static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|
|
{
|
|
|
- struct kvm_vcpu *vcpu, *vnext;
|
|
|
+ struct kvm_vcpu *vcpu;
|
|
|
int i;
|
|
|
int srcu_idx;
|
|
|
struct core_info core_info;
|
|
@@ -2397,8 +2445,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|
|
*/
|
|
|
if ((threads_per_core > 1) &&
|
|
|
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
|
|
|
- list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
|
|
|
- arch.run_list) {
|
|
|
+ for_each_runnable_thread(i, vcpu, vc) {
|
|
|
vcpu->arch.ret = -EBUSY;
|
|
|
kvmppc_remove_runnable(vc, vcpu);
|
|
|
wake_up(&vcpu->arch.cpu_run);
|
|
@@ -2477,8 +2524,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
|
|
active |= 1 << thr;
|
|
|
list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
|
|
|
pvc->pcpu = pcpu + thr;
|
|
|
- list_for_each_entry(vcpu, &pvc->runnable_threads,
|
|
|
- arch.run_list) {
|
|
|
+ for_each_runnable_thread(i, vcpu, pvc) {
|
|
|
kvmppc_start_thread(vcpu, pvc);
|
|
|
kvmppc_create_dtl_entry(vcpu, pvc);
|
|
|
trace_kvm_guest_enter(vcpu);
|
|
@@ -2604,34 +2650,92 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
|
|
|
finish_wait(&vcpu->arch.cpu_run, &wait);
|
|
|
}
|
|
|
|
|
|
+static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
|
|
|
+{
|
|
|
+ /* 10us base */
|
|
|
+ if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
|
|
|
+ vc->halt_poll_ns = 10000;
|
|
|
+ else
|
|
|
+ vc->halt_poll_ns *= halt_poll_ns_grow;
|
|
|
+
|
|
|
+ if (vc->halt_poll_ns > halt_poll_max_ns)
|
|
|
+ vc->halt_poll_ns = halt_poll_max_ns;
|
|
|
+}
|
|
|
+
|
|
|
+static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
|
|
|
+{
|
|
|
+ if (halt_poll_ns_shrink == 0)
|
|
|
+ vc->halt_poll_ns = 0;
|
|
|
+ else
|
|
|
+ vc->halt_poll_ns /= halt_poll_ns_shrink;
|
|
|
+}
|
|
|
+
|
|
|
+/* Check to see if any of the runnable vcpus on the vcore have pending
|
|
|
+ * exceptions or are no longer ceded
|
|
|
+ */
|
|
|
+static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
|
|
|
+{
|
|
|
+ struct kvm_vcpu *vcpu;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for_each_runnable_thread(i, vcpu, vc) {
|
|
|
+ if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* All the vcpus in this vcore are idle, so wait for a decrementer
|
|
|
* or external interrupt to one of the vcpus. vc->lock is held.
|
|
|
*/
|
|
|
static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
|
|
|
{
|
|
|
- struct kvm_vcpu *vcpu;
|
|
|
+ ktime_t cur, start_poll, start_wait;
|
|
|
int do_sleep = 1;
|
|
|
+ u64 block_ns;
|
|
|
DECLARE_SWAITQUEUE(wait);
|
|
|
|
|
|
- prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
|
|
|
+ /* Poll for pending exceptions and ceded state */
|
|
|
+ cur = start_poll = ktime_get();
|
|
|
+ if (vc->halt_poll_ns) {
|
|
|
+ ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
|
|
|
+ ++vc->runner->stat.halt_attempted_poll;
|
|
|
|
|
|
- /*
|
|
|
- * Check one last time for pending exceptions and ceded state after
|
|
|
- * we put ourselves on the wait queue
|
|
|
- */
|
|
|
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
|
|
|
- if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
|
|
|
- do_sleep = 0;
|
|
|
- break;
|
|
|
+ vc->vcore_state = VCORE_POLLING;
|
|
|
+ spin_unlock(&vc->lock);
|
|
|
+
|
|
|
+ do {
|
|
|
+ if (kvmppc_vcore_check_block(vc)) {
|
|
|
+ do_sleep = 0;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ cur = ktime_get();
|
|
|
+ } while (single_task_running() && ktime_before(cur, stop));
|
|
|
+
|
|
|
+ spin_lock(&vc->lock);
|
|
|
+ vc->vcore_state = VCORE_INACTIVE;
|
|
|
+
|
|
|
+ if (!do_sleep) {
|
|
|
+ ++vc->runner->stat.halt_successful_poll;
|
|
|
+ goto out;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if (!do_sleep) {
|
|
|
+ prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
|
|
|
+
|
|
|
+ if (kvmppc_vcore_check_block(vc)) {
|
|
|
finish_swait(&vc->wq, &wait);
|
|
|
- return;
|
|
|
+ do_sleep = 0;
|
|
|
+ /* If we polled, count this as a successful poll */
|
|
|
+ if (vc->halt_poll_ns)
|
|
|
+ ++vc->runner->stat.halt_successful_poll;
|
|
|
+ goto out;
|
|
|
}
|
|
|
|
|
|
+ start_wait = ktime_get();
|
|
|
+
|
|
|
vc->vcore_state = VCORE_SLEEPING;
|
|
|
trace_kvmppc_vcore_blocked(vc, 0);
|
|
|
spin_unlock(&vc->lock);
|
|
@@ -2640,13 +2744,52 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
|
|
|
spin_lock(&vc->lock);
|
|
|
vc->vcore_state = VCORE_INACTIVE;
|
|
|
trace_kvmppc_vcore_blocked(vc, 1);
|
|
|
+ ++vc->runner->stat.halt_successful_wait;
|
|
|
+
|
|
|
+ cur = ktime_get();
|
|
|
+
|
|
|
+out:
|
|
|
+ block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
|
|
|
+
|
|
|
+ /* Attribute wait time */
|
|
|
+ if (do_sleep) {
|
|
|
+ vc->runner->stat.halt_wait_ns +=
|
|
|
+ ktime_to_ns(cur) - ktime_to_ns(start_wait);
|
|
|
+ /* Attribute failed poll time */
|
|
|
+ if (vc->halt_poll_ns)
|
|
|
+ vc->runner->stat.halt_poll_fail_ns +=
|
|
|
+ ktime_to_ns(start_wait) -
|
|
|
+ ktime_to_ns(start_poll);
|
|
|
+ } else {
|
|
|
+ /* Attribute successful poll time */
|
|
|
+ if (vc->halt_poll_ns)
|
|
|
+ vc->runner->stat.halt_poll_success_ns +=
|
|
|
+ ktime_to_ns(cur) -
|
|
|
+ ktime_to_ns(start_poll);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Adjust poll time */
|
|
|
+ if (halt_poll_max_ns) {
|
|
|
+ if (block_ns <= vc->halt_poll_ns)
|
|
|
+ ;
|
|
|
+ /* We slept and blocked for longer than the max halt time */
|
|
|
+ else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
|
|
|
+ shrink_halt_poll_ns(vc);
|
|
|
+ /* We slept and our poll time is too small */
|
|
|
+ else if (vc->halt_poll_ns < halt_poll_max_ns &&
|
|
|
+ block_ns < halt_poll_max_ns)
|
|
|
+ grow_halt_poll_ns(vc);
|
|
|
+ } else
|
|
|
+ vc->halt_poll_ns = 0;
|
|
|
+
|
|
|
+ trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
|
|
|
}
|
|
|
|
|
|
static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
- int n_ceded;
|
|
|
+ int n_ceded, i;
|
|
|
struct kvmppc_vcore *vc;
|
|
|
- struct kvm_vcpu *v, *vn;
|
|
|
+ struct kvm_vcpu *v;
|
|
|
|
|
|
trace_kvmppc_run_vcpu_enter(vcpu);
|
|
|
|
|
@@ -2666,7 +2809,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|
|
vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
|
|
|
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
|
|
|
vcpu->arch.busy_preempt = TB_NIL;
|
|
|
- list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
|
|
|
+ WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
|
|
|
++vc->n_runnable;
|
|
|
|
|
|
/*
|
|
@@ -2706,8 +2849,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|
|
kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
|
|
|
continue;
|
|
|
}
|
|
|
- list_for_each_entry_safe(v, vn, &vc->runnable_threads,
|
|
|
- arch.run_list) {
|
|
|
+ for_each_runnable_thread(i, v, vc) {
|
|
|
kvmppc_core_prepare_to_enter(v);
|
|
|
if (signal_pending(v->arch.run_task)) {
|
|
|
kvmppc_remove_runnable(vc, v);
|
|
@@ -2720,7 +2862,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|
|
if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
|
|
|
break;
|
|
|
n_ceded = 0;
|
|
|
- list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
|
|
|
+ for_each_runnable_thread(i, v, vc) {
|
|
|
if (!v->arch.pending_exceptions)
|
|
|
n_ceded += v->arch.ceded;
|
|
|
else
|
|
@@ -2759,8 +2901,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|
|
|
|
|
if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
|
|
|
/* Wake up some vcpu to run the core */
|
|
|
- v = list_first_entry(&vc->runnable_threads,
|
|
|
- struct kvm_vcpu, arch.run_list);
|
|
|
+ i = -1;
|
|
|
+ v = next_runnable_thread(vc, &i);
|
|
|
wake_up(&v->arch.cpu_run);
|
|
|
}
|
|
|
|
|
@@ -2818,7 +2960,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
|
|
r = kvmppc_book3s_hv_page_fault(run, vcpu,
|
|
|
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
|
|
|
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
|
|
|
- }
|
|
|
+ } else if (r == RESUME_PASSTHROUGH)
|
|
|
+ r = kvmppc_xics_rm_complete(vcpu, 0);
|
|
|
} while (is_kvmppc_resume_guest(r));
|
|
|
|
|
|
out:
|
|
@@ -3247,6 +3390,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
|
|
|
kvmppc_free_vcores(kvm);
|
|
|
|
|
|
kvmppc_free_hpt(kvm);
|
|
|
+
|
|
|
+ kvmppc_free_pimap(kvm);
|
|
|
}
|
|
|
|
|
|
/* We don't need to emulate any privileged instructions or dcbz */
|
|
@@ -3282,6 +3427,184 @@ static int kvmppc_core_check_processor_compat_hv(void)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+#ifdef CONFIG_KVM_XICS
|
|
|
+
|
|
|
+void kvmppc_free_pimap(struct kvm *kvm)
|
|
|
+{
|
|
|
+ kfree(kvm->arch.pimap);
|
|
|
+}
|
|
|
+
|
|
|
+static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
|
|
|
+{
|
|
|
+ return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
|
|
|
+}
|
|
|
+
|
|
|
+static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
|
|
|
+{
|
|
|
+ struct irq_desc *desc;
|
|
|
+ struct kvmppc_irq_map *irq_map;
|
|
|
+ struct kvmppc_passthru_irqmap *pimap;
|
|
|
+ struct irq_chip *chip;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ if (!kvm_irq_bypass)
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ desc = irq_to_desc(host_irq);
|
|
|
+ if (!desc)
|
|
|
+ return -EIO;
|
|
|
+
|
|
|
+ mutex_lock(&kvm->lock);
|
|
|
+
|
|
|
+ pimap = kvm->arch.pimap;
|
|
|
+ if (pimap == NULL) {
|
|
|
+ /* First call, allocate structure to hold IRQ map */
|
|
|
+ pimap = kvmppc_alloc_pimap();
|
|
|
+ if (pimap == NULL) {
|
|
|
+ mutex_unlock(&kvm->lock);
|
|
|
+ return -ENOMEM;
|
|
|
+ }
|
|
|
+ kvm->arch.pimap = pimap;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * For now, we only support interrupts for which the EOI operation
|
|
|
+ * is an OPAL call followed by a write to XIRR, since that's
|
|
|
+ * what our real-mode EOI code does.
|
|
|
+ */
|
|
|
+ chip = irq_data_get_irq_chip(&desc->irq_data);
|
|
|
+ if (!chip || !is_pnv_opal_msi(chip)) {
|
|
|
+ pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
|
|
|
+ host_irq, guest_gsi);
|
|
|
+ mutex_unlock(&kvm->lock);
|
|
|
+ return -ENOENT;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * See if we already have an entry for this guest IRQ number.
|
|
|
+ * If it's mapped to a hardware IRQ number, that's an error,
|
|
|
+ * otherwise re-use this entry.
|
|
|
+ */
|
|
|
+ for (i = 0; i < pimap->n_mapped; i++) {
|
|
|
+ if (guest_gsi == pimap->mapped[i].v_hwirq) {
|
|
|
+ if (pimap->mapped[i].r_hwirq) {
|
|
|
+ mutex_unlock(&kvm->lock);
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (i == KVMPPC_PIRQ_MAPPED) {
|
|
|
+ mutex_unlock(&kvm->lock);
|
|
|
+ return -EAGAIN; /* table is full */
|
|
|
+ }
|
|
|
+
|
|
|
+ irq_map = &pimap->mapped[i];
|
|
|
+
|
|
|
+ irq_map->v_hwirq = guest_gsi;
|
|
|
+ irq_map->desc = desc;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Order the above two stores before the next to serialize with
|
|
|
+ * the KVM real mode handler.
|
|
|
+ */
|
|
|
+ smp_wmb();
|
|
|
+ irq_map->r_hwirq = desc->irq_data.hwirq;
|
|
|
+
|
|
|
+ if (i == pimap->n_mapped)
|
|
|
+ pimap->n_mapped++;
|
|
|
+
|
|
|
+ kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
|
|
|
+
|
|
|
+ mutex_unlock(&kvm->lock);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
|
|
|
+{
|
|
|
+ struct irq_desc *desc;
|
|
|
+ struct kvmppc_passthru_irqmap *pimap;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ if (!kvm_irq_bypass)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ desc = irq_to_desc(host_irq);
|
|
|
+ if (!desc)
|
|
|
+ return -EIO;
|
|
|
+
|
|
|
+ mutex_lock(&kvm->lock);
|
|
|
+
|
|
|
+ if (kvm->arch.pimap == NULL) {
|
|
|
+ mutex_unlock(&kvm->lock);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ pimap = kvm->arch.pimap;
|
|
|
+
|
|
|
+ for (i = 0; i < pimap->n_mapped; i++) {
|
|
|
+ if (guest_gsi == pimap->mapped[i].v_hwirq)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (i == pimap->n_mapped) {
|
|
|
+ mutex_unlock(&kvm->lock);
|
|
|
+ return -ENODEV;
|
|
|
+ }
|
|
|
+
|
|
|
+ kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
|
|
|
+
|
|
|
+ /* invalidate the entry */
|
|
|
+ pimap->mapped[i].r_hwirq = 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We don't free this structure even when the count goes to
|
|
|
+ * zero. The structure is freed when we destroy the VM.
|
|
|
+ */
|
|
|
+
|
|
|
+ mutex_unlock(&kvm->lock);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
|
|
|
+ struct irq_bypass_producer *prod)
|
|
|
+{
|
|
|
+ int ret = 0;
|
|
|
+ struct kvm_kernel_irqfd *irqfd =
|
|
|
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
|
|
|
+
|
|
|
+ irqfd->producer = prod;
|
|
|
+
|
|
|
+ ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
|
|
|
+ if (ret)
|
|
|
+ pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
|
|
|
+ prod->irq, irqfd->gsi, ret);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
|
|
|
+ struct irq_bypass_producer *prod)
|
|
|
+{
|
|
|
+ int ret;
|
|
|
+ struct kvm_kernel_irqfd *irqfd =
|
|
|
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
|
|
|
+
|
|
|
+ irqfd->producer = NULL;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * When producer of consumer is unregistered, we change back to
|
|
|
+ * default external interrupt handling mode - KVM real mode
|
|
|
+ * will switch back to host.
|
|
|
+ */
|
|
|
+ ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
|
|
|
+ if (ret)
|
|
|
+ pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n",
|
|
|
+ prod->irq, irqfd->gsi, ret);
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
static long kvm_arch_vm_ioctl_hv(struct file *filp,
|
|
|
unsigned int ioctl, unsigned long arg)
|
|
|
{
|
|
@@ -3400,6 +3723,10 @@ static struct kvmppc_ops kvm_ops_hv = {
|
|
|
.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
|
|
|
.arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
|
|
|
.hcall_implemented = kvmppc_hcall_impl_hv,
|
|
|
+#ifdef CONFIG_KVM_XICS
|
|
|
+ .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
|
|
|
+ .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
|
|
|
+#endif
|
|
|
};
|
|
|
|
|
|
static int kvm_init_subcore_bitmap(void)
|