|
@@ -37,11 +37,16 @@
|
|
|
#include <linux/iommu.h>
|
|
|
#include <linux/intel-iommu.h>
|
|
|
#include <linux/cpufreq.h>
|
|
|
+#include <trace/events/kvm.h>
|
|
|
+#undef TRACE_INCLUDE_FILE
|
|
|
+#define CREATE_TRACE_POINTS
|
|
|
+#include "trace.h"
|
|
|
|
|
|
#include <asm/uaccess.h>
|
|
|
#include <asm/msr.h>
|
|
|
#include <asm/desc.h>
|
|
|
#include <asm/mtrr.h>
|
|
|
+#include <asm/mce.h>
|
|
|
|
|
|
#define MAX_IO_MSRS 256
|
|
|
#define CR0_RESERVED_BITS \
|
|
@@ -55,6 +60,10 @@
|
|
|
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
|
|
|
|
|
|
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
|
|
|
+
|
|
|
+#define KVM_MAX_MCE_BANKS 32
|
|
|
+#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
|
|
|
+
|
|
|
/* EFER defaults:
|
|
|
* - enable syscall per default because its emulated by KVM
|
|
|
* - enable LME and LMA per default on 64 bit KVM
|
|
@@ -68,14 +77,16 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
|
|
|
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
|
|
|
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
|
|
|
|
|
|
+static void update_cr8_intercept(struct kvm_vcpu *vcpu);
|
|
|
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
|
|
|
struct kvm_cpuid_entry2 __user *entries);
|
|
|
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
|
|
|
- u32 function, u32 index);
|
|
|
|
|
|
struct kvm_x86_ops *kvm_x86_ops;
|
|
|
EXPORT_SYMBOL_GPL(kvm_x86_ops);
|
|
|
|
|
|
+int ignore_msrs = 0;
|
|
|
+module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
|
|
|
+
|
|
|
struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|
|
{ "pf_fixed", VCPU_STAT(pf_fixed) },
|
|
|
{ "pf_guest", VCPU_STAT(pf_guest) },
|
|
@@ -122,18 +133,16 @@ unsigned long segment_base(u16 selector)
|
|
|
if (selector == 0)
|
|
|
return 0;
|
|
|
|
|
|
- asm("sgdt %0" : "=m"(gdt));
|
|
|
+ kvm_get_gdt(&gdt);
|
|
|
table_base = gdt.base;
|
|
|
|
|
|
if (selector & 4) { /* from ldt */
|
|
|
- u16 ldt_selector;
|
|
|
+ u16 ldt_selector = kvm_read_ldt();
|
|
|
|
|
|
- asm("sldt %0" : "=g"(ldt_selector));
|
|
|
table_base = segment_base(ldt_selector);
|
|
|
}
|
|
|
d = (struct desc_struct *)(table_base + (selector & ~7));
|
|
|
- v = d->base0 | ((unsigned long)d->base1 << 16) |
|
|
|
- ((unsigned long)d->base2 << 24);
|
|
|
+ v = get_desc_base(d);
|
|
|
#ifdef CONFIG_X86_64
|
|
|
if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
|
|
|
v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
|
|
@@ -176,16 +185,22 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
|
|
|
++vcpu->stat.pf_guest;
|
|
|
|
|
|
if (vcpu->arch.exception.pending) {
|
|
|
- if (vcpu->arch.exception.nr == PF_VECTOR) {
|
|
|
- printk(KERN_DEBUG "kvm: inject_page_fault:"
|
|
|
- " double fault 0x%lx\n", addr);
|
|
|
- vcpu->arch.exception.nr = DF_VECTOR;
|
|
|
- vcpu->arch.exception.error_code = 0;
|
|
|
- } else if (vcpu->arch.exception.nr == DF_VECTOR) {
|
|
|
+ switch(vcpu->arch.exception.nr) {
|
|
|
+ case DF_VECTOR:
|
|
|
/* triple fault -> shutdown */
|
|
|
set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
|
|
|
+ return;
|
|
|
+ case PF_VECTOR:
|
|
|
+ vcpu->arch.exception.nr = DF_VECTOR;
|
|
|
+ vcpu->arch.exception.error_code = 0;
|
|
|
+ return;
|
|
|
+ default:
|
|
|
+ /* replace previous exception with a new one in a hope
|
|
|
+ that instruction re-execution will regenerate lost
|
|
|
+ exception */
|
|
|
+ vcpu->arch.exception.pending = false;
|
|
|
+ break;
|
|
|
}
|
|
|
- return;
|
|
|
}
|
|
|
vcpu->arch.cr2 = addr;
|
|
|
kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
|
|
@@ -207,12 +222,18 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
|
|
|
|
|
|
-static void __queue_exception(struct kvm_vcpu *vcpu)
|
|
|
+/*
|
|
|
+ * Checks if cpl <= required_cpl; if true, return true. Otherwise queue
|
|
|
+ * a #GP and return false.
|
|
|
+ */
|
|
|
+bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
|
|
|
{
|
|
|
- kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
|
|
|
- vcpu->arch.exception.has_error_code,
|
|
|
- vcpu->arch.exception.error_code);
|
|
|
+ if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
|
|
|
+ return true;
|
|
|
+ kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
|
|
|
+ return false;
|
|
|
}
|
|
|
+EXPORT_SYMBOL_GPL(kvm_require_cpl);
|
|
|
|
|
|
/*
|
|
|
* Load the pae pdptrs. Return true is they are all valid.
|
|
@@ -232,7 +253,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
|
|
|
goto out;
|
|
|
}
|
|
|
for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
|
|
|
- if (is_present_pte(pdpte[i]) &&
|
|
|
+ if (is_present_gpte(pdpte[i]) &&
|
|
|
(pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
|
|
|
ret = 0;
|
|
|
goto out;
|
|
@@ -241,6 +262,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
|
|
|
ret = 1;
|
|
|
|
|
|
memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
|
|
|
+ __set_bit(VCPU_EXREG_PDPTR,
|
|
|
+ (unsigned long *)&vcpu->arch.regs_avail);
|
|
|
+ __set_bit(VCPU_EXREG_PDPTR,
|
|
|
+ (unsigned long *)&vcpu->arch.regs_dirty);
|
|
|
out:
|
|
|
|
|
|
return ret;
|
|
@@ -256,6 +281,10 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
|
|
|
if (is_long_mode(vcpu) || !is_pae(vcpu))
|
|
|
return false;
|
|
|
|
|
|
+ if (!test_bit(VCPU_EXREG_PDPTR,
|
|
|
+ (unsigned long *)&vcpu->arch.regs_avail))
|
|
|
+ return true;
|
|
|
+
|
|
|
r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
|
|
|
if (r < 0)
|
|
|
goto out;
|
|
@@ -328,9 +357,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
|
|
|
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
|
|
|
{
|
|
|
kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
|
|
|
- KVMTRACE_1D(LMSW, vcpu,
|
|
|
- (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
|
|
|
- handler);
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(kvm_lmsw);
|
|
|
|
|
@@ -466,7 +492,7 @@ static u32 msrs_to_save[] = {
|
|
|
#ifdef CONFIG_X86_64
|
|
|
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
|
|
|
#endif
|
|
|
- MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
|
|
|
+ MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
|
|
|
MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
|
|
|
};
|
|
|
|
|
@@ -644,8 +670,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
|
|
|
|
|
|
/* Keep irq disabled to prevent changes to the clock */
|
|
|
local_irq_save(flags);
|
|
|
- kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
|
|
|
- &vcpu->hv_clock.tsc_timestamp);
|
|
|
+ kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
|
|
|
ktime_get_ts(&ts);
|
|
|
local_irq_restore(flags);
|
|
|
|
|
@@ -778,23 +803,60 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|
|
+{
|
|
|
+ u64 mcg_cap = vcpu->arch.mcg_cap;
|
|
|
+ unsigned bank_num = mcg_cap & 0xff;
|
|
|
+
|
|
|
+ switch (msr) {
|
|
|
+ case MSR_IA32_MCG_STATUS:
|
|
|
+ vcpu->arch.mcg_status = data;
|
|
|
+ break;
|
|
|
+ case MSR_IA32_MCG_CTL:
|
|
|
+ if (!(mcg_cap & MCG_CTL_P))
|
|
|
+ return 1;
|
|
|
+ if (data != 0 && data != ~(u64)0)
|
|
|
+ return -1;
|
|
|
+ vcpu->arch.mcg_ctl = data;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ if (msr >= MSR_IA32_MC0_CTL &&
|
|
|
+ msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
|
|
|
+ u32 offset = msr - MSR_IA32_MC0_CTL;
|
|
|
+ /* only 0 or all 1s can be written to IA32_MCi_CTL */
|
|
|
+ if ((offset & 0x3) == 0 &&
|
|
|
+ data != 0 && data != ~(u64)0)
|
|
|
+ return -1;
|
|
|
+ vcpu->arch.mce_banks[offset] = data;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|
|
{
|
|
|
switch (msr) {
|
|
|
case MSR_EFER:
|
|
|
set_efer(vcpu, data);
|
|
|
break;
|
|
|
- case MSR_IA32_MC0_STATUS:
|
|
|
- pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
|
|
|
- __func__, data);
|
|
|
+ case MSR_K7_HWCR:
|
|
|
+ data &= ~(u64)0x40; /* ignore flush filter disable */
|
|
|
+ if (data != 0) {
|
|
|
+ pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
|
|
|
+ data);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
break;
|
|
|
- case MSR_IA32_MCG_STATUS:
|
|
|
- pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
|
|
|
- __func__, data);
|
|
|
+ case MSR_FAM10H_MMIO_CONF_BASE:
|
|
|
+ if (data != 0) {
|
|
|
+ pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
|
|
|
+ "0x%llx\n", data);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
break;
|
|
|
- case MSR_IA32_MCG_CTL:
|
|
|
- pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
|
|
|
- __func__, data);
|
|
|
+ case MSR_AMD64_NB_CFG:
|
|
|
break;
|
|
|
case MSR_IA32_DEBUGCTLMSR:
|
|
|
if (!data) {
|
|
@@ -811,12 +873,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|
|
case MSR_IA32_UCODE_REV:
|
|
|
case MSR_IA32_UCODE_WRITE:
|
|
|
case MSR_VM_HSAVE_PA:
|
|
|
+ case MSR_AMD64_PATCH_LOADER:
|
|
|
break;
|
|
|
case 0x200 ... 0x2ff:
|
|
|
return set_msr_mtrr(vcpu, msr, data);
|
|
|
case MSR_IA32_APICBASE:
|
|
|
kvm_set_apic_base(vcpu, data);
|
|
|
break;
|
|
|
+ case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
|
|
|
+ return kvm_x2apic_msr_write(vcpu, msr, data);
|
|
|
case MSR_IA32_MISC_ENABLE:
|
|
|
vcpu->arch.ia32_misc_enable_msr = data;
|
|
|
break;
|
|
@@ -850,9 +915,50 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|
|
kvm_request_guest_time_update(vcpu);
|
|
|
break;
|
|
|
}
|
|
|
+ case MSR_IA32_MCG_CTL:
|
|
|
+ case MSR_IA32_MCG_STATUS:
|
|
|
+ case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
|
|
|
+ return set_msr_mce(vcpu, msr, data);
|
|
|
+
|
|
|
+ /* Performance counters are not protected by a CPUID bit,
|
|
|
+ * so we should check all of them in the generic path for the sake of
|
|
|
+ * cross vendor migration.
|
|
|
+ * Writing a zero into the event select MSRs disables them,
|
|
|
+ * which we perfectly emulate ;-). Any other value should be at least
|
|
|
+ * reported, some guests depend on them.
|
|
|
+ */
|
|
|
+ case MSR_P6_EVNTSEL0:
|
|
|
+ case MSR_P6_EVNTSEL1:
|
|
|
+ case MSR_K7_EVNTSEL0:
|
|
|
+ case MSR_K7_EVNTSEL1:
|
|
|
+ case MSR_K7_EVNTSEL2:
|
|
|
+ case MSR_K7_EVNTSEL3:
|
|
|
+ if (data != 0)
|
|
|
+ pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
|
|
|
+ "0x%x data 0x%llx\n", msr, data);
|
|
|
+ break;
|
|
|
+ /* at least RHEL 4 unconditionally writes to the perfctr registers,
|
|
|
+ * so we ignore writes to make it happy.
|
|
|
+ */
|
|
|
+ case MSR_P6_PERFCTR0:
|
|
|
+ case MSR_P6_PERFCTR1:
|
|
|
+ case MSR_K7_PERFCTR0:
|
|
|
+ case MSR_K7_PERFCTR1:
|
|
|
+ case MSR_K7_PERFCTR2:
|
|
|
+ case MSR_K7_PERFCTR3:
|
|
|
+ pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
|
|
|
+ "0x%x data 0x%llx\n", msr, data);
|
|
|
+ break;
|
|
|
default:
|
|
|
- pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
|
|
|
- return 1;
|
|
|
+ if (!ignore_msrs) {
|
|
|
+ pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
|
|
|
+ msr, data);
|
|
|
+ return 1;
|
|
|
+ } else {
|
|
|
+ pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
|
|
|
+ msr, data);
|
|
|
+ break;
|
|
|
+ }
|
|
|
}
|
|
|
return 0;
|
|
|
}
|
|
@@ -905,26 +1011,47 @@ static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|
|
+static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|
|
{
|
|
|
u64 data;
|
|
|
+ u64 mcg_cap = vcpu->arch.mcg_cap;
|
|
|
+ unsigned bank_num = mcg_cap & 0xff;
|
|
|
|
|
|
switch (msr) {
|
|
|
- case 0xc0010010: /* SYSCFG */
|
|
|
- case 0xc0010015: /* HWCR */
|
|
|
- case MSR_IA32_PLATFORM_ID:
|
|
|
case MSR_IA32_P5_MC_ADDR:
|
|
|
case MSR_IA32_P5_MC_TYPE:
|
|
|
- case MSR_IA32_MC0_CTL:
|
|
|
- case MSR_IA32_MCG_STATUS:
|
|
|
+ data = 0;
|
|
|
+ break;
|
|
|
case MSR_IA32_MCG_CAP:
|
|
|
+ data = vcpu->arch.mcg_cap;
|
|
|
+ break;
|
|
|
case MSR_IA32_MCG_CTL:
|
|
|
- case MSR_IA32_MC0_MISC:
|
|
|
- case MSR_IA32_MC0_MISC+4:
|
|
|
- case MSR_IA32_MC0_MISC+8:
|
|
|
- case MSR_IA32_MC0_MISC+12:
|
|
|
- case MSR_IA32_MC0_MISC+16:
|
|
|
- case MSR_IA32_MC0_MISC+20:
|
|
|
+ if (!(mcg_cap & MCG_CTL_P))
|
|
|
+ return 1;
|
|
|
+ data = vcpu->arch.mcg_ctl;
|
|
|
+ break;
|
|
|
+ case MSR_IA32_MCG_STATUS:
|
|
|
+ data = vcpu->arch.mcg_status;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ if (msr >= MSR_IA32_MC0_CTL &&
|
|
|
+ msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
|
|
|
+ u32 offset = msr - MSR_IA32_MC0_CTL;
|
|
|
+ data = vcpu->arch.mce_banks[offset];
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+ *pdata = data;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|
|
+{
|
|
|
+ u64 data;
|
|
|
+
|
|
|
+ switch (msr) {
|
|
|
+ case MSR_IA32_PLATFORM_ID:
|
|
|
case MSR_IA32_UCODE_REV:
|
|
|
case MSR_IA32_EBL_CR_POWERON:
|
|
|
case MSR_IA32_DEBUGCTLMSR:
|
|
@@ -932,10 +1059,18 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|
|
case MSR_IA32_LASTBRANCHTOIP:
|
|
|
case MSR_IA32_LASTINTFROMIP:
|
|
|
case MSR_IA32_LASTINTTOIP:
|
|
|
+ case MSR_K8_SYSCFG:
|
|
|
+ case MSR_K7_HWCR:
|
|
|
case MSR_VM_HSAVE_PA:
|
|
|
+ case MSR_P6_PERFCTR0:
|
|
|
+ case MSR_P6_PERFCTR1:
|
|
|
case MSR_P6_EVNTSEL0:
|
|
|
case MSR_P6_EVNTSEL1:
|
|
|
case MSR_K7_EVNTSEL0:
|
|
|
+ case MSR_K7_PERFCTR0:
|
|
|
+ case MSR_K8_INT_PENDING_MSG:
|
|
|
+ case MSR_AMD64_NB_CFG:
|
|
|
+ case MSR_FAM10H_MMIO_CONF_BASE:
|
|
|
data = 0;
|
|
|
break;
|
|
|
case MSR_MTRRcap:
|
|
@@ -949,6 +1084,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|
|
case MSR_IA32_APICBASE:
|
|
|
data = kvm_get_apic_base(vcpu);
|
|
|
break;
|
|
|
+ case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
|
|
|
+ return kvm_x2apic_msr_read(vcpu, msr, pdata);
|
|
|
+ break;
|
|
|
case MSR_IA32_MISC_ENABLE:
|
|
|
data = vcpu->arch.ia32_misc_enable_msr;
|
|
|
break;
|
|
@@ -967,9 +1105,22 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|
|
case MSR_KVM_SYSTEM_TIME:
|
|
|
data = vcpu->arch.time;
|
|
|
break;
|
|
|
+ case MSR_IA32_P5_MC_ADDR:
|
|
|
+ case MSR_IA32_P5_MC_TYPE:
|
|
|
+ case MSR_IA32_MCG_CAP:
|
|
|
+ case MSR_IA32_MCG_CTL:
|
|
|
+ case MSR_IA32_MCG_STATUS:
|
|
|
+ case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
|
|
|
+ return get_msr_mce(vcpu, msr, pdata);
|
|
|
default:
|
|
|
- pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
|
|
|
- return 1;
|
|
|
+ if (!ignore_msrs) {
|
|
|
+ pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
|
|
|
+ return 1;
|
|
|
+ } else {
|
|
|
+ pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
|
|
|
+ data = 0;
|
|
|
+ }
|
|
|
+ break;
|
|
|
}
|
|
|
*pdata = data;
|
|
|
return 0;
|
|
@@ -1068,6 +1219,11 @@ int kvm_dev_ioctl_check_extension(long ext)
|
|
|
case KVM_CAP_REINJECT_CONTROL:
|
|
|
case KVM_CAP_IRQ_INJECT_STATUS:
|
|
|
case KVM_CAP_ASSIGN_DEV_IRQ:
|
|
|
+ case KVM_CAP_IRQFD:
|
|
|
+ case KVM_CAP_IOEVENTFD:
|
|
|
+ case KVM_CAP_PIT2:
|
|
|
+ case KVM_CAP_PIT_STATE2:
|
|
|
+ case KVM_CAP_SET_IDENTITY_MAP_ADDR:
|
|
|
r = 1;
|
|
|
break;
|
|
|
case KVM_CAP_COALESCED_MMIO:
|
|
@@ -1088,6 +1244,9 @@ int kvm_dev_ioctl_check_extension(long ext)
|
|
|
case KVM_CAP_IOMMU:
|
|
|
r = iommu_found();
|
|
|
break;
|
|
|
+ case KVM_CAP_MCE:
|
|
|
+ r = KVM_MAX_MCE_BANKS;
|
|
|
+ break;
|
|
|
default:
|
|
|
r = 0;
|
|
|
break;
|
|
@@ -1147,6 +1306,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
|
|
r = 0;
|
|
|
break;
|
|
|
}
|
|
|
+ case KVM_X86_GET_MCE_CAP_SUPPORTED: {
|
|
|
+ u64 mce_cap;
|
|
|
+
|
|
|
+ mce_cap = KVM_MCE_CAP_SUPPORTED;
|
|
|
+ r = -EFAULT;
|
|
|
+ if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
|
|
|
+ goto out;
|
|
|
+ r = 0;
|
|
|
+ break;
|
|
|
+ }
|
|
|
default:
|
|
|
r = -EINVAL;
|
|
|
}
|
|
@@ -1227,6 +1396,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
|
|
|
vcpu->arch.cpuid_nent = cpuid->nent;
|
|
|
cpuid_fix_nx_cap(vcpu);
|
|
|
r = 0;
|
|
|
+ kvm_apic_set_version(vcpu);
|
|
|
|
|
|
out_free:
|
|
|
vfree(cpuid_entries);
|
|
@@ -1248,6 +1418,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
|
|
|
cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
|
|
|
goto out;
|
|
|
vcpu->arch.cpuid_nent = cpuid->nent;
|
|
|
+ kvm_apic_set_version(vcpu);
|
|
|
return 0;
|
|
|
|
|
|
out:
|
|
@@ -1290,6 +1461,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|
|
u32 index, int *nent, int maxnent)
|
|
|
{
|
|
|
unsigned f_nx = is_efer_nx() ? F(NX) : 0;
|
|
|
+ unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0;
|
|
|
#ifdef CONFIG_X86_64
|
|
|
unsigned f_lm = F(LM);
|
|
|
#else
|
|
@@ -1314,7 +1486,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|
|
F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
|
|
|
F(PAT) | F(PSE36) | 0 /* Reserved */ |
|
|
|
f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
|
|
|
- F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ |
|
|
|
+ F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ |
|
|
|
0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
|
|
|
/* cpuid 1.ecx */
|
|
|
const u32 kvm_supported_word4_x86_features =
|
|
@@ -1323,7 +1495,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|
|
0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
|
|
|
0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
|
|
|
0 /* Reserved, DCA */ | F(XMM4_1) |
|
|
|
- F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) |
|
|
|
+ F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
|
|
|
0 /* Reserved, XSAVE, OSXSAVE */;
|
|
|
/* cpuid 0x80000001.ecx */
|
|
|
const u32 kvm_supported_word6_x86_features =
|
|
@@ -1344,6 +1516,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|
|
case 1:
|
|
|
entry->edx &= kvm_supported_word0_x86_features;
|
|
|
entry->ecx &= kvm_supported_word4_x86_features;
|
|
|
+ /* we support x2apic emulation even if host does not support
|
|
|
+ * it since we emulate x2apic in software */
|
|
|
+ entry->ecx |= F(X2APIC);
|
|
|
break;
|
|
|
/* function 2 entries are STATEFUL. That is, repeated cpuid commands
|
|
|
* may return different values. This forces us to get_cpu() before
|
|
@@ -1435,6 +1610,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
|
|
|
for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
|
|
|
do_cpuid_ent(&cpuid_entries[nent], func, 0,
|
|
|
&nent, cpuid->nent);
|
|
|
+ r = -E2BIG;
|
|
|
+ if (nent >= cpuid->nent)
|
|
|
+ goto out_free;
|
|
|
+
|
|
|
r = -EFAULT;
|
|
|
if (copy_to_user(entries, cpuid_entries,
|
|
|
nent * sizeof(struct kvm_cpuid_entry2)))
|
|
@@ -1464,6 +1643,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
|
|
|
vcpu_load(vcpu);
|
|
|
memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
|
|
|
kvm_apic_post_state_restore(vcpu);
|
|
|
+ update_cr8_intercept(vcpu);
|
|
|
vcpu_put(vcpu);
|
|
|
|
|
|
return 0;
|
|
@@ -1503,6 +1683,80 @@ static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
|
|
|
+ u64 mcg_cap)
|
|
|
+{
|
|
|
+ int r;
|
|
|
+ unsigned bank_num = mcg_cap & 0xff, bank;
|
|
|
+
|
|
|
+ r = -EINVAL;
|
|
|
+ if (!bank_num)
|
|
|
+ goto out;
|
|
|
+ if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
|
|
|
+ goto out;
|
|
|
+ r = 0;
|
|
|
+ vcpu->arch.mcg_cap = mcg_cap;
|
|
|
+ /* Init IA32_MCG_CTL to all 1s */
|
|
|
+ if (mcg_cap & MCG_CTL_P)
|
|
|
+ vcpu->arch.mcg_ctl = ~(u64)0;
|
|
|
+ /* Init IA32_MCi_CTL to all 1s */
|
|
|
+ for (bank = 0; bank < bank_num; bank++)
|
|
|
+ vcpu->arch.mce_banks[bank*4] = ~(u64)0;
|
|
|
+out:
|
|
|
+ return r;
|
|
|
+}
|
|
|
+
|
|
|
+static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
|
|
|
+ struct kvm_x86_mce *mce)
|
|
|
+{
|
|
|
+ u64 mcg_cap = vcpu->arch.mcg_cap;
|
|
|
+ unsigned bank_num = mcg_cap & 0xff;
|
|
|
+ u64 *banks = vcpu->arch.mce_banks;
|
|
|
+
|
|
|
+ if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
|
|
|
+ return -EINVAL;
|
|
|
+ /*
|
|
|
+ * if IA32_MCG_CTL is not all 1s, the uncorrected error
|
|
|
+ * reporting is disabled
|
|
|
+ */
|
|
|
+ if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
|
|
|
+ vcpu->arch.mcg_ctl != ~(u64)0)
|
|
|
+ return 0;
|
|
|
+ banks += 4 * mce->bank;
|
|
|
+ /*
|
|
|
+ * if IA32_MCi_CTL is not all 1s, the uncorrected error
|
|
|
+ * reporting is disabled for the bank
|
|
|
+ */
|
|
|
+ if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
|
|
|
+ return 0;
|
|
|
+ if (mce->status & MCI_STATUS_UC) {
|
|
|
+ if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
|
|
|
+ !(vcpu->arch.cr4 & X86_CR4_MCE)) {
|
|
|
+ printk(KERN_DEBUG "kvm: set_mce: "
|
|
|
+ "injects mce exception while "
|
|
|
+ "previous one is in progress!\n");
|
|
|
+ set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ if (banks[1] & MCI_STATUS_VAL)
|
|
|
+ mce->status |= MCI_STATUS_OVER;
|
|
|
+ banks[2] = mce->addr;
|
|
|
+ banks[3] = mce->misc;
|
|
|
+ vcpu->arch.mcg_status = mce->mcg_status;
|
|
|
+ banks[1] = mce->status;
|
|
|
+ kvm_queue_exception(vcpu, MC_VECTOR);
|
|
|
+ } else if (!(banks[1] & MCI_STATUS_VAL)
|
|
|
+ || !(banks[1] & MCI_STATUS_UC)) {
|
|
|
+ if (banks[1] & MCI_STATUS_VAL)
|
|
|
+ mce->status |= MCI_STATUS_OVER;
|
|
|
+ banks[2] = mce->addr;
|
|
|
+ banks[3] = mce->misc;
|
|
|
+ banks[1] = mce->status;
|
|
|
+ } else
|
|
|
+ banks[1] |= MCI_STATUS_OVER;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
long kvm_arch_vcpu_ioctl(struct file *filp,
|
|
|
unsigned int ioctl, unsigned long arg)
|
|
|
{
|
|
@@ -1636,6 +1890,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
|
|
kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
|
|
|
break;
|
|
|
}
|
|
|
+ case KVM_X86_SETUP_MCE: {
|
|
|
+ u64 mcg_cap;
|
|
|
+
|
|
|
+ r = -EFAULT;
|
|
|
+ if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
|
|
|
+ goto out;
|
|
|
+ r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ case KVM_X86_SET_MCE: {
|
|
|
+ struct kvm_x86_mce mce;
|
|
|
+
|
|
|
+ r = -EFAULT;
|
|
|
+ if (copy_from_user(&mce, argp, sizeof mce))
|
|
|
+ goto out;
|
|
|
+ r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
|
|
|
+ break;
|
|
|
+ }
|
|
|
default:
|
|
|
r = -EINVAL;
|
|
|
}
|
|
@@ -1654,6 +1926,13 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
|
|
|
+ u64 ident_addr)
|
|
|
+{
|
|
|
+ kvm->arch.ept_identity_map_addr = ident_addr;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
|
|
|
u32 kvm_nr_mmu_pages)
|
|
|
{
|
|
@@ -1775,19 +2054,25 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
|
|
|
r = 0;
|
|
|
switch (chip->chip_id) {
|
|
|
case KVM_IRQCHIP_PIC_MASTER:
|
|
|
+ spin_lock(&pic_irqchip(kvm)->lock);
|
|
|
memcpy(&pic_irqchip(kvm)->pics[0],
|
|
|
&chip->chip.pic,
|
|
|
sizeof(struct kvm_pic_state));
|
|
|
+ spin_unlock(&pic_irqchip(kvm)->lock);
|
|
|
break;
|
|
|
case KVM_IRQCHIP_PIC_SLAVE:
|
|
|
+ spin_lock(&pic_irqchip(kvm)->lock);
|
|
|
memcpy(&pic_irqchip(kvm)->pics[1],
|
|
|
&chip->chip.pic,
|
|
|
sizeof(struct kvm_pic_state));
|
|
|
+ spin_unlock(&pic_irqchip(kvm)->lock);
|
|
|
break;
|
|
|
case KVM_IRQCHIP_IOAPIC:
|
|
|
+ mutex_lock(&kvm->irq_lock);
|
|
|
memcpy(ioapic_irqchip(kvm),
|
|
|
&chip->chip.ioapic,
|
|
|
sizeof(struct kvm_ioapic_state));
|
|
|
+ mutex_unlock(&kvm->irq_lock);
|
|
|
break;
|
|
|
default:
|
|
|
r = -EINVAL;
|
|
@@ -1801,7 +2086,9 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
|
|
|
{
|
|
|
int r = 0;
|
|
|
|
|
|
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
|
|
memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
|
|
|
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
|
|
return r;
|
|
|
}
|
|
|
|
|
@@ -1809,8 +2096,39 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
|
|
|
{
|
|
|
int r = 0;
|
|
|
|
|
|
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
|
|
memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
|
|
|
- kvm_pit_load_count(kvm, 0, ps->channels[0].count);
|
|
|
+ kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
|
|
|
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
|
|
+ return r;
|
|
|
+}
|
|
|
+
|
|
|
+static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
|
|
|
+{
|
|
|
+ int r = 0;
|
|
|
+
|
|
|
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
|
|
+ memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
|
|
|
+ sizeof(ps->channels));
|
|
|
+ ps->flags = kvm->arch.vpit->pit_state.flags;
|
|
|
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
|
|
+ return r;
|
|
|
+}
|
|
|
+
|
|
|
+static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
|
|
|
+{
|
|
|
+ int r = 0, start = 0;
|
|
|
+ u32 prev_legacy, cur_legacy;
|
|
|
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
|
|
+ prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
|
|
|
+ cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
|
|
|
+ if (!prev_legacy && cur_legacy)
|
|
|
+ start = 1;
|
|
|
+ memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
|
|
|
+ sizeof(kvm->arch.vpit->pit_state.channels));
|
|
|
+ kvm->arch.vpit->pit_state.flags = ps->flags;
|
|
|
+ kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
|
|
|
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
|
|
return r;
|
|
|
}
|
|
|
|
|
@@ -1819,7 +2137,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
|
|
|
{
|
|
|
if (!kvm->arch.vpit)
|
|
|
return -ENXIO;
|
|
|
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
|
|
kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
|
|
|
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -1845,7 +2165,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
|
kvm_mmu_slot_remove_write_access(kvm, log->slot);
|
|
|
spin_unlock(&kvm->mmu_lock);
|
|
|
- kvm_flush_remote_tlbs(kvm);
|
|
|
memslot = &kvm->memslots[log->slot];
|
|
|
n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
|
|
|
memset(memslot->dirty_bitmap, 0, n);
|
|
@@ -1869,7 +2188,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|
|
*/
|
|
|
union {
|
|
|
struct kvm_pit_state ps;
|
|
|
+ struct kvm_pit_state2 ps2;
|
|
|
struct kvm_memory_alias alias;
|
|
|
+ struct kvm_pit_config pit_config;
|
|
|
} u;
|
|
|
|
|
|
switch (ioctl) {
|
|
@@ -1878,6 +2199,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|
|
if (r < 0)
|
|
|
goto out;
|
|
|
break;
|
|
|
+ case KVM_SET_IDENTITY_MAP_ADDR: {
|
|
|
+ u64 ident_addr;
|
|
|
+
|
|
|
+ r = -EFAULT;
|
|
|
+ if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
|
|
|
+ goto out;
|
|
|
+ r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
|
|
|
+ if (r < 0)
|
|
|
+ goto out;
|
|
|
+ break;
|
|
|
+ }
|
|
|
case KVM_SET_MEMORY_REGION: {
|
|
|
struct kvm_memory_region kvm_mem;
|
|
|
struct kvm_userspace_memory_region kvm_userspace_mem;
|
|
@@ -1930,16 +2262,24 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|
|
}
|
|
|
break;
|
|
|
case KVM_CREATE_PIT:
|
|
|
- mutex_lock(&kvm->lock);
|
|
|
+ u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
|
|
|
+ goto create_pit;
|
|
|
+ case KVM_CREATE_PIT2:
|
|
|
+ r = -EFAULT;
|
|
|
+ if (copy_from_user(&u.pit_config, argp,
|
|
|
+ sizeof(struct kvm_pit_config)))
|
|
|
+ goto out;
|
|
|
+ create_pit:
|
|
|
+ down_write(&kvm->slots_lock);
|
|
|
r = -EEXIST;
|
|
|
if (kvm->arch.vpit)
|
|
|
goto create_pit_unlock;
|
|
|
r = -ENOMEM;
|
|
|
- kvm->arch.vpit = kvm_create_pit(kvm);
|
|
|
+ kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
|
|
|
if (kvm->arch.vpit)
|
|
|
r = 0;
|
|
|
create_pit_unlock:
|
|
|
- mutex_unlock(&kvm->lock);
|
|
|
+ up_write(&kvm->slots_lock);
|
|
|
break;
|
|
|
case KVM_IRQ_LINE_STATUS:
|
|
|
case KVM_IRQ_LINE: {
|
|
@@ -1950,10 +2290,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|
|
goto out;
|
|
|
if (irqchip_in_kernel(kvm)) {
|
|
|
__s32 status;
|
|
|
- mutex_lock(&kvm->lock);
|
|
|
+ mutex_lock(&kvm->irq_lock);
|
|
|
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
|
|
|
irq_event.irq, irq_event.level);
|
|
|
- mutex_unlock(&kvm->lock);
|
|
|
+ mutex_unlock(&kvm->irq_lock);
|
|
|
if (ioctl == KVM_IRQ_LINE_STATUS) {
|
|
|
irq_event.status = status;
|
|
|
if (copy_to_user(argp, &irq_event,
|
|
@@ -2042,6 +2382,32 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|
|
r = 0;
|
|
|
break;
|
|
|
}
|
|
|
+ case KVM_GET_PIT2: {
|
|
|
+ r = -ENXIO;
|
|
|
+ if (!kvm->arch.vpit)
|
|
|
+ goto out;
|
|
|
+ r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
|
|
|
+ if (r)
|
|
|
+ goto out;
|
|
|
+ r = -EFAULT;
|
|
|
+ if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
|
|
|
+ goto out;
|
|
|
+ r = 0;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ case KVM_SET_PIT2: {
|
|
|
+ r = -EFAULT;
|
|
|
+ if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
|
|
|
+ goto out;
|
|
|
+ r = -ENXIO;
|
|
|
+ if (!kvm->arch.vpit)
|
|
|
+ goto out;
|
|
|
+ r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
|
|
|
+ if (r)
|
|
|
+ goto out;
|
|
|
+ r = 0;
|
|
|
+ break;
|
|
|
+ }
|
|
|
case KVM_REINJECT_CONTROL: {
|
|
|
struct kvm_reinject_control control;
|
|
|
r = -EFAULT;
|
|
@@ -2075,35 +2441,23 @@ static void kvm_init_msr_list(void)
|
|
|
num_msrs_to_save = j;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Only apic need an MMIO device hook, so shortcut now..
|
|
|
- */
|
|
|
-static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
|
|
|
- gpa_t addr, int len,
|
|
|
- int is_write)
|
|
|
+static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
|
|
|
+ const void *v)
|
|
|
{
|
|
|
- struct kvm_io_device *dev;
|
|
|
+ if (vcpu->arch.apic &&
|
|
|
+ !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
|
|
|
+ return 0;
|
|
|
|
|
|
- if (vcpu->arch.apic) {
|
|
|
- dev = &vcpu->arch.apic->dev;
|
|
|
- if (dev->in_range(dev, addr, len, is_write))
|
|
|
- return dev;
|
|
|
- }
|
|
|
- return NULL;
|
|
|
+ return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v);
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
|
|
|
- gpa_t addr, int len,
|
|
|
- int is_write)
|
|
|
+static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
|
|
|
{
|
|
|
- struct kvm_io_device *dev;
|
|
|
+ if (vcpu->arch.apic &&
|
|
|
+ !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
|
|
|
+ return 0;
|
|
|
|
|
|
- dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
|
|
|
- if (dev == NULL)
|
|
|
- dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
|
|
|
- is_write);
|
|
|
- return dev;
|
|
|
+ return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
|
|
|
}
|
|
|
|
|
|
static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
|
|
@@ -2172,11 +2526,12 @@ static int emulator_read_emulated(unsigned long addr,
|
|
|
unsigned int bytes,
|
|
|
struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
- struct kvm_io_device *mmio_dev;
|
|
|
gpa_t gpa;
|
|
|
|
|
|
if (vcpu->mmio_read_completed) {
|
|
|
memcpy(val, vcpu->mmio_data, bytes);
|
|
|
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
|
|
|
+ vcpu->mmio_phys_addr, *(u64 *)val);
|
|
|
vcpu->mmio_read_completed = 0;
|
|
|
return X86EMUL_CONTINUE;
|
|
|
}
|
|
@@ -2197,14 +2552,12 @@ mmio:
|
|
|
/*
|
|
|
* Is this MMIO handled locally?
|
|
|
*/
|
|
|
- mutex_lock(&vcpu->kvm->lock);
|
|
|
- mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
|
|
|
- if (mmio_dev) {
|
|
|
- kvm_iodevice_read(mmio_dev, gpa, bytes, val);
|
|
|
- mutex_unlock(&vcpu->kvm->lock);
|
|
|
+ if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
|
|
|
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
|
|
|
return X86EMUL_CONTINUE;
|
|
|
}
|
|
|
- mutex_unlock(&vcpu->kvm->lock);
|
|
|
+
|
|
|
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
|
|
|
|
|
|
vcpu->mmio_needed = 1;
|
|
|
vcpu->mmio_phys_addr = gpa;
|
|
@@ -2231,7 +2584,6 @@ static int emulator_write_emulated_onepage(unsigned long addr,
|
|
|
unsigned int bytes,
|
|
|
struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
- struct kvm_io_device *mmio_dev;
|
|
|
gpa_t gpa;
|
|
|
|
|
|
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
|
|
@@ -2249,17 +2601,12 @@ static int emulator_write_emulated_onepage(unsigned long addr,
|
|
|
return X86EMUL_CONTINUE;
|
|
|
|
|
|
mmio:
|
|
|
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
|
|
|
/*
|
|
|
* Is this MMIO handled locally?
|
|
|
*/
|
|
|
- mutex_lock(&vcpu->kvm->lock);
|
|
|
- mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
|
|
|
- if (mmio_dev) {
|
|
|
- kvm_iodevice_write(mmio_dev, gpa, bytes, val);
|
|
|
- mutex_unlock(&vcpu->kvm->lock);
|
|
|
+ if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
|
|
|
return X86EMUL_CONTINUE;
|
|
|
- }
|
|
|
- mutex_unlock(&vcpu->kvm->lock);
|
|
|
|
|
|
vcpu->mmio_needed = 1;
|
|
|
vcpu->mmio_phys_addr = gpa;
|
|
@@ -2343,7 +2690,6 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
|
|
|
|
|
|
int emulate_clts(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
- KVMTRACE_0D(CLTS, vcpu, handler);
|
|
|
kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
|
|
|
return X86EMUL_CONTINUE;
|
|
|
}
|
|
@@ -2420,7 +2766,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
|
|
|
kvm_clear_exception_queue(vcpu);
|
|
|
vcpu->arch.mmio_fault_cr2 = cr2;
|
|
|
/*
|
|
|
- * TODO: fix x86_emulate.c to use guest_read/write_register
|
|
|
+ * TODO: fix emulate.c to use guest_read/write_register
|
|
|
* instead of direct ->regs accesses, can save hundred cycles
|
|
|
* on Intel for instructions that don't read/change RSP, for
|
|
|
* for example.
|
|
@@ -2444,14 +2790,33 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
|
|
|
|
|
|
r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
|
|
|
|
|
|
- /* Reject the instructions other than VMCALL/VMMCALL when
|
|
|
- * try to emulate invalid opcode */
|
|
|
+ /* Only allow emulation of specific instructions on #UD
|
|
|
+ * (namely VMMCALL, sysenter, sysexit, syscall)*/
|
|
|
c = &vcpu->arch.emulate_ctxt.decode;
|
|
|
- if ((emulation_type & EMULTYPE_TRAP_UD) &&
|
|
|
- (!(c->twobyte && c->b == 0x01 &&
|
|
|
- (c->modrm_reg == 0 || c->modrm_reg == 3) &&
|
|
|
- c->modrm_mod == 3 && c->modrm_rm == 1)))
|
|
|
- return EMULATE_FAIL;
|
|
|
+ if (emulation_type & EMULTYPE_TRAP_UD) {
|
|
|
+ if (!c->twobyte)
|
|
|
+ return EMULATE_FAIL;
|
|
|
+ switch (c->b) {
|
|
|
+ case 0x01: /* VMMCALL */
|
|
|
+ if (c->modrm_mod != 3 || c->modrm_rm != 1)
|
|
|
+ return EMULATE_FAIL;
|
|
|
+ break;
|
|
|
+ case 0x34: /* sysenter */
|
|
|
+ case 0x35: /* sysexit */
|
|
|
+ if (c->modrm_mod != 0 || c->modrm_rm != 0)
|
|
|
+ return EMULATE_FAIL;
|
|
|
+ break;
|
|
|
+ case 0x05: /* syscall */
|
|
|
+ if (c->modrm_mod != 0 || c->modrm_rm != 0)
|
|
|
+ return EMULATE_FAIL;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ return EMULATE_FAIL;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
|
|
|
+ return EMULATE_FAIL;
|
|
|
+ }
|
|
|
|
|
|
++vcpu->stat.insn_emulation;
|
|
|
if (r) {
|
|
@@ -2571,52 +2936,40 @@ int complete_pio(struct kvm_vcpu *vcpu)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static void kernel_pio(struct kvm_io_device *pio_dev,
|
|
|
- struct kvm_vcpu *vcpu,
|
|
|
- void *pd)
|
|
|
+static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
|
|
|
{
|
|
|
/* TODO: String I/O for in kernel device */
|
|
|
+ int r;
|
|
|
|
|
|
- mutex_lock(&vcpu->kvm->lock);
|
|
|
if (vcpu->arch.pio.in)
|
|
|
- kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
|
|
|
- vcpu->arch.pio.size,
|
|
|
- pd);
|
|
|
+ r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
|
|
|
+ vcpu->arch.pio.size, pd);
|
|
|
else
|
|
|
- kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
|
|
|
- vcpu->arch.pio.size,
|
|
|
- pd);
|
|
|
- mutex_unlock(&vcpu->kvm->lock);
|
|
|
+ r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
|
|
|
+ vcpu->arch.pio.size, pd);
|
|
|
+ return r;
|
|
|
}
|
|
|
|
|
|
-static void pio_string_write(struct kvm_io_device *pio_dev,
|
|
|
- struct kvm_vcpu *vcpu)
|
|
|
+static int pio_string_write(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
struct kvm_pio_request *io = &vcpu->arch.pio;
|
|
|
void *pd = vcpu->arch.pio_data;
|
|
|
- int i;
|
|
|
+ int i, r = 0;
|
|
|
|
|
|
- mutex_lock(&vcpu->kvm->lock);
|
|
|
for (i = 0; i < io->cur_count; i++) {
|
|
|
- kvm_iodevice_write(pio_dev, io->port,
|
|
|
- io->size,
|
|
|
- pd);
|
|
|
+ if (kvm_io_bus_write(&vcpu->kvm->pio_bus,
|
|
|
+ io->port, io->size, pd)) {
|
|
|
+ r = -EOPNOTSUPP;
|
|
|
+ break;
|
|
|
+ }
|
|
|
pd += io->size;
|
|
|
}
|
|
|
- mutex_unlock(&vcpu->kvm->lock);
|
|
|
-}
|
|
|
-
|
|
|
-static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
|
|
|
- gpa_t addr, int len,
|
|
|
- int is_write)
|
|
|
-{
|
|
|
- return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
|
|
|
+ return r;
|
|
|
}
|
|
|
|
|
|
int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
|
|
|
int size, unsigned port)
|
|
|
{
|
|
|
- struct kvm_io_device *pio_dev;
|
|
|
unsigned long val;
|
|
|
|
|
|
vcpu->run->exit_reason = KVM_EXIT_IO;
|
|
@@ -2630,19 +2983,13 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
|
|
|
vcpu->arch.pio.down = 0;
|
|
|
vcpu->arch.pio.rep = 0;
|
|
|
|
|
|
- if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
|
|
|
- KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
|
|
|
- handler);
|
|
|
- else
|
|
|
- KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
|
|
|
- handler);
|
|
|
+ trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
|
|
|
+ size, 1);
|
|
|
|
|
|
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
|
|
|
memcpy(vcpu->arch.pio_data, &val, 4);
|
|
|
|
|
|
- pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
|
|
|
- if (pio_dev) {
|
|
|
- kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
|
|
|
+ if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
|
|
|
complete_pio(vcpu);
|
|
|
return 1;
|
|
|
}
|
|
@@ -2656,7 +3003,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
|
|
|
{
|
|
|
unsigned now, in_page;
|
|
|
int ret = 0;
|
|
|
- struct kvm_io_device *pio_dev;
|
|
|
|
|
|
vcpu->run->exit_reason = KVM_EXIT_IO;
|
|
|
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
|
|
@@ -2669,12 +3015,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
|
|
|
vcpu->arch.pio.down = down;
|
|
|
vcpu->arch.pio.rep = rep;
|
|
|
|
|
|
- if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
|
|
|
- KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
|
|
|
- handler);
|
|
|
- else
|
|
|
- KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
|
|
|
- handler);
|
|
|
+ trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
|
|
|
+ size, count);
|
|
|
|
|
|
if (!count) {
|
|
|
kvm_x86_ops->skip_emulated_instruction(vcpu);
|
|
@@ -2704,9 +3046,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
|
|
|
|
|
|
vcpu->arch.pio.guest_gva = address;
|
|
|
|
|
|
- pio_dev = vcpu_find_pio_dev(vcpu, port,
|
|
|
- vcpu->arch.pio.cur_count,
|
|
|
- !vcpu->arch.pio.in);
|
|
|
if (!vcpu->arch.pio.in) {
|
|
|
/* string PIO write */
|
|
|
ret = pio_copy_data(vcpu);
|
|
@@ -2714,16 +3053,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
|
|
|
kvm_inject_gp(vcpu, 0);
|
|
|
return 1;
|
|
|
}
|
|
|
- if (ret == 0 && pio_dev) {
|
|
|
- pio_string_write(pio_dev, vcpu);
|
|
|
+ if (ret == 0 && !pio_string_write(vcpu)) {
|
|
|
complete_pio(vcpu);
|
|
|
if (vcpu->arch.pio.count == 0)
|
|
|
ret = 1;
|
|
|
}
|
|
|
- } else if (pio_dev)
|
|
|
- pr_unimpl(vcpu, "no string pio read support yet, "
|
|
|
- "port %x size %d count %ld\n",
|
|
|
- port, size, count);
|
|
|
+ }
|
|
|
+ /* no string PIO read support yet */
|
|
|
|
|
|
return ret;
|
|
|
}
|
|
@@ -2756,10 +3092,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
|
|
|
|
|
|
spin_lock(&kvm_lock);
|
|
|
list_for_each_entry(kvm, &vm_list, vm_list) {
|
|
|
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
|
|
- vcpu = kvm->vcpus[i];
|
|
|
- if (!vcpu)
|
|
|
- continue;
|
|
|
+ kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
if (vcpu->cpu != freq->cpu)
|
|
|
continue;
|
|
|
if (!kvm_request_guest_time_update(vcpu))
|
|
@@ -2852,7 +3185,6 @@ void kvm_arch_exit(void)
|
|
|
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
++vcpu->stat.halt_exits;
|
|
|
- KVMTRACE_0D(HLT, vcpu, handler);
|
|
|
if (irqchip_in_kernel(vcpu->kvm)) {
|
|
|
vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
|
|
|
return 1;
|
|
@@ -2883,7 +3215,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
|
|
|
a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
|
|
|
a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
|
|
|
|
|
|
- KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
|
|
|
+ trace_kvm_hypercall(nr, a0, a1, a2, a3);
|
|
|
|
|
|
if (!is_long_mode(vcpu)) {
|
|
|
nr &= 0xFFFFFFFF;
|
|
@@ -2893,6 +3225,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
|
|
|
a3 &= 0xFFFFFFFF;
|
|
|
}
|
|
|
|
|
|
+ if (kvm_x86_ops->get_cpl(vcpu) != 0) {
|
|
|
+ ret = -KVM_EPERM;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
switch (nr) {
|
|
|
case KVM_HC_VAPIC_POLL_IRQ:
|
|
|
ret = 0;
|
|
@@ -2904,6 +3241,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
|
|
|
ret = -KVM_ENOSYS;
|
|
|
break;
|
|
|
}
|
|
|
+out:
|
|
|
kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
|
|
|
++vcpu->stat.hypercalls;
|
|
|
return r;
|
|
@@ -2983,8 +3321,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
|
|
|
vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
|
|
|
return 0;
|
|
|
}
|
|
|
- KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
|
|
|
- (u32)((u64)value >> 32), handler);
|
|
|
|
|
|
return value;
|
|
|
}
|
|
@@ -2992,9 +3328,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
|
|
|
void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
|
|
|
unsigned long *rflags)
|
|
|
{
|
|
|
- KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
|
|
|
- (u32)((u64)val >> 32), handler);
|
|
|
-
|
|
|
switch (cr) {
|
|
|
case 0:
|
|
|
kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
|
|
@@ -3104,11 +3437,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
|
|
|
kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
|
|
|
}
|
|
|
kvm_x86_ops->skip_emulated_instruction(vcpu);
|
|
|
- KVMTRACE_5D(CPUID, vcpu, function,
|
|
|
- (u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
|
|
|
- (u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
|
|
|
- (u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
|
|
|
- (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
|
|
|
+ trace_kvm_cpuid(function,
|
|
|
+ kvm_register_read(vcpu, VCPU_REGS_RAX),
|
|
|
+ kvm_register_read(vcpu, VCPU_REGS_RBX),
|
|
|
+ kvm_register_read(vcpu, VCPU_REGS_RCX),
|
|
|
+ kvm_register_read(vcpu, VCPU_REGS_RDX));
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
|
|
|
|
|
@@ -3174,6 +3507,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
|
|
|
if (!kvm_x86_ops->update_cr8_intercept)
|
|
|
return;
|
|
|
|
|
|
+ if (!vcpu->arch.apic)
|
|
|
+ return;
|
|
|
+
|
|
|
if (!vcpu->arch.apic->vapic_addr)
|
|
|
max_irr = kvm_lapic_find_highest_irr(vcpu);
|
|
|
else
|
|
@@ -3187,12 +3523,16 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
|
|
|
kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
|
|
|
}
|
|
|
|
|
|
-static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|
|
+static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|
|
{
|
|
|
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
|
|
|
- kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
|
|
|
-
|
|
|
/* try to reinject previous events if any */
|
|
|
+ if (vcpu->arch.exception.pending) {
|
|
|
+ kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
|
|
|
+ vcpu->arch.exception.has_error_code,
|
|
|
+ vcpu->arch.exception.error_code);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
if (vcpu->arch.nmi_injected) {
|
|
|
kvm_x86_ops->set_nmi(vcpu);
|
|
|
return;
|
|
@@ -3266,16 +3606,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|
|
smp_mb__after_clear_bit();
|
|
|
|
|
|
if (vcpu->requests || need_resched() || signal_pending(current)) {
|
|
|
+ set_bit(KVM_REQ_KICK, &vcpu->requests);
|
|
|
local_irq_enable();
|
|
|
preempt_enable();
|
|
|
r = 1;
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
- if (vcpu->arch.exception.pending)
|
|
|
- __queue_exception(vcpu);
|
|
|
- else
|
|
|
- inject_pending_irq(vcpu, kvm_run);
|
|
|
+ inject_pending_event(vcpu, kvm_run);
|
|
|
|
|
|
/* enable NMI/IRQ window open exits if needed */
|
|
|
if (vcpu->arch.nmi_pending)
|
|
@@ -3292,14 +3630,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|
|
|
|
|
kvm_guest_enter();
|
|
|
|
|
|
- get_debugreg(vcpu->arch.host_dr6, 6);
|
|
|
- get_debugreg(vcpu->arch.host_dr7, 7);
|
|
|
if (unlikely(vcpu->arch.switch_db_regs)) {
|
|
|
- get_debugreg(vcpu->arch.host_db[0], 0);
|
|
|
- get_debugreg(vcpu->arch.host_db[1], 1);
|
|
|
- get_debugreg(vcpu->arch.host_db[2], 2);
|
|
|
- get_debugreg(vcpu->arch.host_db[3], 3);
|
|
|
-
|
|
|
set_debugreg(0, 7);
|
|
|
set_debugreg(vcpu->arch.eff_db[0], 0);
|
|
|
set_debugreg(vcpu->arch.eff_db[1], 1);
|
|
@@ -3307,18 +3638,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|
|
set_debugreg(vcpu->arch.eff_db[3], 3);
|
|
|
}
|
|
|
|
|
|
- KVMTRACE_0D(VMENTRY, vcpu, entryexit);
|
|
|
+ trace_kvm_entry(vcpu->vcpu_id);
|
|
|
kvm_x86_ops->run(vcpu, kvm_run);
|
|
|
|
|
|
- if (unlikely(vcpu->arch.switch_db_regs)) {
|
|
|
- set_debugreg(0, 7);
|
|
|
- set_debugreg(vcpu->arch.host_db[0], 0);
|
|
|
- set_debugreg(vcpu->arch.host_db[1], 1);
|
|
|
- set_debugreg(vcpu->arch.host_db[2], 2);
|
|
|
- set_debugreg(vcpu->arch.host_db[3], 3);
|
|
|
+ if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
|
|
|
+ set_debugreg(current->thread.debugreg0, 0);
|
|
|
+ set_debugreg(current->thread.debugreg1, 1);
|
|
|
+ set_debugreg(current->thread.debugreg2, 2);
|
|
|
+ set_debugreg(current->thread.debugreg3, 3);
|
|
|
+ set_debugreg(current->thread.debugreg6, 6);
|
|
|
+ set_debugreg(current->thread.debugreg7, 7);
|
|
|
}
|
|
|
- set_debugreg(vcpu->arch.host_dr6, 6);
|
|
|
- set_debugreg(vcpu->arch.host_dr7, 7);
|
|
|
|
|
|
set_bit(KVM_REQ_KICK, &vcpu->requests);
|
|
|
local_irq_enable();
|
|
@@ -3648,11 +3978,8 @@ static void kvm_set_segment(struct kvm_vcpu *vcpu,
|
|
|
static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
|
|
|
struct kvm_segment *kvm_desct)
|
|
|
{
|
|
|
- kvm_desct->base = seg_desc->base0;
|
|
|
- kvm_desct->base |= seg_desc->base1 << 16;
|
|
|
- kvm_desct->base |= seg_desc->base2 << 24;
|
|
|
- kvm_desct->limit = seg_desc->limit0;
|
|
|
- kvm_desct->limit |= seg_desc->limit << 16;
|
|
|
+ kvm_desct->base = get_desc_base(seg_desc);
|
|
|
+ kvm_desct->limit = get_desc_limit(seg_desc);
|
|
|
if (seg_desc->g) {
|
|
|
kvm_desct->limit <<= 12;
|
|
|
kvm_desct->limit |= 0xfff;
|
|
@@ -3696,7 +4023,6 @@ static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
|
|
|
static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
|
|
|
struct desc_struct *seg_desc)
|
|
|
{
|
|
|
- gpa_t gpa;
|
|
|
struct descriptor_table dtable;
|
|
|
u16 index = selector >> 3;
|
|
|
|
|
@@ -3706,16 +4032,13 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
|
|
|
kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
|
|
|
return 1;
|
|
|
}
|
|
|
- gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
|
|
|
- gpa += index * 8;
|
|
|
- return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8);
|
|
|
+ return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
|
|
|
}
|
|
|
|
|
|
/* allowed just for 8 bytes segments */
|
|
|
static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
|
|
|
struct desc_struct *seg_desc)
|
|
|
{
|
|
|
- gpa_t gpa;
|
|
|
struct descriptor_table dtable;
|
|
|
u16 index = selector >> 3;
|
|
|
|
|
@@ -3723,19 +4046,13 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
|
|
|
|
|
|
if (dtable.limit < index * 8 + 7)
|
|
|
return 1;
|
|
|
- gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
|
|
|
- gpa += index * 8;
|
|
|
- return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8);
|
|
|
+ return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
|
|
|
}
|
|
|
|
|
|
static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
|
|
|
struct desc_struct *seg_desc)
|
|
|
{
|
|
|
- u32 base_addr;
|
|
|
-
|
|
|
- base_addr = seg_desc->base0;
|
|
|
- base_addr |= (seg_desc->base1 << 16);
|
|
|
- base_addr |= (seg_desc->base2 << 24);
|
|
|
+ u32 base_addr = get_desc_base(seg_desc);
|
|
|
|
|
|
return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
|
|
|
}
|
|
@@ -3780,12 +4097,19 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
|
|
|
+{
|
|
|
+ return (seg != VCPU_SREG_LDTR) &&
|
|
|
+ (seg != VCPU_SREG_TR) &&
|
|
|
+ (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM);
|
|
|
+}
|
|
|
+
|
|
|
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
|
|
|
int type_bits, int seg)
|
|
|
{
|
|
|
struct kvm_segment kvm_seg;
|
|
|
|
|
|
- if (!(vcpu->arch.cr0 & X86_CR0_PE))
|
|
|
+ if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE))
|
|
|
return kvm_load_realmode_segment(vcpu, selector, seg);
|
|
|
if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
|
|
|
return 1;
|
|
@@ -4024,7 +4348,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
|
|
|
+ if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) {
|
|
|
kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
|
|
|
return 1;
|
|
|
}
|
|
@@ -4094,13 +4418,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
|
|
|
|
|
vcpu->arch.cr2 = sregs->cr2;
|
|
|
mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
|
|
|
-
|
|
|
- down_read(&vcpu->kvm->slots_lock);
|
|
|
- if (gfn_to_memslot(vcpu->kvm, sregs->cr3 >> PAGE_SHIFT))
|
|
|
- vcpu->arch.cr3 = sregs->cr3;
|
|
|
- else
|
|
|
- set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
|
|
|
- up_read(&vcpu->kvm->slots_lock);
|
|
|
+ vcpu->arch.cr3 = sregs->cr3;
|
|
|
|
|
|
kvm_set_cr8(vcpu, sregs->cr8);
|
|
|
|
|
@@ -4142,8 +4460,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
|
|
kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
|
|
|
kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
|
|
|
|
|
|
+ update_cr8_intercept(vcpu);
|
|
|
+
|
|
|
/* Older userspace won't unhalt the vcpu on reset. */
|
|
|
- if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
|
|
|
+ if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
|
|
|
sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
|
|
|
!(vcpu->arch.cr0 & X86_CR0_PE))
|
|
|
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
|
@@ -4414,7 +4734,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
|
|
kvm = vcpu->kvm;
|
|
|
|
|
|
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
|
|
|
- if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
|
|
|
+ if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
|
|
|
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
|
|
else
|
|
|
vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
|
|
@@ -4436,6 +4756,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
|
|
goto fail_mmu_destroy;
|
|
|
}
|
|
|
|
|
|
+ vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
|
|
|
+ GFP_KERNEL);
|
|
|
+ if (!vcpu->arch.mce_banks) {
|
|
|
+ r = -ENOMEM;
|
|
|
+ goto fail_mmu_destroy;
|
|
|
+ }
|
|
|
+ vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
|
|
|
+
|
|
|
return 0;
|
|
|
|
|
|
fail_mmu_destroy:
|
|
@@ -4483,20 +4811,22 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
|
|
|
static void kvm_free_vcpus(struct kvm *kvm)
|
|
|
{
|
|
|
unsigned int i;
|
|
|
+ struct kvm_vcpu *vcpu;
|
|
|
|
|
|
/*
|
|
|
* Unpin any mmu pages first.
|
|
|
*/
|
|
|
- for (i = 0; i < KVM_MAX_VCPUS; ++i)
|
|
|
- if (kvm->vcpus[i])
|
|
|
- kvm_unload_vcpu_mmu(kvm->vcpus[i]);
|
|
|
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
|
|
- if (kvm->vcpus[i]) {
|
|
|
- kvm_arch_vcpu_free(kvm->vcpus[i]);
|
|
|
- kvm->vcpus[i] = NULL;
|
|
|
- }
|
|
|
- }
|
|
|
+ kvm_for_each_vcpu(i, vcpu, kvm)
|
|
|
+ kvm_unload_vcpu_mmu(vcpu);
|
|
|
+ kvm_for_each_vcpu(i, vcpu, kvm)
|
|
|
+ kvm_arch_vcpu_free(vcpu);
|
|
|
+
|
|
|
+ mutex_lock(&kvm->lock);
|
|
|
+ for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
|
|
|
+ kvm->vcpus[i] = NULL;
|
|
|
|
|
|
+ atomic_set(&kvm->online_vcpus, 0);
|
|
|
+ mutex_unlock(&kvm->lock);
|
|
|
}
|
|
|
|
|
|
void kvm_arch_sync_events(struct kvm *kvm)
|
|
@@ -4573,7 +4903,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
|
|
|
|
|
|
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
|
|
|
spin_unlock(&kvm->mmu_lock);
|
|
|
- kvm_flush_remote_tlbs(kvm);
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
@@ -4587,8 +4916,10 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
|
|
|
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
|
|
|
- || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
|
|
|
- || vcpu->arch.nmi_pending;
|
|
|
+ || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
|
|
|
+ || vcpu->arch.nmi_pending ||
|
|
|
+ (kvm_arch_interrupt_allowed(vcpu) &&
|
|
|
+ kvm_cpu_has_interrupt(vcpu));
|
|
|
}
|
|
|
|
|
|
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
|
|
@@ -4612,3 +4943,9 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
return kvm_x86_ops->interrupt_allowed(vcpu);
|
|
|
}
|
|
|
+
|
|
|
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
|
|
|
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
|
|
|
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
|
|
|
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
|
|
|
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
|