|
@@ -84,8 +84,11 @@ module_param(vmm_exclusive, bool, S_IRUGO);
|
|
|
static bool __read_mostly fasteoi = 1;
|
|
|
module_param(fasteoi, bool, S_IRUGO);
|
|
|
|
|
|
-static bool __read_mostly enable_apicv_reg_vid;
|
|
|
+static bool __read_mostly enable_apicv = 1;
|
|
|
+module_param(enable_apicv, bool, S_IRUGO);
|
|
|
|
|
|
+static bool __read_mostly enable_shadow_vmcs = 1;
|
|
|
+module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
|
|
|
/*
|
|
|
* If nested=1, nested virtualization is supported, i.e., guests may use
|
|
|
* VMX and be a hypervisor for its own guests. If nested=0, guests may not
|
|
@@ -298,7 +301,8 @@ struct __packed vmcs12 {
|
|
|
u32 guest_activity_state;
|
|
|
u32 guest_sysenter_cs;
|
|
|
u32 host_ia32_sysenter_cs;
|
|
|
- u32 padding32[8]; /* room for future expansion */
|
|
|
+ u32 vmx_preemption_timer_value;
|
|
|
+ u32 padding32[7]; /* room for future expansion */
|
|
|
u16 virtual_processor_id;
|
|
|
u16 guest_es_selector;
|
|
|
u16 guest_cs_selector;
|
|
@@ -351,6 +355,12 @@ struct nested_vmx {
|
|
|
/* The host-usable pointer to the above */
|
|
|
struct page *current_vmcs12_page;
|
|
|
struct vmcs12 *current_vmcs12;
|
|
|
+ struct vmcs *current_shadow_vmcs;
|
|
|
+ /*
|
|
|
+ * Indicates if the shadow vmcs must be updated with the
|
|
|
+ * data hold by vmcs12
|
|
|
+ */
|
|
|
+ bool sync_shadow_vmcs;
|
|
|
|
|
|
/* vmcs02_list cache of VMCSs recently used to run L2 guests */
|
|
|
struct list_head vmcs02_pool;
|
|
@@ -365,6 +375,31 @@ struct nested_vmx {
|
|
|
struct page *apic_access_page;
|
|
|
};
|
|
|
|
|
|
+#define POSTED_INTR_ON 0
|
|
|
+/* Posted-Interrupt Descriptor */
|
|
|
+struct pi_desc {
|
|
|
+ u32 pir[8]; /* Posted interrupt requested */
|
|
|
+ u32 control; /* bit 0 of control is outstanding notification bit */
|
|
|
+ u32 rsvd[7];
|
|
|
+} __aligned(64);
|
|
|
+
|
|
|
+static bool pi_test_and_set_on(struct pi_desc *pi_desc)
|
|
|
+{
|
|
|
+ return test_and_set_bit(POSTED_INTR_ON,
|
|
|
+ (unsigned long *)&pi_desc->control);
|
|
|
+}
|
|
|
+
|
|
|
+static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
|
|
|
+{
|
|
|
+ return test_and_clear_bit(POSTED_INTR_ON,
|
|
|
+ (unsigned long *)&pi_desc->control);
|
|
|
+}
|
|
|
+
|
|
|
+static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
|
|
|
+{
|
|
|
+ return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
|
|
|
+}
|
|
|
+
|
|
|
struct vcpu_vmx {
|
|
|
struct kvm_vcpu vcpu;
|
|
|
unsigned long host_rsp;
|
|
@@ -377,6 +412,7 @@ struct vcpu_vmx {
|
|
|
struct shared_msr_entry *guest_msrs;
|
|
|
int nmsrs;
|
|
|
int save_nmsrs;
|
|
|
+ unsigned long host_idt_base;
|
|
|
#ifdef CONFIG_X86_64
|
|
|
u64 msr_host_kernel_gs_base;
|
|
|
u64 msr_guest_kernel_gs_base;
|
|
@@ -428,6 +464,9 @@ struct vcpu_vmx {
|
|
|
|
|
|
bool rdtscp_enabled;
|
|
|
|
|
|
+ /* Posted interrupt descriptor */
|
|
|
+ struct pi_desc pi_desc;
|
|
|
+
|
|
|
/* Support for a guest hypervisor (nested VMX) */
|
|
|
struct nested_vmx nested;
|
|
|
};
|
|
@@ -451,6 +490,64 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
|
|
|
#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
|
|
|
[number##_HIGH] = VMCS12_OFFSET(name)+4
|
|
|
|
|
|
+
|
|
|
+static const unsigned long shadow_read_only_fields[] = {
|
|
|
+ /*
|
|
|
+ * We do NOT shadow fields that are modified when L0
|
|
|
+ * traps and emulates any vmx instruction (e.g. VMPTRLD,
|
|
|
+ * VMXON...) executed by L1.
|
|
|
+ * For example, VM_INSTRUCTION_ERROR is read
|
|
|
+ * by L1 if a vmx instruction fails (part of the error path).
|
|
|
+ * Note the code assumes this logic. If for some reason
|
|
|
+ * we start shadowing these fields then we need to
|
|
|
+ * force a shadow sync when L0 emulates vmx instructions
|
|
|
+ * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
|
|
|
+ * by nested_vmx_failValid)
|
|
|
+ */
|
|
|
+ VM_EXIT_REASON,
|
|
|
+ VM_EXIT_INTR_INFO,
|
|
|
+ VM_EXIT_INSTRUCTION_LEN,
|
|
|
+ IDT_VECTORING_INFO_FIELD,
|
|
|
+ IDT_VECTORING_ERROR_CODE,
|
|
|
+ VM_EXIT_INTR_ERROR_CODE,
|
|
|
+ EXIT_QUALIFICATION,
|
|
|
+ GUEST_LINEAR_ADDRESS,
|
|
|
+ GUEST_PHYSICAL_ADDRESS
|
|
|
+};
|
|
|
+static const int max_shadow_read_only_fields =
|
|
|
+ ARRAY_SIZE(shadow_read_only_fields);
|
|
|
+
|
|
|
+static const unsigned long shadow_read_write_fields[] = {
|
|
|
+ GUEST_RIP,
|
|
|
+ GUEST_RSP,
|
|
|
+ GUEST_CR0,
|
|
|
+ GUEST_CR3,
|
|
|
+ GUEST_CR4,
|
|
|
+ GUEST_INTERRUPTIBILITY_INFO,
|
|
|
+ GUEST_RFLAGS,
|
|
|
+ GUEST_CS_SELECTOR,
|
|
|
+ GUEST_CS_AR_BYTES,
|
|
|
+ GUEST_CS_LIMIT,
|
|
|
+ GUEST_CS_BASE,
|
|
|
+ GUEST_ES_BASE,
|
|
|
+ CR0_GUEST_HOST_MASK,
|
|
|
+ CR0_READ_SHADOW,
|
|
|
+ CR4_READ_SHADOW,
|
|
|
+ TSC_OFFSET,
|
|
|
+ EXCEPTION_BITMAP,
|
|
|
+ CPU_BASED_VM_EXEC_CONTROL,
|
|
|
+ VM_ENTRY_EXCEPTION_ERROR_CODE,
|
|
|
+ VM_ENTRY_INTR_INFO_FIELD,
|
|
|
+ VM_ENTRY_INSTRUCTION_LEN,
|
|
|
+ VM_ENTRY_EXCEPTION_ERROR_CODE,
|
|
|
+ HOST_FS_BASE,
|
|
|
+ HOST_GS_BASE,
|
|
|
+ HOST_FS_SELECTOR,
|
|
|
+ HOST_GS_SELECTOR
|
|
|
+};
|
|
|
+static const int max_shadow_read_write_fields =
|
|
|
+ ARRAY_SIZE(shadow_read_write_fields);
|
|
|
+
|
|
|
static const unsigned short vmcs_field_to_offset_table[] = {
|
|
|
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
|
|
|
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
|
|
@@ -537,6 +634,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
|
|
|
FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
|
|
|
FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
|
|
|
FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
|
|
|
+ FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value),
|
|
|
FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
|
|
|
FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
|
|
|
FIELD(CR0_READ_SHADOW, cr0_read_shadow),
|
|
@@ -624,6 +722,9 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
|
|
|
struct kvm_segment *var, int seg);
|
|
|
static bool guest_state_valid(struct kvm_vcpu *vcpu);
|
|
|
static u32 vmx_segment_access_rights(struct kvm_segment *var);
|
|
|
+static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
|
|
|
+static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
|
|
|
+static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
|
|
|
|
|
|
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
|
|
|
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
|
|
@@ -640,6 +741,8 @@ static unsigned long *vmx_msr_bitmap_legacy;
|
|
|
static unsigned long *vmx_msr_bitmap_longmode;
|
|
|
static unsigned long *vmx_msr_bitmap_legacy_x2apic;
|
|
|
static unsigned long *vmx_msr_bitmap_longmode_x2apic;
|
|
|
+static unsigned long *vmx_vmread_bitmap;
|
|
|
+static unsigned long *vmx_vmwrite_bitmap;
|
|
|
|
|
|
static bool cpu_has_load_ia32_efer;
|
|
|
static bool cpu_has_load_perf_global_ctrl;
|
|
@@ -782,6 +885,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
|
|
|
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
|
|
|
}
|
|
|
|
|
|
+static inline bool cpu_has_vmx_posted_intr(void)
|
|
|
+{
|
|
|
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
|
|
|
+}
|
|
|
+
|
|
|
+static inline bool cpu_has_vmx_apicv(void)
|
|
|
+{
|
|
|
+ return cpu_has_vmx_apic_register_virt() &&
|
|
|
+ cpu_has_vmx_virtual_intr_delivery() &&
|
|
|
+ cpu_has_vmx_posted_intr();
|
|
|
+}
|
|
|
+
|
|
|
static inline bool cpu_has_vmx_flexpriority(void)
|
|
|
{
|
|
|
return cpu_has_vmx_tpr_shadow() &&
|
|
@@ -895,6 +1010,18 @@ static inline bool cpu_has_vmx_wbinvd_exit(void)
|
|
|
SECONDARY_EXEC_WBINVD_EXITING;
|
|
|
}
|
|
|
|
|
|
+static inline bool cpu_has_vmx_shadow_vmcs(void)
|
|
|
+{
|
|
|
+ u64 vmx_msr;
|
|
|
+ rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
|
|
|
+ /* check if the cpu supports writing r/o exit information fields */
|
|
|
+ if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
|
|
|
+ SECONDARY_EXEC_SHADOW_VMCS;
|
|
|
+}
|
|
|
+
|
|
|
static inline bool report_flexpriority(void)
|
|
|
{
|
|
|
return flexpriority_enabled;
|
|
@@ -1790,7 +1917,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
|
|
|
u32 intr_info = nr | INTR_INFO_VALID_MASK;
|
|
|
|
|
|
if (nr == PF_VECTOR && is_guest_mode(vcpu) &&
|
|
|
- nested_pf_handled(vcpu))
|
|
|
+ !vmx->nested.nested_run_pending && nested_pf_handled(vcpu))
|
|
|
return;
|
|
|
|
|
|
if (has_error_code) {
|
|
@@ -2022,6 +2149,7 @@ static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high;
|
|
|
static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
|
|
|
static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
|
|
|
static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
|
|
|
+static u32 nested_vmx_misc_low, nested_vmx_misc_high;
|
|
|
static __init void nested_vmx_setup_ctls_msrs(void)
|
|
|
{
|
|
|
/*
|
|
@@ -2040,30 +2168,40 @@ static __init void nested_vmx_setup_ctls_msrs(void)
|
|
|
*/
|
|
|
|
|
|
/* pin-based controls */
|
|
|
+ rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
|
|
|
+ nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high);
|
|
|
/*
|
|
|
* According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is
|
|
|
* in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR.
|
|
|
*/
|
|
|
- nested_vmx_pinbased_ctls_low = 0x16 ;
|
|
|
- nested_vmx_pinbased_ctls_high = 0x16 |
|
|
|
- PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
|
|
|
- PIN_BASED_VIRTUAL_NMIS;
|
|
|
+ nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
+ nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
|
|
|
+ PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS |
|
|
|
+ PIN_BASED_VMX_PREEMPTION_TIMER;
|
|
|
+ nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
|
|
|
- /* exit controls */
|
|
|
- nested_vmx_exit_ctls_low = 0;
|
|
|
+ /*
|
|
|
+ * Exit controls
|
|
|
+ * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
|
|
|
+ * 17 must be 1.
|
|
|
+ */
|
|
|
+ nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
/* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */
|
|
|
#ifdef CONFIG_X86_64
|
|
|
nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
|
|
|
#else
|
|
|
nested_vmx_exit_ctls_high = 0;
|
|
|
#endif
|
|
|
+ nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
|
|
|
/* entry controls */
|
|
|
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
|
|
|
nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
|
|
|
- nested_vmx_entry_ctls_low = 0;
|
|
|
+ /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
|
|
|
+ nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
nested_vmx_entry_ctls_high &=
|
|
|
VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
|
|
|
+ nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
|
|
|
/* cpu-based controls */
|
|
|
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
|
|
@@ -2080,6 +2218,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
|
|
|
CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
|
|
|
CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
|
|
|
CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
|
|
|
+ CPU_BASED_PAUSE_EXITING |
|
|
|
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
|
|
|
/*
|
|
|
* We can allow some features even when not supported by the
|
|
@@ -2094,7 +2233,14 @@ static __init void nested_vmx_setup_ctls_msrs(void)
|
|
|
nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high);
|
|
|
nested_vmx_secondary_ctls_low = 0;
|
|
|
nested_vmx_secondary_ctls_high &=
|
|
|
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
|
|
|
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
|
|
+ SECONDARY_EXEC_WBINVD_EXITING;
|
|
|
+
|
|
|
+ /* miscellaneous data */
|
|
|
+ rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
|
|
|
+ nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
|
|
|
+ VMX_MISC_SAVE_EFER_LMA;
|
|
|
+ nested_vmx_misc_high = 0;
|
|
|
}
|
|
|
|
|
|
static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
|
|
@@ -2165,7 +2311,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
|
|
|
nested_vmx_entry_ctls_high);
|
|
|
break;
|
|
|
case MSR_IA32_VMX_MISC:
|
|
|
- *pdata = 0;
|
|
|
+ *pdata = vmx_control_msr(nested_vmx_misc_low,
|
|
|
+ nested_vmx_misc_high);
|
|
|
break;
|
|
|
/*
|
|
|
* These MSRs specify bits which the guest must keep fixed (on or off)
|
|
@@ -2529,12 +2676,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|
|
u32 _vmexit_control = 0;
|
|
|
u32 _vmentry_control = 0;
|
|
|
|
|
|
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
|
|
|
- opt = PIN_BASED_VIRTUAL_NMIS;
|
|
|
- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
|
|
|
- &_pin_based_exec_control) < 0)
|
|
|
- return -EIO;
|
|
|
-
|
|
|
min = CPU_BASED_HLT_EXITING |
|
|
|
#ifdef CONFIG_X86_64
|
|
|
CPU_BASED_CR8_LOAD_EXITING |
|
|
@@ -2573,7 +2714,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|
|
SECONDARY_EXEC_RDTSCP |
|
|
|
SECONDARY_EXEC_ENABLE_INVPCID |
|
|
|
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
|
|
- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
|
|
|
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
|
|
|
+ SECONDARY_EXEC_SHADOW_VMCS;
|
|
|
if (adjust_vmx_controls(min2, opt2,
|
|
|
MSR_IA32_VMX_PROCBASED_CTLS2,
|
|
|
&_cpu_based_2nd_exec_control) < 0)
|
|
@@ -2605,11 +2747,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|
|
#ifdef CONFIG_X86_64
|
|
|
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
|
|
|
#endif
|
|
|
- opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
|
|
|
+ opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
|
|
|
+ VM_EXIT_ACK_INTR_ON_EXIT;
|
|
|
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
|
|
|
&_vmexit_control) < 0)
|
|
|
return -EIO;
|
|
|
|
|
|
+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
|
|
|
+ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
|
|
|
+ if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
|
|
|
+ &_pin_based_exec_control) < 0)
|
|
|
+ return -EIO;
|
|
|
+
|
|
|
+ if (!(_cpu_based_2nd_exec_control &
|
|
|
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
|
|
|
+ !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
|
|
|
+ _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
|
|
|
+
|
|
|
min = 0;
|
|
|
opt = VM_ENTRY_LOAD_IA32_PAT;
|
|
|
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
|
|
@@ -2762,6 +2916,8 @@ static __init int hardware_setup(void)
|
|
|
|
|
|
if (!cpu_has_vmx_vpid())
|
|
|
enable_vpid = 0;
|
|
|
+ if (!cpu_has_vmx_shadow_vmcs())
|
|
|
+ enable_shadow_vmcs = 0;
|
|
|
|
|
|
if (!cpu_has_vmx_ept() ||
|
|
|
!cpu_has_vmx_ept_4levels()) {
|
|
@@ -2788,14 +2944,16 @@ static __init int hardware_setup(void)
|
|
|
if (!cpu_has_vmx_ple())
|
|
|
ple_gap = 0;
|
|
|
|
|
|
- if (!cpu_has_vmx_apic_register_virt() ||
|
|
|
- !cpu_has_vmx_virtual_intr_delivery())
|
|
|
- enable_apicv_reg_vid = 0;
|
|
|
+ if (!cpu_has_vmx_apicv())
|
|
|
+ enable_apicv = 0;
|
|
|
|
|
|
- if (enable_apicv_reg_vid)
|
|
|
+ if (enable_apicv)
|
|
|
kvm_x86_ops->update_cr8_intercept = NULL;
|
|
|
- else
|
|
|
+ else {
|
|
|
kvm_x86_ops->hwapic_irr_update = NULL;
|
|
|
+ kvm_x86_ops->deliver_posted_interrupt = NULL;
|
|
|
+ kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
|
|
|
+ }
|
|
|
|
|
|
if (nested)
|
|
|
nested_vmx_setup_ctls_msrs();
|
|
@@ -2876,22 +3034,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
|
|
|
vmx->cpl = 0;
|
|
|
}
|
|
|
|
|
|
-static gva_t rmode_tss_base(struct kvm *kvm)
|
|
|
-{
|
|
|
- if (!kvm->arch.tss_addr) {
|
|
|
- struct kvm_memslots *slots;
|
|
|
- struct kvm_memory_slot *slot;
|
|
|
- gfn_t base_gfn;
|
|
|
-
|
|
|
- slots = kvm_memslots(kvm);
|
|
|
- slot = id_to_memslot(slots, 0);
|
|
|
- base_gfn = slot->base_gfn + slot->npages - 3;
|
|
|
-
|
|
|
- return base_gfn << PAGE_SHIFT;
|
|
|
- }
|
|
|
- return kvm->arch.tss_addr;
|
|
|
-}
|
|
|
-
|
|
|
static void fix_rmode_seg(int seg, struct kvm_segment *save)
|
|
|
{
|
|
|
const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
|
|
@@ -2942,19 +3084,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
|
|
|
|
|
|
/*
|
|
|
* Very old userspace does not call KVM_SET_TSS_ADDR before entering
|
|
|
- * vcpu. Call it here with phys address pointing 16M below 4G.
|
|
|
+ * vcpu. Warn the user that an update is overdue.
|
|
|
*/
|
|
|
- if (!vcpu->kvm->arch.tss_addr) {
|
|
|
+ if (!vcpu->kvm->arch.tss_addr)
|
|
|
printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
|
|
|
"called before entering vcpu\n");
|
|
|
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
|
|
- vmx_set_tss_addr(vcpu->kvm, 0xfeffd000);
|
|
|
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
|
|
- }
|
|
|
|
|
|
vmx_segment_cache_clear(vmx);
|
|
|
|
|
|
- vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
|
|
|
+ vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr);
|
|
|
vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
|
|
|
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
|
|
|
|
|
@@ -3214,7 +3352,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
|
|
*/
|
|
|
if (!nested_vmx_allowed(vcpu))
|
|
|
return 1;
|
|
|
- } else if (to_vmx(vcpu)->nested.vmxon)
|
|
|
+ }
|
|
|
+ if (to_vmx(vcpu)->nested.vmxon &&
|
|
|
+ ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON))
|
|
|
return 1;
|
|
|
|
|
|
vcpu->arch.cr4 = cr4;
|
|
@@ -3550,7 +3690,7 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
|
|
|
return true;
|
|
|
|
|
|
/* real mode guest state checks */
|
|
|
- if (!is_protmode(vcpu)) {
|
|
|
+ if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
|
|
|
if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
|
|
|
return false;
|
|
|
if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
|
|
@@ -3599,7 +3739,7 @@ static int init_rmode_tss(struct kvm *kvm)
|
|
|
int r, idx, ret = 0;
|
|
|
|
|
|
idx = srcu_read_lock(&kvm->srcu);
|
|
|
- fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
|
|
|
+ fn = kvm->arch.tss_addr >> PAGE_SHIFT;
|
|
|
r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
|
|
|
if (r < 0)
|
|
|
goto out;
|
|
@@ -3692,7 +3832,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
|
|
|
kvm_userspace_mem.flags = 0;
|
|
|
kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
|
|
|
kvm_userspace_mem.memory_size = PAGE_SIZE;
|
|
|
- r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
|
|
|
+ r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
|
|
|
if (r)
|
|
|
goto out;
|
|
|
|
|
@@ -3722,7 +3862,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
|
|
|
kvm_userspace_mem.guest_phys_addr =
|
|
|
kvm->arch.ept_identity_map_addr;
|
|
|
kvm_userspace_mem.memory_size = PAGE_SIZE;
|
|
|
- r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
|
|
|
+ r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
|
|
|
if (r)
|
|
|
goto out;
|
|
|
|
|
@@ -3869,13 +4009,59 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
|
|
|
msr, MSR_TYPE_W);
|
|
|
}
|
|
|
|
|
|
+static int vmx_vm_has_apicv(struct kvm *kvm)
|
|
|
+{
|
|
|
+ return enable_apicv && irqchip_in_kernel(kvm);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Send interrupt to vcpu via posted interrupt way.
|
|
|
+ * 1. If target vcpu is running(non-root mode), send posted interrupt
|
|
|
+ * notification to vcpu and hardware will sync PIR to vIRR atomically.
|
|
|
+ * 2. If target vcpu isn't running(root mode), kick it to pick up the
|
|
|
+ * interrupt from PIR in next vmentry.
|
|
|
+ */
|
|
|
+static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
|
|
|
+{
|
|
|
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
+ int r;
|
|
|
+
|
|
|
+ if (pi_test_and_set_pir(vector, &vmx->pi_desc))
|
|
|
+ return;
|
|
|
+
|
|
|
+ r = pi_test_and_set_on(&vmx->pi_desc);
|
|
|
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
|
|
|
+#ifdef CONFIG_SMP
|
|
|
+ if (!r && (vcpu->mode == IN_GUEST_MODE))
|
|
|
+ apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
|
|
|
+ POSTED_INTR_VECTOR);
|
|
|
+ else
|
|
|
+#endif
|
|
|
+ kvm_vcpu_kick(vcpu);
|
|
|
+}
|
|
|
+
|
|
|
+static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
+
|
|
|
+ if (!pi_test_and_clear_on(&vmx->pi_desc))
|
|
|
+ return;
|
|
|
+
|
|
|
+ kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
|
|
|
+}
|
|
|
+
|
|
|
+static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ return;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Set up the vmcs's constant host-state fields, i.e., host-state fields that
|
|
|
* will not change in the lifetime of the guest.
|
|
|
* Note that host-state that does change is set elsewhere. E.g., host-state
|
|
|
* that is set differently for each CPU is set in vmx_vcpu_load(), not here.
|
|
|
*/
|
|
|
-static void vmx_set_constant_host_state(void)
|
|
|
+static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
|
|
|
{
|
|
|
u32 low32, high32;
|
|
|
unsigned long tmpl;
|
|
@@ -3903,6 +4089,7 @@ static void vmx_set_constant_host_state(void)
|
|
|
|
|
|
native_store_idt(&dt);
|
|
|
vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
|
|
|
+ vmx->host_idt_base = dt.address;
|
|
|
|
|
|
vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
|
|
|
|
|
@@ -3928,6 +4115,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
|
|
|
vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
|
|
|
}
|
|
|
|
|
|
+static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
|
|
|
+{
|
|
|
+ u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
|
|
|
+
|
|
|
+ if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
|
|
|
+ pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
|
|
|
+ return pin_based_exec_ctrl;
|
|
|
+}
|
|
|
+
|
|
|
static u32 vmx_exec_control(struct vcpu_vmx *vmx)
|
|
|
{
|
|
|
u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
|
|
@@ -3945,11 +4141,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
|
|
|
return exec_control;
|
|
|
}
|
|
|
|
|
|
-static int vmx_vm_has_apicv(struct kvm *kvm)
|
|
|
-{
|
|
|
- return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
|
|
|
-}
|
|
|
-
|
|
|
static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
|
|
|
{
|
|
|
u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
|
|
@@ -3971,6 +4162,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
|
|
|
exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
|
|
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
|
|
|
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
|
|
|
+ /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
|
|
|
+ (handle_vmptrld).
|
|
|
+ We can NOT enable shadow_vmcs here because we don't have yet
|
|
|
+ a current VMCS12
|
|
|
+ */
|
|
|
+ exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
|
|
|
return exec_control;
|
|
|
}
|
|
|
|
|
@@ -3999,14 +4196,17 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
|
|
vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
|
|
|
vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
|
|
|
|
|
|
+ if (enable_shadow_vmcs) {
|
|
|
+ vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
|
|
|
+ vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
|
|
|
+ }
|
|
|
if (cpu_has_vmx_msr_bitmap())
|
|
|
vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
|
|
|
|
|
|
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
|
|
|
|
|
|
/* Control */
|
|
|
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
|
|
|
- vmcs_config.pin_based_exec_ctrl);
|
|
|
+ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
|
|
|
|
|
|
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
|
|
|
|
|
@@ -4015,13 +4215,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
|
|
vmx_secondary_exec_control(vmx));
|
|
|
}
|
|
|
|
|
|
- if (enable_apicv_reg_vid) {
|
|
|
+ if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
|
|
|
vmcs_write64(EOI_EXIT_BITMAP0, 0);
|
|
|
vmcs_write64(EOI_EXIT_BITMAP1, 0);
|
|
|
vmcs_write64(EOI_EXIT_BITMAP2, 0);
|
|
|
vmcs_write64(EOI_EXIT_BITMAP3, 0);
|
|
|
|
|
|
vmcs_write16(GUEST_INTR_STATUS, 0);
|
|
|
+
|
|
|
+ vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
|
|
|
+ vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
|
|
|
}
|
|
|
|
|
|
if (ple_gap) {
|
|
@@ -4035,7 +4238,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
|
|
|
|
|
vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
|
|
|
vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
|
|
|
- vmx_set_constant_host_state();
|
|
|
+ vmx_set_constant_host_state(vmx);
|
|
|
#ifdef CONFIG_X86_64
|
|
|
rdmsrl(MSR_FS_BASE, a);
|
|
|
vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
|
|
@@ -4089,11 +4292,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
|
|
+static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
u64 msr;
|
|
|
- int ret;
|
|
|
|
|
|
vmx->rmode.vm86_active = 0;
|
|
|
|
|
@@ -4109,12 +4311,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
|
|
vmx_segment_cache_clear(vmx);
|
|
|
|
|
|
seg_setup(VCPU_SREG_CS);
|
|
|
- if (kvm_vcpu_is_bsp(&vmx->vcpu))
|
|
|
- vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
|
|
|
- else {
|
|
|
- vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
|
|
|
- vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
|
|
|
- }
|
|
|
+ vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
|
|
|
+ vmcs_write32(GUEST_CS_BASE, 0xffff0000);
|
|
|
|
|
|
seg_setup(VCPU_SREG_DS);
|
|
|
seg_setup(VCPU_SREG_ES);
|
|
@@ -4137,10 +4335,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
|
|
vmcs_writel(GUEST_SYSENTER_EIP, 0);
|
|
|
|
|
|
vmcs_writel(GUEST_RFLAGS, 0x02);
|
|
|
- if (kvm_vcpu_is_bsp(&vmx->vcpu))
|
|
|
- kvm_rip_write(vcpu, 0xfff0);
|
|
|
- else
|
|
|
- kvm_rip_write(vcpu, 0);
|
|
|
+ kvm_rip_write(vcpu, 0xfff0);
|
|
|
|
|
|
vmcs_writel(GUEST_GDTR_BASE, 0);
|
|
|
vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
|
|
@@ -4171,23 +4366,20 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
|
|
vmcs_write64(APIC_ACCESS_ADDR,
|
|
|
page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
|
|
|
|
|
|
+ if (vmx_vm_has_apicv(vcpu->kvm))
|
|
|
+ memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
|
|
|
+
|
|
|
if (vmx->vpid != 0)
|
|
|
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
|
|
|
|
|
|
vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
|
|
|
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
|
|
vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
|
|
|
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
|
|
vmx_set_cr4(&vmx->vcpu, 0);
|
|
|
vmx_set_efer(&vmx->vcpu, 0);
|
|
|
vmx_fpu_activate(&vmx->vcpu);
|
|
|
update_exception_bitmap(&vmx->vcpu);
|
|
|
|
|
|
vpid_sync_context(vmx);
|
|
|
-
|
|
|
- ret = 0;
|
|
|
-
|
|
|
- return ret;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -4200,40 +4392,45 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
|
|
|
PIN_BASED_EXT_INTR_MASK;
|
|
|
}
|
|
|
|
|
|
-static void enable_irq_window(struct kvm_vcpu *vcpu)
|
|
|
+static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ return get_vmcs12(vcpu)->pin_based_vm_exec_control &
|
|
|
+ PIN_BASED_NMI_EXITING;
|
|
|
+}
|
|
|
+
|
|
|
+static int enable_irq_window(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
u32 cpu_based_vm_exec_control;
|
|
|
- if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
|
|
|
+
|
|
|
+ if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
|
|
|
/*
|
|
|
* We get here if vmx_interrupt_allowed() said we can't
|
|
|
- * inject to L1 now because L2 must run. Ask L2 to exit
|
|
|
- * right after entry, so we can inject to L1 more promptly.
|
|
|
+ * inject to L1 now because L2 must run. The caller will have
|
|
|
+ * to make L2 exit right after entry, so we can inject to L1
|
|
|
+ * more promptly.
|
|
|
*/
|
|
|
- kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
|
|
|
- return;
|
|
|
- }
|
|
|
+ return -EBUSY;
|
|
|
|
|
|
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
|
|
|
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
|
|
|
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
|
|
|
+static int enable_nmi_window(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
u32 cpu_based_vm_exec_control;
|
|
|
|
|
|
- if (!cpu_has_virtual_nmis()) {
|
|
|
- enable_irq_window(vcpu);
|
|
|
- return;
|
|
|
- }
|
|
|
+ if (!cpu_has_virtual_nmis())
|
|
|
+ return enable_irq_window(vcpu);
|
|
|
+
|
|
|
+ if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI)
|
|
|
+ return enable_irq_window(vcpu);
|
|
|
|
|
|
- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
|
|
|
- enable_irq_window(vcpu);
|
|
|
- return;
|
|
|
- }
|
|
|
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
|
|
|
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
|
|
|
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
static void vmx_inject_irq(struct kvm_vcpu *vcpu)
|
|
@@ -4294,16 +4491,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
|
|
|
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
|
|
|
}
|
|
|
|
|
|
-static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
|
|
|
-{
|
|
|
- if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
|
|
|
- return 0;
|
|
|
-
|
|
|
- return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
|
|
|
- (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
|
|
|
- | GUEST_INTR_STATE_NMI));
|
|
|
-}
|
|
|
-
|
|
|
static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
if (!cpu_has_virtual_nmis())
|
|
@@ -4333,18 +4520,52 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ if (is_guest_mode(vcpu)) {
|
|
|
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
+
|
|
|
+ if (to_vmx(vcpu)->nested.nested_run_pending)
|
|
|
+ return 0;
|
|
|
+ if (nested_exit_on_nmi(vcpu)) {
|
|
|
+ nested_vmx_vmexit(vcpu);
|
|
|
+ vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI;
|
|
|
+ vmcs12->vm_exit_intr_info = NMI_VECTOR |
|
|
|
+ INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK;
|
|
|
+ /*
|
|
|
+ * The NMI-triggered VM exit counts as injection:
|
|
|
+ * clear this one and block further NMIs.
|
|
|
+ */
|
|
|
+ vcpu->arch.nmi_pending = 0;
|
|
|
+ vmx_set_nmi_mask(vcpu, true);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
|
|
|
+ (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
|
|
|
+ | GUEST_INTR_STATE_NMI));
|
|
|
+}
|
|
|
+
|
|
|
static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
- if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
|
|
|
+ if (is_guest_mode(vcpu)) {
|
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
- if (to_vmx(vcpu)->nested.nested_run_pending ||
|
|
|
- (vmcs12->idt_vectoring_info_field &
|
|
|
- VECTORING_INFO_VALID_MASK))
|
|
|
+
|
|
|
+ if (to_vmx(vcpu)->nested.nested_run_pending)
|
|
|
return 0;
|
|
|
- nested_vmx_vmexit(vcpu);
|
|
|
- vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
|
|
|
- vmcs12->vm_exit_intr_info = 0;
|
|
|
- /* fall through to normal code, but now in L1, not L2 */
|
|
|
+ if (nested_exit_on_intr(vcpu)) {
|
|
|
+ nested_vmx_vmexit(vcpu);
|
|
|
+ vmcs12->vm_exit_reason =
|
|
|
+ EXIT_REASON_EXTERNAL_INTERRUPT;
|
|
|
+ vmcs12->vm_exit_intr_info = 0;
|
|
|
+ /*
|
|
|
+ * fall through to normal code, but now in L1, not L2
|
|
|
+ */
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
|
|
@@ -4362,7 +4583,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
|
|
|
.flags = 0,
|
|
|
};
|
|
|
|
|
|
- ret = kvm_set_memory_region(kvm, &tss_mem, false);
|
|
|
+ ret = kvm_set_memory_region(kvm, &tss_mem);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
kvm->arch.tss_addr = addr;
|
|
@@ -4603,34 +4824,50 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
|
|
|
/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
|
|
|
static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
|
|
|
{
|
|
|
- if (to_vmx(vcpu)->nested.vmxon &&
|
|
|
- ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
|
|
|
- return 1;
|
|
|
-
|
|
|
if (is_guest_mode(vcpu)) {
|
|
|
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
+ unsigned long orig_val = val;
|
|
|
+
|
|
|
/*
|
|
|
* We get here when L2 changed cr0 in a way that did not change
|
|
|
* any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
|
|
|
- * but did change L0 shadowed bits. This can currently happen
|
|
|
- * with the TS bit: L0 may want to leave TS on (for lazy fpu
|
|
|
- * loading) while pretending to allow the guest to change it.
|
|
|
+ * but did change L0 shadowed bits. So we first calculate the
|
|
|
+ * effective cr0 value that L1 would like to write into the
|
|
|
+ * hardware. It consists of the L2-owned bits from the new
|
|
|
+ * value combined with the L1-owned bits from L1's guest_cr0.
|
|
|
*/
|
|
|
- if (kvm_set_cr0(vcpu, (val & vcpu->arch.cr0_guest_owned_bits) |
|
|
|
- (vcpu->arch.cr0 & ~vcpu->arch.cr0_guest_owned_bits)))
|
|
|
+ val = (val & ~vmcs12->cr0_guest_host_mask) |
|
|
|
+ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
|
|
|
+
|
|
|
+ /* TODO: will have to take unrestricted guest mode into
|
|
|
+ * account */
|
|
|
+ if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
|
|
|
return 1;
|
|
|
- vmcs_writel(CR0_READ_SHADOW, val);
|
|
|
+
|
|
|
+ if (kvm_set_cr0(vcpu, val))
|
|
|
+ return 1;
|
|
|
+ vmcs_writel(CR0_READ_SHADOW, orig_val);
|
|
|
return 0;
|
|
|
- } else
|
|
|
+ } else {
|
|
|
+ if (to_vmx(vcpu)->nested.vmxon &&
|
|
|
+ ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
|
|
|
+ return 1;
|
|
|
return kvm_set_cr0(vcpu, val);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
|
|
|
{
|
|
|
if (is_guest_mode(vcpu)) {
|
|
|
- if (kvm_set_cr4(vcpu, (val & vcpu->arch.cr4_guest_owned_bits) |
|
|
|
- (vcpu->arch.cr4 & ~vcpu->arch.cr4_guest_owned_bits)))
|
|
|
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
+ unsigned long orig_val = val;
|
|
|
+
|
|
|
+ /* analogously to handle_set_cr0 */
|
|
|
+ val = (val & ~vmcs12->cr4_guest_host_mask) |
|
|
|
+ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
|
|
|
+ if (kvm_set_cr4(vcpu, val))
|
|
|
return 1;
|
|
|
- vmcs_writel(CR4_READ_SHADOW, val);
|
|
|
+ vmcs_writel(CR4_READ_SHADOW, orig_val);
|
|
|
return 0;
|
|
|
} else
|
|
|
return kvm_set_cr4(vcpu, val);
|
|
@@ -5183,7 +5420,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
|
|
if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
|
|
|
return 1;
|
|
|
|
|
|
- err = emulate_instruction(vcpu, 0);
|
|
|
+ err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
|
|
|
|
|
|
if (err == EMULATE_DO_MMIO) {
|
|
|
ret = 0;
|
|
@@ -5259,8 +5496,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
|
|
|
}
|
|
|
|
|
|
/* Create a new VMCS */
|
|
|
- item = (struct vmcs02_list *)
|
|
|
- kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
|
|
|
+ item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
|
|
|
if (!item)
|
|
|
return NULL;
|
|
|
item->vmcs02.vmcs = alloc_vmcs();
|
|
@@ -5309,6 +5545,9 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
|
|
|
free_loaded_vmcs(&vmx->vmcs01);
|
|
|
}
|
|
|
|
|
|
+static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
|
|
|
+ u32 vm_instruction_error);
|
|
|
+
|
|
|
/*
|
|
|
* Emulate the VMXON instruction.
|
|
|
* Currently, we just remember that VMX is active, and do not save or even
|
|
@@ -5321,6 +5560,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
struct kvm_segment cs;
|
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
+ struct vmcs *shadow_vmcs;
|
|
|
|
|
|
/* The Intel VMX Instruction Reference lists a bunch of bits that
|
|
|
* are prerequisite to running VMXON, most notably cr4.VMXE must be
|
|
@@ -5344,6 +5584,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
|
|
|
kvm_inject_gp(vcpu, 0);
|
|
|
return 1;
|
|
|
}
|
|
|
+ if (vmx->nested.vmxon) {
|
|
|
+ nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
|
|
|
+ skip_emulated_instruction(vcpu);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+ if (enable_shadow_vmcs) {
|
|
|
+ shadow_vmcs = alloc_vmcs();
|
|
|
+ if (!shadow_vmcs)
|
|
|
+ return -ENOMEM;
|
|
|
+ /* mark vmcs as shadow */
|
|
|
+ shadow_vmcs->revision_id |= (1u << 31);
|
|
|
+ /* init shadow vmcs */
|
|
|
+ vmcs_clear(shadow_vmcs);
|
|
|
+ vmx->nested.current_shadow_vmcs = shadow_vmcs;
|
|
|
+ }
|
|
|
|
|
|
INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
|
|
|
vmx->nested.vmcs02_num = 0;
|
|
@@ -5384,6 +5639,25 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
+static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
|
|
|
+{
|
|
|
+ u32 exec_control;
|
|
|
+ if (enable_shadow_vmcs) {
|
|
|
+ if (vmx->nested.current_vmcs12 != NULL) {
|
|
|
+ /* copy to memory all shadowed fields in case
|
|
|
+ they were modified */
|
|
|
+ copy_shadow_to_vmcs12(vmx);
|
|
|
+ vmx->nested.sync_shadow_vmcs = false;
|
|
|
+ exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
|
|
|
+ exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
|
|
|
+ vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
|
|
|
+ vmcs_write64(VMCS_LINK_POINTER, -1ull);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ kunmap(vmx->nested.current_vmcs12_page);
|
|
|
+ nested_release_page(vmx->nested.current_vmcs12_page);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Free whatever needs to be freed from vmx->nested when L1 goes down, or
|
|
|
* just stops using VMX.
|
|
@@ -5394,11 +5668,12 @@ static void free_nested(struct vcpu_vmx *vmx)
|
|
|
return;
|
|
|
vmx->nested.vmxon = false;
|
|
|
if (vmx->nested.current_vmptr != -1ull) {
|
|
|
- kunmap(vmx->nested.current_vmcs12_page);
|
|
|
- nested_release_page(vmx->nested.current_vmcs12_page);
|
|
|
+ nested_release_vmcs12(vmx);
|
|
|
vmx->nested.current_vmptr = -1ull;
|
|
|
vmx->nested.current_vmcs12 = NULL;
|
|
|
}
|
|
|
+ if (enable_shadow_vmcs)
|
|
|
+ free_vmcs(vmx->nested.current_shadow_vmcs);
|
|
|
/* Unpin physical memory we referred to in current vmcs02 */
|
|
|
if (vmx->nested.apic_access_page) {
|
|
|
nested_release_page(vmx->nested.apic_access_page);
|
|
@@ -5507,6 +5782,10 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
|
|
|
X86_EFLAGS_SF | X86_EFLAGS_OF))
|
|
|
| X86_EFLAGS_ZF);
|
|
|
get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
|
|
|
+ /*
|
|
|
+ * We don't need to force a shadow sync because
|
|
|
+ * VM_INSTRUCTION_ERROR is not shadowed
|
|
|
+ */
|
|
|
}
|
|
|
|
|
|
/* Emulate the VMCLEAR instruction */
|
|
@@ -5539,8 +5818,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
|
|
|
}
|
|
|
|
|
|
if (vmptr == vmx->nested.current_vmptr) {
|
|
|
- kunmap(vmx->nested.current_vmcs12_page);
|
|
|
- nested_release_page(vmx->nested.current_vmcs12_page);
|
|
|
+ nested_release_vmcs12(vmx);
|
|
|
vmx->nested.current_vmptr = -1ull;
|
|
|
vmx->nested.current_vmcs12 = NULL;
|
|
|
}
|
|
@@ -5639,6 +5917,111 @@ static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu,
|
|
|
+ unsigned long field, u64 field_value){
|
|
|
+ short offset = vmcs_field_to_offset(field);
|
|
|
+ char *p = ((char *) get_vmcs12(vcpu)) + offset;
|
|
|
+ if (offset < 0)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ switch (vmcs_field_type(field)) {
|
|
|
+ case VMCS_FIELD_TYPE_U16:
|
|
|
+ *(u16 *)p = field_value;
|
|
|
+ return true;
|
|
|
+ case VMCS_FIELD_TYPE_U32:
|
|
|
+ *(u32 *)p = field_value;
|
|
|
+ return true;
|
|
|
+ case VMCS_FIELD_TYPE_U64:
|
|
|
+ *(u64 *)p = field_value;
|
|
|
+ return true;
|
|
|
+ case VMCS_FIELD_TYPE_NATURAL_WIDTH:
|
|
|
+ *(natural_width *)p = field_value;
|
|
|
+ return true;
|
|
|
+ default:
|
|
|
+ return false; /* can never happen. */
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ unsigned long field;
|
|
|
+ u64 field_value;
|
|
|
+ struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
|
|
|
+ unsigned long *fields = (unsigned long *)shadow_read_write_fields;
|
|
|
+ int num_fields = max_shadow_read_write_fields;
|
|
|
+
|
|
|
+ vmcs_load(shadow_vmcs);
|
|
|
+
|
|
|
+ for (i = 0; i < num_fields; i++) {
|
|
|
+ field = fields[i];
|
|
|
+ switch (vmcs_field_type(field)) {
|
|
|
+ case VMCS_FIELD_TYPE_U16:
|
|
|
+ field_value = vmcs_read16(field);
|
|
|
+ break;
|
|
|
+ case VMCS_FIELD_TYPE_U32:
|
|
|
+ field_value = vmcs_read32(field);
|
|
|
+ break;
|
|
|
+ case VMCS_FIELD_TYPE_U64:
|
|
|
+ field_value = vmcs_read64(field);
|
|
|
+ break;
|
|
|
+ case VMCS_FIELD_TYPE_NATURAL_WIDTH:
|
|
|
+ field_value = vmcs_readl(field);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ vmcs12_write_any(&vmx->vcpu, field, field_value);
|
|
|
+ }
|
|
|
+
|
|
|
+ vmcs_clear(shadow_vmcs);
|
|
|
+ vmcs_load(vmx->loaded_vmcs->vmcs);
|
|
|
+}
|
|
|
+
|
|
|
+static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
|
|
|
+{
|
|
|
+ unsigned long *fields[] = {
|
|
|
+ (unsigned long *)shadow_read_write_fields,
|
|
|
+ (unsigned long *)shadow_read_only_fields
|
|
|
+ };
|
|
|
+ int num_lists = ARRAY_SIZE(fields);
|
|
|
+ int max_fields[] = {
|
|
|
+ max_shadow_read_write_fields,
|
|
|
+ max_shadow_read_only_fields
|
|
|
+ };
|
|
|
+ int i, q;
|
|
|
+ unsigned long field;
|
|
|
+ u64 field_value = 0;
|
|
|
+ struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
|
|
|
+
|
|
|
+ vmcs_load(shadow_vmcs);
|
|
|
+
|
|
|
+ for (q = 0; q < num_lists; q++) {
|
|
|
+ for (i = 0; i < max_fields[q]; i++) {
|
|
|
+ field = fields[q][i];
|
|
|
+ vmcs12_read_any(&vmx->vcpu, field, &field_value);
|
|
|
+
|
|
|
+ switch (vmcs_field_type(field)) {
|
|
|
+ case VMCS_FIELD_TYPE_U16:
|
|
|
+ vmcs_write16(field, (u16)field_value);
|
|
|
+ break;
|
|
|
+ case VMCS_FIELD_TYPE_U32:
|
|
|
+ vmcs_write32(field, (u32)field_value);
|
|
|
+ break;
|
|
|
+ case VMCS_FIELD_TYPE_U64:
|
|
|
+ vmcs_write64(field, (u64)field_value);
|
|
|
+ break;
|
|
|
+ case VMCS_FIELD_TYPE_NATURAL_WIDTH:
|
|
|
+ vmcs_writel(field, (long)field_value);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ vmcs_clear(shadow_vmcs);
|
|
|
+ vmcs_load(vmx->loaded_vmcs->vmcs);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was
|
|
|
* used before) all generate the same failure when it is missing.
|
|
@@ -5703,8 +6086,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
|
|
|
gva_t gva;
|
|
|
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
|
|
|
u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
|
|
- char *p;
|
|
|
- short offset;
|
|
|
/* The value to write might be 32 or 64 bits, depending on L1's long
|
|
|
* mode, and eventually we need to write that into a field of several
|
|
|
* possible lengths. The code below first zero-extends the value to 64
|
|
@@ -5741,28 +6122,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
- offset = vmcs_field_to_offset(field);
|
|
|
- if (offset < 0) {
|
|
|
- nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
|
|
|
- skip_emulated_instruction(vcpu);
|
|
|
- return 1;
|
|
|
- }
|
|
|
- p = ((char *) get_vmcs12(vcpu)) + offset;
|
|
|
-
|
|
|
- switch (vmcs_field_type(field)) {
|
|
|
- case VMCS_FIELD_TYPE_U16:
|
|
|
- *(u16 *)p = field_value;
|
|
|
- break;
|
|
|
- case VMCS_FIELD_TYPE_U32:
|
|
|
- *(u32 *)p = field_value;
|
|
|
- break;
|
|
|
- case VMCS_FIELD_TYPE_U64:
|
|
|
- *(u64 *)p = field_value;
|
|
|
- break;
|
|
|
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
|
|
|
- *(natural_width *)p = field_value;
|
|
|
- break;
|
|
|
- default:
|
|
|
+ if (!vmcs12_write_any(vcpu, field, field_value)) {
|
|
|
nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
|
|
|
skip_emulated_instruction(vcpu);
|
|
|
return 1;
|
|
@@ -5780,6 +6140,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
|
|
|
gva_t gva;
|
|
|
gpa_t vmptr;
|
|
|
struct x86_exception e;
|
|
|
+ u32 exec_control;
|
|
|
|
|
|
if (!nested_vmx_check_permission(vcpu))
|
|
|
return 1;
|
|
@@ -5818,14 +6179,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
|
|
|
skip_emulated_instruction(vcpu);
|
|
|
return 1;
|
|
|
}
|
|
|
- if (vmx->nested.current_vmptr != -1ull) {
|
|
|
- kunmap(vmx->nested.current_vmcs12_page);
|
|
|
- nested_release_page(vmx->nested.current_vmcs12_page);
|
|
|
- }
|
|
|
+ if (vmx->nested.current_vmptr != -1ull)
|
|
|
+ nested_release_vmcs12(vmx);
|
|
|
|
|
|
vmx->nested.current_vmptr = vmptr;
|
|
|
vmx->nested.current_vmcs12 = new_vmcs12;
|
|
|
vmx->nested.current_vmcs12_page = page;
|
|
|
+ if (enable_shadow_vmcs) {
|
|
|
+ exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
|
|
|
+ exec_control |= SECONDARY_EXEC_SHADOW_VMCS;
|
|
|
+ vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
|
|
|
+ vmcs_write64(VMCS_LINK_POINTER,
|
|
|
+ __pa(vmx->nested.current_shadow_vmcs));
|
|
|
+ vmx->nested.sync_shadow_vmcs = true;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
nested_vmx_succeed(vcpu);
|
|
@@ -5908,6 +6275,52 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
|
|
static const int kvm_vmx_max_exit_handlers =
|
|
|
ARRAY_SIZE(kvm_vmx_exit_handlers);
|
|
|
|
|
|
+static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
|
|
|
+ struct vmcs12 *vmcs12)
|
|
|
+{
|
|
|
+ unsigned long exit_qualification;
|
|
|
+ gpa_t bitmap, last_bitmap;
|
|
|
+ unsigned int port;
|
|
|
+ int size;
|
|
|
+ u8 b;
|
|
|
+
|
|
|
+ if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING))
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
|
|
|
+
|
|
|
+ port = exit_qualification >> 16;
|
|
|
+ size = (exit_qualification & 7) + 1;
|
|
|
+
|
|
|
+ last_bitmap = (gpa_t)-1;
|
|
|
+ b = -1;
|
|
|
+
|
|
|
+ while (size > 0) {
|
|
|
+ if (port < 0x8000)
|
|
|
+ bitmap = vmcs12->io_bitmap_a;
|
|
|
+ else if (port < 0x10000)
|
|
|
+ bitmap = vmcs12->io_bitmap_b;
|
|
|
+ else
|
|
|
+ return 1;
|
|
|
+ bitmap += (port & 0x7fff) / 8;
|
|
|
+
|
|
|
+ if (last_bitmap != bitmap)
|
|
|
+ if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
|
|
|
+ return 1;
|
|
|
+ if (b & (1 << (port & 7)))
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ port++;
|
|
|
+ size--;
|
|
|
+ last_bitmap = bitmap;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Return 1 if we should exit from L2 to L1 to handle an MSR access access,
|
|
|
* rather than handle it ourselves in L0. I.e., check whether L1 expressed
|
|
@@ -5939,7 +6352,8 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
|
|
|
/* Then read the msr_index'th bit from this bitmap: */
|
|
|
if (msr_index < 1024*8) {
|
|
|
unsigned char b;
|
|
|
- kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1);
|
|
|
+ if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
|
|
|
+ return 1;
|
|
|
return 1 & (b >> (msr_index & 7));
|
|
|
} else
|
|
|
return 1; /* let L1 handle the wrong parameter */
|
|
@@ -6033,10 +6447,10 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
|
|
|
*/
|
|
|
static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
- u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
|
|
|
u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
+ u32 exit_reason = vmx->exit_reason;
|
|
|
|
|
|
if (vmx->nested.nested_run_pending)
|
|
|
return 0;
|
|
@@ -6060,14 +6474,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
|
|
case EXIT_REASON_TRIPLE_FAULT:
|
|
|
return 1;
|
|
|
case EXIT_REASON_PENDING_INTERRUPT:
|
|
|
+ return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
|
|
|
case EXIT_REASON_NMI_WINDOW:
|
|
|
- /*
|
|
|
- * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit
|
|
|
- * (aka Interrupt Window Exiting) only when L1 turned it on,
|
|
|
- * so if we got a PENDING_INTERRUPT exit, this must be for L1.
|
|
|
- * Same for NMI Window Exiting.
|
|
|
- */
|
|
|
- return 1;
|
|
|
+ return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
|
|
|
case EXIT_REASON_TASK_SWITCH:
|
|
|
return 1;
|
|
|
case EXIT_REASON_CPUID:
|
|
@@ -6097,8 +6506,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
|
|
case EXIT_REASON_DR_ACCESS:
|
|
|
return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
|
|
|
case EXIT_REASON_IO_INSTRUCTION:
|
|
|
- /* TODO: support IO bitmaps */
|
|
|
- return 1;
|
|
|
+ return nested_vmx_exit_handled_io(vcpu, vmcs12);
|
|
|
case EXIT_REASON_MSR_READ:
|
|
|
case EXIT_REASON_MSR_WRITE:
|
|
|
return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
|
|
@@ -6122,6 +6530,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
|
|
case EXIT_REASON_EPT_VIOLATION:
|
|
|
case EXIT_REASON_EPT_MISCONFIG:
|
|
|
return 0;
|
|
|
+ case EXIT_REASON_PREEMPTION_TIMER:
|
|
|
+ return vmcs12->pin_based_vm_exec_control &
|
|
|
+ PIN_BASED_VMX_PREEMPTION_TIMER;
|
|
|
case EXIT_REASON_WBINVD:
|
|
|
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
|
|
|
case EXIT_REASON_XSETBV:
|
|
@@ -6316,6 +6727,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
|
|
|
|
|
|
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
|
|
|
{
|
|
|
+ if (!vmx_vm_has_apicv(vcpu->kvm))
|
|
|
+ return;
|
|
|
+
|
|
|
vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
|
|
|
vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
|
|
|
vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
|
|
@@ -6346,6 +6760,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If external interrupt exists, IF bit is set in rflags/eflags on the
|
|
|
+ * interrupt stack frame, and interrupt will be enabled on a return
|
|
|
+ * from interrupt handler.
|
|
|
+ */
|
|
|
+ if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
|
|
|
+ == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
|
|
|
+ unsigned int vector;
|
|
|
+ unsigned long entry;
|
|
|
+ gate_desc *desc;
|
|
|
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
+#ifdef CONFIG_X86_64
|
|
|
+ unsigned long tmp;
|
|
|
+#endif
|
|
|
+
|
|
|
+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
|
|
|
+ desc = (gate_desc *)vmx->host_idt_base + vector;
|
|
|
+ entry = gate_offset(*desc);
|
|
|
+ asm volatile(
|
|
|
+#ifdef CONFIG_X86_64
|
|
|
+ "mov %%" _ASM_SP ", %[sp]\n\t"
|
|
|
+ "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
|
|
|
+ "push $%c[ss]\n\t"
|
|
|
+ "push %[sp]\n\t"
|
|
|
+#endif
|
|
|
+ "pushf\n\t"
|
|
|
+ "orl $0x200, (%%" _ASM_SP ")\n\t"
|
|
|
+ __ASM_SIZE(push) " $%c[cs]\n\t"
|
|
|
+ "call *%[entry]\n\t"
|
|
|
+ :
|
|
|
+#ifdef CONFIG_X86_64
|
|
|
+ [sp]"=&r"(tmp)
|
|
|
+#endif
|
|
|
+ :
|
|
|
+ [entry]"r"(entry),
|
|
|
+ [ss]"i"(__KERNEL_DS),
|
|
|
+ [cs]"i"(__KERNEL_CS)
|
|
|
+ );
|
|
|
+ } else
|
|
|
+ local_irq_enable();
|
|
|
+}
|
|
|
+
|
|
|
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
|
|
|
{
|
|
|
u32 exit_intr_info;
|
|
@@ -6388,7 +6848,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
|
|
|
ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
|
|
|
}
|
|
|
|
|
|
-static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
|
|
|
+static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
|
|
|
u32 idt_vectoring_info,
|
|
|
int instr_len_field,
|
|
|
int error_code_field)
|
|
@@ -6399,46 +6859,43 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
|
|
|
|
|
|
idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
|
|
|
|
|
|
- vmx->vcpu.arch.nmi_injected = false;
|
|
|
- kvm_clear_exception_queue(&vmx->vcpu);
|
|
|
- kvm_clear_interrupt_queue(&vmx->vcpu);
|
|
|
+ vcpu->arch.nmi_injected = false;
|
|
|
+ kvm_clear_exception_queue(vcpu);
|
|
|
+ kvm_clear_interrupt_queue(vcpu);
|
|
|
|
|
|
if (!idtv_info_valid)
|
|
|
return;
|
|
|
|
|
|
- kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
|
|
|
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
|
|
|
|
|
|
vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
|
|
|
type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
|
|
|
|
|
|
switch (type) {
|
|
|
case INTR_TYPE_NMI_INTR:
|
|
|
- vmx->vcpu.arch.nmi_injected = true;
|
|
|
+ vcpu->arch.nmi_injected = true;
|
|
|
/*
|
|
|
* SDM 3: 27.7.1.2 (September 2008)
|
|
|
* Clear bit "block by NMI" before VM entry if a NMI
|
|
|
* delivery faulted.
|
|
|
*/
|
|
|
- vmx_set_nmi_mask(&vmx->vcpu, false);
|
|
|
+ vmx_set_nmi_mask(vcpu, false);
|
|
|
break;
|
|
|
case INTR_TYPE_SOFT_EXCEPTION:
|
|
|
- vmx->vcpu.arch.event_exit_inst_len =
|
|
|
- vmcs_read32(instr_len_field);
|
|
|
+ vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
|
|
|
/* fall through */
|
|
|
case INTR_TYPE_HARD_EXCEPTION:
|
|
|
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
|
|
|
u32 err = vmcs_read32(error_code_field);
|
|
|
- kvm_queue_exception_e(&vmx->vcpu, vector, err);
|
|
|
+ kvm_queue_exception_e(vcpu, vector, err);
|
|
|
} else
|
|
|
- kvm_queue_exception(&vmx->vcpu, vector);
|
|
|
+ kvm_queue_exception(vcpu, vector);
|
|
|
break;
|
|
|
case INTR_TYPE_SOFT_INTR:
|
|
|
- vmx->vcpu.arch.event_exit_inst_len =
|
|
|
- vmcs_read32(instr_len_field);
|
|
|
+ vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
|
|
|
/* fall through */
|
|
|
case INTR_TYPE_EXT_INTR:
|
|
|
- kvm_queue_interrupt(&vmx->vcpu, vector,
|
|
|
- type == INTR_TYPE_SOFT_INTR);
|
|
|
+ kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
|
|
|
break;
|
|
|
default:
|
|
|
break;
|
|
@@ -6447,18 +6904,14 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
|
|
|
|
|
|
static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
|
|
|
{
|
|
|
- if (is_guest_mode(&vmx->vcpu))
|
|
|
- return;
|
|
|
- __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info,
|
|
|
+ __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
|
|
|
VM_EXIT_INSTRUCTION_LEN,
|
|
|
IDT_VECTORING_ERROR_CODE);
|
|
|
}
|
|
|
|
|
|
static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
- if (is_guest_mode(vcpu))
|
|
|
- return;
|
|
|
- __vmx_complete_interrupts(to_vmx(vcpu),
|
|
|
+ __vmx_complete_interrupts(vcpu,
|
|
|
vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
|
|
|
VM_ENTRY_INSTRUCTION_LEN,
|
|
|
VM_ENTRY_EXCEPTION_ERROR_CODE);
|
|
@@ -6489,21 +6942,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
unsigned long debugctlmsr;
|
|
|
|
|
|
- if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
|
|
|
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
- if (vmcs12->idt_vectoring_info_field &
|
|
|
- VECTORING_INFO_VALID_MASK) {
|
|
|
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
|
|
|
- vmcs12->idt_vectoring_info_field);
|
|
|
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
|
|
|
- vmcs12->vm_exit_instruction_len);
|
|
|
- if (vmcs12->idt_vectoring_info_field &
|
|
|
- VECTORING_INFO_DELIVER_CODE_MASK)
|
|
|
- vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
|
|
|
- vmcs12->idt_vectoring_error_code);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
/* Record the guest's net vcpu time for enforced NMI injections. */
|
|
|
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
|
|
|
vmx->entry_time = ktime_get();
|
|
@@ -6513,6 +6951,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|
|
if (vmx->emulation_required)
|
|
|
return;
|
|
|
|
|
|
+ if (vmx->nested.sync_shadow_vmcs) {
|
|
|
+ copy_vmcs12_to_shadow(vmx);
|
|
|
+ vmx->nested.sync_shadow_vmcs = false;
|
|
|
+ }
|
|
|
+
|
|
|
if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
|
|
|
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
|
|
|
if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
|
|
@@ -6662,17 +7105,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|
|
|
|
|
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
|
|
|
|
|
|
- if (is_guest_mode(vcpu)) {
|
|
|
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
- vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
|
|
|
- if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
|
|
|
- vmcs12->idt_vectoring_error_code =
|
|
|
- vmcs_read32(IDT_VECTORING_ERROR_CODE);
|
|
|
- vmcs12->vm_exit_instruction_len =
|
|
|
- vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
vmx->loaded_vmcs->launched = 1;
|
|
|
|
|
|
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
|
|
@@ -6734,10 +7166,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
|
|
put_cpu();
|
|
|
if (err)
|
|
|
goto free_vmcs;
|
|
|
- if (vm_need_virtualize_apic_accesses(kvm))
|
|
|
+ if (vm_need_virtualize_apic_accesses(kvm)) {
|
|
|
err = alloc_apic_access_page(kvm);
|
|
|
if (err)
|
|
|
goto free_vmcs;
|
|
|
+ }
|
|
|
|
|
|
if (enable_ept) {
|
|
|
if (!kvm->arch.ept_identity_map_addr)
|
|
@@ -6931,9 +7364,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
vmcs12->vm_entry_instruction_len);
|
|
|
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
|
|
|
vmcs12->guest_interruptibility_info);
|
|
|
- vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state);
|
|
|
vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
|
|
|
- vmcs_writel(GUEST_DR7, vmcs12->guest_dr7);
|
|
|
+ kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
|
|
|
vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags);
|
|
|
vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
|
|
|
vmcs12->guest_pending_dbg_exceptions);
|
|
@@ -6946,6 +7378,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
(vmcs_config.pin_based_exec_ctrl |
|
|
|
vmcs12->pin_based_vm_exec_control));
|
|
|
|
|
|
+ if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
|
|
|
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE,
|
|
|
+ vmcs12->vmx_preemption_timer_value);
|
|
|
+
|
|
|
/*
|
|
|
* Whether page-faults are trapped is determined by a combination of
|
|
|
* 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
|
|
@@ -7016,7 +7452,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
* Other fields are different per CPU, and will be set later when
|
|
|
* vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
|
|
|
*/
|
|
|
- vmx_set_constant_host_state();
|
|
|
+ vmx_set_constant_host_state(vmx);
|
|
|
|
|
|
/*
|
|
|
* HOST_RSP is normally set correctly in vmx_vcpu_run() just before
|
|
@@ -7082,7 +7518,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
|
|
|
if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
|
|
|
vcpu->arch.efer = vmcs12->guest_ia32_efer;
|
|
|
- if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
|
|
|
+ else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
|
|
|
vcpu->arch.efer |= (EFER_LMA | EFER_LME);
|
|
|
else
|
|
|
vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
|
|
@@ -7121,6 +7557,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
int cpu;
|
|
|
struct loaded_vmcs *vmcs02;
|
|
|
+ bool ia32e;
|
|
|
|
|
|
if (!nested_vmx_check_permission(vcpu) ||
|
|
|
!nested_vmx_check_vmcs12(vcpu))
|
|
@@ -7129,6 +7566,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
|
|
skip_emulated_instruction(vcpu);
|
|
|
vmcs12 = get_vmcs12(vcpu);
|
|
|
|
|
|
+ if (enable_shadow_vmcs)
|
|
|
+ copy_shadow_to_vmcs12(vmx);
|
|
|
+
|
|
|
/*
|
|
|
* The nested entry process starts with enforcing various prerequisites
|
|
|
* on vmcs12 as required by the Intel SDM, and act appropriately when
|
|
@@ -7146,6 +7586,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
+ if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) {
|
|
|
+ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
|
|
|
!IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) {
|
|
|
/*TODO: Also verify bits beyond physical address width are 0*/
|
|
@@ -7203,6 +7648,45 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * If the load IA32_EFER VM-entry control is 1, the following checks
|
|
|
+ * are performed on the field for the IA32_EFER MSR:
|
|
|
+ * - Bits reserved in the IA32_EFER MSR must be 0.
|
|
|
+ * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
|
|
|
+ * the IA-32e mode guest VM-exit control. It must also be identical
|
|
|
+ * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
|
|
|
+ * CR0.PG) is 1.
|
|
|
+ */
|
|
|
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) {
|
|
|
+ ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
|
|
|
+ if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
|
|
|
+ ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
|
|
|
+ ((vmcs12->guest_cr0 & X86_CR0_PG) &&
|
|
|
+ ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) {
|
|
|
+ nested_vmx_entry_failure(vcpu, vmcs12,
|
|
|
+ EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the load IA32_EFER VM-exit control is 1, bits reserved in the
|
|
|
+ * IA32_EFER MSR must be 0 in the field for that register. In addition,
|
|
|
+ * the values of the LMA and LME bits in the field must each be that of
|
|
|
+ * the host address-space size VM-exit control.
|
|
|
+ */
|
|
|
+ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
|
|
|
+ ia32e = (vmcs12->vm_exit_controls &
|
|
|
+ VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
|
|
|
+ if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
|
|
|
+ ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
|
|
|
+ ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) {
|
|
|
+ nested_vmx_entry_failure(vcpu, vmcs12,
|
|
|
+ EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
/*
|
|
|
* We're finally done with prerequisite checking, and can start with
|
|
|
* the nested entry.
|
|
@@ -7223,6 +7707,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
|
|
vcpu->cpu = cpu;
|
|
|
put_cpu();
|
|
|
|
|
|
+ vmx_segment_cache_clear(vmx);
|
|
|
+
|
|
|
vmcs12->launch_state = 1;
|
|
|
|
|
|
prepare_vmcs02(vcpu, vmcs12);
|
|
@@ -7273,6 +7759,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
vcpu->arch.cr4_guest_owned_bits));
|
|
|
}
|
|
|
|
|
|
+static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
|
|
|
+ struct vmcs12 *vmcs12)
|
|
|
+{
|
|
|
+ u32 idt_vectoring;
|
|
|
+ unsigned int nr;
|
|
|
+
|
|
|
+ if (vcpu->arch.exception.pending) {
|
|
|
+ nr = vcpu->arch.exception.nr;
|
|
|
+ idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
|
|
|
+
|
|
|
+ if (kvm_exception_is_soft(nr)) {
|
|
|
+ vmcs12->vm_exit_instruction_len =
|
|
|
+ vcpu->arch.event_exit_inst_len;
|
|
|
+ idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
|
|
|
+ } else
|
|
|
+ idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
|
|
|
+
|
|
|
+ if (vcpu->arch.exception.has_error_code) {
|
|
|
+ idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
|
|
|
+ vmcs12->idt_vectoring_error_code =
|
|
|
+ vcpu->arch.exception.error_code;
|
|
|
+ }
|
|
|
+
|
|
|
+ vmcs12->idt_vectoring_info_field = idt_vectoring;
|
|
|
+ } else if (vcpu->arch.nmi_pending) {
|
|
|
+ vmcs12->idt_vectoring_info_field =
|
|
|
+ INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
|
|
|
+ } else if (vcpu->arch.interrupt.pending) {
|
|
|
+ nr = vcpu->arch.interrupt.nr;
|
|
|
+ idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
|
|
|
+
|
|
|
+ if (vcpu->arch.interrupt.soft) {
|
|
|
+ idt_vectoring |= INTR_TYPE_SOFT_INTR;
|
|
|
+ vmcs12->vm_entry_instruction_len =
|
|
|
+ vcpu->arch.event_exit_inst_len;
|
|
|
+ } else
|
|
|
+ idt_vectoring |= INTR_TYPE_EXT_INTR;
|
|
|
+
|
|
|
+ vmcs12->idt_vectoring_info_field = idt_vectoring;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
|
|
|
* and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
|
|
@@ -7284,7 +7812,7 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
* exit-information fields only. Other fields are modified by L1 with VMWRITE,
|
|
|
* which already writes to vmcs12 directly.
|
|
|
*/
|
|
|
-void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
+static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
{
|
|
|
/* update guest state fields: */
|
|
|
vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
|
|
@@ -7332,16 +7860,19 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
|
|
|
vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
|
|
|
|
|
|
- vmcs12->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE);
|
|
|
vmcs12->guest_interruptibility_info =
|
|
|
vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
|
|
|
vmcs12->guest_pending_dbg_exceptions =
|
|
|
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
|
|
|
|
|
|
+ vmcs12->vm_entry_controls =
|
|
|
+ (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
|
|
|
+ (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
|
|
|
+
|
|
|
/* TODO: These cannot have changed unless we have MSR bitmaps and
|
|
|
* the relevant bit asks not to trap the change */
|
|
|
vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
|
|
- if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT)
|
|
|
+ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
|
|
|
vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
|
|
|
vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
|
|
|
vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
|
|
@@ -7349,21 +7880,38 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
|
|
|
/* update exit information fields: */
|
|
|
|
|
|
- vmcs12->vm_exit_reason = vmcs_read32(VM_EXIT_REASON);
|
|
|
+ vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason;
|
|
|
vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
|
|
|
|
|
|
vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
|
|
- vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
|
|
|
- vmcs12->idt_vectoring_info_field =
|
|
|
- vmcs_read32(IDT_VECTORING_INFO_FIELD);
|
|
|
- vmcs12->idt_vectoring_error_code =
|
|
|
- vmcs_read32(IDT_VECTORING_ERROR_CODE);
|
|
|
+ if ((vmcs12->vm_exit_intr_info &
|
|
|
+ (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
|
|
|
+ (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
|
|
|
+ vmcs12->vm_exit_intr_error_code =
|
|
|
+ vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
|
|
|
+ vmcs12->idt_vectoring_info_field = 0;
|
|
|
vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
|
|
|
vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
|
|
|
|
|
- /* clear vm-entry fields which are to be cleared on exit */
|
|
|
- if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
|
|
|
+ if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
|
|
|
+ /* vm_entry_intr_info_field is cleared on exit. Emulate this
|
|
|
+ * instead of reading the real value. */
|
|
|
vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Transfer the event that L0 or L1 may wanted to inject into
|
|
|
+ * L2 to IDT_VECTORING_INFO_FIELD.
|
|
|
+ */
|
|
|
+ vmcs12_save_pending_event(vcpu, vmcs12);
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Drop what we picked up for L2 via vmx_complete_interrupts. It is
|
|
|
+ * preserved above and would only end up incorrectly in L1.
|
|
|
+ */
|
|
|
+ vcpu->arch.nmi_injected = false;
|
|
|
+ kvm_clear_exception_queue(vcpu);
|
|
|
+ kvm_clear_interrupt_queue(vcpu);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -7375,11 +7923,12 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
* Failures During or After Loading Guest State").
|
|
|
* This function should be called when the active VMCS is L1's (vmcs01).
|
|
|
*/
|
|
|
-void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
+static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
|
|
|
+ struct vmcs12 *vmcs12)
|
|
|
{
|
|
|
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
|
|
|
vcpu->arch.efer = vmcs12->host_ia32_efer;
|
|
|
- if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
|
|
|
+ else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
|
|
|
vcpu->arch.efer |= (EFER_LMA | EFER_LME);
|
|
|
else
|
|
|
vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
|
|
@@ -7387,6 +7936,7 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
|
|
|
kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
|
|
|
kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
|
|
|
+ vmx_set_rflags(vcpu, X86_EFLAGS_BIT1);
|
|
|
/*
|
|
|
* Note that calling vmx_set_cr0 is important, even if cr0 hasn't
|
|
|
* actually changed, because it depends on the current state of
|
|
@@ -7445,6 +7995,9 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
|
|
|
vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
|
|
|
vmcs12->host_ia32_perf_global_ctrl);
|
|
|
+
|
|
|
+ kvm_set_dr(vcpu, 7, 0x400);
|
|
|
+ vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -7458,6 +8011,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
|
|
|
int cpu;
|
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
|
|
|
+ /* trying to cancel vmlaunch/vmresume is a bug */
|
|
|
+ WARN_ON_ONCE(vmx->nested.nested_run_pending);
|
|
|
+
|
|
|
leave_guest_mode(vcpu);
|
|
|
prepare_vmcs12(vcpu, vmcs12);
|
|
|
|
|
@@ -7468,6 +8024,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
|
|
|
vcpu->cpu = cpu;
|
|
|
put_cpu();
|
|
|
|
|
|
+ vmx_segment_cache_clear(vmx);
|
|
|
+
|
|
|
/* if no vmcs02 cache requested, remove the one we used */
|
|
|
if (VMCS02_POOL_SIZE == 0)
|
|
|
nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
|
|
@@ -7496,6 +8054,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
|
|
|
nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR));
|
|
|
} else
|
|
|
nested_vmx_succeed(vcpu);
|
|
|
+ if (enable_shadow_vmcs)
|
|
|
+ vmx->nested.sync_shadow_vmcs = true;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -7513,6 +8073,8 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
|
|
|
vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
|
|
|
vmcs12->exit_qualification = qualification;
|
|
|
nested_vmx_succeed(vcpu);
|
|
|
+ if (enable_shadow_vmcs)
|
|
|
+ to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
|
|
|
}
|
|
|
|
|
|
static int vmx_check_intercept(struct kvm_vcpu *vcpu,
|
|
@@ -7590,6 +8152,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
|
|
.load_eoi_exitmap = vmx_load_eoi_exitmap,
|
|
|
.hwapic_irr_update = vmx_hwapic_irr_update,
|
|
|
.hwapic_isr_update = vmx_hwapic_isr_update,
|
|
|
+ .sync_pir_to_irr = vmx_sync_pir_to_irr,
|
|
|
+ .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
|
|
|
|
|
|
.set_tss_addr = vmx_set_tss_addr,
|
|
|
.get_tdp_level = get_ept_level,
|
|
@@ -7618,6 +8182,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
|
|
.set_tdp_cr3 = vmx_set_cr3,
|
|
|
|
|
|
.check_intercept = vmx_check_intercept,
|
|
|
+ .handle_external_intr = vmx_handle_external_intr,
|
|
|
};
|
|
|
|
|
|
static int __init vmx_init(void)
|
|
@@ -7656,6 +8221,24 @@ static int __init vmx_init(void)
|
|
|
(unsigned long *)__get_free_page(GFP_KERNEL);
|
|
|
if (!vmx_msr_bitmap_longmode_x2apic)
|
|
|
goto out4;
|
|
|
+ vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
|
|
|
+ if (!vmx_vmread_bitmap)
|
|
|
+ goto out5;
|
|
|
+
|
|
|
+ vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
|
|
|
+ if (!vmx_vmwrite_bitmap)
|
|
|
+ goto out6;
|
|
|
+
|
|
|
+ memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
|
|
|
+ memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
|
|
|
+ /* shadowed read/write fields */
|
|
|
+ for (i = 0; i < max_shadow_read_write_fields; i++) {
|
|
|
+ clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap);
|
|
|
+ clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap);
|
|
|
+ }
|
|
|
+ /* shadowed read only fields */
|
|
|
+ for (i = 0; i < max_shadow_read_only_fields; i++)
|
|
|
+ clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap);
|
|
|
|
|
|
/*
|
|
|
* Allow direct access to the PC debug port (it is often used for I/O
|
|
@@ -7674,7 +8257,7 @@ static int __init vmx_init(void)
|
|
|
r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
|
|
|
__alignof__(struct vcpu_vmx), THIS_MODULE);
|
|
|
if (r)
|
|
|
- goto out3;
|
|
|
+ goto out7;
|
|
|
|
|
|
#ifdef CONFIG_KEXEC
|
|
|
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
|
|
@@ -7692,7 +8275,7 @@ static int __init vmx_init(void)
|
|
|
memcpy(vmx_msr_bitmap_longmode_x2apic,
|
|
|
vmx_msr_bitmap_longmode, PAGE_SIZE);
|
|
|
|
|
|
- if (enable_apicv_reg_vid) {
|
|
|
+ if (enable_apicv) {
|
|
|
for (msr = 0x800; msr <= 0x8ff; msr++)
|
|
|
vmx_disable_intercept_msr_read_x2apic(msr);
|
|
|
|
|
@@ -7722,6 +8305,12 @@ static int __init vmx_init(void)
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
+out7:
|
|
|
+ free_page((unsigned long)vmx_vmwrite_bitmap);
|
|
|
+out6:
|
|
|
+ free_page((unsigned long)vmx_vmread_bitmap);
|
|
|
+out5:
|
|
|
+ free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
|
|
|
out4:
|
|
|
free_page((unsigned long)vmx_msr_bitmap_longmode);
|
|
|
out3:
|
|
@@ -7743,6 +8332,8 @@ static void __exit vmx_exit(void)
|
|
|
free_page((unsigned long)vmx_msr_bitmap_longmode);
|
|
|
free_page((unsigned long)vmx_io_bitmap_b);
|
|
|
free_page((unsigned long)vmx_io_bitmap_a);
|
|
|
+ free_page((unsigned long)vmx_vmwrite_bitmap);
|
|
|
+ free_page((unsigned long)vmx_vmread_bitmap);
|
|
|
|
|
|
#ifdef CONFIG_KEXEC
|
|
|
rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
|