|
@@ -45,6 +45,7 @@
|
|
|
#include <asm/perf_event.h>
|
|
|
#include <asm/debugreg.h>
|
|
|
#include <asm/kexec.h>
|
|
|
+#include <asm/apic.h>
|
|
|
|
|
|
#include "trace.h"
|
|
|
|
|
@@ -101,6 +102,9 @@ module_param(nested, bool, S_IRUGO);
|
|
|
|
|
|
static u64 __read_mostly host_xss;
|
|
|
|
|
|
+static bool __read_mostly enable_pml = 1;
|
|
|
+module_param_named(pml, enable_pml, bool, S_IRUGO);
|
|
|
+
|
|
|
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
|
|
|
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
|
|
|
#define KVM_VM_CR0_ALWAYS_ON \
|
|
@@ -215,7 +219,12 @@ struct __packed vmcs12 {
|
|
|
u64 tsc_offset;
|
|
|
u64 virtual_apic_page_addr;
|
|
|
u64 apic_access_addr;
|
|
|
+ u64 posted_intr_desc_addr;
|
|
|
u64 ept_pointer;
|
|
|
+ u64 eoi_exit_bitmap0;
|
|
|
+ u64 eoi_exit_bitmap1;
|
|
|
+ u64 eoi_exit_bitmap2;
|
|
|
+ u64 eoi_exit_bitmap3;
|
|
|
u64 xss_exit_bitmap;
|
|
|
u64 guest_physical_address;
|
|
|
u64 vmcs_link_pointer;
|
|
@@ -330,6 +339,7 @@ struct __packed vmcs12 {
|
|
|
u32 vmx_preemption_timer_value;
|
|
|
u32 padding32[7]; /* room for future expansion */
|
|
|
u16 virtual_processor_id;
|
|
|
+ u16 posted_intr_nv;
|
|
|
u16 guest_es_selector;
|
|
|
u16 guest_cs_selector;
|
|
|
u16 guest_ss_selector;
|
|
@@ -338,6 +348,7 @@ struct __packed vmcs12 {
|
|
|
u16 guest_gs_selector;
|
|
|
u16 guest_ldtr_selector;
|
|
|
u16 guest_tr_selector;
|
|
|
+ u16 guest_intr_status;
|
|
|
u16 host_es_selector;
|
|
|
u16 host_cs_selector;
|
|
|
u16 host_ss_selector;
|
|
@@ -401,6 +412,10 @@ struct nested_vmx {
|
|
|
*/
|
|
|
struct page *apic_access_page;
|
|
|
struct page *virtual_apic_page;
|
|
|
+ struct page *pi_desc_page;
|
|
|
+ struct pi_desc *pi_desc;
|
|
|
+ bool pi_pending;
|
|
|
+ u16 posted_intr_nv;
|
|
|
u64 msr_ia32_feature_control;
|
|
|
|
|
|
struct hrtimer preemption_timer;
|
|
@@ -408,6 +423,23 @@ struct nested_vmx {
|
|
|
|
|
|
/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
|
|
|
u64 vmcs01_debugctl;
|
|
|
+
|
|
|
+ u32 nested_vmx_procbased_ctls_low;
|
|
|
+ u32 nested_vmx_procbased_ctls_high;
|
|
|
+ u32 nested_vmx_true_procbased_ctls_low;
|
|
|
+ u32 nested_vmx_secondary_ctls_low;
|
|
|
+ u32 nested_vmx_secondary_ctls_high;
|
|
|
+ u32 nested_vmx_pinbased_ctls_low;
|
|
|
+ u32 nested_vmx_pinbased_ctls_high;
|
|
|
+ u32 nested_vmx_exit_ctls_low;
|
|
|
+ u32 nested_vmx_exit_ctls_high;
|
|
|
+ u32 nested_vmx_true_exit_ctls_low;
|
|
|
+ u32 nested_vmx_entry_ctls_low;
|
|
|
+ u32 nested_vmx_entry_ctls_high;
|
|
|
+ u32 nested_vmx_true_entry_ctls_low;
|
|
|
+ u32 nested_vmx_misc_low;
|
|
|
+ u32 nested_vmx_misc_high;
|
|
|
+ u32 nested_vmx_ept_caps;
|
|
|
};
|
|
|
|
|
|
#define POSTED_INTR_ON 0
|
|
@@ -511,6 +543,10 @@ struct vcpu_vmx {
|
|
|
/* Dynamic PLE window. */
|
|
|
int ple_window;
|
|
|
bool ple_window_dirty;
|
|
|
+
|
|
|
+ /* Support for PML */
|
|
|
+#define PML_ENTITY_NUM 512
|
|
|
+ struct page *pml_pg;
|
|
|
};
|
|
|
|
|
|
enum segment_cache_field {
|
|
@@ -594,6 +630,7 @@ static int max_shadow_read_write_fields =
|
|
|
|
|
|
static const unsigned short vmcs_field_to_offset_table[] = {
|
|
|
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
|
|
|
+ FIELD(POSTED_INTR_NV, posted_intr_nv),
|
|
|
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
|
|
|
FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
|
|
|
FIELD(GUEST_SS_SELECTOR, guest_ss_selector),
|
|
@@ -602,6 +639,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
|
|
|
FIELD(GUEST_GS_SELECTOR, guest_gs_selector),
|
|
|
FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
|
|
|
FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
|
|
|
+ FIELD(GUEST_INTR_STATUS, guest_intr_status),
|
|
|
FIELD(HOST_ES_SELECTOR, host_es_selector),
|
|
|
FIELD(HOST_CS_SELECTOR, host_cs_selector),
|
|
|
FIELD(HOST_SS_SELECTOR, host_ss_selector),
|
|
@@ -618,7 +656,12 @@ static const unsigned short vmcs_field_to_offset_table[] = {
|
|
|
FIELD64(TSC_OFFSET, tsc_offset),
|
|
|
FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
|
|
|
FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
|
|
|
+ FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
|
|
|
FIELD64(EPT_POINTER, ept_pointer),
|
|
|
+ FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
|
|
|
+ FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
|
|
|
+ FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
|
|
|
+ FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
|
|
|
FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
|
|
|
FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
|
|
|
FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
|
|
@@ -766,6 +809,7 @@ static void kvm_cpu_vmxon(u64 addr);
|
|
|
static void kvm_cpu_vmxoff(void);
|
|
|
static bool vmx_mpx_supported(void);
|
|
|
static bool vmx_xsaves_supported(void);
|
|
|
+static int vmx_vm_has_apicv(struct kvm *kvm);
|
|
|
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
|
|
|
static void vmx_set_segment(struct kvm_vcpu *vcpu,
|
|
|
struct kvm_segment *var, int seg);
|
|
@@ -793,6 +837,7 @@ static unsigned long *vmx_msr_bitmap_legacy;
|
|
|
static unsigned long *vmx_msr_bitmap_longmode;
|
|
|
static unsigned long *vmx_msr_bitmap_legacy_x2apic;
|
|
|
static unsigned long *vmx_msr_bitmap_longmode_x2apic;
|
|
|
+static unsigned long *vmx_msr_bitmap_nested;
|
|
|
static unsigned long *vmx_vmread_bitmap;
|
|
|
static unsigned long *vmx_vmwrite_bitmap;
|
|
|
|
|
@@ -959,16 +1004,6 @@ static inline bool cpu_has_vmx_ept_execute_only(void)
|
|
|
return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
|
|
|
}
|
|
|
|
|
|
-static inline bool cpu_has_vmx_eptp_uncacheable(void)
|
|
|
-{
|
|
|
- return vmx_capability.ept & VMX_EPTP_UC_BIT;
|
|
|
-}
|
|
|
-
|
|
|
-static inline bool cpu_has_vmx_eptp_writeback(void)
|
|
|
-{
|
|
|
- return vmx_capability.ept & VMX_EPTP_WB_BIT;
|
|
|
-}
|
|
|
-
|
|
|
static inline bool cpu_has_vmx_ept_2m_page(void)
|
|
|
{
|
|
|
return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT;
|
|
@@ -1073,6 +1108,11 @@ static inline bool cpu_has_vmx_shadow_vmcs(void)
|
|
|
SECONDARY_EXEC_SHADOW_VMCS;
|
|
|
}
|
|
|
|
|
|
+static inline bool cpu_has_vmx_pml(void)
|
|
|
+{
|
|
|
+ return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
|
|
|
+}
|
|
|
+
|
|
|
static inline bool report_flexpriority(void)
|
|
|
{
|
|
|
return flexpriority_enabled;
|
|
@@ -1112,6 +1152,26 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
|
|
|
vmx_xsaves_supported();
|
|
|
}
|
|
|
|
|
|
+static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
|
|
|
+{
|
|
|
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
|
|
|
+}
|
|
|
+
|
|
|
+static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
|
|
|
+{
|
|
|
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
|
|
|
+}
|
|
|
+
|
|
|
+static inline bool nested_cpu_has_vid(struct vmcs12 *vmcs12)
|
|
|
+{
|
|
|
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
|
|
|
+}
|
|
|
+
|
|
|
+static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
|
|
|
+{
|
|
|
+ return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
|
|
|
+}
|
|
|
+
|
|
|
static inline bool is_exception(u32 intr_info)
|
|
|
{
|
|
|
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
|
|
@@ -2284,20 +2344,8 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
|
|
|
* if the corresponding bit in the (32-bit) control field *must* be on, and a
|
|
|
* bit in the high half is on if the corresponding bit in the control field
|
|
|
* may be on. See also vmx_control_verify().
|
|
|
- * TODO: allow these variables to be modified (downgraded) by module options
|
|
|
- * or other means.
|
|
|
*/
|
|
|
-static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high;
|
|
|
-static u32 nested_vmx_true_procbased_ctls_low;
|
|
|
-static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high;
|
|
|
-static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
|
|
|
-static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
|
|
|
-static u32 nested_vmx_true_exit_ctls_low;
|
|
|
-static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
|
|
|
-static u32 nested_vmx_true_entry_ctls_low;
|
|
|
-static u32 nested_vmx_misc_low, nested_vmx_misc_high;
|
|
|
-static u32 nested_vmx_ept_caps;
|
|
|
-static __init void nested_vmx_setup_ctls_msrs(void)
|
|
|
+static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
|
|
|
{
|
|
|
/*
|
|
|
* Note that as a general rule, the high half of the MSRs (bits in
|
|
@@ -2316,57 +2364,74 @@ static __init void nested_vmx_setup_ctls_msrs(void)
|
|
|
|
|
|
/* pin-based controls */
|
|
|
rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
|
|
|
- nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high);
|
|
|
- nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
- nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
|
|
|
- PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS;
|
|
|
- nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
|
|
|
+ vmx->nested.nested_vmx_pinbased_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_pinbased_ctls_high);
|
|
|
+ vmx->nested.nested_vmx_pinbased_ctls_low |=
|
|
|
+ PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
+ vmx->nested.nested_vmx_pinbased_ctls_high &=
|
|
|
+ PIN_BASED_EXT_INTR_MASK |
|
|
|
+ PIN_BASED_NMI_EXITING |
|
|
|
+ PIN_BASED_VIRTUAL_NMIS;
|
|
|
+ vmx->nested.nested_vmx_pinbased_ctls_high |=
|
|
|
+ PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
|
|
|
PIN_BASED_VMX_PREEMPTION_TIMER;
|
|
|
+ if (vmx_vm_has_apicv(vmx->vcpu.kvm))
|
|
|
+ vmx->nested.nested_vmx_pinbased_ctls_high |=
|
|
|
+ PIN_BASED_POSTED_INTR;
|
|
|
|
|
|
/* exit controls */
|
|
|
rdmsr(MSR_IA32_VMX_EXIT_CTLS,
|
|
|
- nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high);
|
|
|
- nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
+ vmx->nested.nested_vmx_exit_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_exit_ctls_high);
|
|
|
+ vmx->nested.nested_vmx_exit_ctls_low =
|
|
|
+ VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
|
|
|
- nested_vmx_exit_ctls_high &=
|
|
|
+ vmx->nested.nested_vmx_exit_ctls_high &=
|
|
|
#ifdef CONFIG_X86_64
|
|
|
VM_EXIT_HOST_ADDR_SPACE_SIZE |
|
|
|
#endif
|
|
|
VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
|
|
|
- nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
|
|
|
+ vmx->nested.nested_vmx_exit_ctls_high |=
|
|
|
+ VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
|
|
|
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
|
|
|
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
|
|
|
|
|
|
if (vmx_mpx_supported())
|
|
|
- nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
|
|
|
+ vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
|
|
|
|
|
|
/* We support free control of debug control saving. */
|
|
|
- nested_vmx_true_exit_ctls_low = nested_vmx_exit_ctls_low &
|
|
|
+ vmx->nested.nested_vmx_true_exit_ctls_low =
|
|
|
+ vmx->nested.nested_vmx_exit_ctls_low &
|
|
|
~VM_EXIT_SAVE_DEBUG_CONTROLS;
|
|
|
|
|
|
/* entry controls */
|
|
|
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
|
|
|
- nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
|
|
|
- nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
- nested_vmx_entry_ctls_high &=
|
|
|
+ vmx->nested.nested_vmx_entry_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_entry_ctls_high);
|
|
|
+ vmx->nested.nested_vmx_entry_ctls_low =
|
|
|
+ VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
+ vmx->nested.nested_vmx_entry_ctls_high &=
|
|
|
#ifdef CONFIG_X86_64
|
|
|
VM_ENTRY_IA32E_MODE |
|
|
|
#endif
|
|
|
VM_ENTRY_LOAD_IA32_PAT;
|
|
|
- nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
|
|
|
- VM_ENTRY_LOAD_IA32_EFER);
|
|
|
+ vmx->nested.nested_vmx_entry_ctls_high |=
|
|
|
+ (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
|
|
|
if (vmx_mpx_supported())
|
|
|
- nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
|
|
|
+ vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
|
|
|
|
|
|
/* We support free control of debug control loading. */
|
|
|
- nested_vmx_true_entry_ctls_low = nested_vmx_entry_ctls_low &
|
|
|
+ vmx->nested.nested_vmx_true_entry_ctls_low =
|
|
|
+ vmx->nested.nested_vmx_entry_ctls_low &
|
|
|
~VM_ENTRY_LOAD_DEBUG_CONTROLS;
|
|
|
|
|
|
/* cpu-based controls */
|
|
|
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
|
|
|
- nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
|
|
|
- nested_vmx_procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
- nested_vmx_procbased_ctls_high &=
|
|
|
+ vmx->nested.nested_vmx_procbased_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_procbased_ctls_high);
|
|
|
+ vmx->nested.nested_vmx_procbased_ctls_low =
|
|
|
+ CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
+ vmx->nested.nested_vmx_procbased_ctls_high &=
|
|
|
CPU_BASED_VIRTUAL_INTR_PENDING |
|
|
|
CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
|
|
|
CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
|
|
@@ -2386,45 +2451,55 @@ static __init void nested_vmx_setup_ctls_msrs(void)
|
|
|
* can use it to avoid exits to L1 - even when L0 runs L2
|
|
|
* without MSR bitmaps.
|
|
|
*/
|
|
|
- nested_vmx_procbased_ctls_high |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
|
|
|
+ vmx->nested.nested_vmx_procbased_ctls_high |=
|
|
|
+ CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
|
|
|
CPU_BASED_USE_MSR_BITMAPS;
|
|
|
|
|
|
/* We support free control of CR3 access interception. */
|
|
|
- nested_vmx_true_procbased_ctls_low = nested_vmx_procbased_ctls_low &
|
|
|
+ vmx->nested.nested_vmx_true_procbased_ctls_low =
|
|
|
+ vmx->nested.nested_vmx_procbased_ctls_low &
|
|
|
~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
|
|
|
|
|
|
/* secondary cpu-based controls */
|
|
|
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
|
|
|
- nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high);
|
|
|
- nested_vmx_secondary_ctls_low = 0;
|
|
|
- nested_vmx_secondary_ctls_high &=
|
|
|
+ vmx->nested.nested_vmx_secondary_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_secondary_ctls_high);
|
|
|
+ vmx->nested.nested_vmx_secondary_ctls_low = 0;
|
|
|
+ vmx->nested.nested_vmx_secondary_ctls_high &=
|
|
|
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
|
|
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
|
|
|
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
|
|
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
|
|
|
SECONDARY_EXEC_WBINVD_EXITING |
|
|
|
SECONDARY_EXEC_XSAVES;
|
|
|
|
|
|
if (enable_ept) {
|
|
|
/* nested EPT: emulate EPT also to L1 */
|
|
|
- nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
|
|
|
+ vmx->nested.nested_vmx_secondary_ctls_high |=
|
|
|
+ SECONDARY_EXEC_ENABLE_EPT |
|
|
|
SECONDARY_EXEC_UNRESTRICTED_GUEST;
|
|
|
- nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
|
|
|
+ vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
|
|
|
VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
|
|
|
VMX_EPT_INVEPT_BIT;
|
|
|
- nested_vmx_ept_caps &= vmx_capability.ept;
|
|
|
+ vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
|
|
|
/*
|
|
|
* For nested guests, we don't do anything specific
|
|
|
* for single context invalidation. Hence, only advertise
|
|
|
* support for global context invalidation.
|
|
|
*/
|
|
|
- nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
|
|
|
+ vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
|
|
|
} else
|
|
|
- nested_vmx_ept_caps = 0;
|
|
|
+ vmx->nested.nested_vmx_ept_caps = 0;
|
|
|
|
|
|
/* miscellaneous data */
|
|
|
- rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
|
|
|
- nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
|
|
|
- nested_vmx_misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
|
|
|
+ rdmsr(MSR_IA32_VMX_MISC,
|
|
|
+ vmx->nested.nested_vmx_misc_low,
|
|
|
+ vmx->nested.nested_vmx_misc_high);
|
|
|
+ vmx->nested.nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
|
|
|
+ vmx->nested.nested_vmx_misc_low |=
|
|
|
+ VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
|
|
|
VMX_MISC_ACTIVITY_HLT;
|
|
|
- nested_vmx_misc_high = 0;
|
|
|
+ vmx->nested.nested_vmx_misc_high = 0;
|
|
|
}
|
|
|
|
|
|
static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
|
|
@@ -2443,6 +2518,8 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
|
|
|
/* Returns 0 on success, non-0 otherwise. */
|
|
|
static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
|
|
|
{
|
|
|
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
+
|
|
|
switch (msr_index) {
|
|
|
case MSR_IA32_VMX_BASIC:
|
|
|
/*
|
|
@@ -2457,36 +2534,44 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
|
|
|
break;
|
|
|
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
|
|
|
case MSR_IA32_VMX_PINBASED_CTLS:
|
|
|
- *pdata = vmx_control_msr(nested_vmx_pinbased_ctls_low,
|
|
|
- nested_vmx_pinbased_ctls_high);
|
|
|
+ *pdata = vmx_control_msr(
|
|
|
+ vmx->nested.nested_vmx_pinbased_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_pinbased_ctls_high);
|
|
|
break;
|
|
|
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
|
|
|
- *pdata = vmx_control_msr(nested_vmx_true_procbased_ctls_low,
|
|
|
- nested_vmx_procbased_ctls_high);
|
|
|
+ *pdata = vmx_control_msr(
|
|
|
+ vmx->nested.nested_vmx_true_procbased_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_procbased_ctls_high);
|
|
|
break;
|
|
|
case MSR_IA32_VMX_PROCBASED_CTLS:
|
|
|
- *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low,
|
|
|
- nested_vmx_procbased_ctls_high);
|
|
|
+ *pdata = vmx_control_msr(
|
|
|
+ vmx->nested.nested_vmx_procbased_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_procbased_ctls_high);
|
|
|
break;
|
|
|
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
|
|
|
- *pdata = vmx_control_msr(nested_vmx_true_exit_ctls_low,
|
|
|
- nested_vmx_exit_ctls_high);
|
|
|
+ *pdata = vmx_control_msr(
|
|
|
+ vmx->nested.nested_vmx_true_exit_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_exit_ctls_high);
|
|
|
break;
|
|
|
case MSR_IA32_VMX_EXIT_CTLS:
|
|
|
- *pdata = vmx_control_msr(nested_vmx_exit_ctls_low,
|
|
|
- nested_vmx_exit_ctls_high);
|
|
|
+ *pdata = vmx_control_msr(
|
|
|
+ vmx->nested.nested_vmx_exit_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_exit_ctls_high);
|
|
|
break;
|
|
|
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
|
|
|
- *pdata = vmx_control_msr(nested_vmx_true_entry_ctls_low,
|
|
|
- nested_vmx_entry_ctls_high);
|
|
|
+ *pdata = vmx_control_msr(
|
|
|
+ vmx->nested.nested_vmx_true_entry_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_entry_ctls_high);
|
|
|
break;
|
|
|
case MSR_IA32_VMX_ENTRY_CTLS:
|
|
|
- *pdata = vmx_control_msr(nested_vmx_entry_ctls_low,
|
|
|
- nested_vmx_entry_ctls_high);
|
|
|
+ *pdata = vmx_control_msr(
|
|
|
+ vmx->nested.nested_vmx_entry_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_entry_ctls_high);
|
|
|
break;
|
|
|
case MSR_IA32_VMX_MISC:
|
|
|
- *pdata = vmx_control_msr(nested_vmx_misc_low,
|
|
|
- nested_vmx_misc_high);
|
|
|
+ *pdata = vmx_control_msr(
|
|
|
+ vmx->nested.nested_vmx_misc_low,
|
|
|
+ vmx->nested.nested_vmx_misc_high);
|
|
|
break;
|
|
|
/*
|
|
|
* These MSRs specify bits which the guest must keep fixed (on or off)
|
|
@@ -2511,12 +2596,13 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
|
|
|
*pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */
|
|
|
break;
|
|
|
case MSR_IA32_VMX_PROCBASED_CTLS2:
|
|
|
- *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low,
|
|
|
- nested_vmx_secondary_ctls_high);
|
|
|
+ *pdata = vmx_control_msr(
|
|
|
+ vmx->nested.nested_vmx_secondary_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_secondary_ctls_high);
|
|
|
break;
|
|
|
case MSR_IA32_VMX_EPT_VPID_CAP:
|
|
|
/* Currently, no nested vpid support */
|
|
|
- *pdata = nested_vmx_ept_caps;
|
|
|
+ *pdata = vmx->nested.nested_vmx_ept_caps;
|
|
|
break;
|
|
|
default:
|
|
|
return 1;
|
|
@@ -2929,7 +3015,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
|
|
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
|
|
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
|
|
|
SECONDARY_EXEC_SHADOW_VMCS |
|
|
|
- SECONDARY_EXEC_XSAVES;
|
|
|
+ SECONDARY_EXEC_XSAVES |
|
|
|
+ SECONDARY_EXEC_ENABLE_PML;
|
|
|
if (adjust_vmx_controls(min2, opt2,
|
|
|
MSR_IA32_VMX_PROCBASED_CTLS2,
|
|
|
&_cpu_based_2nd_exec_control) < 0)
|
|
@@ -4159,6 +4246,52 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * If a msr is allowed by L0, we should check whether it is allowed by L1.
|
|
|
+ * The corresponding bit will be cleared unless both of L0 and L1 allow it.
|
|
|
+ */
|
|
|
+static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
|
|
|
+ unsigned long *msr_bitmap_nested,
|
|
|
+ u32 msr, int type)
|
|
|
+{
|
|
|
+ int f = sizeof(unsigned long);
|
|
|
+
|
|
|
+ if (!cpu_has_vmx_msr_bitmap()) {
|
|
|
+ WARN_ON(1);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
|
|
|
+ * have the write-low and read-high bitmap offsets the wrong way round.
|
|
|
+ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
|
|
|
+ */
|
|
|
+ if (msr <= 0x1fff) {
|
|
|
+ if (type & MSR_TYPE_R &&
|
|
|
+ !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
|
|
|
+ /* read-low */
|
|
|
+ __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
|
|
|
+
|
|
|
+ if (type & MSR_TYPE_W &&
|
|
|
+ !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
|
|
|
+ /* write-low */
|
|
|
+ __clear_bit(msr, msr_bitmap_nested + 0x800 / f);
|
|
|
+
|
|
|
+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
|
|
|
+ msr &= 0x1fff;
|
|
|
+ if (type & MSR_TYPE_R &&
|
|
|
+ !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
|
|
|
+ /* read-high */
|
|
|
+ __clear_bit(msr, msr_bitmap_nested + 0x400 / f);
|
|
|
+
|
|
|
+ if (type & MSR_TYPE_W &&
|
|
|
+ !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
|
|
|
+ /* write-high */
|
|
|
+ __clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
|
|
|
+
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
|
|
|
{
|
|
|
if (!longmode_only)
|
|
@@ -4197,6 +4330,64 @@ static int vmx_vm_has_apicv(struct kvm *kvm)
|
|
|
return enable_apicv && irqchip_in_kernel(kvm);
|
|
|
}
|
|
|
|
|
|
+static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
+ int max_irr;
|
|
|
+ void *vapic_page;
|
|
|
+ u16 status;
|
|
|
+
|
|
|
+ if (vmx->nested.pi_desc &&
|
|
|
+ vmx->nested.pi_pending) {
|
|
|
+ vmx->nested.pi_pending = false;
|
|
|
+ if (!pi_test_and_clear_on(vmx->nested.pi_desc))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ max_irr = find_last_bit(
|
|
|
+ (unsigned long *)vmx->nested.pi_desc->pir, 256);
|
|
|
+
|
|
|
+ if (max_irr == 256)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ vapic_page = kmap(vmx->nested.virtual_apic_page);
|
|
|
+ if (!vapic_page) {
|
|
|
+ WARN_ON(1);
|
|
|
+ return -ENOMEM;
|
|
|
+ }
|
|
|
+ __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
|
|
|
+ kunmap(vmx->nested.virtual_apic_page);
|
|
|
+
|
|
|
+ status = vmcs_read16(GUEST_INTR_STATUS);
|
|
|
+ if ((u8)max_irr > ((u8)status & 0xff)) {
|
|
|
+ status &= ~0xff;
|
|
|
+ status |= (u8)max_irr;
|
|
|
+ vmcs_write16(GUEST_INTR_STATUS, status);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
|
|
|
+ int vector)
|
|
|
+{
|
|
|
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
+
|
|
|
+ if (is_guest_mode(vcpu) &&
|
|
|
+ vector == vmx->nested.posted_intr_nv) {
|
|
|
+ /* the PIR and ON have been set by L1. */
|
|
|
+ if (vcpu->mode == IN_GUEST_MODE)
|
|
|
+ apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
|
|
|
+ POSTED_INTR_VECTOR);
|
|
|
+ /*
|
|
|
+ * If a posted intr is not recognized by hardware,
|
|
|
+ * we will accomplish it in the next vmentry.
|
|
|
+ */
|
|
|
+ vmx->nested.pi_pending = true;
|
|
|
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ return -1;
|
|
|
+}
|
|
|
/*
|
|
|
* Send interrupt to vcpu via posted interrupt way.
|
|
|
* 1. If target vcpu is running(non-root mode), send posted interrupt
|
|
@@ -4209,6 +4400,10 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
|
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
int r;
|
|
|
|
|
|
+ r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
|
|
|
+ if (!r)
|
|
|
+ return;
|
|
|
+
|
|
|
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
|
|
|
return;
|
|
|
|
|
@@ -4360,6 +4555,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
|
|
|
a current VMCS12
|
|
|
*/
|
|
|
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
|
|
|
+ /* PML is enabled/disabled in creating/destorying vcpu */
|
|
|
+ exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
|
|
|
+
|
|
|
return exec_control;
|
|
|
}
|
|
|
|
|
@@ -4986,11 +5184,12 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
|
|
|
hypercall[2] = 0xc1;
|
|
|
}
|
|
|
|
|
|
-static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val)
|
|
|
+static bool nested_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
|
|
|
{
|
|
|
unsigned long always_on = VMXON_CR0_ALWAYSON;
|
|
|
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
|
|
|
- if (nested_vmx_secondary_ctls_high &
|
|
|
+ if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
|
|
|
SECONDARY_EXEC_UNRESTRICTED_GUEST &&
|
|
|
nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
|
|
|
always_on &= ~(X86_CR0_PE | X86_CR0_PG);
|
|
@@ -5015,7 +5214,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
|
|
|
val = (val & ~vmcs12->cr0_guest_host_mask) |
|
|
|
(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
|
|
|
|
|
|
- if (!nested_cr0_valid(vmcs12, val))
|
|
|
+ if (!nested_cr0_valid(vcpu, val))
|
|
|
return 1;
|
|
|
|
|
|
if (kvm_set_cr0(vcpu, val))
|
|
@@ -5817,13 +6016,21 @@ static __init int hardware_setup(void)
|
|
|
(unsigned long *)__get_free_page(GFP_KERNEL);
|
|
|
if (!vmx_msr_bitmap_longmode_x2apic)
|
|
|
goto out4;
|
|
|
+
|
|
|
+ if (nested) {
|
|
|
+ vmx_msr_bitmap_nested =
|
|
|
+ (unsigned long *)__get_free_page(GFP_KERNEL);
|
|
|
+ if (!vmx_msr_bitmap_nested)
|
|
|
+ goto out5;
|
|
|
+ }
|
|
|
+
|
|
|
vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
|
|
|
if (!vmx_vmread_bitmap)
|
|
|
- goto out5;
|
|
|
+ goto out6;
|
|
|
|
|
|
vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
|
|
|
if (!vmx_vmwrite_bitmap)
|
|
|
- goto out6;
|
|
|
+ goto out7;
|
|
|
|
|
|
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
|
|
|
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
|
|
@@ -5839,10 +6046,12 @@ static __init int hardware_setup(void)
|
|
|
|
|
|
memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
|
|
|
memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
|
|
|
+ if (nested)
|
|
|
+ memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
|
|
|
|
|
|
if (setup_vmcs_config(&vmcs_config) < 0) {
|
|
|
r = -EIO;
|
|
|
- goto out7;
|
|
|
+ goto out8;
|
|
|
}
|
|
|
|
|
|
if (boot_cpu_has(X86_FEATURE_NX))
|
|
@@ -5868,16 +6077,16 @@ static __init int hardware_setup(void)
|
|
|
if (!cpu_has_vmx_unrestricted_guest())
|
|
|
enable_unrestricted_guest = 0;
|
|
|
|
|
|
- if (!cpu_has_vmx_flexpriority()) {
|
|
|
+ if (!cpu_has_vmx_flexpriority())
|
|
|
flexpriority_enabled = 0;
|
|
|
|
|
|
- /*
|
|
|
- * set_apic_access_page_addr() is used to reload apic access
|
|
|
- * page upon invalidation. No need to do anything if the
|
|
|
- * processor does not have the APIC_ACCESS_ADDR VMCS field.
|
|
|
- */
|
|
|
+ /*
|
|
|
+ * set_apic_access_page_addr() is used to reload apic access
|
|
|
+ * page upon invalidation. No need to do anything if not
|
|
|
+ * using the APIC_ACCESS_ADDR VMCS field.
|
|
|
+ */
|
|
|
+ if (!flexpriority_enabled)
|
|
|
kvm_x86_ops->set_apic_access_page_addr = NULL;
|
|
|
- }
|
|
|
|
|
|
if (!cpu_has_vmx_tpr_shadow())
|
|
|
kvm_x86_ops->update_cr8_intercept = NULL;
|
|
@@ -5895,13 +6104,11 @@ static __init int hardware_setup(void)
|
|
|
kvm_x86_ops->update_cr8_intercept = NULL;
|
|
|
else {
|
|
|
kvm_x86_ops->hwapic_irr_update = NULL;
|
|
|
+ kvm_x86_ops->hwapic_isr_update = NULL;
|
|
|
kvm_x86_ops->deliver_posted_interrupt = NULL;
|
|
|
kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
|
|
|
}
|
|
|
|
|
|
- if (nested)
|
|
|
- nested_vmx_setup_ctls_msrs();
|
|
|
-
|
|
|
vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
|
|
|
vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
|
|
|
vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
|
|
@@ -5945,12 +6152,29 @@ static __init int hardware_setup(void)
|
|
|
|
|
|
update_ple_window_actual_max();
|
|
|
|
|
|
+ /*
|
|
|
+ * Only enable PML when hardware supports PML feature, and both EPT
|
|
|
+ * and EPT A/D bit features are enabled -- PML depends on them to work.
|
|
|
+ */
|
|
|
+ if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
|
|
|
+ enable_pml = 0;
|
|
|
+
|
|
|
+ if (!enable_pml) {
|
|
|
+ kvm_x86_ops->slot_enable_log_dirty = NULL;
|
|
|
+ kvm_x86_ops->slot_disable_log_dirty = NULL;
|
|
|
+ kvm_x86_ops->flush_log_dirty = NULL;
|
|
|
+ kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
|
|
|
+ }
|
|
|
+
|
|
|
return alloc_kvm_area();
|
|
|
|
|
|
-out7:
|
|
|
+out8:
|
|
|
free_page((unsigned long)vmx_vmwrite_bitmap);
|
|
|
-out6:
|
|
|
+out7:
|
|
|
free_page((unsigned long)vmx_vmread_bitmap);
|
|
|
+out6:
|
|
|
+ if (nested)
|
|
|
+ free_page((unsigned long)vmx_msr_bitmap_nested);
|
|
|
out5:
|
|
|
free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
|
|
|
out4:
|
|
@@ -5977,6 +6201,8 @@ static __exit void hardware_unsetup(void)
|
|
|
free_page((unsigned long)vmx_io_bitmap_a);
|
|
|
free_page((unsigned long)vmx_vmwrite_bitmap);
|
|
|
free_page((unsigned long)vmx_vmread_bitmap);
|
|
|
+ if (nested)
|
|
|
+ free_page((unsigned long)vmx_msr_bitmap_nested);
|
|
|
|
|
|
free_kvm_area();
|
|
|
}
|
|
@@ -6143,6 +6369,13 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
|
|
|
*/
|
|
|
}
|
|
|
|
|
|
+static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
|
|
|
+{
|
|
|
+ /* TODO: not to reset guest simply here. */
|
|
|
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
|
|
+ pr_warn("kvm: nested vmx abort, indicator %d\n", indicator);
|
|
|
+}
|
|
|
+
|
|
|
static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
|
|
|
{
|
|
|
struct vcpu_vmx *vmx =
|
|
@@ -6432,6 +6665,7 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
|
|
|
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
|
|
|
vmcs_write64(VMCS_LINK_POINTER, -1ull);
|
|
|
}
|
|
|
+ vmx->nested.posted_intr_nv = -1;
|
|
|
kunmap(vmx->nested.current_vmcs12_page);
|
|
|
nested_release_page(vmx->nested.current_vmcs12_page);
|
|
|
vmx->nested.current_vmptr = -1ull;
|
|
@@ -6460,6 +6694,12 @@ static void free_nested(struct vcpu_vmx *vmx)
|
|
|
nested_release_page(vmx->nested.virtual_apic_page);
|
|
|
vmx->nested.virtual_apic_page = NULL;
|
|
|
}
|
|
|
+ if (vmx->nested.pi_desc_page) {
|
|
|
+ kunmap(vmx->nested.pi_desc_page);
|
|
|
+ nested_release_page(vmx->nested.pi_desc_page);
|
|
|
+ vmx->nested.pi_desc_page = NULL;
|
|
|
+ vmx->nested.pi_desc = NULL;
|
|
|
+ }
|
|
|
|
|
|
nested_free_all_saved_vmcss(vmx);
|
|
|
}
|
|
@@ -6893,6 +7133,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
|
|
|
/* Emulate the INVEPT instruction */
|
|
|
static int handle_invept(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
u32 vmx_instruction_info, types;
|
|
|
unsigned long type;
|
|
|
gva_t gva;
|
|
@@ -6901,8 +7142,9 @@ static int handle_invept(struct kvm_vcpu *vcpu)
|
|
|
u64 eptp, gpa;
|
|
|
} operand;
|
|
|
|
|
|
- if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) ||
|
|
|
- !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
|
|
|
+ if (!(vmx->nested.nested_vmx_secondary_ctls_high &
|
|
|
+ SECONDARY_EXEC_ENABLE_EPT) ||
|
|
|
+ !(vmx->nested.nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
|
|
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
|
|
return 1;
|
|
|
}
|
|
@@ -6918,7 +7160,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
|
|
|
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
|
|
type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
|
|
|
|
|
|
- types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
|
|
|
+ types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
|
|
|
|
|
|
if (!(types & (1UL << type))) {
|
|
|
nested_vmx_failValid(vcpu,
|
|
@@ -6960,6 +7202,31 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
+static int handle_pml_full(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ unsigned long exit_qualification;
|
|
|
+
|
|
|
+ trace_kvm_pml_full(vcpu->vcpu_id);
|
|
|
+
|
|
|
+ exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * PML buffer FULL happened while executing iret from NMI,
|
|
|
+ * "blocked by NMI" bit has to be set before next VM entry.
|
|
|
+ */
|
|
|
+ if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
|
|
+ cpu_has_virtual_nmis() &&
|
|
|
+ (exit_qualification & INTR_INFO_UNBLOCK_NMI))
|
|
|
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
|
|
+ GUEST_INTR_STATE_NMI);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * PML buffer already flushed at beginning of VMEXIT. Nothing to do
|
|
|
+ * here.., and there's no userspace involvement needed for PML.
|
|
|
+ */
|
|
|
+ return 1;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* The exit handlers return 1 if the exit was handled fully and guest execution
|
|
|
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
|
@@ -7008,6 +7275,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
|
|
[EXIT_REASON_INVVPID] = handle_invvpid,
|
|
|
[EXIT_REASON_XSAVES] = handle_xsaves,
|
|
|
[EXIT_REASON_XRSTORS] = handle_xrstors,
|
|
|
+ [EXIT_REASON_PML_FULL] = handle_pml_full,
|
|
|
};
|
|
|
|
|
|
static const int kvm_vmx_max_exit_handlers =
|
|
@@ -7275,6 +7543,10 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
|
|
case EXIT_REASON_APIC_ACCESS:
|
|
|
return nested_cpu_has2(vmcs12,
|
|
|
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
|
|
|
+ case EXIT_REASON_APIC_WRITE:
|
|
|
+ case EXIT_REASON_EOI_INDUCED:
|
|
|
+ /* apic_write and eoi_induced should exit unconditionally. */
|
|
|
+ return 1;
|
|
|
case EXIT_REASON_EPT_VIOLATION:
|
|
|
/*
|
|
|
* L0 always deals with the EPT violation. If nested EPT is
|
|
@@ -7314,6 +7586,89 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
|
|
|
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
|
|
|
}
|
|
|
|
|
|
+static int vmx_enable_pml(struct vcpu_vmx *vmx)
|
|
|
+{
|
|
|
+ struct page *pml_pg;
|
|
|
+ u32 exec_control;
|
|
|
+
|
|
|
+ pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
|
|
+ if (!pml_pg)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ vmx->pml_pg = pml_pg;
|
|
|
+
|
|
|
+ vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
|
|
|
+ vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
|
|
|
+
|
|
|
+ exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
|
|
|
+ exec_control |= SECONDARY_EXEC_ENABLE_PML;
|
|
|
+ vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void vmx_disable_pml(struct vcpu_vmx *vmx)
|
|
|
+{
|
|
|
+ u32 exec_control;
|
|
|
+
|
|
|
+ ASSERT(vmx->pml_pg);
|
|
|
+ __free_page(vmx->pml_pg);
|
|
|
+ vmx->pml_pg = NULL;
|
|
|
+
|
|
|
+ exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
|
|
|
+ exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
|
|
|
+ vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
|
|
|
+}
|
|
|
+
|
|
|
+static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
|
|
|
+{
|
|
|
+ struct kvm *kvm = vmx->vcpu.kvm;
|
|
|
+ u64 *pml_buf;
|
|
|
+ u16 pml_idx;
|
|
|
+
|
|
|
+ pml_idx = vmcs_read16(GUEST_PML_INDEX);
|
|
|
+
|
|
|
+ /* Do nothing if PML buffer is empty */
|
|
|
+ if (pml_idx == (PML_ENTITY_NUM - 1))
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* PML index always points to next available PML buffer entity */
|
|
|
+ if (pml_idx >= PML_ENTITY_NUM)
|
|
|
+ pml_idx = 0;
|
|
|
+ else
|
|
|
+ pml_idx++;
|
|
|
+
|
|
|
+ pml_buf = page_address(vmx->pml_pg);
|
|
|
+ for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
|
|
|
+ u64 gpa;
|
|
|
+
|
|
|
+ gpa = pml_buf[pml_idx];
|
|
|
+ WARN_ON(gpa & (PAGE_SIZE - 1));
|
|
|
+ mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* reset PML index */
|
|
|
+ vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
|
|
|
+ * Called before reporting dirty_bitmap to userspace.
|
|
|
+ */
|
|
|
+static void kvm_flush_pml_buffers(struct kvm *kvm)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ struct kvm_vcpu *vcpu;
|
|
|
+ /*
|
|
|
+ * We only need to kick vcpu out of guest mode here, as PML buffer
|
|
|
+ * is flushed at beginning of all VMEXITs, and it's obvious that only
|
|
|
+ * vcpus running in guest are possible to have unflushed GPAs in PML
|
|
|
+ * buffer.
|
|
|
+ */
|
|
|
+ kvm_for_each_vcpu(i, vcpu, kvm)
|
|
|
+ kvm_vcpu_kick(vcpu);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* The guest has exited. See if we can fix it or if we need userspace
|
|
|
* assistance.
|
|
@@ -7324,6 +7679,16 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
|
|
u32 exit_reason = vmx->exit_reason;
|
|
|
u32 vectoring_info = vmx->idt_vectoring_info;
|
|
|
|
|
|
+ /*
|
|
|
+ * Flush logged GPAs PML buffer, this will make dirty_bitmap more
|
|
|
+ * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
|
|
|
+ * querying dirty_bitmap, we only need to kick all vcpus out of guest
|
|
|
+ * mode as if vcpus is in root mode, the PML buffer must has been
|
|
|
+ * flushed already.
|
|
|
+ */
|
|
|
+ if (enable_pml)
|
|
|
+ vmx_flush_pml_buffer(vmx);
|
|
|
+
|
|
|
/* If guest state is invalid, start emulating */
|
|
|
if (vmx->emulation_required)
|
|
|
return handle_invalid_guest_state(vcpu);
|
|
@@ -7471,9 +7836,6 @@ static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
|
|
|
u16 status;
|
|
|
u8 old;
|
|
|
|
|
|
- if (!vmx_vm_has_apicv(kvm))
|
|
|
- return;
|
|
|
-
|
|
|
if (isr == -1)
|
|
|
isr = 0;
|
|
|
|
|
@@ -7973,6 +8335,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
|
|
+ if (enable_pml)
|
|
|
+ vmx_disable_pml(vmx);
|
|
|
free_vpid(vmx);
|
|
|
leave_guest_mode(vcpu);
|
|
|
vmx_load_vmcs01(vcpu);
|
|
@@ -8040,9 +8404,25 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
|
|
goto free_vmcs;
|
|
|
}
|
|
|
|
|
|
+ if (nested)
|
|
|
+ nested_vmx_setup_ctls_msrs(vmx);
|
|
|
+
|
|
|
+ vmx->nested.posted_intr_nv = -1;
|
|
|
vmx->nested.current_vmptr = -1ull;
|
|
|
vmx->nested.current_vmcs12 = NULL;
|
|
|
|
|
|
+ /*
|
|
|
+ * If PML is turned on, failure on enabling PML just results in failure
|
|
|
+ * of creating the vcpu, therefore we can simplify PML logic (by
|
|
|
+ * avoiding dealing with cases, such as enabling PML partially on vcpus
|
|
|
+ * for the guest, etc.
|
|
|
+ */
|
|
|
+ if (enable_pml) {
|
|
|
+ err = vmx_enable_pml(vmx);
|
|
|
+ if (err)
|
|
|
+ goto free_vmcs;
|
|
|
+ }
|
|
|
+
|
|
|
return &vmx->vcpu;
|
|
|
|
|
|
free_vmcs:
|
|
@@ -8184,9 +8564,10 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
|
|
|
|
|
|
static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
|
|
|
{
|
|
|
- kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
|
|
|
- nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT);
|
|
|
-
|
|
|
+ WARN_ON(mmu_is_nested(vcpu));
|
|
|
+ kvm_init_shadow_ept_mmu(vcpu,
|
|
|
+ to_vmx(vcpu)->nested.nested_vmx_ept_caps &
|
|
|
+ VMX_EPT_EXECUTE_ONLY_BIT);
|
|
|
vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
|
|
|
vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
|
|
|
vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
|
|
@@ -8199,6 +8580,18 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
|
|
|
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
|
|
|
}
|
|
|
|
|
|
+static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
|
|
|
+ u16 error_code)
|
|
|
+{
|
|
|
+ bool inequality, bit;
|
|
|
+
|
|
|
+ bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
|
|
|
+ inequality =
|
|
|
+ (error_code & vmcs12->page_fault_error_code_mask) !=
|
|
|
+ vmcs12->page_fault_error_code_match;
|
|
|
+ return inequality ^ bit;
|
|
|
+}
|
|
|
+
|
|
|
static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
|
|
|
struct x86_exception *fault)
|
|
|
{
|
|
@@ -8206,8 +8599,7 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
|
|
|
|
|
|
WARN_ON(!is_guest_mode(vcpu));
|
|
|
|
|
|
- /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
|
|
|
- if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
|
|
|
+ if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code))
|
|
|
nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
|
|
|
vmcs_read32(VM_EXIT_INTR_INFO),
|
|
|
vmcs_readl(EXIT_QUALIFICATION));
|
|
@@ -8261,6 +8653,31 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
+ if (nested_cpu_has_posted_intr(vmcs12)) {
|
|
|
+ if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (vmx->nested.pi_desc_page) { /* shouldn't happen */
|
|
|
+ kunmap(vmx->nested.pi_desc_page);
|
|
|
+ nested_release_page(vmx->nested.pi_desc_page);
|
|
|
+ }
|
|
|
+ vmx->nested.pi_desc_page =
|
|
|
+ nested_get_page(vcpu, vmcs12->posted_intr_desc_addr);
|
|
|
+ if (!vmx->nested.pi_desc_page)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ vmx->nested.pi_desc =
|
|
|
+ (struct pi_desc *)kmap(vmx->nested.pi_desc_page);
|
|
|
+ if (!vmx->nested.pi_desc) {
|
|
|
+ nested_release_page_clean(vmx->nested.pi_desc_page);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ vmx->nested.pi_desc =
|
|
|
+ (struct pi_desc *)((void *)vmx->nested.pi_desc +
|
|
|
+ (unsigned long)(vmcs12->posted_intr_desc_addr &
|
|
|
+ (PAGE_SIZE - 1)));
|
|
|
+ }
|
|
|
+
|
|
|
return true;
|
|
|
}
|
|
|
|
|
@@ -8286,6 +8703,310 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
|
|
|
ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
|
|
|
}
|
|
|
|
|
|
+static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
|
|
|
+ struct vmcs12 *vmcs12)
|
|
|
+{
|
|
|
+ int maxphyaddr;
|
|
|
+ u64 addr;
|
|
|
+
|
|
|
+ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) {
|
|
|
+ WARN_ON(1);
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
+ maxphyaddr = cpuid_maxphyaddr(vcpu);
|
|
|
+
|
|
|
+ if (!PAGE_ALIGNED(vmcs12->msr_bitmap) ||
|
|
|
+ ((addr + PAGE_SIZE) >> maxphyaddr))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Merge L0's and L1's MSR bitmap, return false to indicate that
|
|
|
+ * we do not use the hardware.
|
|
|
+ */
|
|
|
+static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
|
|
|
+ struct vmcs12 *vmcs12)
|
|
|
+{
|
|
|
+ int msr;
|
|
|
+ struct page *page;
|
|
|
+ unsigned long *msr_bitmap;
|
|
|
+
|
|
|
+ if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ page = nested_get_page(vcpu, vmcs12->msr_bitmap);
|
|
|
+ if (!page) {
|
|
|
+ WARN_ON(1);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ msr_bitmap = (unsigned long *)kmap(page);
|
|
|
+ if (!msr_bitmap) {
|
|
|
+ nested_release_page_clean(page);
|
|
|
+ WARN_ON(1);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
|
|
|
+ if (nested_cpu_has_apic_reg_virt(vmcs12))
|
|
|
+ for (msr = 0x800; msr <= 0x8ff; msr++)
|
|
|
+ nested_vmx_disable_intercept_for_msr(
|
|
|
+ msr_bitmap,
|
|
|
+ vmx_msr_bitmap_nested,
|
|
|
+ msr, MSR_TYPE_R);
|
|
|
+ /* TPR is allowed */
|
|
|
+ nested_vmx_disable_intercept_for_msr(msr_bitmap,
|
|
|
+ vmx_msr_bitmap_nested,
|
|
|
+ APIC_BASE_MSR + (APIC_TASKPRI >> 4),
|
|
|
+ MSR_TYPE_R | MSR_TYPE_W);
|
|
|
+ if (nested_cpu_has_vid(vmcs12)) {
|
|
|
+ /* EOI and self-IPI are allowed */
|
|
|
+ nested_vmx_disable_intercept_for_msr(
|
|
|
+ msr_bitmap,
|
|
|
+ vmx_msr_bitmap_nested,
|
|
|
+ APIC_BASE_MSR + (APIC_EOI >> 4),
|
|
|
+ MSR_TYPE_W);
|
|
|
+ nested_vmx_disable_intercept_for_msr(
|
|
|
+ msr_bitmap,
|
|
|
+ vmx_msr_bitmap_nested,
|
|
|
+ APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
|
|
|
+ MSR_TYPE_W);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * Enable reading intercept of all the x2apic
|
|
|
+ * MSRs. We should not rely on vmcs12 to do any
|
|
|
+ * optimizations here, it may have been modified
|
|
|
+ * by L1.
|
|
|
+ */
|
|
|
+ for (msr = 0x800; msr <= 0x8ff; msr++)
|
|
|
+ __vmx_enable_intercept_for_msr(
|
|
|
+ vmx_msr_bitmap_nested,
|
|
|
+ msr,
|
|
|
+ MSR_TYPE_R);
|
|
|
+
|
|
|
+ __vmx_enable_intercept_for_msr(
|
|
|
+ vmx_msr_bitmap_nested,
|
|
|
+ APIC_BASE_MSR + (APIC_TASKPRI >> 4),
|
|
|
+ MSR_TYPE_W);
|
|
|
+ __vmx_enable_intercept_for_msr(
|
|
|
+ vmx_msr_bitmap_nested,
|
|
|
+ APIC_BASE_MSR + (APIC_EOI >> 4),
|
|
|
+ MSR_TYPE_W);
|
|
|
+ __vmx_enable_intercept_for_msr(
|
|
|
+ vmx_msr_bitmap_nested,
|
|
|
+ APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
|
|
|
+ MSR_TYPE_W);
|
|
|
+ }
|
|
|
+ kunmap(page);
|
|
|
+ nested_release_page_clean(page);
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
|
|
|
+ struct vmcs12 *vmcs12)
|
|
|
+{
|
|
|
+ if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
|
|
|
+ !nested_cpu_has_apic_reg_virt(vmcs12) &&
|
|
|
+ !nested_cpu_has_vid(vmcs12) &&
|
|
|
+ !nested_cpu_has_posted_intr(vmcs12))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If virtualize x2apic mode is enabled,
|
|
|
+ * virtualize apic access must be disabled.
|
|
|
+ */
|
|
|
+ if (nested_cpu_has_virt_x2apic_mode(vmcs12) &&
|
|
|
+ nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If virtual interrupt delivery is enabled,
|
|
|
+ * we must exit on external interrupts.
|
|
|
+ */
|
|
|
+ if (nested_cpu_has_vid(vmcs12) &&
|
|
|
+ !nested_exit_on_intr(vcpu))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * bits 15:8 should be zero in posted_intr_nv,
|
|
|
+ * the descriptor address has been already checked
|
|
|
+ * in nested_get_vmcs12_pages.
|
|
|
+ */
|
|
|
+ if (nested_cpu_has_posted_intr(vmcs12) &&
|
|
|
+ (!nested_cpu_has_vid(vmcs12) ||
|
|
|
+ !nested_exit_intr_ack_set(vcpu) ||
|
|
|
+ vmcs12->posted_intr_nv & 0xff00))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ /* tpr shadow is needed by all apicv features. */
|
|
|
+ if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
|
|
|
+ unsigned long count_field,
|
|
|
+ unsigned long addr_field,
|
|
|
+ int maxphyaddr)
|
|
|
+{
|
|
|
+ u64 count, addr;
|
|
|
+
|
|
|
+ if (vmcs12_read_any(vcpu, count_field, &count) ||
|
|
|
+ vmcs12_read_any(vcpu, addr_field, &addr)) {
|
|
|
+ WARN_ON(1);
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
+ if (count == 0)
|
|
|
+ return 0;
|
|
|
+ if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
|
|
|
+ (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
|
|
|
+ pr_warn_ratelimited(
|
|
|
+ "nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
|
|
|
+ addr_field, maxphyaddr, count, addr);
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
|
|
|
+ struct vmcs12 *vmcs12)
|
|
|
+{
|
|
|
+ int maxphyaddr;
|
|
|
+
|
|
|
+ if (vmcs12->vm_exit_msr_load_count == 0 &&
|
|
|
+ vmcs12->vm_exit_msr_store_count == 0 &&
|
|
|
+ vmcs12->vm_entry_msr_load_count == 0)
|
|
|
+ return 0; /* Fast path */
|
|
|
+ maxphyaddr = cpuid_maxphyaddr(vcpu);
|
|
|
+ if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
|
|
|
+ VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) ||
|
|
|
+ nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
|
|
|
+ VM_EXIT_MSR_STORE_ADDR, maxphyaddr) ||
|
|
|
+ nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
|
|
|
+ VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr))
|
|
|
+ return -EINVAL;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
|
|
|
+ struct vmx_msr_entry *e)
|
|
|
+{
|
|
|
+ /* x2APIC MSR accesses are not allowed */
|
|
|
+ if (apic_x2apic_mode(vcpu->arch.apic) && e->index >> 8 == 0x8)
|
|
|
+ return -EINVAL;
|
|
|
+ if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
|
|
|
+ e->index == MSR_IA32_UCODE_REV)
|
|
|
+ return -EINVAL;
|
|
|
+ if (e->reserved != 0)
|
|
|
+ return -EINVAL;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
|
|
|
+ struct vmx_msr_entry *e)
|
|
|
+{
|
|
|
+ if (e->index == MSR_FS_BASE ||
|
|
|
+ e->index == MSR_GS_BASE ||
|
|
|
+ e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */
|
|
|
+ nested_vmx_msr_check_common(vcpu, e))
|
|
|
+ return -EINVAL;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
|
|
|
+ struct vmx_msr_entry *e)
|
|
|
+{
|
|
|
+ if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */
|
|
|
+ nested_vmx_msr_check_common(vcpu, e))
|
|
|
+ return -EINVAL;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Load guest's/host's msr at nested entry/exit.
|
|
|
+ * return 0 for success, entry index for failure.
|
|
|
+ */
|
|
|
+static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
|
|
+{
|
|
|
+ u32 i;
|
|
|
+ struct vmx_msr_entry e;
|
|
|
+ struct msr_data msr;
|
|
|
+
|
|
|
+ msr.host_initiated = false;
|
|
|
+ for (i = 0; i < count; i++) {
|
|
|
+ if (kvm_read_guest(vcpu->kvm, gpa + i * sizeof(e),
|
|
|
+ &e, sizeof(e))) {
|
|
|
+ pr_warn_ratelimited(
|
|
|
+ "%s cannot read MSR entry (%u, 0x%08llx)\n",
|
|
|
+ __func__, i, gpa + i * sizeof(e));
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+ if (nested_vmx_load_msr_check(vcpu, &e)) {
|
|
|
+ pr_warn_ratelimited(
|
|
|
+ "%s check failed (%u, 0x%x, 0x%x)\n",
|
|
|
+ __func__, i, e.index, e.reserved);
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+ msr.index = e.index;
|
|
|
+ msr.data = e.value;
|
|
|
+ if (kvm_set_msr(vcpu, &msr)) {
|
|
|
+ pr_warn_ratelimited(
|
|
|
+ "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
|
|
|
+ __func__, i, e.index, e.value);
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+fail:
|
|
|
+ return i + 1;
|
|
|
+}
|
|
|
+
|
|
|
+static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
|
|
+{
|
|
|
+ u32 i;
|
|
|
+ struct vmx_msr_entry e;
|
|
|
+
|
|
|
+ for (i = 0; i < count; i++) {
|
|
|
+ if (kvm_read_guest(vcpu->kvm,
|
|
|
+ gpa + i * sizeof(e),
|
|
|
+ &e, 2 * sizeof(u32))) {
|
|
|
+ pr_warn_ratelimited(
|
|
|
+ "%s cannot read MSR entry (%u, 0x%08llx)\n",
|
|
|
+ __func__, i, gpa + i * sizeof(e));
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
+ if (nested_vmx_store_msr_check(vcpu, &e)) {
|
|
|
+ pr_warn_ratelimited(
|
|
|
+ "%s check failed (%u, 0x%x, 0x%x)\n",
|
|
|
+ __func__, i, e.index, e.reserved);
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
+ if (kvm_get_msr(vcpu, e.index, &e.value)) {
|
|
|
+ pr_warn_ratelimited(
|
|
|
+ "%s cannot read MSR (%u, 0x%x)\n",
|
|
|
+ __func__, i, e.index);
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
+ if (kvm_write_guest(vcpu->kvm,
|
|
|
+ gpa + i * sizeof(e) +
|
|
|
+ offsetof(struct vmx_msr_entry, value),
|
|
|
+ &e.value, sizeof(e.value))) {
|
|
|
+ pr_warn_ratelimited(
|
|
|
+ "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
|
|
|
+ __func__, i, e.index, e.value);
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
|
|
|
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
|
|
@@ -8365,8 +9086,23 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
|
|
|
exec_control = vmcs12->pin_based_vm_exec_control;
|
|
|
exec_control |= vmcs_config.pin_based_exec_ctrl;
|
|
|
- exec_control &= ~(PIN_BASED_VMX_PREEMPTION_TIMER |
|
|
|
- PIN_BASED_POSTED_INTR);
|
|
|
+ exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
|
|
|
+
|
|
|
+ if (nested_cpu_has_posted_intr(vmcs12)) {
|
|
|
+ /*
|
|
|
+ * Note that we use L0's vector here and in
|
|
|
+ * vmx_deliver_nested_posted_interrupt.
|
|
|
+ */
|
|
|
+ vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
|
|
|
+ vmx->nested.pi_pending = false;
|
|
|
+ vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
|
|
|
+ vmcs_write64(POSTED_INTR_DESC_ADDR,
|
|
|
+ page_to_phys(vmx->nested.pi_desc_page) +
|
|
|
+ (unsigned long)(vmcs12->posted_intr_desc_addr &
|
|
|
+ (PAGE_SIZE - 1)));
|
|
|
+ } else
|
|
|
+ exec_control &= ~PIN_BASED_POSTED_INTR;
|
|
|
+
|
|
|
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
|
|
|
|
|
|
vmx->nested.preemption_timer_expired = false;
|
|
@@ -8423,12 +9159,26 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
else
|
|
|
vmcs_write64(APIC_ACCESS_ADDR,
|
|
|
page_to_phys(vmx->nested.apic_access_page));
|
|
|
- } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
|
|
|
+ } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) &&
|
|
|
+ (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))) {
|
|
|
exec_control |=
|
|
|
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
|
|
|
kvm_vcpu_reload_apic_access_page(vcpu);
|
|
|
}
|
|
|
|
|
|
+ if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
|
|
|
+ vmcs_write64(EOI_EXIT_BITMAP0,
|
|
|
+ vmcs12->eoi_exit_bitmap0);
|
|
|
+ vmcs_write64(EOI_EXIT_BITMAP1,
|
|
|
+ vmcs12->eoi_exit_bitmap1);
|
|
|
+ vmcs_write64(EOI_EXIT_BITMAP2,
|
|
|
+ vmcs12->eoi_exit_bitmap2);
|
|
|
+ vmcs_write64(EOI_EXIT_BITMAP3,
|
|
|
+ vmcs12->eoi_exit_bitmap3);
|
|
|
+ vmcs_write16(GUEST_INTR_STATUS,
|
|
|
+ vmcs12->guest_intr_status);
|
|
|
+ }
|
|
|
+
|
|
|
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
|
|
|
}
|
|
|
|
|
@@ -8462,11 +9212,17 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
|
vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
|
|
|
}
|
|
|
|
|
|
+ if (cpu_has_vmx_msr_bitmap() &&
|
|
|
+ exec_control & CPU_BASED_USE_MSR_BITMAPS &&
|
|
|
+ nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) {
|
|
|
+ vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested));
|
|
|
+ } else
|
|
|
+ exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
|
|
|
+
|
|
|
/*
|
|
|
- * Merging of IO and MSR bitmaps not currently supported.
|
|
|
+ * Merging of IO bitmap not currently supported.
|
|
|
* Rather, exit every time.
|
|
|
*/
|
|
|
- exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
|
|
|
exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
|
|
|
exec_control |= CPU_BASED_UNCOND_IO_EXITING;
|
|
|
|
|
@@ -8582,6 +9338,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
|
|
int cpu;
|
|
|
struct loaded_vmcs *vmcs02;
|
|
|
bool ia32e;
|
|
|
+ u32 msr_entry_idx;
|
|
|
|
|
|
if (!nested_vmx_check_permission(vcpu) ||
|
|
|
!nested_vmx_check_vmcs12(vcpu))
|
|
@@ -8616,41 +9373,42 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
- if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
|
|
|
- !PAGE_ALIGNED(vmcs12->msr_bitmap)) {
|
|
|
+ if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
|
|
|
/*TODO: Also verify bits beyond physical address width are 0*/
|
|
|
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
- if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
|
|
|
- /*TODO: Also verify bits beyond physical address width are 0*/
|
|
|
+ if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) {
|
|
|
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
- if (vmcs12->vm_entry_msr_load_count > 0 ||
|
|
|
- vmcs12->vm_exit_msr_load_count > 0 ||
|
|
|
- vmcs12->vm_exit_msr_store_count > 0) {
|
|
|
- pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n",
|
|
|
- __func__);
|
|
|
+ if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) {
|
|
|
+ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) {
|
|
|
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
|
|
|
- nested_vmx_true_procbased_ctls_low,
|
|
|
- nested_vmx_procbased_ctls_high) ||
|
|
|
+ vmx->nested.nested_vmx_true_procbased_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_procbased_ctls_high) ||
|
|
|
!vmx_control_verify(vmcs12->secondary_vm_exec_control,
|
|
|
- nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) ||
|
|
|
+ vmx->nested.nested_vmx_secondary_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_secondary_ctls_high) ||
|
|
|
!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
|
|
|
- nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) ||
|
|
|
+ vmx->nested.nested_vmx_pinbased_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_pinbased_ctls_high) ||
|
|
|
!vmx_control_verify(vmcs12->vm_exit_controls,
|
|
|
- nested_vmx_true_exit_ctls_low,
|
|
|
- nested_vmx_exit_ctls_high) ||
|
|
|
+ vmx->nested.nested_vmx_true_exit_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_exit_ctls_high) ||
|
|
|
!vmx_control_verify(vmcs12->vm_entry_controls,
|
|
|
- nested_vmx_true_entry_ctls_low,
|
|
|
- nested_vmx_entry_ctls_high))
|
|
|
+ vmx->nested.nested_vmx_true_entry_ctls_low,
|
|
|
+ vmx->nested.nested_vmx_entry_ctls_high))
|
|
|
{
|
|
|
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
|
|
return 1;
|
|
@@ -8663,7 +9421,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
- if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) ||
|
|
|
+ if (!nested_cr0_valid(vcpu, vmcs12->guest_cr0) ||
|
|
|
((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
|
|
|
nested_vmx_entry_failure(vcpu, vmcs12,
|
|
|
EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
|
|
@@ -8739,10 +9497,21 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
|
|
|
|
|
vmx_segment_cache_clear(vmx);
|
|
|
|
|
|
- vmcs12->launch_state = 1;
|
|
|
-
|
|
|
prepare_vmcs02(vcpu, vmcs12);
|
|
|
|
|
|
+ msr_entry_idx = nested_vmx_load_msr(vcpu,
|
|
|
+ vmcs12->vm_entry_msr_load_addr,
|
|
|
+ vmcs12->vm_entry_msr_load_count);
|
|
|
+ if (msr_entry_idx) {
|
|
|
+ leave_guest_mode(vcpu);
|
|
|
+ vmx_load_vmcs01(vcpu);
|
|
|
+ nested_vmx_entry_failure(vcpu, vmcs12,
|
|
|
+ EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ vmcs12->launch_state = 1;
|
|
|
+
|
|
|
if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
|
|
|
return kvm_emulate_halt(vcpu);
|
|
|
|
|
@@ -8869,9 +9638,10 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
|
|
|
if (vmx->nested.nested_run_pending)
|
|
|
return -EBUSY;
|
|
|
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
- return 0;
|
|
|
+ return vmx_complete_nested_posted_interrupt(vcpu);
|
|
|
}
|
|
|
|
|
|
static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
|
|
@@ -8981,6 +9751,9 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
|
|
vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
|
|
|
}
|
|
|
|
|
|
+ if (nested_cpu_has_vid(vmcs12))
|
|
|
+ vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
|
|
|
+
|
|
|
vmcs12->vm_entry_controls =
|
|
|
(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
|
|
|
(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
|
|
@@ -9172,6 +9945,13 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
|
|
|
|
|
|
kvm_set_dr(vcpu, 7, 0x400);
|
|
|
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
|
|
|
+
|
|
|
+ if (cpu_has_vmx_msr_bitmap())
|
|
|
+ vmx_set_msr_bitmap(vcpu);
|
|
|
+
|
|
|
+ if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
|
|
|
+ vmcs12->vm_exit_msr_load_count))
|
|
|
+ nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -9193,6 +9973,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
|
|
prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
|
|
|
exit_qualification);
|
|
|
|
|
|
+ if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
|
|
|
+ vmcs12->vm_exit_msr_store_count))
|
|
|
+ nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
|
|
|
+
|
|
|
vmx_load_vmcs01(vcpu);
|
|
|
|
|
|
if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
|
|
@@ -9235,6 +10019,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
|
|
nested_release_page(vmx->nested.virtual_apic_page);
|
|
|
vmx->nested.virtual_apic_page = NULL;
|
|
|
}
|
|
|
+ if (vmx->nested.pi_desc_page) {
|
|
|
+ kunmap(vmx->nested.pi_desc_page);
|
|
|
+ nested_release_page(vmx->nested.pi_desc_page);
|
|
|
+ vmx->nested.pi_desc_page = NULL;
|
|
|
+ vmx->nested.pi_desc = NULL;
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* We are now running in L2, mmu_notifier will force to reload the
|
|
@@ -9301,6 +10091,31 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
|
|
|
shrink_ple_window(vcpu);
|
|
|
}
|
|
|
|
|
|
+static void vmx_slot_enable_log_dirty(struct kvm *kvm,
|
|
|
+ struct kvm_memory_slot *slot)
|
|
|
+{
|
|
|
+ kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
|
|
|
+ kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
|
|
|
+}
|
|
|
+
|
|
|
+static void vmx_slot_disable_log_dirty(struct kvm *kvm,
|
|
|
+ struct kvm_memory_slot *slot)
|
|
|
+{
|
|
|
+ kvm_mmu_slot_set_dirty(kvm, slot);
|
|
|
+}
|
|
|
+
|
|
|
+static void vmx_flush_log_dirty(struct kvm *kvm)
|
|
|
+{
|
|
|
+ kvm_flush_pml_buffers(kvm);
|
|
|
+}
|
|
|
+
|
|
|
+static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
|
|
|
+ struct kvm_memory_slot *memslot,
|
|
|
+ gfn_t offset, unsigned long mask)
|
|
|
+{
|
|
|
+ kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
|
|
|
+}
|
|
|
+
|
|
|
static struct kvm_x86_ops vmx_x86_ops = {
|
|
|
.cpu_has_kvm_support = cpu_has_kvm_support,
|
|
|
.disabled_by_bios = vmx_disabled_by_bios,
|
|
@@ -9409,6 +10224,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
|
|
.check_nested_events = vmx_check_nested_events,
|
|
|
|
|
|
.sched_in = vmx_sched_in,
|
|
|
+
|
|
|
+ .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
|
|
|
+ .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
|
|
|
+ .flush_log_dirty = vmx_flush_log_dirty,
|
|
|
+ .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
|
|
|
};
|
|
|
|
|
|
static int __init vmx_init(void)
|