7 년 전 · de3a0021a6
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -185,7 +185,6 @@ module_param(ple_window_max, int, S_IRUGO);
 
				 extern const ulong vmx_return;
			
 
				 
			
 
				 #define NR_AUTOLOAD_MSRS 8
			
 
				-#define VMCS02_POOL_SIZE 1
			
 
				 
			
 
				 struct vmcs {
			
 
				 	u32 revision_id;
			
@@ -226,7 +225,7 @@ struct shared_msr_entry {
 
				  * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
			
 
				  * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
			
 
				  * More than one of these structures may exist, if L1 runs multiple L2 guests.
			
 
				- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
			
 
				+ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
			
 
				  * underlying hardware which will be used to run L2.
			
 
				  * This structure is packed to ensure that its layout is identical across
			
 
				  * machines (necessary for live migration).
			
@@ -409,13 +408,6 @@ struct __packed vmcs12 {
 
				  */
			
 
				 #define VMCS12_SIZE 0x1000
			
 
				 
			
 
				-/* Used to remember the last vmcs02 used for some recently used vmcs12s */
			
 
				-struct vmcs02_list {
			
 
				-	struct list_head list;
			
 
				-	gpa_t vmptr;
			
 
				-	struct loaded_vmcs vmcs02;
			
 
				-};
			
 
				-
			
 
				 /*
			
 
				  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
			
 
				  * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
			
@@ -440,15 +432,15 @@ struct nested_vmx {
 
				 	 */
			
 
				 	bool sync_shadow_vmcs;
			
 
				 
			
 
				-	/* vmcs02_list cache of VMCSs recently used to run L2 guests */
			
 
				-	struct list_head vmcs02_pool;
			
 
				-	int vmcs02_num;
			
 
				 	bool change_vmcs01_virtual_x2apic_mode;
			
 
				 	/* L2 must run next, and mustn't decide to exit to L1. */
			
 
				 	bool nested_run_pending;
			
 
				+
			
 
				+	struct loaded_vmcs vmcs02;
			
 
				+
			
 
				 	/*
			
 
				-	 * Guest pages referred to in vmcs02 with host-physical pointers, so
			
 
				-	 * we must keep them pinned while L2 runs.
			
 
				+	 * Guest pages referred to in the vmcs02 with host-physical
			
 
				+	 * pointers, so we must keep them pinned while L2 runs.
			
 
				 	 */
			
 
				 	struct page *apic_access_page;
			
 
				 	struct page *virtual_apic_page;
			
@@ -6973,94 +6965,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
 
				 	return handle_nop(vcpu);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
			
 
				- * We could reuse a single VMCS for all the L2 guests, but we also want the
			
 
				- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
			
 
				- * allows keeping them loaded on the processor, and in the future will allow
			
 
				- * optimizations where prepare_vmcs02 doesn't need to set all the fields on
			
 
				- * every entry if they never change.
			
 
				- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
			
 
				- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
			
 
				- *
			
 
				- * The following functions allocate and free a vmcs02 in this pool.
			
 
				- */
			
 
				-
			
 
				-/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
			
 
				-static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
			
 
				-{
			
 
				-	struct vmcs02_list *item;
			
 
				-	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
			
 
				-		if (item->vmptr == vmx->nested.current_vmptr) {
			
 
				-			list_move(&item->list, &vmx->nested.vmcs02_pool);
			
 
				-			return &item->vmcs02;
			
 
				-		}
			
 
				-
			
 
				-	if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
			
 
				-		/* Recycle the least recently used VMCS. */
			
 
				-		item = list_last_entry(&vmx->nested.vmcs02_pool,
			
 
				-				       struct vmcs02_list, list);
			
 
				-		item->vmptr = vmx->nested.current_vmptr;
			
 
				-		list_move(&item->list, &vmx->nested.vmcs02_pool);
			
 
				-		return &item->vmcs02;
			
 
				-	}
			
 
				-
			
 
				-	/* Create a new VMCS */
			
 
				-	item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
			
 
				-	if (!item)
			
 
				-		return NULL;
			
 
				-	item->vmcs02.vmcs = alloc_vmcs();
			
 
				-	item->vmcs02.shadow_vmcs = NULL;
			
 
				-	if (!item->vmcs02.vmcs) {
			
 
				-		kfree(item);
			
 
				-		return NULL;
			
 
				-	}
			
 
				-	loaded_vmcs_init(&item->vmcs02);
			
 
				-	item->vmptr = vmx->nested.current_vmptr;
			
 
				-	list_add(&(item->list), &(vmx->nested.vmcs02_pool));
			
 
				-	vmx->nested.vmcs02_num++;
			
 
				-	return &item->vmcs02;
			
 
				-}
			
 
				-
			
 
				-/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
			
 
				-static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
			
 
				-{
			
 
				-	struct vmcs02_list *item;
			
 
				-	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
			
 
				-		if (item->vmptr == vmptr) {
			
 
				-			free_loaded_vmcs(&item->vmcs02);
			
 
				-			list_del(&item->list);
			
 
				-			kfree(item);
			
 
				-			vmx->nested.vmcs02_num--;
			
 
				-			return;
			
 
				-		}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Free all VMCSs saved for this vcpu, except the one pointed by
			
 
				- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
			
 
				- * must be &vmx->vmcs01.
			
 
				- */
			
 
				-static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
			
 
				-{
			
 
				-	struct vmcs02_list *item, *n;
			
 
				-
			
 
				-	WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
			
 
				-	list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
			
 
				-		/*
			
 
				-		 * Something will leak if the above WARN triggers.  Better than
			
 
				-		 * a use-after-free.
			
 
				-		 */
			
 
				-		if (vmx->loaded_vmcs == &item->vmcs02)
			
 
				-			continue;
			
 
				-
			
 
				-		free_loaded_vmcs(&item->vmcs02);
			
 
				-		list_del(&item->list);
			
 
				-		kfree(item);
			
 
				-		vmx->nested.vmcs02_num--;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
			
 
				  * set the success or error code of an emulated VMX instruction, as specified
			
@@ -7242,6 +7146,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				 	struct vmcs *shadow_vmcs;
			
 
				 
			
 
				+	vmx->nested.vmcs02.vmcs = alloc_vmcs();
			
 
				+	vmx->nested.vmcs02.shadow_vmcs = NULL;
			
 
				+	if (!vmx->nested.vmcs02.vmcs)
			
 
				+		goto out_vmcs02;
			
 
				+	loaded_vmcs_init(&vmx->nested.vmcs02);
			
 
				+
			
 
				 	if (cpu_has_vmx_msr_bitmap()) {
			
 
				 		vmx->nested.msr_bitmap =
			
 
				 				(unsigned long *)__get_free_page(GFP_KERNEL);
			
@@ -7264,9 +7174,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 
				 		vmx->vmcs01.shadow_vmcs = shadow_vmcs;
			
 
				 	}
			
 
				 
			
 
				-	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
			
 
				-	vmx->nested.vmcs02_num = 0;
			
 
				-
			
 
				 	hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
			
 
				 		     HRTIMER_MODE_REL_PINNED);
			
 
				 	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
			
@@ -7281,6 +7188,9 @@ out_cached_vmcs12:
 
				 	free_page((unsigned long)vmx->nested.msr_bitmap);
			
 
				 
			
 
				 out_msr_bitmap:
			
 
				+	free_loaded_vmcs(&vmx->nested.vmcs02);
			
 
				+
			
 
				+out_vmcs02:
			
 
				 	return -ENOMEM;
			
 
				 }
			
 
				 
			
@@ -7434,7 +7344,7 @@ static void free_nested(struct vcpu_vmx *vmx)
 
				 		vmx->vmcs01.shadow_vmcs = NULL;
			
 
				 	}
			
 
				 	kfree(vmx->nested.cached_vmcs12);
			
 
				-	/* Unpin physical memory we referred to in current vmcs02 */
			
 
				+	/* Unpin physical memory we referred to in the vmcs02 */
			
 
				 	if (vmx->nested.apic_access_page) {
			
 
				 		kvm_release_page_dirty(vmx->nested.apic_access_page);
			
 
				 		vmx->nested.apic_access_page = NULL;
			
@@ -7450,7 +7360,7 @@ static void free_nested(struct vcpu_vmx *vmx)
 
				 		vmx->nested.pi_desc = NULL;
			
 
				 	}
			
 
				 
			
 
				-	nested_free_all_saved_vmcss(vmx);
			
 
				+	free_loaded_vmcs(&vmx->nested.vmcs02);
			
 
				 }
			
 
				 
			
 
				 /* Emulate the VMXOFF instruction */
			
@@ -7493,8 +7403,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 
				 			vmptr + offsetof(struct vmcs12, launch_state),
			
 
				 			&zero, sizeof(zero));
			
 
				 
			
 
				-	nested_free_vmcs02(vmx, vmptr);
			
 
				-
			
 
				 	nested_vmx_succeed(vcpu);
			
 
				 	return kvm_skip_emulated_instruction(vcpu);
			
 
				 }
			
@@ -8406,10 +8314,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 
				 
			
 
				 	/*
			
 
				 	 * The host physical addresses of some pages of guest memory
			
 
				-	 * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
			
 
				-	 * may write to these pages via their host physical address while
			
 
				-	 * L2 is running, bypassing any address-translation-based dirty
			
 
				-	 * tracking (e.g. EPT write protection).
			
 
				+	 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
			
 
				+	 * Page). The CPU may write to these pages via their host
			
 
				+	 * physical address while L2 is running, bypassing any
			
 
				+	 * address-translation-based dirty tracking (e.g. EPT write
			
 
				+	 * protection).
			
 
				 	 *
			
 
				 	 * Mark them dirty on every exit from L2 to prevent them from
			
 
				 	 * getting out of sync with dirty tracking.
			
@@ -10903,20 +10812,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
 
				 {
			
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
			
 
				-	struct loaded_vmcs *vmcs02;
			
 
				 	u32 msr_entry_idx;
			
 
				 	u32 exit_qual;
			
 
				 
			
 
				-	vmcs02 = nested_get_current_vmcs02(vmx);
			
 
				-	if (!vmcs02)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				 	enter_guest_mode(vcpu);
			
 
				 
			
 
				 	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
			
 
				 		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
			
 
				 
			
 
				-	vmx_switch_vmcs(vcpu, vmcs02);
			
 
				+	vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
			
 
				 	vmx_segment_cache_clear(vmx);
			
 
				 
			
 
				 	if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
			
@@ -11534,10 +11438,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 
				 	vm_exit_controls_reset_shadow(vmx);
			
 
				 	vmx_segment_cache_clear(vmx);
			
 
				 
			
 
				-	/* if no vmcs02 cache requested, remove the one we used */
			
 
				-	if (VMCS02_POOL_SIZE == 0)
			
 
				-		nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
			
 
				-
			
 
				 	/* Update any VMCS fields that might have changed while L2 ran */
			
 
				 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
			
 
				 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);