9 жил өмнө · 45d36906e2
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -13,8 +13,12 @@ The acquisition orders for mutexes are as follows:
 
				 - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
			
 
				   them together is quite rare.
			
 
				 
			
 
				-For spinlocks, kvm_lock is taken outside kvm->mmu_lock.  Everything
			
 
				-else is a leaf: no other lock is taken inside the critical sections.
			
 
				+On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
			
 
				+
			
 
				+For spinlocks, kvm_lock is taken outside kvm->mmu_lock.
			
 
				+
			
 
				+Everything else is a leaf: no other lock is taken inside the critical
			
 
				+sections.
			
 
				 
			
 
				 2: Exception
			
 
				 ------------
			
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -704,6 +704,7 @@ struct kvm_apic_map {
 
				 
			
 
				 /* Hyper-V emulation context */
			
 
				 struct kvm_hv {
			
 
				+	struct mutex hv_lock;
			
 
				 	u64 hv_guest_os_id;
			
 
				 	u64 hv_hypercall;
			
 
				 	u64 hv_tsc_page;
			
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -373,16 +373,17 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
				 	const u32 kvm_cpuid_7_0_ebx_x86_features =
			
 
				 		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
			
 
				 		F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
			
 
				-		F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
			
 
				-		F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
			
 
				-		F(AVX512BW) | F(AVX512VL);
			
 
				+		F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
			
 
				+		F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
			
 
				+		F(SHA_NI) | F(AVX512BW) | F(AVX512VL);
			
 
				 
			
 
				 	/* cpuid 0xD.1.eax */
			
 
				 	const u32 kvm_cpuid_D_1_eax_x86_features =
			
 
				 		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
			
 
				 
			
 
				 	/* cpuid 7.0.ecx*/
			
 
				-	const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
			
 
				+	const u32 kvm_cpuid_7_0_ecx_x86_features =
			
 
				+		F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/;
			
 
				 
			
 
				 	/* cpuid 7.0.edx*/
			
 
				 	const u32 kvm_cpuid_7_0_edx_x86_features =
			
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -852,6 +852,10 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
 
				 	if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
			
 
				 		return;
			
 
				 
			
 
				+	mutex_lock(&kvm->arch.hyperv.hv_lock);
			
 
				+	if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
			
 
				+		goto out_unlock;
			
 
				+
			
 
				 	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
			
 
				 	/*
			
 
				 	 * Because the TSC parameters only vary when there is a
			
@@ -859,7 +863,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
 
				 	 */
			
 
				 	if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
			
 
				 				    &tsc_seq, sizeof(tsc_seq))))
			
 
				-		return;
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	/*
			
 
				 	 * While we're computing and writing the parameters, force the
			
@@ -868,15 +872,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
 
				 	hv->tsc_ref.tsc_sequence = 0;
			
 
				 	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
			
 
				 			    &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
			
 
				-		return;
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
			
 
				-		return;
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	/* Ensure sequence is zero before writing the rest of the struct.  */
			
 
				 	smp_wmb();
			
 
				 	if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
			
 
				-		return;
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	/*
			
 
				 	 * Now switch to the TSC page mechanism by writing the sequence.
			
@@ -891,6 +895,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
 
				 	hv->tsc_ref.tsc_sequence = tsc_seq;
			
 
				 	kvm_write_guest(kvm, gfn_to_gpa(gfn),
			
 
				 			&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
			
 
				+out_unlock:
			
 
				+	mutex_unlock(&kvm->arch.hyperv.hv_lock);
			
 
				 }
			
 
				 
			
 
				 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
			
@@ -1142,9 +1148,9 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 
				 	if (kvm_hv_msr_partition_wide(msr)) {
			
 
				 		int r;
			
 
				 
			
 
				-		mutex_lock(&vcpu->kvm->lock);
			
 
				+		mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
			
 
				 		r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
			
 
				-		mutex_unlock(&vcpu->kvm->lock);
			
 
				+		mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
			
 
				 		return r;
			
 
				 	} else
			
 
				 		return kvm_hv_set_msr(vcpu, msr, data, host);
			
@@ -1155,9 +1161,9 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 
				 	if (kvm_hv_msr_partition_wide(msr)) {
			
 
				 		int r;
			
 
				 
			
 
				-		mutex_lock(&vcpu->kvm->lock);
			
 
				+		mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
			
 
				 		r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
			
 
				-		mutex_unlock(&vcpu->kvm->lock);
			
 
				+		mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
			
 
				 		return r;
			
 
				 	} else
			
 
				 		return kvm_hv_get_msr(vcpu, msr, pdata);
			
@@ -1165,7 +1171,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 
				 
			
 
				 bool kvm_hv_hypercall_enabled(struct kvm *kvm)
			
 
				 {
			
 
				-	return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
			
 
				+	return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
			
 
				 }
			
 
				 
			
 
				 static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
			
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1389,10 +1389,10 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
 
				 	return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
			
 
				 }
			
 
				 
			
 
				-static inline bool is_exception(u32 intr_info)
			
 
				+static inline bool is_nmi(u32 intr_info)
			
 
				 {
			
 
				 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
			
 
				-		== (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
			
 
				+		== (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
			
 
				 }
			
 
				 
			
 
				 static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
			
@@ -5728,7 +5728,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 
				 	if (is_machine_check(intr_info))
			
 
				 		return handle_machine_check(vcpu);
			
 
				 
			
 
				-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
			
 
				+	if (is_nmi(intr_info))
			
 
				 		return 1;  /* already handled by vmx_vcpu_run() */
			
 
				 
			
 
				 	if (is_no_device(intr_info)) {
			
@@ -7122,7 +7122,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
 
				 
			
 
				 		if (vmptr == vmx->nested.vmxon_ptr) {
			
 
				 			nested_vmx_failValid(vcpu,
			
 
				-					     VMXERR_VMCLEAR_VMXON_POINTER);
			
 
				+					     VMXERR_VMPTRLD_VMXON_POINTER);
			
 
				 			return kvm_skip_emulated_instruction(vcpu);
			
 
				 		}
			
 
				 		break;
			
@@ -8170,7 +8170,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 
				 
			
 
				 	switch (exit_reason) {
			
 
				 	case EXIT_REASON_EXCEPTION_NMI:
			
 
				-		if (!is_exception(intr_info))
			
 
				+		if (is_nmi(intr_info))
			
 
				 			return false;
			
 
				 		else if (is_page_fault(intr_info))
			
 
				 			return enable_ept;
			
@@ -8765,8 +8765,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 
				 		kvm_machine_check();
			
 
				 
			
 
				 	/* We need to handle NMIs before interrupts are enabled */
			
 
				-	if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
			
 
				-	    (exit_intr_info & INTR_INFO_VALID_MASK)) {
			
 
				+	if (is_nmi(exit_intr_info)) {
			
 
				 		kvm_before_handle_nmi(&vmx->vcpu);
			
 
				 		asm("int $2");
			
 
				 		kvm_after_handle_nmi(&vmx->vcpu);
			
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2844,7 +2844,24 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 
				 
			
 
				 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				+	int idx;
			
 
				+	/*
			
 
				+	 * Disable page faults because we're in atomic context here.
			
 
				+	 * kvm_write_guest_offset_cached() would call might_fault()
			
 
				+	 * that relies on pagefault_disable() to tell if there's a
			
 
				+	 * bug. NOTE: the write to guest memory may not go through if
			
 
				+	 * during postcopy live migration or if there's heavy guest
			
 
				+	 * paging.
			
 
				+	 */
			
 
				+	pagefault_disable();
			
 
				+	/*
			
 
				+	 * kvm_memslots() will be called by
			
 
				+	 * kvm_write_guest_offset_cached() so take the srcu lock.
			
 
				+	 */
			
 
				+	idx = srcu_read_lock(&vcpu->kvm->srcu);
			
 
				 	kvm_steal_time_set_preempted(vcpu);
			
 
				+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
			
 
				+	pagefault_enable();
			
 
				 	kvm_x86_ops->vcpu_put(vcpu);
			
 
				 	kvm_put_guest_fpu(vcpu);
			
 
				 	vcpu->arch.last_host_tsc = rdtsc();
			
@@ -7881,6 +7898,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
				 
			
 
				 	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
			
 
				 	mutex_init(&kvm->arch.apic_map_lock);
			
 
				+	mutex_init(&kvm->arch.hyperv.hv_lock);
			
 
				 	spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
			
 
				 
			
 
				 	kvm->arch.kvmclock_offset = -ktime_get_boot_ns();