8 年之前 · 628f54cc64
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -18,11 +18,25 @@ struct hv_flush_pcpu {
 
															 	u64 gva_list[];
														
 
															 };
														
 
															+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
														
 
															+struct hv_flush_pcpu_ex {
														
 
															+	u64 address_space;
														
 
															+	u64 flags;
														
 
															+	struct {
														
 
															+		u64 format;
														
 
															+		u64 valid_bank_mask;
														
 
															+		u64 bank_contents[];
														
 
															+	} hv_vp_set;
														
 
															+	u64 gva_list[];
														
 
															+};
														
 
															+
														
 
															 /* Each gva in gva_list encodes up to 4096 pages to flush */
														
 
															 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
														
 
															 static struct hv_flush_pcpu __percpu *pcpu_flush;
														
 
															+static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
														
 
															+
														
 
															 /*
														
 
															  * Fills in gva_list starting from offset. Returns the number of items added.
														
 
															  */
														
@@ -53,6 +67,34 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
 
															 	return gva_n - offset;
														
 
															 }
														
 
															+/* Return the number of banks in the resulting vp_set */
														
 
															+static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
														
 
															+				    const struct cpumask *cpus)
														
 
															+{
														
 
															+	int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
														
 
															+
														
 
															+	/*
														
 
															+	 * Some banks may end up being empty but this is acceptable.
														
 
															+	 */
														
 
															+	for_each_cpu(cpu, cpus) {
														
 
															+		vcpu = hv_cpu_number_to_vp_number(cpu);
														
 
															+		vcpu_bank = vcpu / 64;
														
 
															+		vcpu_offset = vcpu % 64;
														
 
															+
														
 
															+		/* valid_bank_mask can represent up to 64 banks */
														
 
															+		if (vcpu_bank >= 64)
														
 
															+			return 0;
														
 
															+
														
 
															+		__set_bit(vcpu_offset, (unsigned long *)
														
 
															+			  &flush->hv_vp_set.bank_contents[vcpu_bank]);
														
 
															+		if (vcpu_bank >= nr_bank)
														
 
															+			nr_bank = vcpu_bank + 1;
														
 
															+	}
														
 
															+	flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
														
 
															+
														
 
															+	return nr_bank;
														
 
															+}
														
 
															+
														
 
															 static void hyperv_flush_tlb_others(const struct cpumask *cpus,
														
 
															 				    const struct flush_tlb_info *info)
														
 
															 {
														
@@ -122,17 +164,102 @@ do_native:
 
															 	native_flush_tlb_others(cpus, info);
														
 
															 }
														
 
															+static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
														
 
															+				       const struct flush_tlb_info *info)
														
 
															+{
														
 
															+	int nr_bank = 0, max_gvas, gva_n;
														
 
															+	struct hv_flush_pcpu_ex *flush;
														
 
															+	u64 status = U64_MAX;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	if (!pcpu_flush_ex || !hv_hypercall_pg)
														
 
															+		goto do_native;
														
 
															+
														
 
															+	if (cpumask_empty(cpus))
														
 
															+		return;
														
 
															+
														
 
															+	local_irq_save(flags);
														
 
															+
														
 
															+	flush = this_cpu_ptr(pcpu_flush_ex);
														
 
															+
														
 
															+	if (info->mm) {
														
 
															+		flush->address_space = virt_to_phys(info->mm->pgd);
														
 
															+		flush->flags = 0;
														
 
															+	} else {
														
 
															+		flush->address_space = 0;
														
 
															+		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
														
 
															+	}
														
 
															+
														
 
															+	flush->hv_vp_set.valid_bank_mask = 0;
														
 
															+
														
 
															+	if (!cpumask_equal(cpus, cpu_present_mask)) {
														
 
															+		flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
														
 
															+		nr_bank = cpumask_to_vp_set(flush, cpus);
														
 
															+	}
														
 
															+
														
 
															+	if (!nr_bank) {
														
 
															+		flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
														
 
															+		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * We can flush not more than max_gvas with one hypercall. Flush the
														
 
															+	 * whole address space if we were asked to do more.
														
 
															+	 */
														
 
															+	max_gvas =
														
 
															+		(PAGE_SIZE - sizeof(*flush) - nr_bank *
														
 
															+		 sizeof(flush->hv_vp_set.bank_contents[0])) /
														
 
															+		sizeof(flush->gva_list[0]);
														
 
															+
														
 
															+	if (info->end == TLB_FLUSH_ALL) {
														
 
															+		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
														
 
															+		status = hv_do_rep_hypercall(
														
 
															+			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
														
 
															+			0, nr_bank + 2, flush, NULL);
														
 
															+	} else if (info->end &&
														
 
															+		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
														
 
															+		status = hv_do_rep_hypercall(
														
 
															+			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
														
 
															+			0, nr_bank + 2, flush, NULL);
														
 
															+	} else {
														
 
															+		gva_n = fill_gva_list(flush->gva_list, nr_bank,
														
 
															+				      info->start, info->end);
														
 
															+		status = hv_do_rep_hypercall(
														
 
															+			HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
														
 
															+			gva_n, nr_bank + 2, flush, NULL);
														
 
															+	}
														
 
															+
														
 
															+	local_irq_restore(flags);
														
 
															+
														
 
															+	if (!(status & HV_HYPERCALL_RESULT_MASK))
														
 
															+		return;
														
 
															+do_native:
														
 
															+	native_flush_tlb_others(cpus, info);
														
 
															+}
														
 
															+
														
 
															 void hyperv_setup_mmu_ops(void)
														
 
															 {
														
 
															-	if (ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED) {
														
 
															+	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
														
 
															+		return;
														
 
															+
														
 
															+	setup_clear_cpu_cap(X86_FEATURE_PCID);
														
 
															+
														
 
															+	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
														
 
															 		pr_info("Using hypercall for remote TLB flush\n");
														
 
															 		pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
														
 
															-		setup_clear_cpu_cap(X86_FEATURE_PCID);
														
 
															+	} else {
														
 
															+		pr_info("Using ext hypercall for remote TLB flush\n");
														
 
															+		pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
														
 
															 	}
														
 
															 }
														
 
															 void hyper_alloc_mmu(void)
														
 
															 {
														
 
															-	if (ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)
														
 
															+	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
														
 
															+		return;
														
 
															+
														
 
															+	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
														
 
															 		pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
														
 
															+	else
														
 
															+		pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
														
 
															 }
														
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -149,6 +149,9 @@
 
															  */
														
 
															 #define HV_X64_DEPRECATING_AEOI_RECOMMENDED	(1 << 9)
														
 
															+/* Recommend using the newer ExProcessorMasks interface */
														
 
															+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED	(1 << 11)
														
 
															+
														
 
															 /*
														
 
															  * HV_VP_SET available
														
 
															  */
														
@@ -245,6 +248,8 @@
 
															 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE	0x0002
														
 
															 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST	0x0003
														
 
															 #define HVCALL_NOTIFY_LONG_SPIN_WAIT		0x0008
														
 
															+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX  0x0013
														
 
															+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX   0x0014
														
 
															 #define HVCALL_POST_MESSAGE			0x005c
														
 
															 #define HVCALL_SIGNAL_EVENT			0x005d
														
@@ -266,6 +271,11 @@
 
															 #define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY	BIT(2)
														
 
															 #define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT	BIT(3)
														
 
															+enum HV_GENERIC_SET_FORMAT {
														
 
															+	HV_GENERIC_SET_SPARCE_4K,
														
 
															+	HV_GENERIC_SET_ALL,
														
 
															+};
														
 
															+
														
 
															 /* hypercall status code */
														
 
															 #define HV_STATUS_SUCCESS			0
														
 
															 #define HV_STATUS_INVALID_HYPERCALL_CODE	2