10 anni fa · 6fe1010d6d
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -66,6 +66,7 @@ struct vfio_domain {
 
				 	struct list_head	next;
			
 
				 	struct list_head	group_list;
			
 
				 	int			prot;		/* IOMMU_CACHE */
			
 
				+	bool			fgsp;		/* Fine-grained super pages */
			
 
				 };
			
 
				 
			
 
				 struct vfio_dma {
			
@@ -350,8 +351,8 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
 
				 		iommu_unmap(d->domain, dma->iova, dma->size);
			
 
				 
			
 
				 	while (iova < end) {
			
 
				-		size_t unmapped;
			
 
				-		phys_addr_t phys;
			
 
				+		size_t unmapped, len;
			
 
				+		phys_addr_t phys, next;
			
 
				 
			
 
				 		phys = iommu_iova_to_phys(domain->domain, iova);
			
 
				 		if (WARN_ON(!phys)) {
			
@@ -359,7 +360,19 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		unmapped = iommu_unmap(domain->domain, iova, PAGE_SIZE);
			
 
				+		/*
			
 
				+		 * To optimize for fewer iommu_unmap() calls, each of which
			
 
				+		 * may require hardware cache flushing, try to find the
			
 
				+		 * largest contiguous physical memory chunk to unmap.
			
 
				+		 */
			
 
				+		for (len = PAGE_SIZE;
			
 
				+		     !domain->fgsp && iova + len < end; len += PAGE_SIZE) {
			
 
				+			next = iommu_iova_to_phys(domain->domain, iova + len);
			
 
				+			if (next != phys + len)
			
 
				+				break;
			
 
				+		}
			
 
				+
			
 
				+		unmapped = iommu_unmap(domain->domain, iova, len);
			
 
				 		if (WARN_ON(!unmapped))
			
 
				 			break;
			
 
				 
			
@@ -665,6 +678,39 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * We change our unmap behavior slightly depending on whether the IOMMU
			
 
				+ * supports fine-grained superpages.  IOMMUs like AMD-Vi will use a superpage
			
 
				+ * for practically any contiguous power-of-two mapping we give it.  This means
			
 
				+ * we don't need to look for contiguous chunks ourselves to make unmapping
			
 
				+ * more efficient.  On IOMMUs with coarse-grained super pages, like Intel VT-d
			
 
				+ * with discrete 2M/1G/512G/1T superpages, identifying contiguous chunks
			
 
				+ * significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when
			
 
				+ * hugetlbfs is in use.
			
 
				+ */
			
 
				+static void vfio_test_domain_fgsp(struct vfio_domain *domain)
			
 
				+{
			
 
				+	struct page *pages;
			
 
				+	int ret, order = get_order(PAGE_SIZE * 2);
			
 
				+
			
 
				+	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
			
 
				+	if (!pages)
			
 
				+		return;
			
 
				+
			
 
				+	ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
			
 
				+			IOMMU_READ | IOMMU_WRITE | domain->prot);
			
 
				+	if (!ret) {
			
 
				+		size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
			
 
				+
			
 
				+		if (unmapped == PAGE_SIZE)
			
 
				+			iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE);
			
 
				+		else
			
 
				+			domain->fgsp = true;
			
 
				+	}
			
 
				+
			
 
				+	__free_pages(pages, order);
			
 
				+}
			
 
				+
			
 
				 static int vfio_iommu_type1_attach_group(void *iommu_data,
			
 
				 					 struct iommu_group *iommu_group)
			
 
				 {
			
@@ -758,6 +804,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	vfio_test_domain_fgsp(domain);
			
 
				+
			
 
				 	/* replay mappings on new domains */
			
 
				 	ret = vfio_iommu_replay(iommu, domain);
			
 
				 	if (ret)