Эх сурвалжийг харах

Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm fixes from Dan Williams:
 "A 4.16 regression fix, three fixes for -stable, and a cleanup fix:

   - During the merge window support for the new ACPI NVDIMM Platform
     Capabilities structure disabled support for "deep flush", a
     force-unit- access like mechanism for persistent memory. Restore
     that mechanism.

   - VFIO like RDMA is yet one more memory registration / pinning
     interface that is incompatible with Filesystem-DAX. Disable long
     term pins of Filesystem-DAX mappings via VFIO.

   - The Filesystem-DAX detection to prevent long terms pins mistakenly
     also disabled Device-DAX pins which are not subject to the same
     block- map collision concerns.

   - Similar to the setup path, softlockup warnings can trigger in the
     shutdown path for large persistent memory namespaces. Teach
     for_each_device_pfn() to perform cond_resched() in all cases.

   - Boaz noticed that the might_sleep() in dax_direct_access() is stale
     as of the v4.15 kernel.

  These have received a build success notification from the 0day robot,
  and the longterm pin fixes have appeared in -next. However, I recently
  rebased the tree to remove some other fixes that need to be reworked
  after review feedback.

* 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  memremap: fix softlockup reports at teardown
  libnvdimm: re-enable deep flush for pmem devices via fsync()
  vfio: disable filesystem-dax page pinning
  dax: fix vma_is_fsdax() helper
  dax: ->direct_access does not sleep anymore
Linus Torvalds 7 жил өмнө
parent
commit
20f14172cb

+ 0 - 6
drivers/dax/super.c

@@ -246,12 +246,6 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 {
 {
 	long avail;
 	long avail;
 
 
-	/*
-	 * The device driver is allowed to sleep, in order to make the
-	 * memory directly accessible.
-	 */
-	might_sleep();
-
 	if (!dax_dev)
 	if (!dax_dev)
 		return -EOPNOTSUPP;
 		return -EOPNOTSUPP;
 
 

+ 1 - 2
drivers/nvdimm/pmem.c

@@ -335,8 +335,7 @@ static int pmem_attach_disk(struct device *dev,
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
 		fua = 0;
 		fua = 0;
 	}
 	}
-	wbc = nvdimm_has_cache(nd_region) &&
-		!test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags);
+	wbc = nvdimm_has_cache(nd_region);
 
 
 	if (!devm_request_mem_region(dev, res->start, resource_size(res),
 	if (!devm_request_mem_region(dev, res->start, resource_size(res),
 				dev_name(&ndns->dev))) {
 				dev_name(&ndns->dev))) {

+ 15 - 3
drivers/vfio/vfio_iommu_type1.c

@@ -338,11 +338,12 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 {
 {
 	struct page *page[1];
 	struct page *page[1];
 	struct vm_area_struct *vma;
 	struct vm_area_struct *vma;
+	struct vm_area_struct *vmas[1];
 	int ret;
 	int ret;
 
 
 	if (mm == current->mm) {
 	if (mm == current->mm) {
-		ret = get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE),
-					  page);
+		ret = get_user_pages_longterm(vaddr, 1, !!(prot & IOMMU_WRITE),
+					      page, vmas);
 	} else {
 	} else {
 		unsigned int flags = 0;
 		unsigned int flags = 0;
 
 
@@ -351,7 +352,18 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 
 
 		down_read(&mm->mmap_sem);
 		down_read(&mm->mmap_sem);
 		ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page,
 		ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page,
-					    NULL, NULL);
+					    vmas, NULL);
+		/*
+		 * The lifetime of a vaddr_get_pfn() page pin is
+		 * userspace-controlled. In the fs-dax case this could
+		 * lead to indefinite stalls in filesystem operations.
+		 * Disallow attempts to pin fs-dax pages via this
+		 * interface.
+		 */
+		if (ret > 0 && vma_is_fsdax(vmas[0])) {
+			ret = -EOPNOTSUPP;
+			put_page(page[0]);
+		}
 		up_read(&mm->mmap_sem);
 		up_read(&mm->mmap_sem);
 	}
 	}
 
 

+ 1 - 1
include/linux/fs.h

@@ -3198,7 +3198,7 @@ static inline bool vma_is_fsdax(struct vm_area_struct *vma)
 	if (!vma_is_dax(vma))
 	if (!vma_is_dax(vma))
 		return false;
 		return false;
 	inode = file_inode(vma->vm_file);
 	inode = file_inode(vma->vm_file);
-	if (inode->i_mode == S_IFCHR)
+	if (S_ISCHR(inode->i_mode))
 		return false; /* device-dax */
 		return false; /* device-dax */
 	return true;
 	return true;
 }
 }

+ 10 - 5
kernel/memremap.c

@@ -275,8 +275,15 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap)
 	return (res->start + resource_size(res)) >> PAGE_SHIFT;
 	return (res->start + resource_size(res)) >> PAGE_SHIFT;
 }
 }
 
 
+static unsigned long pfn_next(unsigned long pfn)
+{
+	if (pfn % 1024 == 0)
+		cond_resched();
+	return pfn + 1;
+}
+
 #define for_each_device_pfn(pfn, map) \
 #define for_each_device_pfn(pfn, map) \
-	for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
+	for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn))
 
 
 static void devm_memremap_pages_release(void *data)
 static void devm_memremap_pages_release(void *data)
 {
 {
@@ -337,10 +344,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	resource_size_t align_start, align_size, align_end;
 	resource_size_t align_start, align_size, align_end;
 	struct vmem_altmap *altmap = pgmap->altmap_valid ?
 	struct vmem_altmap *altmap = pgmap->altmap_valid ?
 			&pgmap->altmap : NULL;
 			&pgmap->altmap : NULL;
+	struct resource *res = &pgmap->res;
 	unsigned long pfn, pgoff, order;
 	unsigned long pfn, pgoff, order;
 	pgprot_t pgprot = PAGE_KERNEL;
 	pgprot_t pgprot = PAGE_KERNEL;
-	int error, nid, is_ram, i = 0;
-	struct resource *res = &pgmap->res;
+	int error, nid, is_ram;
 
 
 	align_start = res->start & ~(SECTION_SIZE - 1);
 	align_start = res->start & ~(SECTION_SIZE - 1);
 	align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
 	align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -409,8 +416,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 		list_del(&page->lru);
 		list_del(&page->lru);
 		page->pgmap = pgmap;
 		page->pgmap = pgmap;
 		percpu_ref_get(pgmap->ref);
 		percpu_ref_get(pgmap->ref);
-		if (!(++i % 1024))
-			cond_resched();
 	}
 	}
 
 
 	devm_add_action(dev, devm_memremap_pages_release, pgmap);
 	devm_add_action(dev, devm_memremap_pages_release, pgmap);