8 жил өмнө · d5ff0814fd
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -55,7 +55,8 @@ static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
 
				  * @size:	number of bytes to write back
			
 
				  *
			
 
				  * Write back a cache range using the CLWB (cache line write back)
			
 
				- * instruction.
			
 
				+ * instruction. Note that @size is internally rounded up to be cache
			
 
				+ * line size aligned.
			
 
				  */
			
 
				 static inline void arch_wb_cache_pmem(void *addr, size_t size)
			
 
				 {
			
@@ -69,15 +70,6 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size)
 
				 		clwb(p);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
			
 
				- * iterators, so for other types (bvec & kvec) we must do a cache write-back.
			
 
				- */
			
 
				-static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
			
 
				-{
			
 
				-	return iter_is_iovec(i) == false;
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
			
 
				  * @addr:	PMEM destination address
			
@@ -94,7 +86,35 @@ static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
 
				 	/* TODO: skip the write-back by always using non-temporal stores */
			
 
				 	len = copy_from_iter_nocache(addr, bytes, i);
			
 
				 
			
 
				-	if (__iter_needs_pmem_wb(i))
			
 
				+	/*
			
 
				+	 * In the iovec case on x86_64 copy_from_iter_nocache() uses
			
 
				+	 * non-temporal stores for the bulk of the transfer, but we need
			
 
				+	 * to manually flush if the transfer is unaligned. A cached
			
 
				+	 * memory copy is used when destination or size is not naturally
			
 
				+	 * aligned. That is:
			
 
				+	 *   - Require 8-byte alignment when size is 8 bytes or larger.
			
 
				+	 *   - Require 4-byte alignment when size is 4 bytes.
			
 
				+	 *
			
 
				+	 * In the non-iovec case the entire destination needs to be
			
 
				+	 * flushed.
			
 
				+	 */
			
 
				+	if (iter_is_iovec(i)) {
			
 
				+		unsigned long flushed, dest = (unsigned long) addr;
			
 
				+
			
 
				+		if (bytes < 8) {
			
 
				+			if (!IS_ALIGNED(dest, 4) || (bytes != 4))
			
 
				+				arch_wb_cache_pmem(addr, 1);
			
 
				+		} else {
			
 
				+			if (!IS_ALIGNED(dest, 8)) {
			
 
				+				dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
			
 
				+				arch_wb_cache_pmem(addr, 1);
			
 
				+			}
			
 
				+
			
 
				+			flushed = dest - (unsigned long) addr;
			
 
				+			if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
			
 
				+				arch_wb_cache_pmem(addr + bytes - 1, 1);
			
 
				+		}
			
 
				+	} else
			
 
				 		arch_wb_cache_pmem(addr, bytes);
			
 
				 
			
 
				 	return len;
			
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1617,7 +1617,11 @@ static int cmp_map(const void *m0, const void *m1)
 
				 	const struct nfit_set_info_map *map0 = m0;
			
 
				 	const struct nfit_set_info_map *map1 = m1;
			
 
				 
			
 
				-	return map0->region_offset - map1->region_offset;
			
 
				+	if (map0->region_offset < map1->region_offset)
			
 
				+		return -1;
			
 
				+	else if (map0->region_offset > map1->region_offset)
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /* Retrieve the nth entry referencing this spa */
			
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -2,6 +2,7 @@ menuconfig DEV_DAX
 
				 	tristate "DAX: direct access to differentiated memory"
			
 
				 	default m if NVDIMM_DAX
			
 
				 	depends on TRANSPARENT_HUGEPAGE
			
 
				+	select SRCU
			
 
				 	help
			
 
				 	  Support raw access to differentiated (persistence, bandwidth,
			
 
				 	  latency...) memory via an mmap(2) capable character
			
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -25,6 +25,7 @@
 
				 #include "dax.h"
			
 
				 
			
 
				 static dev_t dax_devt;
			
 
				+DEFINE_STATIC_SRCU(dax_srcu);
			
 
				 static struct class *dax_class;
			
 
				 static DEFINE_IDA(dax_minor_ida);
			
 
				 static int nr_dax = CONFIG_NR_DEV_DAX;
			
@@ -60,7 +61,7 @@ struct dax_region {
 
				  * @region - parent region
			
 
				  * @dev - device backing the character device
			
 
				  * @cdev - core chardev data
			
 
				- * @alive - !alive + rcu grace period == no new mappings can be established
			
 
				+ * @alive - !alive + srcu grace period == no new mappings can be established
			
 
				  * @id - child id in the region
			
 
				  * @num_resources - number of physical address extents in this device
			
 
				  * @res - array of physical address ranges
			
@@ -569,7 +570,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 
				 static int dax_dev_huge_fault(struct vm_fault *vmf,
			
 
				 		enum page_entry_size pe_size)
			
 
				 {
			
 
				-	int rc;
			
 
				+	int rc, id;
			
 
				 	struct file *filp = vmf->vma->vm_file;
			
 
				 	struct dax_dev *dax_dev = filp->private_data;
			
 
				 
			
@@ -578,7 +579,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
 
				 			? "write" : "read",
			
 
				 			vmf->vma->vm_start, vmf->vma->vm_end);
			
 
				 
			
 
				-	rcu_read_lock();
			
 
				+	id = srcu_read_lock(&dax_srcu);
			
 
				 	switch (pe_size) {
			
 
				 	case PE_SIZE_PTE:
			
 
				 		rc = __dax_dev_pte_fault(dax_dev, vmf);
			
@@ -592,7 +593,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
 
				 	default:
			
 
				 		return VM_FAULT_FALLBACK;
			
 
				 	}
			
 
				-	rcu_read_unlock();
			
 
				+	srcu_read_unlock(&dax_srcu, id);
			
 
				 
			
 
				 	return rc;
			
 
				 }
			
@@ -713,11 +714,11 @@ static void unregister_dax_dev(void *dev)
 
				 	 * Note, rcu is not protecting the liveness of dax_dev, rcu is
			
 
				 	 * ensuring that any fault handlers that might have seen
			
 
				 	 * dax_dev->alive == true, have completed.  Any fault handlers
			
 
				-	 * that start after synchronize_rcu() has started will abort
			
 
				+	 * that start after synchronize_srcu() has started will abort
			
 
				 	 * upon seeing dax_dev->alive == false.
			
 
				 	 */
			
 
				 	dax_dev->alive = false;
			
 
				-	synchronize_rcu();
			
 
				+	synchronize_srcu(&dax_srcu);
			
 
				 	unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
			
 
				 	cdev_del(cdev);
			
 
				 	device_unregister(dev);
			
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -934,8 +934,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 
				 	rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL);
			
 
				 	if (rc < 0)
			
 
				 		goto out_unlock;
			
 
				+	nvdimm_bus_unlock(&nvdimm_bus->dev);
			
 
				+
			
 
				 	if (copy_to_user(p, buf, buf_len))
			
 
				 		rc = -EFAULT;
			
 
				+
			
 
				+	vfree(buf);
			
 
				+	return rc;
			
 
				+
			
 
				  out_unlock:
			
 
				 	nvdimm_bus_unlock(&nvdimm_bus->dev);
			
 
				  out:
			
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -243,7 +243,15 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 
				 	}
			
 
				 
			
 
				 	if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
			
 
				-		if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)) {
			
 
				+		/*
			
 
				+		 * FIXME: nsio_rw_bytes() may be called from atomic
			
 
				+		 * context in the btt case and nvdimm_clear_poison()
			
 
				+		 * takes a sleeping lock. Until the locking can be
			
 
				+		 * reworked this capability requires that the namespace
			
 
				+		 * is not claimed by btt.
			
 
				+		 */
			
 
				+		if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
			
 
				+				&& (!ndns->claim || !is_nd_btt(ndns->claim))) {
			
 
				 			long cleared;
			
 
				 
			
 
				 			cleared = nvdimm_clear_poison(&ndns->dev, offset, size);
			
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -395,7 +395,7 @@ EXPORT_SYMBOL_GPL(nvdimm_create);
 
				 
			
 
				 int alias_dpa_busy(struct device *dev, void *data)
			
 
				 {
			
 
				-	resource_size_t map_end, blk_start, new, busy;
			
 
				+	resource_size_t map_end, blk_start, new;
			
 
				 	struct blk_alloc_info *info = data;
			
 
				 	struct nd_mapping *nd_mapping;
			
 
				 	struct nd_region *nd_region;
			
@@ -436,29 +436,19 @@ int alias_dpa_busy(struct device *dev, void *data)
 
				  retry:
			
 
				 	/*
			
 
				 	 * Find the free dpa from the end of the last pmem allocation to
			
 
				-	 * the end of the interleave-set mapping that is not already
			
 
				-	 * covered by a blk allocation.
			
 
				+	 * the end of the interleave-set mapping.
			
 
				 	 */
			
 
				-	busy = 0;
			
 
				 	for_each_dpa_resource(ndd, res) {
			
 
				+		if (strncmp(res->name, "pmem", 4) != 0)
			
 
				+			continue;
			
 
				 		if ((res->start >= blk_start && res->start < map_end)
			
 
				 				|| (res->end >= blk_start
			
 
				 					&& res->end <= map_end)) {
			
 
				-			if (strncmp(res->name, "pmem", 4) == 0) {
			
 
				-				new = max(blk_start, min(map_end + 1,
			
 
				-							res->end + 1));
			
 
				-				if (new != blk_start) {
			
 
				-					blk_start = new;
			
 
				-					goto retry;
			
 
				-				}
			
 
				-			} else
			
 
				-				busy += min(map_end, res->end)
			
 
				-					- max(nd_mapping->start, res->start) + 1;
			
 
				-		} else if (nd_mapping->start > res->start
			
 
				-				&& map_end < res->end) {
			
 
				-			/* total eclipse of the PMEM region mapping */
			
 
				-			busy += nd_mapping->size;
			
 
				-			break;
			
 
				+			new = max(blk_start, min(map_end + 1, res->end + 1));
			
 
				+			if (new != blk_start) {
			
 
				+				blk_start = new;
			
 
				+				goto retry;
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -470,52 +460,11 @@ int alias_dpa_busy(struct device *dev, void *data)
 
				 		return 1;
			
 
				 	}
			
 
				 
			
 
				-	info->available -= blk_start - nd_mapping->start + busy;
			
 
				+	info->available -= blk_start - nd_mapping->start;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int blk_dpa_busy(struct device *dev, void *data)
			
 
				-{
			
 
				-	struct blk_alloc_info *info = data;
			
 
				-	struct nd_mapping *nd_mapping;
			
 
				-	struct nd_region *nd_region;
			
 
				-	resource_size_t map_end;
			
 
				-	int i;
			
 
				-
			
 
				-	if (!is_nd_pmem(dev))
			
 
				-		return 0;
			
 
				-
			
 
				-	nd_region = to_nd_region(dev);
			
 
				-	for (i = 0; i < nd_region->ndr_mappings; i++) {
			
 
				-		nd_mapping  = &nd_region->mapping[i];
			
 
				-		if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	if (i >= nd_region->ndr_mappings)
			
 
				-		return 0;
			
 
				-
			
 
				-	map_end = nd_mapping->start + nd_mapping->size - 1;
			
 
				-	if (info->res->start >= nd_mapping->start
			
 
				-			&& info->res->start < map_end) {
			
 
				-		if (info->res->end <= map_end) {
			
 
				-			info->busy = 0;
			
 
				-			return 1;
			
 
				-		} else {
			
 
				-			info->busy -= info->res->end - map_end;
			
 
				-			return 0;
			
 
				-		}
			
 
				-	} else if (info->res->end >= nd_mapping->start
			
 
				-			&& info->res->end <= map_end) {
			
 
				-		info->busy -= nd_mapping->start - info->res->start;
			
 
				-		return 0;
			
 
				-	} else {
			
 
				-		info->busy -= nd_mapping->size;
			
 
				-		return 0;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * nd_blk_available_dpa - account the unused dpa of BLK region
			
 
				  * @nd_mapping: container of dpa-resource-root + labels
			
@@ -545,11 +494,7 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
 
				 	for_each_dpa_resource(ndd, res) {
			
 
				 		if (strncmp(res->name, "blk", 3) != 0)
			
 
				 			continue;
			
 
				-
			
 
				-		info.res = res;
			
 
				-		info.busy = resource_size(res);
			
 
				-		device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy);
			
 
				-		info.available -= info.busy;
			
 
				+		info.available -= resource_size(res);
			
 
				 	}
			
 
				 
			
 
				 	return info.available;