浏览代码

Merge tag 'libnvdimm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
 "This update has successfully completed a 0day-kbuild run and has
  appeared in a linux-next release.  The changes outside of the typical
  drivers/nvdimm/ and drivers/acpi/nfit.[ch] paths are related to the
  removal of IORESOURCE_CACHEABLE, the introduction of memremap(), and
  the introduction of ZONE_DEVICE + devm_memremap_pages().

  Summary:

   - Introduce ZONE_DEVICE and devm_memremap_pages() as a generic
     mechanism for adding device-driver-discovered memory regions to the
     kernel's direct map.

     This facility is used by the pmem driver to enable pfn_to_page()
     operations on the page frames returned by DAX ('direct_access' in
     'struct block_device_operations').

     For now, the 'memmap' allocation for these "device" pages comes
     from "System RAM".  Support for allocating the memmap from device
     memory will arrive in a later kernel.

   - Introduce memremap() to replace usages of ioremap_cache() and
     ioremap_wt().  memremap() drops the __iomem annotation for these
     mappings to memory that do not have i/o side effects.  The
     replacement of ioremap_cache() with memremap() is limited to the
     pmem driver to ease merging the api change in v4.3.

     Completion of the conversion is targeted for v4.4.

   - Similar to the usage of memcpy_to_pmem() + wmb_pmem() in the pmem
     driver, update the VFS DAX implementation and PMEM api to provide
     persistence guarantees for kernel operations on a DAX mapping.

   - Convert the ACPI NFIT 'BLK' driver to map the block apertures as
     cacheable to improve performance.

   - Miscellaneous updates and fixes to libnvdimm including support for
     issuing "address range scrub" commands, clarifying the optimal
     'sector size' of pmem devices, a clarification of the usage of the
     ACPI '_STA' (status) property for DIMM devices, and other minor
     fixes"

* tag 'libnvdimm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (34 commits)
  libnvdimm, pmem: direct map legacy pmem by default
  libnvdimm, pmem: 'struct page' for pmem
  libnvdimm, pfn: 'struct page' provider infrastructure
  x86, pmem: clarify that ARCH_HAS_PMEM_API implies PMEM mapped WB
  add devm_memremap_pages
  mm: ZONE_DEVICE for "device memory"
  mm: move __phys_to_pfn and __pfn_to_phys to asm/generic/memory_model.h
  dax: drop size parameter to ->direct_access()
  nd_blk: change aperture mapping from WC to WB
  nvdimm: change to use generic kvfree()
  pmem, dax: have direct_access use __pmem annotation
  dax: update I/O path to do proper PMEM flushing
  pmem: add copy_from_iter_pmem() and clear_pmem()
  pmem, x86: clean up conditional pmem includes
  pmem: remove layer when calling arch_has_wmb_pmem()
  pmem, x86: move x86 PMEM API to new pmem.h header
  libnvdimm, e820: make CONFIG_X86_PMEM_LEGACY a tristate option
  pmem: switch to devm_ allocations
  devres: add devm_memremap
  libnvdimm, btt: write and validate parent_uuid
  ...
Linus Torvalds 10 年之前
父节点
当前提交
12f03ee606
共有 92 个文件被更改,包括 2142 次插入745 次删除
  1. 2 1
      Documentation/filesystems/Locking
  2. 1 0
      MAINTAINERS
  3. 0 6
      arch/arm/include/asm/memory.h
  4. 1 1
      arch/arm/mach-clps711x/board-cdb89712.c
  5. 1 1
      arch/arm/mach-shmobile/pm-rcar.c
  6. 0 6
      arch/arm64/include/asm/memory.h
  7. 1 0
      arch/ia64/include/asm/io.h
  8. 1 1
      arch/ia64/kernel/cyclone.c
  9. 2 2
      arch/ia64/mm/init.c
  10. 1 1
      arch/powerpc/kernel/pci_of_scan.c
  11. 2 2
      arch/powerpc/mm/mem.c
  12. 4 3
      arch/powerpc/sysdev/axonram.c
  13. 1 1
      arch/s390/mm/init.c
  14. 1 0
      arch/sh/include/asm/io.h
  15. 3 2
      arch/sh/mm/init.c
  16. 1 2
      arch/sparc/kernel/pci.c
  17. 1 1
      arch/tile/mm/init.c
  18. 0 6
      arch/unicore32/include/asm/memory.h
  19. 7 2
      arch/x86/Kconfig
  20. 2 71
      arch/x86/include/asm/cacheflush.h
  21. 0 6
      arch/x86/include/asm/io.h
  22. 153 0
      arch/x86/include/asm/pmem.h
  23. 1 1
      arch/x86/include/uapi/asm/e820.h
  24. 1 1
      arch/x86/kernel/Makefile
  25. 8 71
      arch/x86/kernel/pmem.c
  26. 2 2
      arch/x86/mm/init_32.c
  27. 2 2
      arch/x86/mm/init_64.c
  28. 1 0
      arch/xtensa/include/asm/io.h
  29. 1 0
      drivers/acpi/Kconfig
  30. 36 43
      drivers/acpi/nfit.c
  31. 12 5
      drivers/acpi/nfit.h
  32. 2 6
      drivers/block/brd.c
  33. 1 1
      drivers/isdn/icn/icn.h
  34. 1 1
      drivers/mtd/devices/slram.c
  35. 1 1
      drivers/mtd/nand/diskonchip.c
  36. 1 1
      drivers/mtd/onenand/generic.c
  37. 23 0
      drivers/nvdimm/Kconfig
  38. 5 0
      drivers/nvdimm/Makefile
  39. 13 37
      drivers/nvdimm/btt.c
  40. 3 0
      drivers/nvdimm/btt.h
  41. 48 167
      drivers/nvdimm/btt_devs.c
  42. 201 0
      drivers/nvdimm/claim.c
  43. 1 4
      drivers/nvdimm/dimm_devs.c
  44. 87 0
      drivers/nvdimm/e820.c
  45. 76 13
      drivers/nvdimm/namespace_devs.c
  46. 9 0
      drivers/nvdimm/nd-core.h
  47. 64 3
      drivers/nvdimm/nd.h
  48. 35 0
      drivers/nvdimm/pfn.h
  49. 337 0
      drivers/nvdimm/pfn_devs.c
  50. 209 36
      drivers/nvdimm/pmem.c
  51. 2 0
      drivers/nvdimm/region.c
  52. 20 0
      drivers/nvdimm/region_devs.c
  53. 1 2
      drivers/pci/probe.c
  54. 0 2
      drivers/pnp/manager.c
  55. 6 4
      drivers/s390/block/dcssblk.c
  56. 1 6
      drivers/scsi/aic94xx/aic94xx_init.c
  57. 1 4
      drivers/scsi/arcmsr/arcmsr_hba.c
  58. 4 11
      drivers/scsi/mvsas/mv_init.c
  59. 1 1
      drivers/scsi/sun3x_esp.c
  60. 1 0
      drivers/staging/comedi/drivers/ii_pci20kc.c
  61. 9 7
      drivers/staging/unisys/visorbus/visorchannel.c
  62. 9 8
      drivers/staging/unisys/visorbus/visorchipset.c
  63. 1 1
      drivers/tty/serial/8250/8250_core.c
  64. 0 1
      drivers/video/fbdev/ocfb.c
  65. 1 2
      drivers/video/fbdev/s1d13xxxfb.c
  66. 1 0
      drivers/video/fbdev/stifb.c
  67. 2 2
      fs/block_dev.c
  68. 38 24
      fs/dax.c
  69. 6 0
      include/asm-generic/memory_model.h
  70. 4 4
      include/linux/blkdev.h
  71. 1 1
      include/linux/io-mapping.h
  72. 33 0
      include/linux/io.h
  73. 4 0
      include/linux/libnvdimm.h
  74. 3 2
      include/linux/memory_hotplug.h
  75. 8 1
      include/linux/mm.h
  76. 23 0
      include/linux/mmzone.h
  77. 1 1
      include/linux/mtd/map.h
  78. 84 31
      include/linux/pmem.h
  79. 11 1
      include/uapi/linux/ndctl.h
  80. 1 1
      include/video/vga.h
  81. 2 0
      kernel/Makefile
  82. 190 0
      kernel/memremap.c
  83. 36 25
      kernel/resource.c
  84. 3 0
      lib/Kconfig
  85. 4 9
      lib/devres.c
  86. 2 5
      lib/pci_iomap.c
  87. 17 0
      mm/Kconfig
  88. 11 3
      mm/memory_hotplug.c
  89. 3 0
      mm/page_alloc.c
  90. 11 2
      tools/testing/nvdimm/Kbuild
  91. 73 12
      tools/testing/nvdimm/test/iomap.c
  92. 147 62
      tools/testing/nvdimm/test/nfit.c

+ 2 - 1
Documentation/filesystems/Locking

@@ -397,7 +397,8 @@ prototypes:
 	int (*release) (struct gendisk *, fmode_t);
 	int (*release) (struct gendisk *, fmode_t);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-	int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *);
+	int (*direct_access) (struct block_device *, sector_t, void __pmem **,
+				unsigned long *);
 	int (*media_changed) (struct gendisk *);
 	int (*media_changed) (struct gendisk *);
 	void (*unlock_native_capacity) (struct gendisk *);
 	void (*unlock_native_capacity) (struct gendisk *);
 	int (*revalidate_disk) (struct gendisk *);
 	int (*revalidate_disk) (struct gendisk *);

+ 1 - 0
MAINTAINERS

@@ -6229,6 +6229,7 @@ Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
 S:	Supported
 F:	drivers/nvdimm/pmem.c
 F:	drivers/nvdimm/pmem.c
 F:	include/linux/pmem.h
 F:	include/linux/pmem.h
+F:	arch/*/include/asm/pmem.h
 
 
 LINUX FOR IBM pSERIES (RS/6000)
 LINUX FOR IBM pSERIES (RS/6000)
 M:	Paul Mackerras <paulus@au.ibm.com>
 M:	Paul Mackerras <paulus@au.ibm.com>

+ 0 - 6
arch/arm/include/asm/memory.h

@@ -118,12 +118,6 @@
 #define DTCM_OFFSET	UL(0xfffe8000)
 #define DTCM_OFFSET	UL(0xfffe8000)
 #endif
 #endif
 
 
-/*
- * Convert a physical address to a Page Frame Number and back
- */
-#define	__phys_to_pfn(paddr)	((unsigned long)((paddr) >> PAGE_SHIFT))
-#define	__pfn_to_phys(pfn)	((phys_addr_t)(pfn) << PAGE_SHIFT)
-
 /*
 /*
  * Convert a page to/from a physical address
  * Convert a page to/from a physical address
  */
  */

+ 1 - 1
arch/arm/mach-clps711x/board-cdb89712.c

@@ -95,7 +95,7 @@ static struct physmap_flash_data cdb89712_bootrom_pdata __initdata = {
 
 
 static struct resource cdb89712_bootrom_resources[] __initdata = {
 static struct resource cdb89712_bootrom_resources[] __initdata = {
 	DEFINE_RES_NAMED(CS7_PHYS_BASE, SZ_128, "BOOTROM", IORESOURCE_MEM |
 	DEFINE_RES_NAMED(CS7_PHYS_BASE, SZ_128, "BOOTROM", IORESOURCE_MEM |
-			 IORESOURCE_CACHEABLE | IORESOURCE_READONLY),
+			 IORESOURCE_READONLY),
 };
 };
 
 
 static struct platform_device cdb89712_bootrom_pdev __initdata = {
 static struct platform_device cdb89712_bootrom_pdev __initdata = {

+ 1 - 1
arch/arm/mach-shmobile/pm-rcar.c

@@ -12,7 +12,7 @@
 #include <linux/err.h>
 #include <linux/err.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include "pm-rcar.h"
 #include "pm-rcar.h"
 
 
 /* SYSC Common */
 /* SYSC Common */

+ 0 - 6
arch/arm64/include/asm/memory.h

@@ -80,12 +80,6 @@
 #define __virt_to_phys(x)	(((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET))
 #define __virt_to_phys(x)	(((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET))
 #define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
 #define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
 
 
-/*
- * Convert a physical address to a Page Frame Number and back
- */
-#define	__phys_to_pfn(paddr)	((unsigned long)((paddr) >> PAGE_SHIFT))
-#define	__pfn_to_phys(pfn)	((phys_addr_t)(pfn) << PAGE_SHIFT)
-
 /*
 /*
  * Convert a page to/from a physical address
  * Convert a page to/from a physical address
  */
  */

+ 1 - 0
arch/ia64/include/asm/io.h

@@ -435,6 +435,7 @@ static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned lo
 {
 {
 	return ioremap(phys_addr, size);
 	return ioremap(phys_addr, size);
 }
 }
+#define ioremap_cache ioremap_cache
 
 
 
 
 /*
 /*

+ 1 - 1
arch/ia64/kernel/cyclone.c

@@ -4,7 +4,7 @@
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/timex.h>
 #include <linux/timex.h>
 #include <linux/clocksource.h>
 #include <linux/clocksource.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 
 /* IBM Summit (EXA) Cyclone counter code*/
 /* IBM Summit (EXA) Cyclone counter code*/
 #define CYCLONE_CBAR_ADDR 0xFEB00CD0
 #define CYCLONE_CBAR_ADDR 0xFEB00CD0

+ 2 - 2
arch/ia64/mm/init.c

@@ -645,7 +645,7 @@ mem_init (void)
 }
 }
 
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 {
 	pg_data_t *pgdat;
 	pg_data_t *pgdat;
 	struct zone *zone;
 	struct zone *zone;
@@ -656,7 +656,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	pgdat = NODE_DATA(nid);
 	pgdat = NODE_DATA(nid);
 
 
 	zone = pgdat->node_zones +
 	zone = pgdat->node_zones +
-		zone_for_memory(nid, start, size, ZONE_NORMAL);
+		zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
 	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 
 
 	if (ret)
 	if (ret)

+ 1 - 1
arch/powerpc/kernel/pci_of_scan.c

@@ -102,7 +102,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
 			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
 			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
 		} else if (i == dev->rom_base_reg) {
 		} else if (i == dev->rom_base_reg) {
 			res = &dev->resource[PCI_ROM_RESOURCE];
 			res = &dev->resource[PCI_ROM_RESOURCE];
-			flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+			flags |= IORESOURCE_READONLY;
 		} else {
 		} else {
 			printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
 			printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
 			continue;
 			continue;

+ 2 - 2
arch/powerpc/mm/mem.c

@@ -113,7 +113,7 @@ int memory_add_physaddr_to_nid(u64 start)
 }
 }
 #endif
 #endif
 
 
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 {
 	struct pglist_data *pgdata;
 	struct pglist_data *pgdata;
 	struct zone *zone;
 	struct zone *zone;
@@ -128,7 +128,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 
 	/* this should work for most non-highmem platforms */
 	/* this should work for most non-highmem platforms */
 	zone = pgdata->node_zones +
 	zone = pgdata->node_zones +
-		zone_for_memory(nid, start, size, 0);
+		zone_for_memory(nid, start, size, 0, for_device);
 
 
 	return __add_pages(nid, zone, start_pfn, nr_pages);
 	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
 }

+ 4 - 3
arch/powerpc/sysdev/axonram.c

@@ -141,13 +141,14 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
  */
  */
 static long
 static long
 axon_ram_direct_access(struct block_device *device, sector_t sector,
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-		       void **kaddr, unsigned long *pfn, long size)
+		       void __pmem **kaddr, unsigned long *pfn)
 {
 {
 	struct axon_ram_bank *bank = device->bd_disk->private_data;
 	struct axon_ram_bank *bank = device->bd_disk->private_data;
 	loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
 	loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
+	void *addr = (void *)(bank->ph_addr + offset);
 
 
-	*kaddr = (void *)(bank->ph_addr + offset);
-	*pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT;
+	*kaddr = (void __pmem *)addr;
+	*pfn = virt_to_phys(addr) >> PAGE_SHIFT;
 
 
 	return bank->size - offset;
 	return bank->size - offset;
 }
 }

+ 1 - 1
arch/s390/mm/init.c

@@ -169,7 +169,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 #endif
 
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 {
 	unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
 	unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
 	unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
 	unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);

+ 1 - 0
arch/sh/include/asm/io.h

@@ -342,6 +342,7 @@ ioremap_cache(phys_addr_t offset, unsigned long size)
 {
 {
 	return __ioremap_mode(offset, size, PAGE_KERNEL);
 	return __ioremap_mode(offset, size, PAGE_KERNEL);
 }
 }
+#define ioremap_cache ioremap_cache
 
 
 #ifdef CONFIG_HAVE_IOREMAP_PROT
 #ifdef CONFIG_HAVE_IOREMAP_PROT
 static inline void __iomem *
 static inline void __iomem *

+ 3 - 2
arch/sh/mm/init.c

@@ -485,7 +485,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 #endif
 
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 {
 	pg_data_t *pgdat;
 	pg_data_t *pgdat;
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long start_pfn = PFN_DOWN(start);
@@ -496,7 +496,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 
 	/* We only have ZONE_NORMAL, so this is easy.. */
 	/* We only have ZONE_NORMAL, so this is easy.. */
 	ret = __add_pages(nid, pgdat->node_zones +
 	ret = __add_pages(nid, pgdat->node_zones +
-			zone_for_memory(nid, start, size, ZONE_NORMAL),
+			zone_for_memory(nid, start, size, ZONE_NORMAL,
+			for_device),
 			start_pfn, nr_pages);
 			start_pfn, nr_pages);
 	if (unlikely(ret))
 	if (unlikely(ret))
 		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
 		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);

+ 1 - 2
arch/sparc/kernel/pci.c

@@ -231,8 +231,7 @@ static void pci_parse_of_addrs(struct platform_device *op,
 			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
 			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
 		} else if (i == dev->rom_base_reg) {
 		} else if (i == dev->rom_base_reg) {
 			res = &dev->resource[PCI_ROM_RESOURCE];
 			res = &dev->resource[PCI_ROM_RESOURCE];
-			flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE
-			      | IORESOURCE_SIZEALIGN;
+			flags |= IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
 		} else {
 		} else {
 			printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
 			printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
 			continue;
 			continue;

+ 1 - 1
arch/tile/mm/init.c

@@ -863,7 +863,7 @@ void __init mem_init(void)
  * memory to the highmem for now.
  * memory to the highmem for now.
  */
  */
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-int arch_add_memory(u64 start, u64 size)
+int arch_add_memory(u64 start, u64 size, bool for_device)
 {
 {
 	struct pglist_data *pgdata = &contig_page_data;
 	struct pglist_data *pgdata = &contig_page_data;
 	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
 	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;

+ 0 - 6
arch/unicore32/include/asm/memory.h

@@ -60,12 +60,6 @@
 #define __phys_to_virt(x)	((x) - PHYS_OFFSET + PAGE_OFFSET)
 #define __phys_to_virt(x)	((x) - PHYS_OFFSET + PAGE_OFFSET)
 #endif
 #endif
 
 
-/*
- * Convert a physical address to a Page Frame Number and back
- */
-#define	__phys_to_pfn(paddr)	((paddr) >> PAGE_SHIFT)
-#define	__pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
-
 /*
 /*
  * Convert a page to/from a physical address
  * Convert a page to/from a physical address
  */
  */

+ 7 - 2
arch/x86/Kconfig

@@ -27,7 +27,8 @@ config X86
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_GCOV_PROFILE_ALL
-	select ARCH_HAS_PMEM_API
+	select ARCH_HAS_PMEM_API		if X86_64
+	select ARCH_HAS_MMIO_FLUSH
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
@@ -1450,10 +1451,14 @@ config ILLEGAL_POINTER_VALUE
 
 
 source "mm/Kconfig"
 source "mm/Kconfig"
 
 
+config X86_PMEM_LEGACY_DEVICE
+	bool
+
 config X86_PMEM_LEGACY
 config X86_PMEM_LEGACY
-	bool "Support non-standard NVDIMMs and ADR protected memory"
+	tristate "Support non-standard NVDIMMs and ADR protected memory"
 	depends on PHYS_ADDR_T_64BIT
 	depends on PHYS_ADDR_T_64BIT
 	depends on BLK_DEV
 	depends on BLK_DEV
+	select X86_PMEM_LEGACY_DEVICE
 	select LIBNVDIMM
 	select LIBNVDIMM
 	help
 	help
 	  Treat memory marked using the non-standard e820 type of 12 as used
 	  Treat memory marked using the non-standard e820 type of 12 as used

+ 2 - 71
arch/x86/include/asm/cacheflush.h

@@ -89,6 +89,8 @@ int set_pages_rw(struct page *page, int numpages);
 
 
 void clflush_cache_range(void *addr, unsigned int size);
 void clflush_cache_range(void *addr, unsigned int size);
 
 
+#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
+
 #ifdef CONFIG_DEBUG_RODATA
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 void mark_rodata_ro(void);
 extern const int rodata_test_data;
 extern const int rodata_test_data;
@@ -109,75 +111,4 @@ static inline int rodata_test(void)
 }
 }
 #endif
 #endif
 
 
-#ifdef ARCH_HAS_NOCACHE_UACCESS
-
-/**
- * arch_memcpy_to_pmem - copy data to persistent memory
- * @dst: destination buffer for the copy
- * @src: source buffer for the copy
- * @n: length of the copy in bytes
- *
- * Copy data to persistent memory media via non-temporal stores so that
- * a subsequent arch_wmb_pmem() can flush cpu and memory controller
- * write buffers to guarantee durability.
- */
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
-		size_t n)
-{
-	int unwritten;
-
-	/*
-	 * We are copying between two kernel buffers, if
-	 * __copy_from_user_inatomic_nocache() returns an error (page
-	 * fault) we would have already reported a general protection fault
-	 * before the WARN+BUG.
-	 */
-	unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
-			(void __user *) src, n);
-	if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
-				__func__, dst, src, unwritten))
-		BUG();
-}
-
-/**
- * arch_wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of arch_memcpy_to_pmem() operations this drains data
- * from cpu write buffers and any platform (memory controller) buffers
- * to ensure that written data is durable on persistent memory media.
- */
-static inline void arch_wmb_pmem(void)
-{
-	/*
-	 * wmb() to 'sfence' all previous writes such that they are
-	 * architecturally visible to 'pcommit'.  Note, that we've
-	 * already arranged for pmem writes to avoid the cache via
-	 * arch_memcpy_to_pmem().
-	 */
-	wmb();
-	pcommit_sfence();
-}
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-#ifdef CONFIG_X86_64
-	/*
-	 * We require that wmb() be an 'sfence', that is only guaranteed on
-	 * 64-bit builds
-	 */
-	return static_cpu_has(X86_FEATURE_PCOMMIT);
-#else
-	return false;
-#endif
-}
-#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */
-extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n);
-extern void arch_wmb_pmem(void);
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-	return false;
-}
-#endif
-
 #endif /* _ASM_X86_CACHEFLUSH_H */
 #endif /* _ASM_X86_CACHEFLUSH_H */

+ 0 - 6
arch/x86/include/asm/io.h

@@ -250,12 +250,6 @@ static inline void flush_write_buffers(void)
 #endif
 #endif
 }
 }
 
 
-static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
-	unsigned long size)
-{
-	return (void __force __pmem *) ioremap_cache(offset, size);
-}
-
 #endif /* __KERNEL__ */
 #endif /* __KERNEL__ */
 
 
 extern void native_io_delay(void);
 extern void native_io_delay(void);

+ 153 - 0
arch/x86/include/asm/pmem.h

@@ -0,0 +1,153 @@
+/*
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __ASM_X86_PMEM_H__
+#define __ASM_X86_PMEM_H__
+
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/special_insns.h>
+
+#ifdef CONFIG_ARCH_HAS_PMEM_API
+/**
+ * arch_memcpy_to_pmem - copy data to persistent memory
+ * @dst: destination buffer for the copy
+ * @src: source buffer for the copy
+ * @n: length of the copy in bytes
+ *
+ * Copy data to persistent memory media via non-temporal stores so that
+ * a subsequent arch_wmb_pmem() can flush cpu and memory controller
+ * write buffers to guarantee durability.
+ */
+static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
+		size_t n)
+{
+	int unwritten;
+
+	/*
+	 * We are copying between two kernel buffers, if
+	 * __copy_from_user_inatomic_nocache() returns an error (page
+	 * fault) we would have already reported a general protection fault
+	 * before the WARN+BUG.
+	 */
+	unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
+			(void __user *) src, n);
+	if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
+				__func__, dst, src, unwritten))
+		BUG();
+}
+
+/**
+ * arch_wmb_pmem - synchronize writes to persistent memory
+ *
+ * After a series of arch_memcpy_to_pmem() operations this drains data
+ * from cpu write buffers and any platform (memory controller) buffers
+ * to ensure that written data is durable on persistent memory media.
+ */
+static inline void arch_wmb_pmem(void)
+{
+	/*
+	 * wmb() to 'sfence' all previous writes such that they are
+	 * architecturally visible to 'pcommit'.  Note, that we've
+	 * already arranged for pmem writes to avoid the cache via
+	 * arch_memcpy_to_pmem().
+	 */
+	wmb();
+	pcommit_sfence();
+}
+
+/**
+ * __arch_wb_cache_pmem - write back a cache range with CLWB
+ * @vaddr:	virtual start address
+ * @size:	number of bytes to write back
+ *
+ * Write back a cache range using the CLWB (cache line write back)
+ * instruction.  This function requires explicit ordering with an
+ * arch_wmb_pmem() call.  This API is internal to the x86 PMEM implementation.
+ */
+static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
+{
+	u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
+	unsigned long clflush_mask = x86_clflush_size - 1;
+	void *vend = vaddr + size;
+	void *p;
+
+	for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+	     p < vend; p += x86_clflush_size)
+		clwb(p);
+}
+
+/*
+ * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
+ * iterators, so for other types (bvec & kvec) we must do a cache write-back.
+ */
+static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
+{
+	return iter_is_iovec(i) == false;
+}
+
+/**
+ * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
+ * @addr:	PMEM destination address
+ * @bytes:	number of bytes to copy
+ * @i:		iterator with source data
+ *
+ * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
+ * This function requires explicit ordering with an arch_wmb_pmem() call.
+ */
+static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+		struct iov_iter *i)
+{
+	void *vaddr = (void __force *)addr;
+	size_t len;
+
+	/* TODO: skip the write-back by always using non-temporal stores */
+	len = copy_from_iter_nocache(vaddr, bytes, i);
+
+	if (__iter_needs_pmem_wb(i))
+		__arch_wb_cache_pmem(vaddr, bytes);
+
+	return len;
+}
+
+/**
+ * arch_clear_pmem - zero a PMEM memory range
+ * @addr:	virtual start address
+ * @size:	number of bytes to zero
+ *
+ * Write zeros into the memory range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with an arch_wmb_pmem() call.
+ */
+static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+{
+	void *vaddr = (void __force *)addr;
+
+	/* TODO: implement the zeroing via non-temporal writes */
+	if (size == PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0)
+		clear_page(vaddr);
+	else
+		memset(vaddr, 0, size);
+
+	__arch_wb_cache_pmem(vaddr, size);
+}
+
+static inline bool __arch_has_wmb_pmem(void)
+{
+	/*
+	 * We require that wmb() be an 'sfence', that is only guaranteed on
+	 * 64-bit builds
+	 */
+	return static_cpu_has(X86_FEATURE_PCOMMIT);
+}
+#endif /* CONFIG_ARCH_HAS_PMEM_API */
+#endif /* __ASM_X86_PMEM_H__ */

+ 1 - 1
arch/x86/include/uapi/asm/e820.h

@@ -37,7 +37,7 @@
 /*
 /*
  * This is a non-standardized way to represent ADR or NVDIMM regions that
  * This is a non-standardized way to represent ADR or NVDIMM regions that
  * persist over a reboot.  The kernel will ignore their special capabilities
  * persist over a reboot.  The kernel will ignore their special capabilities
- * unless the CONFIG_X86_PMEM_LEGACY=y option is set.
+ * unless the CONFIG_X86_PMEM_LEGACY option is set.
  *
  *
  * ( Note that older platforms also used 6 for the same type of memory,
  * ( Note that older platforms also used 6 for the same type of memory,
  *   but newer versions switched to 12 as 6 was assigned differently.  Some
  *   but newer versions switched to 12 as 6 was assigned differently.  Some

+ 1 - 1
arch/x86/kernel/Makefile

@@ -94,7 +94,7 @@ obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
-obj-$(CONFIG_X86_PMEM_LEGACY)	+= pmem.o
+obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
 
 
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 
 

+ 8 - 71
arch/x86/kernel/pmem.c

@@ -3,80 +3,17 @@
  * Copyright (c) 2015, Intel Corporation.
  * Copyright (c) 2015, Intel Corporation.
  */
  */
 #include <linux/platform_device.h>
 #include <linux/platform_device.h>
-#include <linux/libnvdimm.h>
 #include <linux/module.h>
 #include <linux/module.h>
-#include <asm/e820.h>
-
-static void e820_pmem_release(struct device *dev)
-{
-	struct nvdimm_bus *nvdimm_bus = dev->platform_data;
-
-	if (nvdimm_bus)
-		nvdimm_bus_unregister(nvdimm_bus);
-}
-
-static struct platform_device e820_pmem = {
-	.name = "e820_pmem",
-	.id = -1,
-	.dev = {
-		.release = e820_pmem_release,
-	},
-};
-
-static const struct attribute_group *e820_pmem_attribute_groups[] = {
-	&nvdimm_bus_attribute_group,
-	NULL,
-};
-
-static const struct attribute_group *e820_pmem_region_attribute_groups[] = {
-	&nd_region_attribute_group,
-	&nd_device_attribute_group,
-	NULL,
-};
 
 
 static __init int register_e820_pmem(void)
 static __init int register_e820_pmem(void)
 {
 {
-	static struct nvdimm_bus_descriptor nd_desc;
-	struct device *dev = &e820_pmem.dev;
-	struct nvdimm_bus *nvdimm_bus;
-	int rc, i;
-
-	rc = platform_device_register(&e820_pmem);
-	if (rc)
-		return rc;
-
-	nd_desc.attr_groups = e820_pmem_attribute_groups;
-	nd_desc.provider_name = "e820";
-	nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
-	if (!nvdimm_bus)
-		goto err;
-	dev->platform_data = nvdimm_bus;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-		struct resource res = {
-			.flags	= IORESOURCE_MEM,
-			.start	= ei->addr,
-			.end	= ei->addr + ei->size - 1,
-		};
-		struct nd_region_desc ndr_desc;
-
-		if (ei->type != E820_PRAM)
-			continue;
-
-		memset(&ndr_desc, 0, sizeof(ndr_desc));
-		ndr_desc.res = &res;
-		ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
-		ndr_desc.numa_node = NUMA_NO_NODE;
-		if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
-			goto err;
-	}
-
-	return 0;
-
- err:
-	dev_err(dev, "failed to register legacy persistent memory ranges\n");
-	platform_device_unregister(&e820_pmem);
-	return -ENXIO;
+	struct platform_device *pdev;
+
+	/*
+	 * See drivers/nvdimm/e820.c for the implementation, this is
+	 * simply here to trigger the module to load on demand.
+	 */
+	pdev = platform_device_alloc("e820_pmem", -1);
+	return platform_device_add(pdev);
 }
 }
 device_initcall(register_e820_pmem);
 device_initcall(register_e820_pmem);

+ 2 - 2
arch/x86/mm/init_32.c

@@ -823,11 +823,11 @@ void __init mem_init(void)
 }
 }
 
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 {
 	struct pglist_data *pgdata = NODE_DATA(nid);
 	struct pglist_data *pgdata = NODE_DATA(nid);
 	struct zone *zone = pgdata->node_zones +
 	struct zone *zone = pgdata->node_zones +
-		zone_for_memory(nid, start, size, ZONE_HIGHMEM);
+		zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device);
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 
 

+ 2 - 2
arch/x86/mm/init_64.c

@@ -687,11 +687,11 @@ static void  update_end_of_memory_vars(u64 start, u64 size)
  * Memory is added always to NORMAL zone. This means you will never get
  * Memory is added always to NORMAL zone. This means you will never get
  * additional DMA/DMA32 memory.
  * additional DMA/DMA32 memory.
  */
  */
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
 {
 	struct pglist_data *pgdat = NODE_DATA(nid);
 	struct pglist_data *pgdat = NODE_DATA(nid);
 	struct zone *zone = pgdat->node_zones +
 	struct zone *zone = pgdat->node_zones +
-		zone_for_memory(nid, start, size, ZONE_NORMAL);
+		zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 	int ret;

+ 1 - 0
arch/xtensa/include/asm/io.h

@@ -57,6 +57,7 @@ static inline void __iomem *ioremap_cache(unsigned long offset,
 	else
 	else
 		BUG();
 		BUG();
 }
 }
+#define ioremap_cache ioremap_cache
 
 
 #define ioremap_wc ioremap_nocache
 #define ioremap_wc ioremap_nocache
 #define ioremap_wt ioremap_nocache
 #define ioremap_wt ioremap_nocache

+ 1 - 0
drivers/acpi/Kconfig

@@ -417,6 +417,7 @@ config ACPI_NFIT
 	tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
 	tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
 	depends on PHYS_ADDR_T_64BIT
 	depends on PHYS_ADDR_T_64BIT
 	depends on BLK_DEV
 	depends on BLK_DEV
+	depends on ARCH_HAS_MMIO_FLUSH
 	select LIBNVDIMM
 	select LIBNVDIMM
 	help
 	help
 	  Infrastructure to probe ACPI 6 compliant platforms for
 	  Infrastructure to probe ACPI 6 compliant platforms for

+ 36 - 43
drivers/acpi/nfit.c

@@ -20,6 +20,7 @@
 #include <linux/sort.h>
 #include <linux/sort.h>
 #include <linux/pmem.h>
 #include <linux/pmem.h>
 #include <linux/io.h>
 #include <linux/io.h>
+#include <asm/cacheflush.h>
 #include "nfit.h"
 #include "nfit.h"
 
 
 /*
 /*
@@ -764,9 +765,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	struct acpi_device *adev, *adev_dimm;
 	struct acpi_device *adev, *adev_dimm;
 	struct device *dev = acpi_desc->dev;
 	struct device *dev = acpi_desc->dev;
 	const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM);
 	const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM);
-	unsigned long long sta;
-	int i, rc = -ENODEV;
-	acpi_status status;
+	int i;
 
 
 	nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en;
 	nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en;
 	adev = to_acpi_dev(acpi_desc);
 	adev = to_acpi_dev(acpi_desc);
@@ -781,25 +780,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		return force_enable_dimms ? 0 : -ENODEV;
 		return force_enable_dimms ? 0 : -ENODEV;
 	}
 	}
 
 
-	status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta);
-	if (status == AE_NOT_FOUND) {
-		dev_dbg(dev, "%s missing _STA, assuming enabled...\n",
-				dev_name(&adev_dimm->dev));
-		rc = 0;
-	} else if (ACPI_FAILURE(status))
-		dev_err(dev, "%s failed to retrieve_STA, disabling...\n",
-				dev_name(&adev_dimm->dev));
-	else if ((sta & ACPI_STA_DEVICE_ENABLED) == 0)
-		dev_info(dev, "%s disabled by firmware\n",
-				dev_name(&adev_dimm->dev));
-	else
-		rc = 0;
-
 	for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++)
 	for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++)
 		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
 		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
 			set_bit(i, &nfit_mem->dsm_mask);
 			set_bit(i, &nfit_mem->dsm_mask);
 
 
-	return force_enable_dimms ? 0 : rc;
+	return 0;
 }
 }
 
 
 static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
@@ -868,6 +853,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
 	struct acpi_device *adev;
 	struct acpi_device *adev;
 	int i;
 	int i;
 
 
+	nd_desc->dsm_mask = acpi_desc->bus_dsm_force_en;
 	adev = to_acpi_dev(acpi_desc);
 	adev = to_acpi_dev(acpi_desc);
 	if (!adev)
 	if (!adev)
 		return;
 		return;
@@ -1032,7 +1018,7 @@ static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
 	if (mmio->num_lines)
 	if (mmio->num_lines)
 		offset = to_interleave_offset(offset, mmio);
 		offset = to_interleave_offset(offset, mmio);
 
 
-	return readl(mmio->base + offset);
+	return readl(mmio->addr.base + offset);
 }
 }
 
 
 static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
 static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
@@ -1057,11 +1043,11 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
 	if (mmio->num_lines)
 	if (mmio->num_lines)
 		offset = to_interleave_offset(offset, mmio);
 		offset = to_interleave_offset(offset, mmio);
 
 
-	writeq(cmd, mmio->base + offset);
+	writeq(cmd, mmio->addr.base + offset);
 	wmb_blk(nfit_blk);
 	wmb_blk(nfit_blk);
 
 
 	if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH)
 	if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH)
-		readq(mmio->base + offset);
+		readq(mmio->addr.base + offset);
 }
 }
 
 
 static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
 static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
@@ -1093,11 +1079,16 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
 		}
 		}
 
 
 		if (rw)
 		if (rw)
-			memcpy_to_pmem(mmio->aperture + offset,
+			memcpy_to_pmem(mmio->addr.aperture + offset,
 					iobuf + copied, c);
 					iobuf + copied, c);
-		else
+		else {
+			if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH)
+				mmio_flush_range((void __force *)
+					mmio->addr.aperture + offset, c);
+
 			memcpy_from_pmem(iobuf + copied,
 			memcpy_from_pmem(iobuf + copied,
-					mmio->aperture + offset, c);
+					mmio->addr.aperture + offset, c);
+		}
 
 
 		copied += c;
 		copied += c;
 		len -= c;
 		len -= c;
@@ -1144,7 +1135,10 @@ static void nfit_spa_mapping_release(struct kref *kref)
 
 
 	WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
 	WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
 	dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
 	dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
-	iounmap(spa_map->iomem);
+	if (spa_map->type == SPA_MAP_APERTURE)
+		memunmap((void __force *)spa_map->addr.aperture);
+	else
+		iounmap(spa_map->addr.base);
 	release_mem_region(spa->address, spa->length);
 	release_mem_region(spa->address, spa->length);
 	list_del(&spa_map->list);
 	list_del(&spa_map->list);
 	kfree(spa_map);
 	kfree(spa_map);
@@ -1190,7 +1184,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
 	spa_map = find_spa_mapping(acpi_desc, spa);
 	spa_map = find_spa_mapping(acpi_desc, spa);
 	if (spa_map) {
 	if (spa_map) {
 		kref_get(&spa_map->kref);
 		kref_get(&spa_map->kref);
-		return spa_map->iomem;
+		return spa_map->addr.base;
 	}
 	}
 
 
 	spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
 	spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
@@ -1206,20 +1200,19 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
 	if (!res)
 	if (!res)
 		goto err_mem;
 		goto err_mem;
 
 
-	if (type == SPA_MAP_APERTURE) {
-		/*
-		 * TODO: memremap_pmem() support, but that requires cache
-		 * flushing when the aperture is moved.
-		 */
-		spa_map->iomem = ioremap_wc(start, n);
-	} else
-		spa_map->iomem = ioremap_nocache(start, n);
+	spa_map->type = type;
+	if (type == SPA_MAP_APERTURE)
+		spa_map->addr.aperture = (void __pmem *)memremap(start, n,
+							ARCH_MEMREMAP_PMEM);
+	else
+		spa_map->addr.base = ioremap_nocache(start, n);
+
 
 
-	if (!spa_map->iomem)
+	if (!spa_map->addr.base)
 		goto err_map;
 		goto err_map;
 
 
 	list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
 	list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
-	return spa_map->iomem;
+	return spa_map->addr.base;
 
 
  err_map:
  err_map:
 	release_mem_region(start, n);
 	release_mem_region(start, n);
@@ -1282,7 +1275,7 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
 		nfit_blk->dimm_flags = flags.flags;
 		nfit_blk->dimm_flags = flags.flags;
 	else if (rc == -ENOTTY) {
 	else if (rc == -ENOTTY) {
 		/* fall back to a conservative default */
 		/* fall back to a conservative default */
-		nfit_blk->dimm_flags = ND_BLK_DCR_LATCH;
+		nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH;
 		rc = 0;
 		rc = 0;
 	} else
 	} else
 		rc = -ENXIO;
 		rc = -ENXIO;
@@ -1322,9 +1315,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	/* map block aperture memory */
 	/* map block aperture memory */
 	nfit_blk->bdw_offset = nfit_mem->bdw->offset;
 	nfit_blk->bdw_offset = nfit_mem->bdw->offset;
 	mmio = &nfit_blk->mmio[BDW];
 	mmio = &nfit_blk->mmio[BDW];
-	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
+	mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
 			SPA_MAP_APERTURE);
 			SPA_MAP_APERTURE);
-	if (!mmio->base) {
+	if (!mmio->addr.base) {
 		dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
 		dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
 				nvdimm_name(nvdimm));
 				nvdimm_name(nvdimm));
 		return -ENOMEM;
 		return -ENOMEM;
@@ -1345,9 +1338,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
 	nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
 	nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
 	nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
 	mmio = &nfit_blk->mmio[DCR];
 	mmio = &nfit_blk->mmio[DCR];
-	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
+	mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
 			SPA_MAP_CONTROL);
 			SPA_MAP_CONTROL);
-	if (!mmio->base) {
+	if (!mmio->addr.base) {
 		dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
 		dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
 				nvdimm_name(nvdimm));
 				nvdimm_name(nvdimm));
 		return -ENOMEM;
 		return -ENOMEM;
@@ -1379,7 +1372,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 			return -ENOMEM;
 			return -ENOMEM;
 	}
 	}
 
 
-	if (!arch_has_pmem_api() && !nfit_blk->nvdimm_flush)
+	if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush)
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
 
 
 	if (mmio->line_size == 0)
 	if (mmio->line_size == 0)
@@ -1414,7 +1407,7 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
 	for (i = 0; i < 2; i++) {
 	for (i = 0; i < 2; i++) {
 		struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
 		struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
 
 
-		if (mmio->base)
+		if (mmio->addr.base)
 			nfit_spa_unmap(acpi_desc, mmio->spa);
 			nfit_spa_unmap(acpi_desc, mmio->spa);
 	}
 	}
 	nd_blk_region_set_provider_data(ndbr, NULL);
 	nd_blk_region_set_provider_data(ndbr, NULL);

+ 12 - 5
drivers/acpi/nfit.h

@@ -41,6 +41,7 @@ enum nfit_uuids {
 };
 };
 
 
 enum {
 enum {
+	ND_BLK_READ_FLUSH = 1,
 	ND_BLK_DCR_LATCH = 2,
 	ND_BLK_DCR_LATCH = 2,
 };
 };
 
 
@@ -107,6 +108,7 @@ struct acpi_nfit_desc {
 	struct nvdimm_bus *nvdimm_bus;
 	struct nvdimm_bus *nvdimm_bus;
 	struct device *dev;
 	struct device *dev;
 	unsigned long dimm_dsm_force_en;
 	unsigned long dimm_dsm_force_en;
+	unsigned long bus_dsm_force_en;
 	int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
 	int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
 			void *iobuf, u64 len, int rw);
 			void *iobuf, u64 len, int rw);
 };
 };
@@ -116,12 +118,16 @@ enum nd_blk_mmio_selector {
 	DCR,
 	DCR,
 };
 };
 
 
+struct nd_blk_addr {
+	union {
+		void __iomem *base;
+		void __pmem  *aperture;
+	};
+};
+
 struct nfit_blk {
 struct nfit_blk {
 	struct nfit_blk_mmio {
 	struct nfit_blk_mmio {
-		union {
-			void __iomem *base;
-			void __pmem  *aperture;
-		};
+		struct nd_blk_addr addr;
 		u64 size;
 		u64 size;
 		u64 base_offset;
 		u64 base_offset;
 		u32 line_size;
 		u32 line_size;
@@ -148,7 +154,8 @@ struct nfit_spa_mapping {
 	struct acpi_nfit_system_address *spa;
 	struct acpi_nfit_system_address *spa;
 	struct list_head list;
 	struct list_head list;
 	struct kref kref;
 	struct kref kref;
-	void __iomem *iomem;
+	enum spa_map_type type;
+	struct nd_blk_addr addr;
 };
 };
 
 
 static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
 static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)

+ 2 - 6
drivers/block/brd.c

@@ -374,7 +374,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
 
 
 #ifdef CONFIG_BLK_DEV_RAM_DAX
 #ifdef CONFIG_BLK_DEV_RAM_DAX
 static long brd_direct_access(struct block_device *bdev, sector_t sector,
 static long brd_direct_access(struct block_device *bdev, sector_t sector,
-			void **kaddr, unsigned long *pfn, long size)
+			void __pmem **kaddr, unsigned long *pfn)
 {
 {
 	struct brd_device *brd = bdev->bd_disk->private_data;
 	struct brd_device *brd = bdev->bd_disk->private_data;
 	struct page *page;
 	struct page *page;
@@ -384,13 +384,9 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
 	page = brd_insert_page(brd, sector);
 	page = brd_insert_page(brd, sector);
 	if (!page)
 	if (!page)
 		return -ENOSPC;
 		return -ENOSPC;
-	*kaddr = page_address(page);
+	*kaddr = (void __pmem *)page_address(page);
 	*pfn = page_to_pfn(page);
 	*pfn = page_to_pfn(page);
 
 
-	/*
-	 * TODO: If size > PAGE_SIZE, we could look to see if the next page in
-	 * the file happens to be mapped to the next page of physical RAM.
-	 */
 	return PAGE_SIZE;
 	return PAGE_SIZE;
 }
 }
 #else
 #else

+ 1 - 1
drivers/isdn/icn/icn.h

@@ -38,7 +38,7 @@ typedef struct icn_cdef {
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/fs.h>
 #include <linux/major.h>
 #include <linux/major.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <linux/slab.h>

+ 1 - 1
drivers/mtd/devices/slram.c

@@ -41,7 +41,7 @@
 #include <linux/fs.h>
 #include <linux/fs.h>
 #include <linux/ioctl.h>
 #include <linux/ioctl.h>
 #include <linux/init.h>
 #include <linux/init.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/mtd.h>
 
 

+ 1 - 1
drivers/mtd/nand/diskonchip.c

@@ -24,7 +24,7 @@
 #include <linux/rslib.h>
 #include <linux/rslib.h>
 #include <linux/moduleparam.h>
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/nand.h>

+ 1 - 1
drivers/mtd/onenand/generic.c

@@ -18,7 +18,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/onenand.h>
 #include <linux/mtd/onenand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/partitions.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 
 /*
 /*
  * Note: Driver name and platform data format have been updated!
  * Note: Driver name and platform data format have been updated!

+ 23 - 0
drivers/nvdimm/Kconfig

@@ -21,6 +21,7 @@ config BLK_DEV_PMEM
 	default LIBNVDIMM
 	default LIBNVDIMM
 	depends on HAS_IOMEM
 	depends on HAS_IOMEM
 	select ND_BTT if BTT
 	select ND_BTT if BTT
+	select ND_PFN if NVDIMM_PFN
 	help
 	help
 	  Memory ranges for PMEM are described by either an NFIT
 	  Memory ranges for PMEM are described by either an NFIT
 	  (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a
 	  (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a
@@ -47,12 +48,16 @@ config ND_BLK
 	  (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode
 	  (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode
 	  capabilities.
 	  capabilities.
 
 
+config ND_CLAIM
+	bool
+
 config ND_BTT
 config ND_BTT
 	tristate
 	tristate
 
 
 config BTT
 config BTT
 	bool "BTT: Block Translation Table (atomic sector updates)"
 	bool "BTT: Block Translation Table (atomic sector updates)"
 	default y if LIBNVDIMM
 	default y if LIBNVDIMM
+	select ND_CLAIM
 	help
 	help
 	  The Block Translation Table (BTT) provides atomic sector
 	  The Block Translation Table (BTT) provides atomic sector
 	  update semantics for persistent memory devices, so that
 	  update semantics for persistent memory devices, so that
@@ -65,4 +70,22 @@ config BTT
 
 
 	  Select Y if unsure
 	  Select Y if unsure
 
 
+config ND_PFN
+	tristate
+
+config NVDIMM_PFN
+	bool "PFN: Map persistent (device) memory"
+	default LIBNVDIMM
+	depends on ZONE_DEVICE
+	select ND_CLAIM
+	help
+	  Map persistent memory, i.e. advertise it to the memory
+	  management sub-system.  By default persistent memory does
+	  not support direct I/O, RDMA, or any other usage that
+	  requires a 'struct page' to mediate an I/O request.  This
+	  driver allocates and initializes the infrastructure needed
+	  to support those use cases.
+
+	  Select Y if unsure
+
 endif
 endif

+ 5 - 0
drivers/nvdimm/Makefile

@@ -2,6 +2,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
 obj-$(CONFIG_ND_BLK) += nd_blk.o
 obj-$(CONFIG_ND_BLK) += nd_blk.o
+obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
 
 
 nd_pmem-y := pmem.o
 nd_pmem-y := pmem.o
 
 
@@ -9,6 +10,8 @@ nd_btt-y := btt.o
 
 
 nd_blk-y := blk.o
 nd_blk-y := blk.o
 
 
+nd_e820-y := e820.o
+
 libnvdimm-y := core.o
 libnvdimm-y := core.o
 libnvdimm-y += bus.o
 libnvdimm-y += bus.o
 libnvdimm-y += dimm_devs.o
 libnvdimm-y += dimm_devs.o
@@ -17,4 +20,6 @@ libnvdimm-y += region_devs.o
 libnvdimm-y += region.o
 libnvdimm-y += region.o
 libnvdimm-y += namespace_devs.o
 libnvdimm-y += namespace_devs.o
 libnvdimm-y += label.o
 libnvdimm-y += label.o
+libnvdimm-$(CONFIG_ND_CLAIM) += claim.o
 libnvdimm-$(CONFIG_BTT) += btt_devs.o
 libnvdimm-$(CONFIG_BTT) += btt_devs.o
+libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o

+ 13 - 37
drivers/nvdimm/btt.c

@@ -582,33 +582,6 @@ static void free_arenas(struct btt *btt)
 	}
 	}
 }
 }
 
 
-/*
- * This function checks if the metadata layout is valid and error free
- */
-static int arena_is_valid(struct arena_info *arena, struct btt_sb *super,
-				u8 *uuid, u32 lbasize)
-{
-	u64 checksum;
-
-	if (memcmp(super->uuid, uuid, 16))
-		return 0;
-
-	checksum = le64_to_cpu(super->checksum);
-	super->checksum = 0;
-	if (checksum != nd_btt_sb_checksum(super))
-		return 0;
-	super->checksum = cpu_to_le64(checksum);
-
-	if (lbasize != le32_to_cpu(super->external_lbasize))
-		return 0;
-
-	/* TODO: figure out action for this */
-	if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0)
-		dev_info(to_dev(arena), "Found arena with an error flag\n");
-
-	return 1;
-}
-
 /*
 /*
  * This function reads an existing valid btt superblock and
  * This function reads an existing valid btt superblock and
  * populates the corresponding arena_info struct
  * populates the corresponding arena_info struct
@@ -632,8 +605,9 @@ static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super,
 	arena->logoff = arena_off + le64_to_cpu(super->logoff);
 	arena->logoff = arena_off + le64_to_cpu(super->logoff);
 	arena->info2off = arena_off + le64_to_cpu(super->info2off);
 	arena->info2off = arena_off + le64_to_cpu(super->info2off);
 
 
-	arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) :
-			(arena->info2off - arena->infooff + BTT_PG_SIZE);
+	arena->size = (le64_to_cpu(super->nextoff) > 0)
+		? (le64_to_cpu(super->nextoff))
+		: (arena->info2off - arena->infooff + BTT_PG_SIZE);
 
 
 	arena->flags = le32_to_cpu(super->flags);
 	arena->flags = le32_to_cpu(super->flags);
 }
 }
@@ -665,8 +639,7 @@ static int discover_arenas(struct btt *btt)
 		if (ret)
 		if (ret)
 			goto out;
 			goto out;
 
 
-		if (!arena_is_valid(arena, super, btt->nd_btt->uuid,
-				btt->lbasize)) {
+		if (!nd_btt_arena_is_valid(btt->nd_btt, super)) {
 			if (remaining == btt->rawsize) {
 			if (remaining == btt->rawsize) {
 				btt->init_state = INIT_NOTFOUND;
 				btt->init_state = INIT_NOTFOUND;
 				dev_info(to_dev(arena), "No existing arenas\n");
 				dev_info(to_dev(arena), "No existing arenas\n");
@@ -755,10 +728,13 @@ static int create_arenas(struct btt *btt)
  * It is only called for an uninitialized arena when a write
  * It is only called for an uninitialized arena when a write
  * to that arena occurs for the first time.
  * to that arena occurs for the first time.
  */
  */
-static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
+static int btt_arena_write_layout(struct arena_info *arena)
 {
 {
 	int ret;
 	int ret;
+	u64 sum;
 	struct btt_sb *super;
 	struct btt_sb *super;
+	struct nd_btt *nd_btt = arena->nd_btt;
+	const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
 
 
 	ret = btt_map_init(arena);
 	ret = btt_map_init(arena);
 	if (ret)
 	if (ret)
@@ -773,7 +749,8 @@ static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
 	strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
 	strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
-	memcpy(super->uuid, uuid, 16);
+	memcpy(super->uuid, nd_btt->uuid, 16);
+	memcpy(super->parent_uuid, parent_uuid, 16);
 	super->flags = cpu_to_le32(arena->flags);
 	super->flags = cpu_to_le32(arena->flags);
 	super->version_major = cpu_to_le16(arena->version_major);
 	super->version_major = cpu_to_le16(arena->version_major);
 	super->version_minor = cpu_to_le16(arena->version_minor);
 	super->version_minor = cpu_to_le16(arena->version_minor);
@@ -794,7 +771,8 @@ static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
 	super->info2off = cpu_to_le64(arena->info2off - arena->infooff);
 	super->info2off = cpu_to_le64(arena->info2off - arena->infooff);
 
 
 	super->flags = 0;
 	super->flags = 0;
-	super->checksum = cpu_to_le64(nd_btt_sb_checksum(super));
+	sum = nd_sb_checksum((struct nd_gen_sb *) super);
+	super->checksum = cpu_to_le64(sum);
 
 
 	ret = btt_info_write(arena, super);
 	ret = btt_info_write(arena, super);
 
 
@@ -813,7 +791,7 @@ static int btt_meta_init(struct btt *btt)
 
 
 	mutex_lock(&btt->init_lock);
 	mutex_lock(&btt->init_lock);
 	list_for_each_entry(arena, &btt->arena_list, list) {
 	list_for_each_entry(arena, &btt->arena_list, list) {
-		ret = btt_arena_write_layout(arena, btt->nd_btt->uuid);
+		ret = btt_arena_write_layout(arena);
 		if (ret)
 		if (ret)
 			goto unlock;
 			goto unlock;
 
 
@@ -1447,8 +1425,6 @@ static int __init nd_btt_init(void)
 {
 {
 	int rc;
 	int rc;
 
 
-	BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K);
-
 	btt_major = register_blkdev(0, "btt");
 	btt_major = register_blkdev(0, "btt");
 	if (btt_major < 0)
 	if (btt_major < 0)
 		return btt_major;
 		return btt_major;

+ 3 - 0
drivers/nvdimm/btt.h

@@ -182,4 +182,7 @@ struct btt {
 	int init_state;
 	int init_state;
 	int num_arenas;
 	int num_arenas;
 };
 };
+
+bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
+
 #endif
 #endif

+ 48 - 167
drivers/nvdimm/btt_devs.c

@@ -21,63 +21,13 @@
 #include "btt.h"
 #include "btt.h"
 #include "nd.h"
 #include "nd.h"
 
 
-static void __nd_btt_detach_ndns(struct nd_btt *nd_btt)
-{
-	struct nd_namespace_common *ndns = nd_btt->ndns;
-
-	dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
-			|| ndns->claim != &nd_btt->dev,
-			"%s: invalid claim\n", __func__);
-	ndns->claim = NULL;
-	nd_btt->ndns = NULL;
-	put_device(&ndns->dev);
-}
-
-static void nd_btt_detach_ndns(struct nd_btt *nd_btt)
-{
-	struct nd_namespace_common *ndns = nd_btt->ndns;
-
-	if (!ndns)
-		return;
-	get_device(&ndns->dev);
-	device_lock(&ndns->dev);
-	__nd_btt_detach_ndns(nd_btt);
-	device_unlock(&ndns->dev);
-	put_device(&ndns->dev);
-}
-
-static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt,
-		struct nd_namespace_common *ndns)
-{
-	if (ndns->claim)
-		return false;
-	dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
-			|| nd_btt->ndns,
-			"%s: invalid claim\n", __func__);
-	ndns->claim = &nd_btt->dev;
-	nd_btt->ndns = ndns;
-	get_device(&ndns->dev);
-	return true;
-}
-
-static bool nd_btt_attach_ndns(struct nd_btt *nd_btt,
-		struct nd_namespace_common *ndns)
-{
-	bool claimed;
-
-	device_lock(&ndns->dev);
-	claimed = __nd_btt_attach_ndns(nd_btt, ndns);
-	device_unlock(&ndns->dev);
-	return claimed;
-}
-
 static void nd_btt_release(struct device *dev)
 static void nd_btt_release(struct device *dev)
 {
 {
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 
 
 	dev_dbg(dev, "%s\n", __func__);
 	dev_dbg(dev, "%s\n", __func__);
-	nd_btt_detach_ndns(nd_btt);
+	nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns);
 	ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
 	ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
 	kfree(nd_btt->uuid);
 	kfree(nd_btt->uuid);
 	kfree(nd_btt);
 	kfree(nd_btt);
@@ -172,104 +122,15 @@ static ssize_t namespace_show(struct device *dev,
 	return rc;
 	return rc;
 }
 }
 
 
-static int namespace_match(struct device *dev, void *data)
-{
-	char *name = data;
-
-	return strcmp(name, dev_name(dev)) == 0;
-}
-
-static bool is_nd_btt_idle(struct device *dev)
-{
-	struct nd_region *nd_region = to_nd_region(dev->parent);
-	struct nd_btt *nd_btt = to_nd_btt(dev);
-
-	if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver)
-		return false;
-	return true;
-}
-
-static ssize_t __namespace_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t len)
-{
-	struct nd_btt *nd_btt = to_nd_btt(dev);
-	struct nd_namespace_common *ndns;
-	struct device *found;
-	char *name;
-
-	if (dev->driver) {
-		dev_dbg(dev, "%s: -EBUSY\n", __func__);
-		return -EBUSY;
-	}
-
-	name = kstrndup(buf, len, GFP_KERNEL);
-	if (!name)
-		return -ENOMEM;
-	strim(name);
-
-	if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0)
-		/* pass */;
-	else {
-		len = -EINVAL;
-		goto out;
-	}
-
-	ndns = nd_btt->ndns;
-	if (strcmp(name, "") == 0) {
-		/* detach the namespace and destroy / reset the btt device */
-		nd_btt_detach_ndns(nd_btt);
-		if (is_nd_btt_idle(dev))
-			nd_device_unregister(dev, ND_ASYNC);
-		else {
-			nd_btt->lbasize = 0;
-			kfree(nd_btt->uuid);
-			nd_btt->uuid = NULL;
-		}
-		goto out;
-	} else if (ndns) {
-		dev_dbg(dev, "namespace already set to: %s\n",
-				dev_name(&ndns->dev));
-		len = -EBUSY;
-		goto out;
-	}
-
-	found = device_find_child(dev->parent, name, namespace_match);
-	if (!found) {
-		dev_dbg(dev, "'%s' not found under %s\n", name,
-				dev_name(dev->parent));
-		len = -ENODEV;
-		goto out;
-	}
-
-	ndns = to_ndns(found);
-	if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
-		dev_dbg(dev, "%s too small to host btt\n", name);
-		len = -ENXIO;
-		goto out_attach;
-	}
-
-	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev));
-	if (!nd_btt_attach_ndns(nd_btt, ndns)) {
-		dev_dbg(dev, "%s already claimed\n",
-				dev_name(&ndns->dev));
-		len = -EBUSY;
-	}
-
- out_attach:
-	put_device(&ndns->dev); /* from device_find_child */
- out:
-	kfree(name);
-	return len;
-}
-
 static ssize_t namespace_store(struct device *dev,
 static ssize_t namespace_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 		struct device_attribute *attr, const char *buf, size_t len)
 {
 {
+	struct nd_btt *nd_btt = to_nd_btt(dev);
 	ssize_t rc;
 	ssize_t rc;
 
 
 	nvdimm_bus_lock(dev);
 	nvdimm_bus_lock(dev);
 	device_lock(dev);
 	device_lock(dev);
-	rc = __namespace_store(dev, attr, buf, len);
+	rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
 	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
 	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
 			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
 			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
 	device_unlock(dev);
 	device_unlock(dev);
@@ -324,7 +185,7 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
 	dev->type = &nd_btt_device_type;
 	dev->type = &nd_btt_device_type;
 	dev->groups = nd_btt_attribute_groups;
 	dev->groups = nd_btt_attribute_groups;
 	device_initialize(&nd_btt->dev);
 	device_initialize(&nd_btt->dev);
-	if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) {
+	if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) {
 		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
 		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
 				__func__, dev_name(ndns->claim));
 				__func__, dev_name(ndns->claim));
 		put_device(dev);
 		put_device(dev);
@@ -342,30 +203,54 @@ struct device *nd_btt_create(struct nd_region *nd_region)
 	return dev;
 	return dev;
 }
 }
 
 
-/*
- * nd_btt_sb_checksum: compute checksum for btt info block
+static bool uuid_is_null(u8 *uuid)
+{
+	static const u8 null_uuid[16];
+
+	return (memcmp(uuid, null_uuid, 16) == 0);
+}
+
+/**
+ * nd_btt_arena_is_valid - check if the metadata layout is valid
+ * @nd_btt:	device with BTT geometry and backing device info
+ * @super:	pointer to the arena's info block being tested
+ *
+ * Check consistency of the btt info block with itself by validating
+ * the checksum, and with the parent namespace by verifying the
+ * parent_uuid contained in the info block with the one supplied in.
  *
  *
- * Returns a fletcher64 checksum of everything in the given info block
- * except the last field (since that's where the checksum lives).
+ * Returns:
+ * false for an invalid info block, true for a valid one
  */
  */
-u64 nd_btt_sb_checksum(struct btt_sb *btt_sb)
+bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
 {
 {
-	u64 sum;
-	__le64 sum_save;
-
-	sum_save = btt_sb->checksum;
-	btt_sb->checksum = 0;
-	sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1);
-	btt_sb->checksum = sum_save;
-	return sum;
+	const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
+	u64 checksum;
+
+	if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0)
+		return false;
+
+	if (!uuid_is_null(super->parent_uuid))
+		if (memcmp(super->parent_uuid, parent_uuid, 16) != 0)
+			return false;
+
+	checksum = le64_to_cpu(super->checksum);
+	super->checksum = 0;
+	if (checksum != nd_sb_checksum((struct nd_gen_sb *) super))
+		return false;
+	super->checksum = cpu_to_le64(checksum);
+
+	/* TODO: figure out action for this */
+	if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0)
+		dev_info(&nd_btt->dev, "Found arena with an error flag\n");
+
+	return true;
 }
 }
-EXPORT_SYMBOL(nd_btt_sb_checksum);
+EXPORT_SYMBOL(nd_btt_arena_is_valid);
 
 
 static int __nd_btt_probe(struct nd_btt *nd_btt,
 static int __nd_btt_probe(struct nd_btt *nd_btt,
 		struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
 		struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
 {
 {
-	u64 checksum;
-
 	if (!btt_sb || !ndns || !nd_btt)
 	if (!btt_sb || !ndns || !nd_btt)
 		return -ENODEV;
 		return -ENODEV;
 
 
@@ -375,14 +260,8 @@ static int __nd_btt_probe(struct nd_btt *nd_btt,
 	if (nvdimm_namespace_capacity(ndns) < SZ_16M)
 	if (nvdimm_namespace_capacity(ndns) < SZ_16M)
 		return -ENXIO;
 		return -ENXIO;
 
 
-	if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0)
-		return -ENODEV;
-
-	checksum = le64_to_cpu(btt_sb->checksum);
-	btt_sb->checksum = 0;
-	if (checksum != nd_btt_sb_checksum(btt_sb))
+	if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
 		return -ENODEV;
 		return -ENODEV;
-	btt_sb->checksum = cpu_to_le64(checksum);
 
 
 	nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
 	nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
 	nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
 	nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
@@ -416,7 +295,9 @@ int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
 	dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__,
 	dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__,
 			rc == 0 ? dev_name(dev) : "<none>");
 			rc == 0 ? dev_name(dev) : "<none>");
 	if (rc < 0) {
 	if (rc < 0) {
-		__nd_btt_detach_ndns(to_nd_btt(dev));
+		struct nd_btt *nd_btt = to_nd_btt(dev);
+
+		__nd_detach_ndns(dev, &nd_btt->ndns);
 		put_device(dev);
 		put_device(dev);
 	}
 	}
 
 

+ 201 - 0
drivers/nvdimm/claim.c

@@ -0,0 +1,201 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/sizes.h>
+#include "nd-core.h"
+#include "pfn.h"
+#include "btt.h"
+#include "nd.h"
+
+void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns)
+{
+	struct nd_namespace_common *ndns = *_ndns;
+
+	dev_WARN_ONCE(dev, !mutex_is_locked(&ndns->dev.mutex)
+			|| ndns->claim != dev,
+			"%s: invalid claim\n", __func__);
+	ndns->claim = NULL;
+	*_ndns = NULL;
+	put_device(&ndns->dev);
+}
+
+void nd_detach_ndns(struct device *dev,
+		struct nd_namespace_common **_ndns)
+{
+	struct nd_namespace_common *ndns = *_ndns;
+
+	if (!ndns)
+		return;
+	get_device(&ndns->dev);
+	device_lock(&ndns->dev);
+	__nd_detach_ndns(dev, _ndns);
+	device_unlock(&ndns->dev);
+	put_device(&ndns->dev);
+}
+
+bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+		struct nd_namespace_common **_ndns)
+{
+	if (attach->claim)
+		return false;
+	dev_WARN_ONCE(dev, !mutex_is_locked(&attach->dev.mutex)
+			|| *_ndns,
+			"%s: invalid claim\n", __func__);
+	attach->claim = dev;
+	*_ndns = attach;
+	get_device(&attach->dev);
+	return true;
+}
+
+bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+		struct nd_namespace_common **_ndns)
+{
+	bool claimed;
+
+	device_lock(&attach->dev);
+	claimed = __nd_attach_ndns(dev, attach, _ndns);
+	device_unlock(&attach->dev);
+	return claimed;
+}
+
+static int namespace_match(struct device *dev, void *data)
+{
+	char *name = data;
+
+	return strcmp(name, dev_name(dev)) == 0;
+}
+
+static bool is_idle(struct device *dev, struct nd_namespace_common *ndns)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct device *seed = NULL;
+
+	if (is_nd_btt(dev))
+		seed = nd_region->btt_seed;
+	else if (is_nd_pfn(dev))
+		seed = nd_region->pfn_seed;
+
+	if (seed == dev || ndns || dev->driver)
+		return false;
+	return true;
+}
+
+static void nd_detach_and_reset(struct device *dev,
+		struct nd_namespace_common **_ndns)
+{
+	/* detach the namespace and destroy / reset the device */
+	nd_detach_ndns(dev, _ndns);
+	if (is_idle(dev, *_ndns)) {
+		nd_device_unregister(dev, ND_ASYNC);
+	} else if (is_nd_btt(dev)) {
+		struct nd_btt *nd_btt = to_nd_btt(dev);
+
+		nd_btt->lbasize = 0;
+		kfree(nd_btt->uuid);
+		nd_btt->uuid = NULL;
+	} else if (is_nd_pfn(dev)) {
+		struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+		kfree(nd_pfn->uuid);
+		nd_pfn->uuid = NULL;
+		nd_pfn->mode = PFN_MODE_NONE;
+	}
+}
+
+ssize_t nd_namespace_store(struct device *dev,
+		struct nd_namespace_common **_ndns, const char *buf,
+		size_t len)
+{
+	struct nd_namespace_common *ndns;
+	struct device *found;
+	char *name;
+
+	if (dev->driver) {
+		dev_dbg(dev, "%s: -EBUSY\n", __func__);
+		return -EBUSY;
+	}
+
+	name = kstrndup(buf, len, GFP_KERNEL);
+	if (!name)
+		return -ENOMEM;
+	strim(name);
+
+	if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0)
+		/* pass */;
+	else {
+		len = -EINVAL;
+		goto out;
+	}
+
+	ndns = *_ndns;
+	if (strcmp(name, "") == 0) {
+		nd_detach_and_reset(dev, _ndns);
+		goto out;
+	} else if (ndns) {
+		dev_dbg(dev, "namespace already set to: %s\n",
+				dev_name(&ndns->dev));
+		len = -EBUSY;
+		goto out;
+	}
+
+	found = device_find_child(dev->parent, name, namespace_match);
+	if (!found) {
+		dev_dbg(dev, "'%s' not found under %s\n", name,
+				dev_name(dev->parent));
+		len = -ENODEV;
+		goto out;
+	}
+
+	ndns = to_ndns(found);
+	if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
+		dev_dbg(dev, "%s too small to host\n", name);
+		len = -ENXIO;
+		goto out_attach;
+	}
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(dev));
+	if (!nd_attach_ndns(dev, ndns, _ndns)) {
+		dev_dbg(dev, "%s already claimed\n",
+				dev_name(&ndns->dev));
+		len = -EBUSY;
+	}
+
+ out_attach:
+	put_device(&ndns->dev); /* from device_find_child */
+ out:
+	kfree(name);
+	return len;
+}
+
+/*
+ * nd_sb_checksum: compute checksum for a generic info block
+ *
+ * Returns a fletcher64 checksum of everything in the given info block
+ * except the last field (since that's where the checksum lives).
+ */
+u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb)
+{
+	u64 sum;
+	__le64 sum_save;
+
+	BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K);
+	BUILD_BUG_ON(sizeof(struct nd_pfn_sb) != SZ_4K);
+	BUILD_BUG_ON(sizeof(struct nd_gen_sb) != SZ_4K);
+
+	sum_save = nd_gen_sb->checksum;
+	nd_gen_sb->checksum = 0;
+	sum = nd_fletcher64(nd_gen_sb, sizeof(*nd_gen_sb), 1);
+	nd_gen_sb->checksum = sum_save;
+	return sum;
+}
+EXPORT_SYMBOL(nd_sb_checksum);

+ 1 - 4
drivers/nvdimm/dimm_devs.c

@@ -241,10 +241,7 @@ void nvdimm_drvdata_release(struct kref *kref)
 		nvdimm_free_dpa(ndd, res);
 		nvdimm_free_dpa(ndd, res);
 	nvdimm_bus_unlock(dev);
 	nvdimm_bus_unlock(dev);
 
 
-	if (ndd->data && is_vmalloc_addr(ndd->data))
-		vfree(ndd->data);
-	else
-		kfree(ndd->data);
+	kvfree(ndd->data);
 	kfree(ndd);
 	kfree(ndd);
 	put_device(dev);
 	put_device(dev);
 }
 }

+ 87 - 0
drivers/nvdimm/e820.c

@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2015, Christoph Hellwig.
+ * Copyright (c) 2015, Intel Corporation.
+ */
+#include <linux/platform_device.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+
+static const struct attribute_group *e820_pmem_attribute_groups[] = {
+	&nvdimm_bus_attribute_group,
+	NULL,
+};
+
+static const struct attribute_group *e820_pmem_region_attribute_groups[] = {
+	&nd_region_attribute_group,
+	&nd_device_attribute_group,
+	NULL,
+};
+
+static int e820_pmem_remove(struct platform_device *pdev)
+{
+	struct nvdimm_bus *nvdimm_bus = platform_get_drvdata(pdev);
+
+	nvdimm_bus_unregister(nvdimm_bus);
+	return 0;
+}
+
+static int e820_pmem_probe(struct platform_device *pdev)
+{
+	static struct nvdimm_bus_descriptor nd_desc;
+	struct device *dev = &pdev->dev;
+	struct nvdimm_bus *nvdimm_bus;
+	struct resource *p;
+
+	nd_desc.attr_groups = e820_pmem_attribute_groups;
+	nd_desc.provider_name = "e820";
+	nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
+	if (!nvdimm_bus)
+		goto err;
+	platform_set_drvdata(pdev, nvdimm_bus);
+
+	for (p = iomem_resource.child; p ; p = p->sibling) {
+		struct nd_region_desc ndr_desc;
+
+		if (strncmp(p->name, "Persistent Memory (legacy)", 26) != 0)
+			continue;
+
+		memset(&ndr_desc, 0, sizeof(ndr_desc));
+		ndr_desc.res = p;
+		ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
+		ndr_desc.numa_node = NUMA_NO_NODE;
+		set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+		if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
+			goto err;
+	}
+
+	return 0;
+
+ err:
+	nvdimm_bus_unregister(nvdimm_bus);
+	dev_err(dev, "failed to register legacy persistent memory ranges\n");
+	return -ENXIO;
+}
+
+static struct platform_driver e820_pmem_driver = {
+	.probe = e820_pmem_probe,
+	.remove = e820_pmem_remove,
+	.driver = {
+		.name = "e820_pmem",
+	},
+};
+
+static __init int e820_pmem_init(void)
+{
+	return platform_driver_register(&e820_pmem_driver);
+}
+
+static __exit void e820_pmem_exit(void)
+{
+	platform_driver_unregister(&e820_pmem_driver);
+}
+
+MODULE_ALIAS("platform:e820_pmem*");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
+module_init(e820_pmem_init);
+module_exit(e820_pmem_exit);

+ 76 - 13
drivers/nvdimm/namespace_devs.c

@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
+#include <linux/pmem.h>
 #include <linux/nd.h>
 #include <linux/nd.h>
 #include "nd-core.h"
 #include "nd-core.h"
 #include "nd.h"
 #include "nd.h"
@@ -76,22 +77,54 @@ static bool is_namespace_io(struct device *dev)
 	return dev ? dev->type == &namespace_io_device_type : false;
 	return dev ? dev->type == &namespace_io_device_type : false;
 }
 }
 
 
+bool pmem_should_map_pages(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+
+	if (!IS_ENABLED(CONFIG_ZONE_DEVICE))
+		return false;
+
+	if (!test_bit(ND_REGION_PAGEMAP, &nd_region->flags))
+		return false;
+
+	if (is_nd_pfn(dev) || is_nd_btt(dev))
+		return false;
+
+#ifdef ARCH_MEMREMAP_PMEM
+	return ARCH_MEMREMAP_PMEM == MEMREMAP_WB;
+#else
+	return false;
+#endif
+}
+EXPORT_SYMBOL(pmem_should_map_pages);
+
 const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 		char *name)
 		char *name)
 {
 {
 	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
 	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
-	const char *suffix = "";
+	const char *suffix = NULL;
 
 
-	if (ndns->claim && is_nd_btt(ndns->claim))
-		suffix = "s";
+	if (ndns->claim) {
+		if (is_nd_btt(ndns->claim))
+			suffix = "s";
+		else if (is_nd_pfn(ndns->claim))
+			suffix = "m";
+		else
+			dev_WARN_ONCE(&ndns->dev, 1,
+					"unknown claim type by %s\n",
+					dev_name(ndns->claim));
+	}
 
 
-	if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev))
-		sprintf(name, "pmem%d%s", nd_region->id, suffix);
-	else if (is_namespace_blk(&ndns->dev)) {
+	if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) {
+		if (!suffix && pmem_should_map_pages(&ndns->dev))
+			suffix = "m";
+		sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : "");
+	} else if (is_namespace_blk(&ndns->dev)) {
 		struct nd_namespace_blk *nsblk;
 		struct nd_namespace_blk *nsblk;
 
 
 		nsblk = to_nd_namespace_blk(&ndns->dev);
 		nsblk = to_nd_namespace_blk(&ndns->dev);
-		sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix);
+		sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id,
+				suffix ? suffix : "");
 	} else {
 	} else {
 		return NULL;
 		return NULL;
 	}
 	}
@@ -100,6 +133,26 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 }
 }
 EXPORT_SYMBOL(nvdimm_namespace_disk_name);
 EXPORT_SYMBOL(nvdimm_namespace_disk_name);
 
 
+const u8 *nd_dev_to_uuid(struct device *dev)
+{
+	static const u8 null_uuid[16];
+
+	if (!dev)
+		return null_uuid;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		return nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		return nsblk->uuid;
+	} else
+		return null_uuid;
+}
+EXPORT_SYMBOL(nd_dev_to_uuid);
+
 static ssize_t nstype_show(struct device *dev,
 static ssize_t nstype_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 		struct device_attribute *attr, char *buf)
 {
 {
@@ -1235,12 +1288,22 @@ static const struct attribute_group *nd_namespace_attribute_groups[] = {
 struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
 struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
 {
 {
 	struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
 	struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
+	struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL;
 	struct nd_namespace_common *ndns;
 	struct nd_namespace_common *ndns;
 	resource_size_t size;
 	resource_size_t size;
 
 
-	if (nd_btt) {
-		ndns = nd_btt->ndns;
-		if (!ndns)
+	if (nd_btt || nd_pfn) {
+		struct device *host = NULL;
+
+		if (nd_btt) {
+			host = &nd_btt->dev;
+			ndns = nd_btt->ndns;
+		} else if (nd_pfn) {
+			host = &nd_pfn->dev;
+			ndns = nd_pfn->ndns;
+		}
+
+		if (!ndns || !host)
 			return ERR_PTR(-ENODEV);
 			return ERR_PTR(-ENODEV);
 
 
 		/*
 		/*
@@ -1251,12 +1314,12 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
 		device_unlock(&ndns->dev);
 		device_unlock(&ndns->dev);
 		if (ndns->dev.driver) {
 		if (ndns->dev.driver) {
 			dev_dbg(&ndns->dev, "is active, can't bind %s\n",
 			dev_dbg(&ndns->dev, "is active, can't bind %s\n",
-					dev_name(&nd_btt->dev));
+					dev_name(host));
 			return ERR_PTR(-EBUSY);
 			return ERR_PTR(-EBUSY);
 		}
 		}
-		if (dev_WARN_ONCE(&ndns->dev, ndns->claim != &nd_btt->dev,
+		if (dev_WARN_ONCE(&ndns->dev, ndns->claim != host,
 					"host (%s) vs claim (%s) mismatch\n",
 					"host (%s) vs claim (%s) mismatch\n",
-					dev_name(&nd_btt->dev),
+					dev_name(host),
 					dev_name(ndns->claim)))
 					dev_name(ndns->claim)))
 			return ERR_PTR(-ENXIO);
 			return ERR_PTR(-ENXIO);
 	} else {
 	} else {

+ 9 - 0
drivers/nvdimm/nd-core.h

@@ -80,4 +80,13 @@ struct resource *nsblk_add_resource(struct nd_region *nd_region,
 int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd);
 int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd);
 void get_ndd(struct nvdimm_drvdata *ndd);
 void get_ndd(struct nvdimm_drvdata *ndd);
 resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
 resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
+void nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns);
+void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns);
+bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+		struct nd_namespace_common **_ndns);
+bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+		struct nd_namespace_common **_ndns);
+ssize_t nd_namespace_store(struct device *dev,
+		struct nd_namespace_common **_ndns, const char *buf,
+		size_t len);
 #endif /* __ND_CORE_H__ */
 #endif /* __ND_CORE_H__ */

+ 64 - 3
drivers/nvdimm/nd.h

@@ -29,6 +29,13 @@ enum {
 	ND_MAX_LANES = 256,
 	ND_MAX_LANES = 256,
 	SECTOR_SHIFT = 9,
 	SECTOR_SHIFT = 9,
 	INT_LBASIZE_ALIGNMENT = 64,
 	INT_LBASIZE_ALIGNMENT = 64,
+#if IS_ENABLED(CONFIG_NVDIMM_PFN)
+	ND_PFN_ALIGN = PAGES_PER_SECTION * PAGE_SIZE,
+	ND_PFN_MASK = ND_PFN_ALIGN - 1,
+#else
+	ND_PFN_ALIGN = 0,
+	ND_PFN_MASK = 0,
+#endif
 };
 };
 
 
 struct nvdimm_drvdata {
 struct nvdimm_drvdata {
@@ -92,8 +99,11 @@ struct nd_region {
 	struct device dev;
 	struct device dev;
 	struct ida ns_ida;
 	struct ida ns_ida;
 	struct ida btt_ida;
 	struct ida btt_ida;
+	struct ida pfn_ida;
+	unsigned long flags;
 	struct device *ns_seed;
 	struct device *ns_seed;
 	struct device *btt_seed;
 	struct device *btt_seed;
+	struct device *pfn_seed;
 	u16 ndr_mappings;
 	u16 ndr_mappings;
 	u64 ndr_size;
 	u64 ndr_size;
 	u64 ndr_start;
 	u64 ndr_start;
@@ -133,6 +143,22 @@ struct nd_btt {
 	int id;
 	int id;
 };
 };
 
 
+enum nd_pfn_mode {
+	PFN_MODE_NONE,
+	PFN_MODE_RAM,
+	PFN_MODE_PMEM,
+};
+
+struct nd_pfn {
+	int id;
+	u8 *uuid;
+	struct device dev;
+	unsigned long npfns;
+	enum nd_pfn_mode mode;
+	struct nd_pfn_sb *pfn_sb;
+	struct nd_namespace_common *ndns;
+};
+
 enum nd_async_mode {
 enum nd_async_mode {
 	ND_SYNC,
 	ND_SYNC,
 	ND_ASYNC,
 	ND_ASYNC,
@@ -159,14 +185,19 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
 int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
 int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
 		void *buf, size_t len);
 		void *buf, size_t len);
 struct nd_btt *to_nd_btt(struct device *dev);
 struct nd_btt *to_nd_btt(struct device *dev);
-struct btt_sb;
-u64 nd_btt_sb_checksum(struct btt_sb *btt_sb);
+
+struct nd_gen_sb {
+	char reserved[SZ_4K - 8];
+	__le64 checksum;
+};
+
+u64 nd_sb_checksum(struct nd_gen_sb *sb);
 #if IS_ENABLED(CONFIG_BTT)
 #if IS_ENABLED(CONFIG_BTT)
 int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata);
 int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata);
 bool is_nd_btt(struct device *dev);
 bool is_nd_btt(struct device *dev);
 struct device *nd_btt_create(struct nd_region *nd_region);
 struct device *nd_btt_create(struct nd_region *nd_region);
 #else
 #else
-static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
+static inline int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
 {
 {
 	return -ENODEV;
 	return -ENODEV;
 }
 }
@@ -180,8 +211,36 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region)
 {
 {
 	return NULL;
 	return NULL;
 }
 }
+#endif
 
 
+struct nd_pfn *to_nd_pfn(struct device *dev);
+#if IS_ENABLED(CONFIG_NVDIMM_PFN)
+int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata);
+bool is_nd_pfn(struct device *dev);
+struct device *nd_pfn_create(struct nd_region *nd_region);
+int nd_pfn_validate(struct nd_pfn *nd_pfn);
+#else
+static inline int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata)
+{
+	return -ENODEV;
+}
+
+static inline bool is_nd_pfn(struct device *dev)
+{
+	return false;
+}
+
+static inline struct device *nd_pfn_create(struct nd_region *nd_region)
+{
+	return NULL;
+}
+
+static inline int nd_pfn_validate(struct nd_pfn *nd_pfn)
+{
+	return -ENODEV;
+}
 #endif
 #endif
+
 struct nd_region *to_nd_region(struct device *dev);
 struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
@@ -217,4 +276,6 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
 }
 }
 void nd_iostat_end(struct bio *bio, unsigned long start);
 void nd_iostat_end(struct bio *bio, unsigned long start);
 resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
 resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
+const u8 *nd_dev_to_uuid(struct device *dev);
+bool pmem_should_map_pages(struct device *dev);
 #endif /* __ND_H__ */
 #endif /* __ND_H__ */

+ 35 - 0
drivers/nvdimm/pfn.h

@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __NVDIMM_PFN_H
+#define __NVDIMM_PFN_H
+
+#include <linux/types.h>
+
+#define PFN_SIG_LEN 16
+#define PFN_SIG "NVDIMM_PFN_INFO\0"
+
+struct nd_pfn_sb {
+	u8 signature[PFN_SIG_LEN];
+	u8 uuid[16];
+	u8 parent_uuid[16];
+	__le32 flags;
+	__le16 version_major;
+	__le16 version_minor;
+	__le64 dataoff;
+	__le64 npfns;
+	__le32 mode;
+	u8 padding[4012];
+	__le64 checksum;
+};
+#endif /* __NVDIMM_PFN_H */

+ 337 - 0
drivers/nvdimm/pfn_devs.c

@@ -0,0 +1,337 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/blkdev.h>
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "nd-core.h"
+#include "pfn.h"
+#include "nd.h"
+
+static void nd_pfn_release(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+	dev_dbg(dev, "%s\n", __func__);
+	nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns);
+	ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id);
+	kfree(nd_pfn->uuid);
+	kfree(nd_pfn);
+}
+
+static struct device_type nd_pfn_device_type = {
+	.name = "nd_pfn",
+	.release = nd_pfn_release,
+};
+
+bool is_nd_pfn(struct device *dev)
+{
+	return dev ? dev->type == &nd_pfn_device_type : false;
+}
+EXPORT_SYMBOL(is_nd_pfn);
+
+struct nd_pfn *to_nd_pfn(struct device *dev)
+{
+	struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev);
+
+	WARN_ON(!is_nd_pfn(dev));
+	return nd_pfn;
+}
+EXPORT_SYMBOL(to_nd_pfn);
+
+static ssize_t mode_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+	switch (nd_pfn->mode) {
+	case PFN_MODE_RAM:
+		return sprintf(buf, "ram\n");
+	case PFN_MODE_PMEM:
+		return sprintf(buf, "pmem\n");
+	default:
+		return sprintf(buf, "none\n");
+	}
+}
+
+static ssize_t mode_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+	ssize_t rc = 0;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	if (dev->driver)
+		rc = -EBUSY;
+	else {
+		size_t n = len - 1;
+
+		if (strncmp(buf, "pmem\n", n) == 0
+				|| strncmp(buf, "pmem", n) == 0) {
+			/* TODO: allocate from PMEM support */
+			rc = -ENOTTY;
+		} else if (strncmp(buf, "ram\n", n) == 0
+				|| strncmp(buf, "ram", n) == 0)
+			nd_pfn->mode = PFN_MODE_RAM;
+		else if (strncmp(buf, "none\n", n) == 0
+				|| strncmp(buf, "none", n) == 0)
+			nd_pfn->mode = PFN_MODE_NONE;
+		else
+			rc = -EINVAL;
+	}
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t uuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+	if (nd_pfn->uuid)
+		return sprintf(buf, "%pUb\n", nd_pfn->uuid);
+	return sprintf(buf, "\n");
+}
+
+static ssize_t uuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t namespace_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	rc = sprintf(buf, "%s\n", nd_pfn->ndns
+			? dev_name(&nd_pfn->ndns->dev) : "");
+	nvdimm_bus_unlock(dev);
+	return rc;
+}
+
+static ssize_t namespace_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	device_lock(dev);
+	rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RW(namespace);
+
+static struct attribute *nd_pfn_attributes[] = {
+	&dev_attr_mode.attr,
+	&dev_attr_namespace.attr,
+	&dev_attr_uuid.attr,
+	NULL,
+};
+
+static struct attribute_group nd_pfn_attribute_group = {
+	.attrs = nd_pfn_attributes,
+};
+
+static const struct attribute_group *nd_pfn_attribute_groups[] = {
+	&nd_pfn_attribute_group,
+	&nd_device_attribute_group,
+	&nd_numa_attribute_group,
+	NULL,
+};
+
+static struct device *__nd_pfn_create(struct nd_region *nd_region,
+		u8 *uuid, enum nd_pfn_mode mode,
+		struct nd_namespace_common *ndns)
+{
+	struct nd_pfn *nd_pfn;
+	struct device *dev;
+
+	/* we can only create pages for contiguous ranged of pmem */
+	if (!is_nd_pmem(&nd_region->dev))
+		return NULL;
+
+	nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL);
+	if (!nd_pfn)
+		return NULL;
+
+	nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL);
+	if (nd_pfn->id < 0) {
+		kfree(nd_pfn);
+		return NULL;
+	}
+
+	nd_pfn->mode = mode;
+	if (uuid)
+		uuid = kmemdup(uuid, 16, GFP_KERNEL);
+	nd_pfn->uuid = uuid;
+	dev = &nd_pfn->dev;
+	dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id);
+	dev->parent = &nd_region->dev;
+	dev->type = &nd_pfn_device_type;
+	dev->groups = nd_pfn_attribute_groups;
+	device_initialize(&nd_pfn->dev);
+	if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
+		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
+				__func__, dev_name(ndns->claim));
+		put_device(dev);
+		return NULL;
+	}
+	return dev;
+}
+
+struct device *nd_pfn_create(struct nd_region *nd_region)
+{
+	struct device *dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE,
+			NULL);
+
+	if (dev)
+		__nd_device_register(dev);
+	return dev;
+}
+
+int nd_pfn_validate(struct nd_pfn *nd_pfn)
+{
+	struct nd_namespace_common *ndns = nd_pfn->ndns;
+	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+	struct nd_namespace_io *nsio;
+	u64 checksum, offset;
+
+	if (!pfn_sb || !ndns)
+		return -ENODEV;
+
+	if (!is_nd_pmem(nd_pfn->dev.parent))
+		return -ENODEV;
+
+	/* section alignment for simple hotplug */
+	if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN)
+		return -ENODEV;
+
+	if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)))
+		return -ENXIO;
+
+	if (memcmp(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN) != 0)
+		return -ENODEV;
+
+	checksum = le64_to_cpu(pfn_sb->checksum);
+	pfn_sb->checksum = 0;
+	if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb))
+		return -ENODEV;
+	pfn_sb->checksum = cpu_to_le64(checksum);
+
+	switch (le32_to_cpu(pfn_sb->mode)) {
+	case PFN_MODE_RAM:
+		break;
+	case PFN_MODE_PMEM:
+		/* TODO: allocate from PMEM support */
+		return -ENOTTY;
+	default:
+		return -ENXIO;
+	}
+
+	if (!nd_pfn->uuid) {
+		/* from probe we allocate */
+		nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL);
+		if (!nd_pfn->uuid)
+			return -ENOMEM;
+	} else {
+		/* from init we validate */
+		if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0)
+			return -EINVAL;
+	}
+
+	/*
+	 * These warnings are verbose because they can only trigger in
+	 * the case where the physical address alignment of the
+	 * namespace has changed since the pfn superblock was
+	 * established.
+	 */
+	offset = le64_to_cpu(pfn_sb->dataoff);
+	nsio = to_nd_namespace_io(&ndns->dev);
+	if (nsio->res.start & ND_PFN_MASK) {
+		dev_err(&nd_pfn->dev,
+				"init failed: %s not section aligned\n",
+				dev_name(&ndns->dev));
+		return -EBUSY;
+	} else if (offset >= resource_size(&nsio->res)) {
+		dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
+				dev_name(&ndns->dev));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(nd_pfn_validate);
+
+int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata)
+{
+	int rc;
+	struct device *dev;
+	struct nd_pfn *nd_pfn;
+	struct nd_pfn_sb *pfn_sb;
+	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
+
+	if (ndns->force_raw)
+		return -ENODEV;
+
+	nvdimm_bus_lock(&ndns->dev);
+	dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE, ndns);
+	nvdimm_bus_unlock(&ndns->dev);
+	if (!dev)
+		return -ENOMEM;
+	dev_set_drvdata(dev, drvdata);
+	pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
+	nd_pfn = to_nd_pfn(dev);
+	nd_pfn->pfn_sb = pfn_sb;
+	rc = nd_pfn_validate(nd_pfn);
+	nd_pfn->pfn_sb = NULL;
+	kfree(pfn_sb);
+	dev_dbg(&ndns->dev, "%s: pfn: %s\n", __func__,
+			rc == 0 ? dev_name(dev) : "<none>");
+	if (rc < 0) {
+		__nd_detach_ndns(dev, &nd_pfn->ndns);
+		put_device(dev);
+	} else
+		__nd_device_register(&nd_pfn->dev);
+
+	return rc;
+}
+EXPORT_SYMBOL(nd_pfn_probe);

+ 209 - 36
drivers/nvdimm/pmem.c

@@ -21,18 +21,24 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/module.h>
+#include <linux/memory_hotplug.h>
 #include <linux/moduleparam.h>
 #include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/pmem.h>
 #include <linux/pmem.h>
 #include <linux/nd.h>
 #include <linux/nd.h>
+#include "pfn.h"
 #include "nd.h"
 #include "nd.h"
 
 
 struct pmem_device {
 struct pmem_device {
 	struct request_queue	*pmem_queue;
 	struct request_queue	*pmem_queue;
 	struct gendisk		*pmem_disk;
 	struct gendisk		*pmem_disk;
+	struct nd_namespace_common *ndns;
 
 
 	/* One contiguous memory region per device */
 	/* One contiguous memory region per device */
 	phys_addr_t		phys_addr;
 	phys_addr_t		phys_addr;
+	/* when non-zero this device is hosting a 'pfn' instance */
+	phys_addr_t		data_offset;
 	void __pmem		*virt_addr;
 	void __pmem		*virt_addr;
 	size_t			size;
 	size_t			size;
 };
 };
@@ -44,7 +50,7 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 			sector_t sector)
 			sector_t sector)
 {
 {
 	void *mem = kmap_atomic(page);
 	void *mem = kmap_atomic(page);
-	size_t pmem_off = sector << 9;
+	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
 	void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
 	void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
 
 
 	if (rw == READ) {
 	if (rw == READ) {
@@ -92,19 +98,26 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 }
 }
 
 
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
-			      void **kaddr, unsigned long *pfn, long size)
+		      void __pmem **kaddr, unsigned long *pfn)
 {
 {
 	struct pmem_device *pmem = bdev->bd_disk->private_data;
 	struct pmem_device *pmem = bdev->bd_disk->private_data;
-	size_t offset = sector << 9;
-
-	if (!pmem)
-		return -ENODEV;
+	resource_size_t offset = sector * 512 + pmem->data_offset;
+	resource_size_t size;
+
+	if (pmem->data_offset) {
+		/*
+		 * Limit the direct_access() size to what is covered by
+		 * the memmap
+		 */
+		size = (pmem->size - offset) & ~ND_PFN_MASK;
+	} else
+		size = pmem->size - offset;
 
 
 	/* FIXME convert DAX to comprehend that this mapping has a lifetime */
 	/* FIXME convert DAX to comprehend that this mapping has a lifetime */
-	*kaddr = (void __force *) pmem->virt_addr + offset;
+	*kaddr = pmem->virt_addr + offset;
 	*pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
 	*pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
 
 
-	return pmem->size - offset;
+	return size;
 }
 }
 
 
 static const struct block_device_operations pmem_fops = {
 static const struct block_device_operations pmem_fops = {
@@ -119,27 +132,33 @@ static struct pmem_device *pmem_alloc(struct device *dev,
 {
 {
 	struct pmem_device *pmem;
 	struct pmem_device *pmem;
 
 
-	pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
+	pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
 	if (!pmem)
 	if (!pmem)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
 
 
 	pmem->phys_addr = res->start;
 	pmem->phys_addr = res->start;
 	pmem->size = resource_size(res);
 	pmem->size = resource_size(res);
-	if (!arch_has_pmem_api())
+	if (!arch_has_wmb_pmem())
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
 
 
-	if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
+	if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size,
+			dev_name(dev))) {
 		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
 		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
 				&pmem->phys_addr, pmem->size);
 				&pmem->phys_addr, pmem->size);
-		kfree(pmem);
 		return ERR_PTR(-EBUSY);
 		return ERR_PTR(-EBUSY);
 	}
 	}
 
 
-	pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size);
-	if (!pmem->virt_addr) {
-		release_mem_region(pmem->phys_addr, pmem->size);
-		kfree(pmem);
-		return ERR_PTR(-ENXIO);
+	if (pmem_should_map_pages(dev)) {
+		void *addr = devm_memremap_pages(dev, res);
+
+		if (IS_ERR(addr))
+			return addr;
+		pmem->virt_addr = (void __pmem *) addr;
+	} else {
+		pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr,
+				pmem->size);
+		if (!pmem->virt_addr)
+			return ERR_PTR(-ENXIO);
 	}
 	}
 
 
 	return pmem;
 	return pmem;
@@ -147,13 +166,16 @@ static struct pmem_device *pmem_alloc(struct device *dev,
 
 
 static void pmem_detach_disk(struct pmem_device *pmem)
 static void pmem_detach_disk(struct pmem_device *pmem)
 {
 {
+	if (!pmem->pmem_disk)
+		return;
+
 	del_gendisk(pmem->pmem_disk);
 	del_gendisk(pmem->pmem_disk);
 	put_disk(pmem->pmem_disk);
 	put_disk(pmem->pmem_disk);
 	blk_cleanup_queue(pmem->pmem_queue);
 	blk_cleanup_queue(pmem->pmem_queue);
 }
 }
 
 
-static int pmem_attach_disk(struct nd_namespace_common *ndns,
-		struct pmem_device *pmem)
+static int pmem_attach_disk(struct device *dev,
+		struct nd_namespace_common *ndns, struct pmem_device *pmem)
 {
 {
 	struct gendisk *disk;
 	struct gendisk *disk;
 
 
@@ -162,6 +184,7 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns,
 		return -ENOMEM;
 		return -ENOMEM;
 
 
 	blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
 	blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
+	blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
 	blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
 	blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
 	blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
 	blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
@@ -179,8 +202,8 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns,
 	disk->queue		= pmem->pmem_queue;
 	disk->queue		= pmem->pmem_queue;
 	disk->flags		= GENHD_FL_EXT_DEVT;
 	disk->flags		= GENHD_FL_EXT_DEVT;
 	nvdimm_namespace_disk_name(ndns, disk->disk_name);
 	nvdimm_namespace_disk_name(ndns, disk->disk_name);
-	disk->driverfs_dev = &ndns->dev;
-	set_capacity(disk, pmem->size >> 9);
+	disk->driverfs_dev = dev;
+	set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
 	pmem->pmem_disk = disk;
 	pmem->pmem_disk = disk;
 
 
 	add_disk(disk);
 	add_disk(disk);
@@ -209,11 +232,152 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns,
 	return 0;
 	return 0;
 }
 }
 
 
-static void pmem_free(struct pmem_device *pmem)
+static int nd_pfn_init(struct nd_pfn *nd_pfn)
+{
+	struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
+	struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev);
+	struct nd_namespace_common *ndns = nd_pfn->ndns;
+	struct nd_region *nd_region;
+	unsigned long npfns;
+	phys_addr_t offset;
+	u64 checksum;
+	int rc;
+
+	if (!pfn_sb)
+		return -ENOMEM;
+
+	nd_pfn->pfn_sb = pfn_sb;
+	rc = nd_pfn_validate(nd_pfn);
+	if (rc == 0 || rc == -EBUSY)
+		return rc;
+
+	/* section alignment for simple hotplug */
+	if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN
+			|| pmem->phys_addr & ND_PFN_MASK)
+		return -ENODEV;
+
+	nd_region = to_nd_region(nd_pfn->dev.parent);
+	if (nd_region->ro) {
+		dev_info(&nd_pfn->dev,
+				"%s is read-only, unable to init metadata\n",
+				dev_name(&nd_region->dev));
+		goto err;
+	}
+
+	memset(pfn_sb, 0, sizeof(*pfn_sb));
+	npfns = (pmem->size - SZ_8K) / SZ_4K;
+	/*
+	 * Note, we use 64 here for the standard size of struct page,
+	 * debugging options may cause it to be larger in which case the
+	 * implementation will limit the pfns advertised through
+	 * ->direct_access() to those that are included in the memmap.
+	 */
+	if (nd_pfn->mode == PFN_MODE_PMEM)
+		offset = ALIGN(SZ_8K + 64 * npfns, PMD_SIZE);
+	else if (nd_pfn->mode == PFN_MODE_RAM)
+		offset = SZ_8K;
+	else
+		goto err;
+
+	npfns = (pmem->size - offset) / SZ_4K;
+	pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
+	pfn_sb->dataoff = cpu_to_le64(offset);
+	pfn_sb->npfns = cpu_to_le64(npfns);
+	memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
+	memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
+	pfn_sb->version_major = cpu_to_le16(1);
+	checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
+	pfn_sb->checksum = cpu_to_le64(checksum);
+
+	rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
+	if (rc)
+		goto err;
+
+	return 0;
+ err:
+	nd_pfn->pfn_sb = NULL;
+	kfree(pfn_sb);
+	return -ENXIO;
+}
+
+static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
+	struct pmem_device *pmem;
+
+	/* free pmem disk */
+	pmem = dev_get_drvdata(&nd_pfn->dev);
+	pmem_detach_disk(pmem);
+
+	/* release nd_pfn resources */
+	kfree(nd_pfn->pfn_sb);
+	nd_pfn->pfn_sb = NULL;
+
+	return 0;
+}
+
+static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
 {
 {
-	memunmap_pmem(pmem->virt_addr);
-	release_mem_region(pmem->phys_addr, pmem->size);
-	kfree(pmem);
+	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+	struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
+	struct device *dev = &nd_pfn->dev;
+	struct vmem_altmap *altmap;
+	struct nd_region *nd_region;
+	struct nd_pfn_sb *pfn_sb;
+	struct pmem_device *pmem;
+	phys_addr_t offset;
+	int rc;
+
+	if (!nd_pfn->uuid || !nd_pfn->ndns)
+		return -ENODEV;
+
+	nd_region = to_nd_region(dev->parent);
+	rc = nd_pfn_init(nd_pfn);
+	if (rc)
+		return rc;
+
+	if (PAGE_SIZE != SZ_4K) {
+		dev_err(dev, "only supported on systems with 4K PAGE_SIZE\n");
+		return -ENXIO;
+	}
+	if (nsio->res.start & ND_PFN_MASK) {
+		dev_err(dev, "%s not memory hotplug section aligned\n",
+				dev_name(&ndns->dev));
+		return -ENXIO;
+	}
+
+	pfn_sb = nd_pfn->pfn_sb;
+	offset = le64_to_cpu(pfn_sb->dataoff);
+	nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
+	if (nd_pfn->mode == PFN_MODE_RAM) {
+		if (offset != SZ_8K)
+			return -EINVAL;
+		nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
+		altmap = NULL;
+	} else {
+		rc = -ENXIO;
+		goto err;
+	}
+
+	/* establish pfn range for lookup, and switch to direct map */
+	pmem = dev_get_drvdata(dev);
+	memunmap_pmem(dev, pmem->virt_addr);
+	pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, &nsio->res);
+	if (IS_ERR(pmem->virt_addr)) {
+		rc = PTR_ERR(pmem->virt_addr);
+		goto err;
+	}
+
+	/* attach pmem disk in "pfn-mode" */
+	pmem->data_offset = offset;
+	rc = pmem_attach_disk(dev, ndns, pmem);
+	if (rc)
+		goto err;
+
+	return rc;
+ err:
+	nvdimm_namespace_detach_pfn(ndns);
+	return rc;
 }
 }
 
 
 static int nd_pmem_probe(struct device *dev)
 static int nd_pmem_probe(struct device *dev)
@@ -222,7 +386,6 @@ static int nd_pmem_probe(struct device *dev)
 	struct nd_namespace_common *ndns;
 	struct nd_namespace_common *ndns;
 	struct nd_namespace_io *nsio;
 	struct nd_namespace_io *nsio;
 	struct pmem_device *pmem;
 	struct pmem_device *pmem;
-	int rc;
 
 
 	ndns = nvdimm_namespace_common_probe(dev);
 	ndns = nvdimm_namespace_common_probe(dev);
 	if (IS_ERR(ndns))
 	if (IS_ERR(ndns))
@@ -233,18 +396,27 @@ static int nd_pmem_probe(struct device *dev)
 	if (IS_ERR(pmem))
 	if (IS_ERR(pmem))
 		return PTR_ERR(pmem);
 		return PTR_ERR(pmem);
 
 
+	pmem->ndns = ndns;
 	dev_set_drvdata(dev, pmem);
 	dev_set_drvdata(dev, pmem);
 	ndns->rw_bytes = pmem_rw_bytes;
 	ndns->rw_bytes = pmem_rw_bytes;
+
 	if (is_nd_btt(dev))
 	if (is_nd_btt(dev))
-		rc = nvdimm_namespace_attach_btt(ndns);
-	else if (nd_btt_probe(ndns, pmem) == 0) {
+		return nvdimm_namespace_attach_btt(ndns);
+
+	if (is_nd_pfn(dev))
+		return nvdimm_namespace_attach_pfn(ndns);
+
+	if (nd_btt_probe(ndns, pmem) == 0) {
 		/* we'll come back as btt-pmem */
 		/* we'll come back as btt-pmem */
-		rc = -ENXIO;
-	} else
-		rc = pmem_attach_disk(ndns, pmem);
-	if (rc)
-		pmem_free(pmem);
-	return rc;
+		return -ENXIO;
+	}
+
+	if (nd_pfn_probe(ndns, pmem) == 0) {
+		/* we'll come back as pfn-pmem */
+		return -ENXIO;
+	}
+
+	return pmem_attach_disk(dev, ndns, pmem);
 }
 }
 
 
 static int nd_pmem_remove(struct device *dev)
 static int nd_pmem_remove(struct device *dev)
@@ -252,10 +424,11 @@ static int nd_pmem_remove(struct device *dev)
 	struct pmem_device *pmem = dev_get_drvdata(dev);
 	struct pmem_device *pmem = dev_get_drvdata(dev);
 
 
 	if (is_nd_btt(dev))
 	if (is_nd_btt(dev))
-		nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
+		nvdimm_namespace_detach_btt(pmem->ndns);
+	else if (is_nd_pfn(dev))
+		nvdimm_namespace_detach_pfn(pmem->ndns);
 	else
 	else
 		pmem_detach_disk(pmem);
 		pmem_detach_disk(pmem);
-	pmem_free(pmem);
 
 
 	return 0;
 	return 0;
 }
 }

+ 2 - 0
drivers/nvdimm/region.c

@@ -53,6 +53,7 @@ static int nd_region_probe(struct device *dev)
 		return -ENODEV;
 		return -ENODEV;
 
 
 	nd_region->btt_seed = nd_btt_create(nd_region);
 	nd_region->btt_seed = nd_btt_create(nd_region);
+	nd_region->pfn_seed = nd_pfn_create(nd_region);
 	if (err == 0)
 	if (err == 0)
 		return 0;
 		return 0;
 
 
@@ -84,6 +85,7 @@ static int nd_region_remove(struct device *dev)
 	nvdimm_bus_lock(dev);
 	nvdimm_bus_lock(dev);
 	nd_region->ns_seed = NULL;
 	nd_region->ns_seed = NULL;
 	nd_region->btt_seed = NULL;
 	nd_region->btt_seed = NULL;
+	nd_region->pfn_seed = NULL;
 	dev_set_drvdata(dev, NULL);
 	dev_set_drvdata(dev, NULL);
 	nvdimm_bus_unlock(dev);
 	nvdimm_bus_unlock(dev);
 
 

+ 20 - 0
drivers/nvdimm/region_devs.c

@@ -345,6 +345,23 @@ static ssize_t btt_seed_show(struct device *dev,
 }
 }
 static DEVICE_ATTR_RO(btt_seed);
 static DEVICE_ATTR_RO(btt_seed);
 
 
+static ssize_t pfn_seed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (nd_region->pfn_seed)
+		rc = sprintf(buf, "%s\n", dev_name(nd_region->pfn_seed));
+	else
+		rc = sprintf(buf, "\n");
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(pfn_seed);
+
 static ssize_t read_only_show(struct device *dev,
 static ssize_t read_only_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 		struct device_attribute *attr, char *buf)
 {
 {
@@ -373,6 +390,7 @@ static struct attribute *nd_region_attributes[] = {
 	&dev_attr_nstype.attr,
 	&dev_attr_nstype.attr,
 	&dev_attr_mappings.attr,
 	&dev_attr_mappings.attr,
 	&dev_attr_btt_seed.attr,
 	&dev_attr_btt_seed.attr,
+	&dev_attr_pfn_seed.attr,
 	&dev_attr_read_only.attr,
 	&dev_attr_read_only.attr,
 	&dev_attr_set_cookie.attr,
 	&dev_attr_set_cookie.attr,
 	&dev_attr_available_size.attr,
 	&dev_attr_available_size.attr,
@@ -740,10 +758,12 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
 	nd_region->provider_data = ndr_desc->provider_data;
 	nd_region->provider_data = ndr_desc->provider_data;
 	nd_region->nd_set = ndr_desc->nd_set;
 	nd_region->nd_set = ndr_desc->nd_set;
 	nd_region->num_lanes = ndr_desc->num_lanes;
 	nd_region->num_lanes = ndr_desc->num_lanes;
+	nd_region->flags = ndr_desc->flags;
 	nd_region->ro = ro;
 	nd_region->ro = ro;
 	nd_region->numa_node = ndr_desc->numa_node;
 	nd_region->numa_node = ndr_desc->numa_node;
 	ida_init(&nd_region->ns_ida);
 	ida_init(&nd_region->ns_ida);
 	ida_init(&nd_region->btt_ida);
 	ida_init(&nd_region->btt_ida);
+	ida_init(&nd_region->pfn_ida);
 	dev = &nd_region->dev;
 	dev = &nd_region->dev;
 	dev_set_name(dev, "region%d", nd_region->id);
 	dev_set_name(dev, "region%d", nd_region->id);
 	dev->parent = &nvdimm_bus->dev;
 	dev->parent = &nvdimm_bus->dev;

+ 1 - 2
drivers/pci/probe.c

@@ -326,8 +326,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
 		struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
 		struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
 		dev->rom_base_reg = rom;
 		dev->rom_base_reg = rom;
 		res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
 		res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
-				IORESOURCE_READONLY | IORESOURCE_CACHEABLE |
-				IORESOURCE_SIZEALIGN;
+				IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
 		__pci_read_base(dev, pci_bar_mem32, res, rom);
 		__pci_read_base(dev, pci_bar_mem32, res, rom);
 	}
 	}
 }
 }

+ 0 - 2
drivers/pnp/manager.c

@@ -97,8 +97,6 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
 	/* ??? rule->flags restricted to 8 bits, all tests bogus ??? */
 	/* ??? rule->flags restricted to 8 bits, all tests bogus ??? */
 	if (!(rule->flags & IORESOURCE_MEM_WRITEABLE))
 	if (!(rule->flags & IORESOURCE_MEM_WRITEABLE))
 		res->flags |= IORESOURCE_READONLY;
 		res->flags |= IORESOURCE_READONLY;
-	if (rule->flags & IORESOURCE_MEM_CACHEABLE)
-		res->flags |= IORESOURCE_CACHEABLE;
 	if (rule->flags & IORESOURCE_MEM_RANGELENGTH)
 	if (rule->flags & IORESOURCE_MEM_RANGELENGTH)
 		res->flags |= IORESOURCE_RANGELENGTH;
 		res->flags |= IORESOURCE_RANGELENGTH;
 	if (rule->flags & IORESOURCE_MEM_SHADOWABLE)
 	if (rule->flags & IORESOURCE_MEM_SHADOWABLE)

+ 6 - 4
drivers/s390/block/dcssblk.c

@@ -29,7 +29,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode);
 static void dcssblk_release(struct gendisk *disk, fmode_t mode);
 static void dcssblk_release(struct gendisk *disk, fmode_t mode);
 static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
 static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
 static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
 static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
-				 void **kaddr, unsigned long *pfn, long size);
+			 void __pmem **kaddr, unsigned long *pfn);
 
 
 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
 
 
@@ -881,18 +881,20 @@ fail:
 
 
 static long
 static long
 dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
 dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
-			void **kaddr, unsigned long *pfn, long size)
+			void __pmem **kaddr, unsigned long *pfn)
 {
 {
 	struct dcssblk_dev_info *dev_info;
 	struct dcssblk_dev_info *dev_info;
 	unsigned long offset, dev_sz;
 	unsigned long offset, dev_sz;
+	void *addr;
 
 
 	dev_info = bdev->bd_disk->private_data;
 	dev_info = bdev->bd_disk->private_data;
 	if (!dev_info)
 	if (!dev_info)
 		return -ENODEV;
 		return -ENODEV;
 	dev_sz = dev_info->end - dev_info->start;
 	dev_sz = dev_info->end - dev_info->start;
 	offset = secnum * 512;
 	offset = secnum * 512;
-	*kaddr = (void *) (dev_info->start + offset);
-	*pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT;
+	addr = (void *) (dev_info->start + offset);
+	*pfn = virt_to_phys(addr) >> PAGE_SHIFT;
+	*kaddr = (void __pmem *) addr;
 
 
 	return dev_sz - offset;
 	return dev_sz - offset;
 }
 }

+ 1 - 6
drivers/scsi/aic94xx/aic94xx_init.c

@@ -100,12 +100,7 @@ static int asd_map_memio(struct asd_ha_struct *asd_ha)
 				   pci_name(asd_ha->pcidev));
 				   pci_name(asd_ha->pcidev));
 			goto Err;
 			goto Err;
 		}
 		}
-		if (io_handle->flags & IORESOURCE_CACHEABLE)
-			io_handle->addr = ioremap(io_handle->start,
-						  io_handle->len);
-		else
-			io_handle->addr = ioremap_nocache(io_handle->start,
-							  io_handle->len);
+		io_handle->addr = ioremap(io_handle->start, io_handle->len);
 		if (!io_handle->addr) {
 		if (!io_handle->addr) {
 			asd_printk("couldn't map MBAR%d of %s\n", i==0?0:1,
 			asd_printk("couldn't map MBAR%d of %s\n", i==0?0:1,
 				   pci_name(asd_ha->pcidev));
 				   pci_name(asd_ha->pcidev));

+ 1 - 4
drivers/scsi/arcmsr/arcmsr_hba.c

@@ -259,10 +259,7 @@ static bool arcmsr_remap_pciregion(struct AdapterControlBlock *acb)
 		addr = (unsigned long)pci_resource_start(pdev, 0);
 		addr = (unsigned long)pci_resource_start(pdev, 0);
 		range = pci_resource_len(pdev, 0);
 		range = pci_resource_len(pdev, 0);
 		flags = pci_resource_flags(pdev, 0);
 		flags = pci_resource_flags(pdev, 0);
-		if (flags & IORESOURCE_CACHEABLE)
-			mem_base0 = ioremap(addr, range);
-		else
-			mem_base0 = ioremap_nocache(addr, range);
+		mem_base0 = ioremap(addr, range);
 		if (!mem_base0) {
 		if (!mem_base0) {
 			pr_notice("arcmsr%d: memory mapping region fail\n",
 			pr_notice("arcmsr%d: memory mapping region fail\n",
 				acb->host->host_no);
 				acb->host->host_no);

+ 4 - 11
drivers/scsi/mvsas/mv_init.c

@@ -324,13 +324,9 @@ int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex)
 			goto err_out;
 			goto err_out;
 
 
 		res_flag_ex = pci_resource_flags(pdev, bar_ex);
 		res_flag_ex = pci_resource_flags(pdev, bar_ex);
-		if (res_flag_ex & IORESOURCE_MEM) {
-			if (res_flag_ex & IORESOURCE_CACHEABLE)
-				mvi->regs_ex = ioremap(res_start, res_len);
-			else
-				mvi->regs_ex = ioremap_nocache(res_start,
-						res_len);
-		} else
+		if (res_flag_ex & IORESOURCE_MEM)
+			mvi->regs_ex = ioremap(res_start, res_len);
+		else
 			mvi->regs_ex = (void *)res_start;
 			mvi->regs_ex = (void *)res_start;
 		if (!mvi->regs_ex)
 		if (!mvi->regs_ex)
 			goto err_out;
 			goto err_out;
@@ -345,10 +341,7 @@ int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex)
 	}
 	}
 
 
 	res_flag = pci_resource_flags(pdev, bar);
 	res_flag = pci_resource_flags(pdev, bar);
-	if (res_flag & IORESOURCE_CACHEABLE)
-		mvi->regs = ioremap(res_start, res_len);
-	else
-		mvi->regs = ioremap_nocache(res_start, res_len);
+	mvi->regs = ioremap(res_start, res_len);
 
 
 	if (!mvi->regs) {
 	if (!mvi->regs) {
 		if (mvi->regs_ex && (res_flag_ex & IORESOURCE_MEM))
 		if (mvi->regs_ex && (res_flag_ex & IORESOURCE_MEM))

+ 1 - 1
drivers/scsi/sun3x_esp.c

@@ -12,9 +12,9 @@
 #include <linux/platform_device.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 
 
 #include <asm/sun3x.h>
 #include <asm/sun3x.h>
-#include <asm/io.h>
 #include <asm/dma.h>
 #include <asm/dma.h>
 #include <asm/dvma.h>
 #include <asm/dvma.h>
 
 

+ 1 - 0
drivers/staging/comedi/drivers/ii_pci20kc.c

@@ -28,6 +28,7 @@
  */
  */
 
 
 #include <linux/module.h>
 #include <linux/module.h>
+#include <linux/io.h>
 #include "../comedidev.h"
 #include "../comedidev.h"
 
 
 /*
 /*

+ 9 - 7
drivers/staging/unisys/visorbus/visorchannel.c

@@ -20,6 +20,7 @@
  */
  */
 
 
 #include <linux/uuid.h>
 #include <linux/uuid.h>
+#include <linux/io.h>
 
 
 #include "version.h"
 #include "version.h"
 #include "visorbus.h"
 #include "visorbus.h"
@@ -35,7 +36,7 @@ static const uuid_le spar_video_guid = SPAR_CONSOLEVIDEO_CHANNEL_PROTOCOL_GUID;
 struct visorchannel {
 struct visorchannel {
 	u64 physaddr;
 	u64 physaddr;
 	ulong nbytes;
 	ulong nbytes;
-	void __iomem *mapped;
+	void *mapped;
 	bool requested;
 	bool requested;
 	struct channel_header chan_hdr;
 	struct channel_header chan_hdr;
 	uuid_le guid;
 	uuid_le guid;
@@ -92,7 +93,7 @@ visorchannel_create_guts(u64 physaddr, unsigned long channel_bytes,
 		}
 		}
 	}
 	}
 
 
-	channel->mapped = ioremap_cache(physaddr, size);
+	channel->mapped = memremap(physaddr, size, MEMREMAP_WB);
 	if (!channel->mapped) {
 	if (!channel->mapped) {
 		release_mem_region(physaddr, size);
 		release_mem_region(physaddr, size);
 		goto cleanup;
 		goto cleanup;
@@ -112,7 +113,7 @@ visorchannel_create_guts(u64 physaddr, unsigned long channel_bytes,
 	if (uuid_le_cmp(guid, NULL_UUID_LE) == 0)
 	if (uuid_le_cmp(guid, NULL_UUID_LE) == 0)
 		guid = channel->chan_hdr.chtype;
 		guid = channel->chan_hdr.chtype;
 
 
-	iounmap(channel->mapped);
+	memunmap(channel->mapped);
 	if (channel->requested)
 	if (channel->requested)
 		release_mem_region(channel->physaddr, channel->nbytes);
 		release_mem_region(channel->physaddr, channel->nbytes);
 	channel->mapped = NULL;
 	channel->mapped = NULL;
@@ -125,7 +126,8 @@ visorchannel_create_guts(u64 physaddr, unsigned long channel_bytes,
 		}
 		}
 	}
 	}
 
 
-	channel->mapped = ioremap_cache(channel->physaddr, channel_bytes);
+	channel->mapped = memremap(channel->physaddr, channel_bytes,
+			MEMREMAP_WB);
 	if (!channel->mapped) {
 	if (!channel->mapped) {
 		release_mem_region(channel->physaddr, channel_bytes);
 		release_mem_region(channel->physaddr, channel_bytes);
 		goto cleanup;
 		goto cleanup;
@@ -166,7 +168,7 @@ visorchannel_destroy(struct visorchannel *channel)
 	if (!channel)
 	if (!channel)
 		return;
 		return;
 	if (channel->mapped) {
 	if (channel->mapped) {
-		iounmap(channel->mapped);
+		memunmap(channel->mapped);
 		if (channel->requested)
 		if (channel->requested)
 			release_mem_region(channel->physaddr, channel->nbytes);
 			release_mem_region(channel->physaddr, channel->nbytes);
 	}
 	}
@@ -240,7 +242,7 @@ visorchannel_read(struct visorchannel *channel, ulong offset,
 	if (offset + nbytes > channel->nbytes)
 	if (offset + nbytes > channel->nbytes)
 		return -EIO;
 		return -EIO;
 
 
-	memcpy_fromio(local, channel->mapped + offset, nbytes);
+	memcpy(local, channel->mapped + offset, nbytes);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -262,7 +264,7 @@ visorchannel_write(struct visorchannel *channel, ulong offset,
 		       local, copy_size);
 		       local, copy_size);
 	}
 	}
 
 
-	memcpy_toio(channel->mapped + offset, local, nbytes);
+	memcpy(channel->mapped + offset, local, nbytes);
 
 
 	return 0;
 	return 0;
 }
 }

+ 9 - 8
drivers/staging/unisys/visorbus/visorchipset.c

@@ -118,7 +118,7 @@ static struct visorchannel *controlvm_channel;
 
 
 /* Manages the request payload in the controlvm channel */
 /* Manages the request payload in the controlvm channel */
 struct visor_controlvm_payload_info {
 struct visor_controlvm_payload_info {
-	u8 __iomem *ptr;	/* pointer to base address of payload pool */
+	u8 *ptr;		/* pointer to base address of payload pool */
 	u64 offset;		/* offset from beginning of controlvm
 	u64 offset;		/* offset from beginning of controlvm
 				 * channel to beginning of payload * pool */
 				 * channel to beginning of payload * pool */
 	u32 bytes;		/* number of bytes in payload pool */
 	u32 bytes;		/* number of bytes in payload pool */
@@ -400,21 +400,22 @@ parser_init_byte_stream(u64 addr, u32 bytes, bool local, bool *retry)
 		p = __va((unsigned long) (addr));
 		p = __va((unsigned long) (addr));
 		memcpy(ctx->data, p, bytes);
 		memcpy(ctx->data, p, bytes);
 	} else {
 	} else {
-		void __iomem *mapping;
+		void *mapping;
 
 
 		if (!request_mem_region(addr, bytes, "visorchipset")) {
 		if (!request_mem_region(addr, bytes, "visorchipset")) {
 			rc = NULL;
 			rc = NULL;
 			goto cleanup;
 			goto cleanup;
 		}
 		}
 
 
-		mapping = ioremap_cache(addr, bytes);
+		mapping = memremap(addr, bytes, MEMREMAP_WB);
 		if (!mapping) {
 		if (!mapping) {
 			release_mem_region(addr, bytes);
 			release_mem_region(addr, bytes);
 			rc = NULL;
 			rc = NULL;
 			goto cleanup;
 			goto cleanup;
 		}
 		}
-		memcpy_fromio(ctx->data, mapping, bytes);
+		memcpy(ctx->data, mapping, bytes);
 		release_mem_region(addr, bytes);
 		release_mem_region(addr, bytes);
+		memunmap(mapping);
 	}
 	}
 
 
 	ctx->byte_stream = true;
 	ctx->byte_stream = true;
@@ -1327,7 +1328,7 @@ static int
 initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes,
 initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes,
 				  struct visor_controlvm_payload_info *info)
 				  struct visor_controlvm_payload_info *info)
 {
 {
-	u8 __iomem *payload = NULL;
+	u8 *payload = NULL;
 	int rc = CONTROLVM_RESP_SUCCESS;
 	int rc = CONTROLVM_RESP_SUCCESS;
 
 
 	if (!info) {
 	if (!info) {
@@ -1339,7 +1340,7 @@ initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes,
 		rc = -CONTROLVM_RESP_ERROR_PAYLOAD_INVALID;
 		rc = -CONTROLVM_RESP_ERROR_PAYLOAD_INVALID;
 		goto cleanup;
 		goto cleanup;
 	}
 	}
-	payload = ioremap_cache(phys_addr + offset, bytes);
+	payload = memremap(phys_addr + offset, bytes, MEMREMAP_WB);
 	if (!payload) {
 	if (!payload) {
 		rc = -CONTROLVM_RESP_ERROR_IOREMAP_FAILED;
 		rc = -CONTROLVM_RESP_ERROR_IOREMAP_FAILED;
 		goto cleanup;
 		goto cleanup;
@@ -1352,7 +1353,7 @@ initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes,
 cleanup:
 cleanup:
 	if (rc < 0) {
 	if (rc < 0) {
 		if (payload) {
 		if (payload) {
-			iounmap(payload);
+			memunmap(payload);
 			payload = NULL;
 			payload = NULL;
 		}
 		}
 	}
 	}
@@ -1363,7 +1364,7 @@ static void
 destroy_controlvm_payload_info(struct visor_controlvm_payload_info *info)
 destroy_controlvm_payload_info(struct visor_controlvm_payload_info *info)
 {
 {
 	if (info->ptr) {
 	if (info->ptr) {
-		iounmap(info->ptr);
+		memunmap(info->ptr);
 		info->ptr = NULL;
 		info->ptr = NULL;
 	}
 	}
 	memset(info, 0, sizeof(struct visor_controlvm_payload_info));
 	memset(info, 0, sizeof(struct visor_controlvm_payload_info));

+ 1 - 1
drivers/tty/serial/8250/8250_core.c

@@ -36,11 +36,11 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/uaccess.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_runtime.h>
+#include <linux/io.h>
 #ifdef CONFIG_SPARC
 #ifdef CONFIG_SPARC
 #include <linux/sunserialcore.h>
 #include <linux/sunserialcore.h>
 #endif
 #endif
 
 
-#include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/irq.h>
 
 
 #include "8250.h"
 #include "8250.h"

+ 0 - 1
drivers/video/fbdev/ocfb.c

@@ -325,7 +325,6 @@ static int ocfb_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "I/O resource request failed\n");
 		dev_err(&pdev->dev, "I/O resource request failed\n");
 		return -ENXIO;
 		return -ENXIO;
 	}
 	}
-	res->flags &= ~IORESOURCE_CACHEABLE;
 	fbdev->regs = devm_ioremap_resource(&pdev->dev, res);
 	fbdev->regs = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(fbdev->regs))
 	if (IS_ERR(fbdev->regs))
 		return PTR_ERR(fbdev->regs);
 		return PTR_ERR(fbdev->regs);

+ 1 - 2
drivers/video/fbdev/s1d13xxxfb.c

@@ -32,8 +32,7 @@
 #include <linux/spinlock_types.h>
 #include <linux/spinlock_types.h>
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
-
-#include <asm/io.h>
+#include <linux/io.h>
 
 
 #include <video/s1d13xxxfb.h>
 #include <video/s1d13xxxfb.h>
 
 

+ 1 - 0
drivers/video/fbdev/stifb.c

@@ -64,6 +64,7 @@
 #include <linux/fb.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/ioport.h>
+#include <linux/io.h>
 
 
 #include <asm/grfioctl.h>	/* for HP-UX compatibility */
 #include <asm/grfioctl.h>	/* for HP-UX compatibility */
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>

+ 2 - 2
fs/block_dev.c

@@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(bdev_write_page);
  * accessible at this address.
  * accessible at this address.
  */
  */
 long bdev_direct_access(struct block_device *bdev, sector_t sector,
 long bdev_direct_access(struct block_device *bdev, sector_t sector,
-			void **addr, unsigned long *pfn, long size)
+			void __pmem **addr, unsigned long *pfn, long size)
 {
 {
 	long avail;
 	long avail;
 	const struct block_device_operations *ops = bdev->bd_disk->fops;
 	const struct block_device_operations *ops = bdev->bd_disk->fops;
@@ -462,7 +462,7 @@ long bdev_direct_access(struct block_device *bdev, sector_t sector,
 	sector += get_start_sect(bdev);
 	sector += get_start_sect(bdev);
 	if (sector % (PAGE_SIZE / 512))
 	if (sector % (PAGE_SIZE / 512))
 		return -EINVAL;
 		return -EINVAL;
-	avail = ops->direct_access(bdev, sector, addr, pfn, size);
+	avail = ops->direct_access(bdev, sector, addr, pfn);
 	if (!avail)
 	if (!avail)
 		return -ERANGE;
 		return -ERANGE;
 	return min(avail, size);
 	return min(avail, size);

+ 38 - 24
fs/dax.c

@@ -23,6 +23,7 @@
 #include <linux/memcontrol.h>
 #include <linux/memcontrol.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
 #include <linux/mutex.h>
+#include <linux/pmem.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/uio.h>
 #include <linux/uio.h>
 #include <linux/vmstat.h>
 #include <linux/vmstat.h>
@@ -34,7 +35,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
 
 
 	might_sleep();
 	might_sleep();
 	do {
 	do {
-		void *addr;
+		void __pmem *addr;
 		unsigned long pfn;
 		unsigned long pfn;
 		long count;
 		long count;
 
 
@@ -46,10 +47,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
 			unsigned pgsz = PAGE_SIZE - offset_in_page(addr);
 			unsigned pgsz = PAGE_SIZE - offset_in_page(addr);
 			if (pgsz > count)
 			if (pgsz > count)
 				pgsz = count;
 				pgsz = count;
-			if (pgsz < PAGE_SIZE)
-				memset(addr, 0, pgsz);
-			else
-				clear_page(addr);
+			clear_pmem(addr, pgsz);
 			addr += pgsz;
 			addr += pgsz;
 			size -= pgsz;
 			size -= pgsz;
 			count -= pgsz;
 			count -= pgsz;
@@ -59,26 +57,29 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
 		}
 		}
 	} while (size);
 	} while (size);
 
 
+	wmb_pmem();
 	return 0;
 	return 0;
 }
 }
 EXPORT_SYMBOL_GPL(dax_clear_blocks);
 EXPORT_SYMBOL_GPL(dax_clear_blocks);
 
 
-static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits)
+static long dax_get_addr(struct buffer_head *bh, void __pmem **addr,
+		unsigned blkbits)
 {
 {
 	unsigned long pfn;
 	unsigned long pfn;
 	sector_t sector = bh->b_blocknr << (blkbits - 9);
 	sector_t sector = bh->b_blocknr << (blkbits - 9);
 	return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size);
 	return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size);
 }
 }
 
 
-static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos,
-			loff_t end)
+/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
+static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
+		loff_t pos, loff_t end)
 {
 {
 	loff_t final = end - pos + first; /* The final byte of the buffer */
 	loff_t final = end - pos + first; /* The final byte of the buffer */
 
 
 	if (first > 0)
 	if (first > 0)
-		memset(addr, 0, first);
+		clear_pmem(addr, first);
 	if (final < size)
 	if (final < size)
-		memset(addr + final, 0, size - final);
+		clear_pmem(addr + final, size - final);
 }
 }
 
 
 static bool buffer_written(struct buffer_head *bh)
 static bool buffer_written(struct buffer_head *bh)
@@ -106,14 +107,15 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 	loff_t pos = start;
 	loff_t pos = start;
 	loff_t max = start;
 	loff_t max = start;
 	loff_t bh_max = start;
 	loff_t bh_max = start;
-	void *addr;
+	void __pmem *addr;
 	bool hole = false;
 	bool hole = false;
+	bool need_wmb = false;
 
 
 	if (iov_iter_rw(iter) != WRITE)
 	if (iov_iter_rw(iter) != WRITE)
 		end = min(end, i_size_read(inode));
 		end = min(end, i_size_read(inode));
 
 
 	while (pos < end) {
 	while (pos < end) {
-		unsigned len;
+		size_t len;
 		if (pos == max) {
 		if (pos == max) {
 			unsigned blkbits = inode->i_blkbits;
 			unsigned blkbits = inode->i_blkbits;
 			sector_t block = pos >> blkbits;
 			sector_t block = pos >> blkbits;
@@ -145,19 +147,23 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 				retval = dax_get_addr(bh, &addr, blkbits);
 				retval = dax_get_addr(bh, &addr, blkbits);
 				if (retval < 0)
 				if (retval < 0)
 					break;
 					break;
-				if (buffer_unwritten(bh) || buffer_new(bh))
+				if (buffer_unwritten(bh) || buffer_new(bh)) {
 					dax_new_buf(addr, retval, first, pos,
 					dax_new_buf(addr, retval, first, pos,
 									end);
 									end);
+					need_wmb = true;
+				}
 				addr += first;
 				addr += first;
 				size = retval - first;
 				size = retval - first;
 			}
 			}
 			max = min(pos + size, end);
 			max = min(pos + size, end);
 		}
 		}
 
 
-		if (iov_iter_rw(iter) == WRITE)
-			len = copy_from_iter_nocache(addr, max - pos, iter);
-		else if (!hole)
-			len = copy_to_iter(addr, max - pos, iter);
+		if (iov_iter_rw(iter) == WRITE) {
+			len = copy_from_iter_pmem(addr, max - pos, iter);
+			need_wmb = true;
+		} else if (!hole)
+			len = copy_to_iter((void __force *)addr, max - pos,
+					iter);
 		else
 		else
 			len = iov_iter_zero(max - pos, iter);
 			len = iov_iter_zero(max - pos, iter);
 
 
@@ -168,6 +174,9 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 		addr += len;
 		addr += len;
 	}
 	}
 
 
+	if (need_wmb)
+		wmb_pmem();
+
 	return (pos == start) ? retval : pos - start;
 	return (pos == start) ? retval : pos - start;
 }
 }
 
 
@@ -260,11 +269,13 @@ static int dax_load_hole(struct address_space *mapping, struct page *page,
 static int copy_user_bh(struct page *to, struct buffer_head *bh,
 static int copy_user_bh(struct page *to, struct buffer_head *bh,
 			unsigned blkbits, unsigned long vaddr)
 			unsigned blkbits, unsigned long vaddr)
 {
 {
-	void *vfrom, *vto;
+	void __pmem *vfrom;
+	void *vto;
+
 	if (dax_get_addr(bh, &vfrom, blkbits) < 0)
 	if (dax_get_addr(bh, &vfrom, blkbits) < 0)
 		return -EIO;
 		return -EIO;
 	vto = kmap_atomic(to);
 	vto = kmap_atomic(to);
-	copy_user_page(vto, vfrom, vaddr, to);
+	copy_user_page(vto, (void __force *)vfrom, vaddr, to);
 	kunmap_atomic(vto);
 	kunmap_atomic(vto);
 	return 0;
 	return 0;
 }
 }
@@ -275,7 +286,7 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 	struct address_space *mapping = inode->i_mapping;
 	struct address_space *mapping = inode->i_mapping;
 	sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
 	sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
 	unsigned long vaddr = (unsigned long)vmf->virtual_address;
 	unsigned long vaddr = (unsigned long)vmf->virtual_address;
-	void *addr;
+	void __pmem *addr;
 	unsigned long pfn;
 	unsigned long pfn;
 	pgoff_t size;
 	pgoff_t size;
 	int error;
 	int error;
@@ -303,8 +314,10 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 		goto out;
 		goto out;
 	}
 	}
 
 
-	if (buffer_unwritten(bh) || buffer_new(bh))
-		clear_page(addr);
+	if (buffer_unwritten(bh) || buffer_new(bh)) {
+		clear_pmem(addr, PAGE_SIZE);
+		wmb_pmem();
+	}
 
 
 	error = vm_insert_mixed(vma, vaddr, pfn);
 	error = vm_insert_mixed(vma, vaddr, pfn);
 
 
@@ -548,11 +561,12 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
 	if (err < 0)
 	if (err < 0)
 		return err;
 		return err;
 	if (buffer_written(&bh)) {
 	if (buffer_written(&bh)) {
-		void *addr;
+		void __pmem *addr;
 		err = dax_get_addr(&bh, &addr, inode->i_blkbits);
 		err = dax_get_addr(&bh, &addr, inode->i_blkbits);
 		if (err < 0)
 		if (err < 0)
 			return err;
 			return err;
-		memset(addr + offset, 0, length);
+		clear_pmem(addr + offset, length);
+		wmb_pmem();
 	}
 	}
 
 
 	return 0;
 	return 0;

+ 6 - 0
include/asm-generic/memory_model.h

@@ -69,6 +69,12 @@
 })
 })
 #endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
 #endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
 
 
+/*
+ * Convert a physical address to a Page Frame Number and back
+ */
+#define	__phys_to_pfn(paddr)	((unsigned long)((paddr) >> PAGE_SHIFT))
+#define	__pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
+
 #define page_to_pfn __page_to_pfn
 #define page_to_pfn __page_to_pfn
 #define pfn_to_page __pfn_to_page
 #define pfn_to_page __pfn_to_page
 
 

+ 4 - 4
include/linux/blkdev.h

@@ -1569,8 +1569,8 @@ struct block_device_operations {
 	int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
 	int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-	long (*direct_access)(struct block_device *, sector_t,
-					void **, unsigned long *pfn, long size);
+	long (*direct_access)(struct block_device *, sector_t, void __pmem **,
+			unsigned long *pfn);
 	unsigned int (*check_events) (struct gendisk *disk,
 	unsigned int (*check_events) (struct gendisk *disk,
 				      unsigned int clearing);
 				      unsigned int clearing);
 	/* ->media_changed() is DEPRECATED, use ->check_events() instead */
 	/* ->media_changed() is DEPRECATED, use ->check_events() instead */
@@ -1588,8 +1588,8 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
 extern int bdev_read_page(struct block_device *, sector_t, struct page *);
 extern int bdev_read_page(struct block_device *, sector_t, struct page *);
 extern int bdev_write_page(struct block_device *, sector_t, struct page *,
 extern int bdev_write_page(struct block_device *, sector_t, struct page *,
 						struct writeback_control *);
 						struct writeback_control *);
-extern long bdev_direct_access(struct block_device *, sector_t, void **addr,
-						unsigned long *pfn, long size);
+extern long bdev_direct_access(struct block_device *, sector_t,
+		void __pmem **addr, unsigned long *pfn, long size);
 #else /* CONFIG_BLOCK */
 #else /* CONFIG_BLOCK */
 
 
 struct block_device;
 struct block_device;

+ 1 - 1
include/linux/io-mapping.h

@@ -21,7 +21,7 @@
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/bug.h>
 #include <linux/bug.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/page.h>
 #include <asm/page.h>
 
 
 /*
 /*

+ 33 - 0
include/linux/io.h

@@ -20,10 +20,13 @@
 
 
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/init.h>
+#include <linux/bug.h>
+#include <linux/err.h>
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/page.h>
 #include <asm/page.h>
 
 
 struct device;
 struct device;
+struct resource;
 
 
 __visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
 __visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
 void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
 void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
@@ -80,6 +83,27 @@ int check_signature(const volatile void __iomem *io_addr,
 			const unsigned char *signature, int length);
 			const unsigned char *signature, int length);
 void devm_ioremap_release(struct device *dev, void *res);
 void devm_ioremap_release(struct device *dev, void *res);
 
 
+void *devm_memremap(struct device *dev, resource_size_t offset,
+		size_t size, unsigned long flags);
+void devm_memunmap(struct device *dev, void *addr);
+
+void *__devm_memremap_pages(struct device *dev, struct resource *res);
+
+#ifdef CONFIG_ZONE_DEVICE
+void *devm_memremap_pages(struct device *dev, struct resource *res);
+#else
+static inline void *devm_memremap_pages(struct device *dev, struct resource *res)
+{
+	/*
+	 * Fail attempts to call devm_memremap_pages() without
+	 * ZONE_DEVICE support enabled, this requires callers to fall
+	 * back to plain devm_memremap() based on config
+	 */
+	WARN_ON_ONCE(1);
+	return ERR_PTR(-ENXIO);
+}
+#endif
+
 /*
 /*
  * Some systems do not have legacy ISA devices.
  * Some systems do not have legacy ISA devices.
  * /dev/port is not a valid interface on these systems.
  * /dev/port is not a valid interface on these systems.
@@ -121,4 +145,13 @@ static inline int arch_phys_wc_index(int handle)
 #endif
 #endif
 #endif
 #endif
 
 
+enum {
+	/* See memremap() kernel-doc for usage description... */
+	MEMREMAP_WB = 1 << 0,
+	MEMREMAP_WT = 1 << 1,
+};
+
+void *memremap(resource_size_t offset, size_t size, unsigned long flags);
+void memunmap(void *addr);
+
 #endif /* _LINUX_IO_H */
 #endif /* _LINUX_IO_H */

+ 4 - 0
include/linux/libnvdimm.h

@@ -31,6 +31,9 @@ enum {
 	ND_CMD_ARS_STATUS_MAX = SZ_4K,
 	ND_CMD_ARS_STATUS_MAX = SZ_4K,
 	ND_MAX_MAPPINGS = 32,
 	ND_MAX_MAPPINGS = 32,
 
 
+	/* region flag indicating to direct-map persistent memory by default */
+	ND_REGION_PAGEMAP = 0,
+
 	/* mark newly adjusted resources as requiring a label update */
 	/* mark newly adjusted resources as requiring a label update */
 	DPA_RESOURCE_ADJUSTED = 1 << 0,
 	DPA_RESOURCE_ADJUSTED = 1 << 0,
 };
 };
@@ -91,6 +94,7 @@ struct nd_region_desc {
 	void *provider_data;
 	void *provider_data;
 	int num_lanes;
 	int num_lanes;
 	int numa_node;
 	int numa_node;
+	unsigned long flags;
 };
 };
 
 
 struct nvdimm_bus;
 struct nvdimm_bus;

+ 3 - 2
include/linux/memory_hotplug.h

@@ -266,8 +266,9 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
 		void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
 extern int add_memory(int nid, u64 start, u64 size);
-extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default);
-extern int arch_add_memory(int nid, u64 start, u64 size);
+extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
+		bool for_device);
+extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
 extern bool is_memblock_offlined(struct memory_block *mem);
 extern void remove_memory(int nid, u64 start, u64 size);
 extern void remove_memory(int nid, u64 start, u64 size);

+ 8 - 1
include/linux/mm.h

@@ -372,7 +372,14 @@ static inline int put_page_unless_one(struct page *page)
 }
 }
 
 
 extern int page_is_ram(unsigned long pfn);
 extern int page_is_ram(unsigned long pfn);
-extern int region_is_ram(resource_size_t phys_addr, unsigned long size);
+
+enum {
+	REGION_INTERSECTS,
+	REGION_DISJOINT,
+	REGION_MIXED,
+};
+
+int region_intersects(resource_size_t offset, size_t size, const char *type);
 
 
 /* Support for virtually mapped pages */
 /* Support for virtually mapped pages */
 struct page *vmalloc_to_page(const void *addr);
 struct page *vmalloc_to_page(const void *addr);

+ 23 - 0
include/linux/mmzone.h

@@ -319,7 +319,11 @@ enum zone_type {
 	ZONE_HIGHMEM,
 	ZONE_HIGHMEM,
 #endif
 #endif
 	ZONE_MOVABLE,
 	ZONE_MOVABLE,
+#ifdef CONFIG_ZONE_DEVICE
+	ZONE_DEVICE,
+#endif
 	__MAX_NR_ZONES
 	__MAX_NR_ZONES
+
 };
 };
 
 
 #ifndef __GENERATING_BOUNDS_H
 #ifndef __GENERATING_BOUNDS_H
@@ -786,6 +790,25 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat)
 	return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
 	return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
 }
 }
 
 
+static inline int zone_id(const struct zone *zone)
+{
+	struct pglist_data *pgdat = zone->zone_pgdat;
+
+	return zone - pgdat->node_zones;
+}
+
+#ifdef CONFIG_ZONE_DEVICE
+static inline bool is_dev_zone(const struct zone *zone)
+{
+	return zone_id(zone) == ZONE_DEVICE;
+}
+#else
+static inline bool is_dev_zone(const struct zone *zone)
+{
+	return false;
+}
+#endif
+
 #include <linux/memory_hotplug.h>
 #include <linux/memory_hotplug.h>
 
 
 extern struct mutex zonelists_mutex;
 extern struct mutex zonelists_mutex;

+ 1 - 1
include/linux/mtd/map.h

@@ -27,9 +27,9 @@
 #include <linux/string.h>
 #include <linux/string.h>
 #include <linux/bug.h>
 #include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
+#include <linux/io.h>
 
 
 #include <asm/unaligned.h>
 #include <asm/unaligned.h>
-#include <asm/io.h>
 #include <asm/barrier.h>
 #include <asm/barrier.h>
 
 
 #ifdef CONFIG_MTD_MAP_BANK_WIDTH_1
 #ifdef CONFIG_MTD_MAP_BANK_WIDTH_1

+ 84 - 31
include/linux/pmem.h

@@ -14,28 +14,42 @@
 #define __PMEM_H__
 #define __PMEM_H__
 
 
 #include <linux/io.h>
 #include <linux/io.h>
+#include <linux/uio.h>
 
 
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 #ifdef CONFIG_ARCH_HAS_PMEM_API
-#include <asm/cacheflush.h>
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
+#include <asm/pmem.h>
 #else
 #else
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
+/*
+ * These are simply here to enable compilation, all call sites gate
+ * calling these symbols with arch_has_pmem_api() and redirect to the
+ * implementation in asm/pmem.h.
+ */
+static inline bool __arch_has_wmb_pmem(void)
+{
+	return false;
+}
+
 static inline void arch_wmb_pmem(void)
 static inline void arch_wmb_pmem(void)
 {
 {
 	BUG();
 	BUG();
 }
 }
 
 
-static inline bool __arch_has_wmb_pmem(void)
+static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
+		size_t n)
 {
 {
-	return false;
+	BUG();
 }
 }
 
 
-static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
-		unsigned long size)
+static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+		struct iov_iter *i)
 {
 {
-	return NULL;
+	BUG();
+	return 0;
 }
 }
 
 
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
-		size_t n)
+static inline void arch_clear_pmem(void __pmem *addr, size_t size)
 {
 {
 	BUG();
 	BUG();
 }
 }
@@ -43,18 +57,22 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
 
 
 /*
 /*
  * Architectures that define ARCH_HAS_PMEM_API must provide
  * Architectures that define ARCH_HAS_PMEM_API must provide
- * implementations for arch_memremap_pmem(), arch_memcpy_to_pmem(),
- * arch_wmb_pmem(), and __arch_has_wmb_pmem().
+ * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
+ * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem().
  */
  */
-
 static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
 static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
 {
 {
 	memcpy(dst, (void __force const *) src, size);
 	memcpy(dst, (void __force const *) src, size);
 }
 }
 
 
-static inline void memunmap_pmem(void __pmem *addr)
+static inline void memunmap_pmem(struct device *dev, void __pmem *addr)
+{
+	devm_memunmap(dev, (void __force *) addr);
+}
+
+static inline bool arch_has_pmem_api(void)
 {
 {
-	iounmap((void __force __iomem *) addr);
+	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
 }
 }
 
 
 /**
 /**
@@ -68,14 +86,7 @@ static inline void memunmap_pmem(void __pmem *addr)
  */
  */
 static inline bool arch_has_wmb_pmem(void)
 static inline bool arch_has_wmb_pmem(void)
 {
 {
-	if (IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API))
-		return __arch_has_wmb_pmem();
-	return false;
-}
-
-static inline bool arch_has_pmem_api(void)
-{
-	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem();
+	return arch_has_pmem_api() && __arch_has_wmb_pmem();
 }
 }
 
 
 /*
 /*
@@ -85,16 +96,24 @@ static inline bool arch_has_pmem_api(void)
  * default_memremap_pmem + default_memcpy_to_pmem is sufficient for
  * default_memremap_pmem + default_memcpy_to_pmem is sufficient for
  * making data durable relative to i/o completion.
  * making data durable relative to i/o completion.
  */
  */
-static void default_memcpy_to_pmem(void __pmem *dst, const void *src,
+static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
 		size_t size)
 		size_t size)
 {
 {
 	memcpy((void __force *) dst, src, size);
 	memcpy((void __force *) dst, src, size);
 }
 }
 
 
-static void __pmem *default_memremap_pmem(resource_size_t offset,
-		unsigned long size)
+static inline size_t default_copy_from_iter_pmem(void __pmem *addr,
+		size_t bytes, struct iov_iter *i)
+{
+	return copy_from_iter_nocache((void __force *)addr, bytes, i);
+}
+
+static inline void default_clear_pmem(void __pmem *addr, size_t size)
 {
 {
-	return (void __pmem __force *)ioremap_wt(offset, size);
+	if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0)
+		clear_page((void __force *)addr);
+	else
+		memset((void __force *)addr, 0, size);
 }
 }
 
 
 /**
 /**
@@ -109,12 +128,11 @@ static void __pmem *default_memremap_pmem(resource_size_t offset,
  * wmb_pmem() arrange for the data to be written through the
  * wmb_pmem() arrange for the data to be written through the
  * cache to persistent media.
  * cache to persistent media.
  */
  */
-static inline void __pmem *memremap_pmem(resource_size_t offset,
-		unsigned long size)
+static inline void __pmem *memremap_pmem(struct device *dev,
+		resource_size_t offset, unsigned long size)
 {
 {
-	if (arch_has_pmem_api())
-		return arch_memremap_pmem(offset, size);
-	return default_memremap_pmem(offset, size);
+	return (void __pmem *) devm_memremap(dev, offset, size,
+			ARCH_MEMREMAP_PMEM);
 }
 }
 
 
 /**
 /**
@@ -146,7 +164,42 @@ static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n)
  */
  */
 static inline void wmb_pmem(void)
 static inline void wmb_pmem(void)
 {
 {
-	if (arch_has_pmem_api())
+	if (arch_has_wmb_pmem())
 		arch_wmb_pmem();
 		arch_wmb_pmem();
+	else
+		wmb();
+}
+
+/**
+ * copy_from_iter_pmem - copy data from an iterator to PMEM
+ * @addr:	PMEM destination address
+ * @bytes:	number of bytes to copy
+ * @i:		iterator with source data
+ *
+ * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
+ * This function requires explicit ordering with a wmb_pmem() call.
+ */
+static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+		struct iov_iter *i)
+{
+	if (arch_has_pmem_api())
+		return arch_copy_from_iter_pmem(addr, bytes, i);
+	return default_copy_from_iter_pmem(addr, bytes, i);
+}
+
+/**
+ * clear_pmem - zero a PMEM memory range
+ * @addr:	virtual start address
+ * @size:	number of bytes to zero
+ *
+ * Write zeros into the memory range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with a wmb_pmem() call.
+ */
+static inline void clear_pmem(void __pmem *addr, size_t size)
+{
+	if (arch_has_pmem_api())
+		arch_clear_pmem(addr, size);
+	else
+		default_clear_pmem(addr, size);
 }
 }
 #endif /* __PMEM_H__ */
 #endif /* __PMEM_H__ */

+ 11 - 1
include/uapi/linux/ndctl.h

@@ -87,7 +87,7 @@ struct nd_cmd_ars_status {
 		__u32 handle;
 		__u32 handle;
 		__u32 flags;
 		__u32 flags;
 		__u64 err_address;
 		__u64 err_address;
-		__u64 mask;
+		__u64 length;
 	} __packed records[0];
 	} __packed records[0];
 } __packed;
 } __packed;
 
 
@@ -111,6 +111,11 @@ enum {
 	ND_CMD_VENDOR = 9,
 	ND_CMD_VENDOR = 9,
 };
 };
 
 
+enum {
+	ND_ARS_VOLATILE = 1,
+	ND_ARS_PERSISTENT = 2,
+};
+
 static inline const char *nvdimm_bus_cmd_name(unsigned cmd)
 static inline const char *nvdimm_bus_cmd_name(unsigned cmd)
 {
 {
 	static const char * const names[] = {
 	static const char * const names[] = {
@@ -194,4 +199,9 @@ enum nd_driver_flags {
 enum {
 enum {
 	ND_MIN_NAMESPACE_SIZE = 0x00400000,
 	ND_MIN_NAMESPACE_SIZE = 0x00400000,
 };
 };
+
+enum ars_masks {
+	ARS_STATUS_MASK = 0x0000FFFF,
+	ARS_EXT_STATUS_SHIFT = 16,
+};
 #endif /* __NDCTL_H__ */
 #endif /* __NDCTL_H__ */

+ 1 - 1
include/video/vga.h

@@ -18,7 +18,7 @@
 #define __linux_video_vga_h__
 #define __linux_video_vga_h__
 
 
 #include <linux/types.h>
 #include <linux/types.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/vga.h>
 #include <asm/vga.h>
 #include <asm/byteorder.h>
 #include <asm/byteorder.h>
 
 

+ 2 - 0
kernel/Makefile

@@ -99,6 +99,8 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
 obj-$(CONFIG_TORTURE_TEST) += torture.o
 obj-$(CONFIG_TORTURE_TEST) += torture.o
 
 
+obj-$(CONFIG_HAS_IOMEM) += memremap.o
+
 $(obj)/configs.o: $(obj)/config_data.h
 $(obj)/configs.o: $(obj)/config_data.h
 
 
 # config_data.h contains the same information as ikconfig.h but gzipped.
 # config_data.h contains the same information as ikconfig.h but gzipped.

+ 190 - 0
kernel/memremap.c

@@ -0,0 +1,190 @@
+/*
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/memory_hotplug.h>
+
+#ifndef ioremap_cache
+/* temporary while we convert existing ioremap_cache users to memremap */
+__weak void __iomem *ioremap_cache(resource_size_t offset, unsigned long size)
+{
+	return ioremap(offset, size);
+}
+#endif
+
+/**
+ * memremap() - remap an iomem_resource as cacheable memory
+ * @offset: iomem resource start address
+ * @size: size of remap
+ * @flags: either MEMREMAP_WB or MEMREMAP_WT
+ *
+ * memremap() is "ioremap" for cases where it is known that the resource
+ * being mapped does not have i/o side effects and the __iomem
+ * annotation is not applicable.
+ *
+ * MEMREMAP_WB - matches the default mapping for "System RAM" on
+ * the architecture.  This is usually a read-allocate write-back cache.
+ * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
+ * memremap() will bypass establishing a new mapping and instead return
+ * a pointer into the direct map.
+ *
+ * MEMREMAP_WT - establish a mapping whereby writes either bypass the
+ * cache or are written through to memory and never exist in a
+ * cache-dirty state with respect to program visibility.  Attempts to
+ * map "System RAM" with this mapping type will fail.
+ */
+void *memremap(resource_size_t offset, size_t size, unsigned long flags)
+{
+	int is_ram = region_intersects(offset, size, "System RAM");
+	void *addr = NULL;
+
+	if (is_ram == REGION_MIXED) {
+		WARN_ONCE(1, "memremap attempted on mixed range %pa size: %#lx\n",
+				&offset, (unsigned long) size);
+		return NULL;
+	}
+
+	/* Try all mapping types requested until one returns non-NULL */
+	if (flags & MEMREMAP_WB) {
+		flags &= ~MEMREMAP_WB;
+		/*
+		 * MEMREMAP_WB is special in that it can be satisifed
+		 * from the direct map.  Some archs depend on the
+		 * capability of memremap() to autodetect cases where
+		 * the requested range is potentially in "System RAM"
+		 */
+		if (is_ram == REGION_INTERSECTS)
+			addr = __va(offset);
+		else
+			addr = ioremap_cache(offset, size);
+	}
+
+	/*
+	 * If we don't have a mapping yet and more request flags are
+	 * pending then we will be attempting to establish a new virtual
+	 * address mapping.  Enforce that this mapping is not aliasing
+	 * "System RAM"
+	 */
+	if (!addr && is_ram == REGION_INTERSECTS && flags) {
+		WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n",
+				&offset, (unsigned long) size);
+		return NULL;
+	}
+
+	if (!addr && (flags & MEMREMAP_WT)) {
+		flags &= ~MEMREMAP_WT;
+		addr = ioremap_wt(offset, size);
+	}
+
+	return addr;
+}
+EXPORT_SYMBOL(memremap);
+
+void memunmap(void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		iounmap((void __iomem *) addr);
+}
+EXPORT_SYMBOL(memunmap);
+
+static void devm_memremap_release(struct device *dev, void *res)
+{
+	memunmap(res);
+}
+
+static int devm_memremap_match(struct device *dev, void *res, void *match_data)
+{
+	return *(void **)res == match_data;
+}
+
+void *devm_memremap(struct device *dev, resource_size_t offset,
+		size_t size, unsigned long flags)
+{
+	void **ptr, *addr;
+
+	ptr = devres_alloc(devm_memremap_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	addr = memremap(offset, size, flags);
+	if (addr) {
+		*ptr = addr;
+		devres_add(dev, ptr);
+	} else
+		devres_free(ptr);
+
+	return addr;
+}
+EXPORT_SYMBOL(devm_memremap);
+
+void devm_memunmap(struct device *dev, void *addr)
+{
+	WARN_ON(devres_destroy(dev, devm_memremap_release, devm_memremap_match,
+			       addr));
+	memunmap(addr);
+}
+EXPORT_SYMBOL(devm_memunmap);
+
+#ifdef CONFIG_ZONE_DEVICE
+struct page_map {
+	struct resource res;
+};
+
+static void devm_memremap_pages_release(struct device *dev, void *res)
+{
+	struct page_map *page_map = res;
+
+	/* pages are dead and unused, undo the arch mapping */
+	arch_remove_memory(page_map->res.start, resource_size(&page_map->res));
+}
+
+void *devm_memremap_pages(struct device *dev, struct resource *res)
+{
+	int is_ram = region_intersects(res->start, resource_size(res),
+			"System RAM");
+	struct page_map *page_map;
+	int error, nid;
+
+	if (is_ram == REGION_MIXED) {
+		WARN_ONCE(1, "%s attempted on mixed region %pr\n",
+				__func__, res);
+		return ERR_PTR(-ENXIO);
+	}
+
+	if (is_ram == REGION_INTERSECTS)
+		return __va(res->start);
+
+	page_map = devres_alloc(devm_memremap_pages_release,
+			sizeof(*page_map), GFP_KERNEL);
+	if (!page_map)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&page_map->res, res, sizeof(*res));
+
+	nid = dev_to_node(dev);
+	if (nid < 0)
+		nid = 0;
+
+	error = arch_add_memory(nid, res->start, resource_size(res), true);
+	if (error) {
+		devres_free(page_map);
+		return ERR_PTR(error);
+	}
+
+	devres_add(dev, page_map);
+	return __va(res->start);
+}
+EXPORT_SYMBOL(devm_memremap_pages);
+#endif /* CONFIG_ZONE_DEVICE */

+ 36 - 25
kernel/resource.c

@@ -492,40 +492,51 @@ int __weak page_is_ram(unsigned long pfn)
 }
 }
 EXPORT_SYMBOL_GPL(page_is_ram);
 EXPORT_SYMBOL_GPL(page_is_ram);
 
 
-/*
- * Search for a resouce entry that fully contains the specified region.
- * If found, return 1 if it is RAM, 0 if not.
- * If not found, or region is not fully contained, return -1
+/**
+ * region_intersects() - determine intersection of region with known resources
+ * @start: region start address
+ * @size: size of region
+ * @name: name of resource (in iomem_resource)
  *
  *
- * Used by the ioremap functions to ensure the user is not remapping RAM and is
- * a vast speed up over walking through the resource table page by page.
+ * Check if the specified region partially overlaps or fully eclipses a
+ * resource identified by @name.  Return REGION_DISJOINT if the region
+ * does not overlap @name, return REGION_MIXED if the region overlaps
+ * @type and another resource, and return REGION_INTERSECTS if the
+ * region overlaps @type and no other defined resource. Note, that
+ * REGION_INTERSECTS is also returned in the case when the specified
+ * region overlaps RAM and undefined memory holes.
+ *
+ * region_intersect() is used by memory remapping functions to ensure
+ * the user is not remapping RAM and is a vast speed up over walking
+ * through the resource table page by page.
  */
  */
-int region_is_ram(resource_size_t start, unsigned long size)
+int region_intersects(resource_size_t start, size_t size, const char *name)
 {
 {
-	struct resource *p;
-	resource_size_t end = start + size - 1;
 	unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-	const char *name = "System RAM";
-	int ret = -1;
+	resource_size_t end = start + size - 1;
+	int type = 0; int other = 0;
+	struct resource *p;
 
 
 	read_lock(&resource_lock);
 	read_lock(&resource_lock);
 	for (p = iomem_resource.child; p ; p = p->sibling) {
 	for (p = iomem_resource.child; p ; p = p->sibling) {
-		if (p->end < start)
-			continue;
-
-		if (p->start <= start && end <= p->end) {
-			/* resource fully contains region */
-			if ((p->flags != flags) || strcmp(p->name, name))
-				ret = 0;
-			else
-				ret = 1;
-			break;
-		}
-		if (end < p->start)
-			break;	/* not found */
+		bool is_type = strcmp(p->name, name) == 0 && p->flags == flags;
+
+		if (start >= p->start && start <= p->end)
+			is_type ? type++ : other++;
+		if (end >= p->start && end <= p->end)
+			is_type ? type++ : other++;
+		if (p->start >= start && p->end <= end)
+			is_type ? type++ : other++;
 	}
 	}
 	read_unlock(&resource_lock);
 	read_unlock(&resource_lock);
-	return ret;
+
+	if (other == 0)
+		return type ? REGION_INTERSECTS : REGION_DISJOINT;
+
+	if (type)
+		return REGION_MIXED;
+
+	return REGION_DISJOINT;
 }
 }
 
 
 void __weak arch_remove_reservations(struct resource *avail)
 void __weak arch_remove_reservations(struct resource *avail)

+ 3 - 0
lib/Kconfig

@@ -525,4 +525,7 @@ config ARCH_HAS_SG_CHAIN
 config ARCH_HAS_PMEM_API
 config ARCH_HAS_PMEM_API
 	bool
 	bool
 
 
+config ARCH_HAS_MMIO_FLUSH
+	bool
+
 endmenu
 endmenu

+ 4 - 9
lib/devres.c

@@ -119,10 +119,9 @@ EXPORT_SYMBOL(devm_iounmap);
  * @dev: generic device to handle the resource for
  * @dev: generic device to handle the resource for
  * @res: resource to be handled
  * @res: resource to be handled
  *
  *
- * Checks that a resource is a valid memory region, requests the memory region
- * and ioremaps it either as cacheable or as non-cacheable memory depending on
- * the resource's flags. All operations are managed and will be undone on
- * driver detach.
+ * Checks that a resource is a valid memory region, requests the memory
+ * region and ioremaps it. All operations are managed and will be undone
+ * on driver detach.
  *
  *
  * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code
  * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code
  * on failure. Usage example:
  * on failure. Usage example:
@@ -153,11 +152,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
 		return IOMEM_ERR_PTR(-EBUSY);
 		return IOMEM_ERR_PTR(-EBUSY);
 	}
 	}
 
 
-	if (res->flags & IORESOURCE_CACHEABLE)
-		dest_ptr = devm_ioremap(dev, res->start, size);
-	else
-		dest_ptr = devm_ioremap_nocache(dev, res->start, size);
-
+	dest_ptr = devm_ioremap(dev, res->start, size);
 	if (!dest_ptr) {
 	if (!dest_ptr) {
 		dev_err(dev, "ioremap failed for resource %pR\n", res);
 		dev_err(dev, "ioremap failed for resource %pR\n", res);
 		devm_release_mem_region(dev, res->start, size);
 		devm_release_mem_region(dev, res->start, size);

+ 2 - 5
lib/pci_iomap.c

@@ -41,11 +41,8 @@ void __iomem *pci_iomap_range(struct pci_dev *dev,
 		len = maxlen;
 		len = maxlen;
 	if (flags & IORESOURCE_IO)
 	if (flags & IORESOURCE_IO)
 		return __pci_ioport_map(dev, start, len);
 		return __pci_ioport_map(dev, start, len);
-	if (flags & IORESOURCE_MEM) {
-		if (flags & IORESOURCE_CACHEABLE)
-			return ioremap(start, len);
-		return ioremap_nocache(start, len);
-	}
+	if (flags & IORESOURCE_MEM)
+		return ioremap(start, len);
 	/* What? */
 	/* What? */
 	return NULL;
 	return NULL;
 }
 }

+ 17 - 0
mm/Kconfig

@@ -648,3 +648,20 @@ config DEFERRED_STRUCT_PAGE_INIT
 	  when kswapd starts. This has a potential performance impact on
 	  when kswapd starts. This has a potential performance impact on
 	  processes running early in the lifetime of the systemm until kswapd
 	  processes running early in the lifetime of the systemm until kswapd
 	  finishes the initialisation.
 	  finishes the initialisation.
+
+config ZONE_DEVICE
+	bool "Device memory (pmem, etc...) hotplug support" if EXPERT
+	default !ZONE_DMA
+	depends on !ZONE_DMA
+	depends on MEMORY_HOTPLUG
+	depends on MEMORY_HOTREMOVE
+	depends on X86_64 #arch_add_memory() comprehends device memory
+
+	help
+	  Device memory hotplug support allows for establishing pmem,
+	  or other device driver discovered memory regions, in the
+	  memmap. This allows pfn_to_page() lookups of otherwise
+	  "device-physical" addresses which is needed for using a DAX
+	  mapping in an O_DIRECT operation, among other things.
+
+	  If FS_DAX is enabled, then say Y.

+ 11 - 3
mm/memory_hotplug.c

@@ -778,7 +778,10 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 
 
 	start = phys_start_pfn << PAGE_SHIFT;
 	start = phys_start_pfn << PAGE_SHIFT;
 	size = nr_pages * PAGE_SIZE;
 	size = nr_pages * PAGE_SIZE;
-	ret = release_mem_region_adjustable(&iomem_resource, start, size);
+
+	/* in the ZONE_DEVICE case device driver owns the memory region */
+	if (!is_dev_zone(zone))
+		ret = release_mem_region_adjustable(&iomem_resource, start, size);
 	if (ret) {
 	if (ret) {
 		resource_size_t endres = start + size - 1;
 		resource_size_t endres = start + size - 1;
 
 
@@ -1215,8 +1218,13 @@ static int should_add_memory_movable(int nid, u64 start, u64 size)
 	return 0;
 	return 0;
 }
 }
 
 
-int zone_for_memory(int nid, u64 start, u64 size, int zone_default)
+int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
+		bool for_device)
 {
 {
+#ifdef CONFIG_ZONE_DEVICE
+	if (for_device)
+		return ZONE_DEVICE;
+#endif
 	if (should_add_memory_movable(nid, start, size))
 	if (should_add_memory_movable(nid, start, size))
 		return ZONE_MOVABLE;
 		return ZONE_MOVABLE;
 
 
@@ -1265,7 +1273,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
 	}
 	}
 
 
 	/* call arch's memory hotadd */
 	/* call arch's memory hotadd */
-	ret = arch_add_memory(nid, start, size);
+	ret = arch_add_memory(nid, start, size, false);
 
 
 	if (ret < 0)
 	if (ret < 0)
 		goto error;
 		goto error;

+ 3 - 0
mm/page_alloc.c

@@ -206,6 +206,9 @@ static char * const zone_names[MAX_NR_ZONES] = {
 	 "HighMem",
 	 "HighMem",
 #endif
 #endif
 	 "Movable",
 	 "Movable",
+#ifdef CONFIG_ZONE_DEVICE
+	 "Device",
+#endif
 };
 };
 
 
 int min_free_kbytes = 1024;
 int min_free_kbytes = 1024;

+ 11 - 2
tools/testing/nvdimm/Kbuild

@@ -1,9 +1,12 @@
-ldflags-y += --wrap=ioremap_wt
 ldflags-y += --wrap=ioremap_wc
 ldflags-y += --wrap=ioremap_wc
+ldflags-y += --wrap=memremap
 ldflags-y += --wrap=devm_ioremap_nocache
 ldflags-y += --wrap=devm_ioremap_nocache
-ldflags-y += --wrap=ioremap_cache
+ldflags-y += --wrap=devm_memremap
+ldflags-y += --wrap=devm_memunmap
 ldflags-y += --wrap=ioremap_nocache
 ldflags-y += --wrap=ioremap_nocache
 ldflags-y += --wrap=iounmap
 ldflags-y += --wrap=iounmap
+ldflags-y += --wrap=memunmap
+ldflags-y += --wrap=__devm_request_region
 ldflags-y += --wrap=__request_region
 ldflags-y += --wrap=__request_region
 ldflags-y += --wrap=__release_region
 ldflags-y += --wrap=__release_region
 
 
@@ -15,6 +18,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
 obj-$(CONFIG_ND_BLK) += nd_blk.o
 obj-$(CONFIG_ND_BLK) += nd_blk.o
+obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
 obj-$(CONFIG_ACPI_NFIT) += nfit.o
 obj-$(CONFIG_ACPI_NFIT) += nfit.o
 
 
 nfit-y := $(ACPI_SRC)/nfit.o
 nfit-y := $(ACPI_SRC)/nfit.o
@@ -29,6 +33,9 @@ nd_btt-y += config_check.o
 nd_blk-y := $(NVDIMM_SRC)/blk.o
 nd_blk-y := $(NVDIMM_SRC)/blk.o
 nd_blk-y += config_check.o
 nd_blk-y += config_check.o
 
 
+nd_e820-y := $(NVDIMM_SRC)/e820.o
+nd_e820-y += config_check.o
+
 libnvdimm-y := $(NVDIMM_SRC)/core.o
 libnvdimm-y := $(NVDIMM_SRC)/core.o
 libnvdimm-y += $(NVDIMM_SRC)/bus.o
 libnvdimm-y += $(NVDIMM_SRC)/bus.o
 libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o
 libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o
@@ -37,7 +44,9 @@ libnvdimm-y += $(NVDIMM_SRC)/region_devs.o
 libnvdimm-y += $(NVDIMM_SRC)/region.o
 libnvdimm-y += $(NVDIMM_SRC)/region.o
 libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o
 libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o
 libnvdimm-y += $(NVDIMM_SRC)/label.o
 libnvdimm-y += $(NVDIMM_SRC)/label.o
+libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
 libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
 libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
+libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
 libnvdimm-y += config_check.o
 libnvdimm-y += config_check.o
 
 
 obj-m += test/
 obj-m += test/

+ 73 - 12
tools/testing/nvdimm/test/iomap.c

@@ -80,23 +80,52 @@ void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
 }
 }
 EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
 EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
 
 
-void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size)
+void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
+		size_t size, unsigned long flags)
 {
 {
-	return __nfit_test_ioremap(offset, size, ioremap_cache);
+	struct nfit_test_resource *nfit_res;
+
+	rcu_read_lock();
+	nfit_res = get_nfit_res(offset);
+	rcu_read_unlock();
+	if (nfit_res)
+		return nfit_res->buf + offset - nfit_res->res->start;
+	return devm_memremap(dev, offset, size, flags);
 }
 }
-EXPORT_SYMBOL(__wrap_ioremap_cache);
+EXPORT_SYMBOL(__wrap_devm_memremap);
 
 
-void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
+void *__wrap_memremap(resource_size_t offset, size_t size,
+		unsigned long flags)
 {
 {
-	return __nfit_test_ioremap(offset, size, ioremap_nocache);
+	struct nfit_test_resource *nfit_res;
+
+	rcu_read_lock();
+	nfit_res = get_nfit_res(offset);
+	rcu_read_unlock();
+	if (nfit_res)
+		return nfit_res->buf + offset - nfit_res->res->start;
+	return memremap(offset, size, flags);
 }
 }
-EXPORT_SYMBOL(__wrap_ioremap_nocache);
+EXPORT_SYMBOL(__wrap_memremap);
+
+void __wrap_devm_memunmap(struct device *dev, void *addr)
+{
+	struct nfit_test_resource *nfit_res;
+
+	rcu_read_lock();
+	nfit_res = get_nfit_res((unsigned long) addr);
+	rcu_read_unlock();
+	if (nfit_res)
+		return;
+	return devm_memunmap(dev, addr);
+}
+EXPORT_SYMBOL(__wrap_devm_memunmap);
 
 
-void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size)
+void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
 {
 {
-	return __nfit_test_ioremap(offset, size, ioremap_wt);
+	return __nfit_test_ioremap(offset, size, ioremap_nocache);
 }
 }
-EXPORT_SYMBOL(__wrap_ioremap_wt);
+EXPORT_SYMBOL(__wrap_ioremap_nocache);
 
 
 void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size)
 void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size)
 {
 {
@@ -117,9 +146,22 @@ void __wrap_iounmap(volatile void __iomem *addr)
 }
 }
 EXPORT_SYMBOL(__wrap_iounmap);
 EXPORT_SYMBOL(__wrap_iounmap);
 
 
-struct resource *__wrap___request_region(struct resource *parent,
-		resource_size_t start, resource_size_t n, const char *name,
-		int flags)
+void __wrap_memunmap(void *addr)
+{
+	struct nfit_test_resource *nfit_res;
+
+	rcu_read_lock();
+	nfit_res = get_nfit_res((unsigned long) addr);
+	rcu_read_unlock();
+	if (nfit_res)
+		return;
+	return memunmap(addr);
+}
+EXPORT_SYMBOL(__wrap_memunmap);
+
+static struct resource *nfit_test_request_region(struct device *dev,
+		struct resource *parent, resource_size_t start,
+		resource_size_t n, const char *name, int flags)
 {
 {
 	struct nfit_test_resource *nfit_res;
 	struct nfit_test_resource *nfit_res;
 
 
@@ -147,10 +189,29 @@ struct resource *__wrap___request_region(struct resource *parent,
 			return res;
 			return res;
 		}
 		}
 	}
 	}
+	if (dev)
+		return __devm_request_region(dev, parent, start, n, name);
 	return __request_region(parent, start, n, name, flags);
 	return __request_region(parent, start, n, name, flags);
 }
 }
+
+struct resource *__wrap___request_region(struct resource *parent,
+		resource_size_t start, resource_size_t n, const char *name,
+		int flags)
+{
+	return nfit_test_request_region(NULL, parent, start, n, name, flags);
+}
 EXPORT_SYMBOL(__wrap___request_region);
 EXPORT_SYMBOL(__wrap___request_region);
 
 
+struct resource *__wrap___devm_request_region(struct device *dev,
+		struct resource *parent, resource_size_t start,
+		resource_size_t n, const char *name)
+{
+	if (!dev)
+		return NULL;
+	return nfit_test_request_region(dev, parent, start, n, name, 0);
+}
+EXPORT_SYMBOL(__wrap___devm_request_region);
+
 void __wrap___release_region(struct resource *parent, resource_size_t start,
 void __wrap___release_region(struct resource *parent, resource_size_t start,
 				resource_size_t n)
 				resource_size_t n)
 {
 {

+ 147 - 62
tools/testing/nvdimm/test/nfit.c

@@ -147,75 +147,153 @@ static struct nfit_test *to_nfit_test(struct device *dev)
 	return container_of(pdev, struct nfit_test, pdev);
 	return container_of(pdev, struct nfit_test, pdev);
 }
 }
 
 
+static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd,
+		unsigned int buf_len)
+{
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	nd_cmd->status = 0;
+	nd_cmd->config_size = LABEL_SIZE;
+	nd_cmd->max_xfer = SZ_4K;
+
+	return 0;
+}
+
+static int nfit_test_cmd_get_config_data(struct nd_cmd_get_config_data_hdr
+		*nd_cmd, unsigned int buf_len, void *label)
+{
+	unsigned int len, offset = nd_cmd->in_offset;
+	int rc;
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+	if (offset >= LABEL_SIZE)
+		return -EINVAL;
+	if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len)
+		return -EINVAL;
+
+	nd_cmd->status = 0;
+	len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+	memcpy(nd_cmd->out_buf, label + offset, len);
+	rc = buf_len - sizeof(*nd_cmd) - len;
+
+	return rc;
+}
+
+static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd,
+		unsigned int buf_len, void *label)
+{
+	unsigned int len, offset = nd_cmd->in_offset;
+	u32 *status;
+	int rc;
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+	if (offset >= LABEL_SIZE)
+		return -EINVAL;
+	if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len)
+		return -EINVAL;
+
+	status = (void *)nd_cmd + nd_cmd->in_length + sizeof(*nd_cmd);
+	*status = 0;
+	len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+	memcpy(label + offset, nd_cmd->in_buf, len);
+	rc = buf_len - sizeof(*nd_cmd) - (len + 4);
+
+	return rc;
+}
+
+static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
+		unsigned int buf_len)
+{
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	nd_cmd->max_ars_out = 256;
+	nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
+
+	return 0;
+}
+
+static int nfit_test_cmd_ars_start(struct nd_cmd_ars_start *nd_cmd,
+		unsigned int buf_len)
+{
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	nd_cmd->status = 0;
+
+	return 0;
+}
+
+static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd,
+		unsigned int buf_len)
+{
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	nd_cmd->out_length = 256;
+	nd_cmd->num_records = 0;
+	nd_cmd->status = 0;
+
+	return 0;
+}
+
 static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len)
 		unsigned int buf_len)
 {
 {
 	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 	struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
 	struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
-	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-	int i, rc;
+	int i, rc = 0;
 
 
-	if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask))
-		return -ENOTTY;
+	if (nvdimm) {
+		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
 
 
-	/* lookup label space for the given dimm */
-	for (i = 0; i < ARRAY_SIZE(handle); i++)
-		if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
+		if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask))
+			return -ENOTTY;
+
+		/* lookup label space for the given dimm */
+		for (i = 0; i < ARRAY_SIZE(handle); i++)
+			if (__to_nfit_memdev(nfit_mem)->device_handle ==
+					handle[i])
+				break;
+		if (i >= ARRAY_SIZE(handle))
+			return -ENXIO;
+
+		switch (cmd) {
+		case ND_CMD_GET_CONFIG_SIZE:
+			rc = nfit_test_cmd_get_config_size(buf, buf_len);
 			break;
 			break;
-	if (i >= ARRAY_SIZE(handle))
-		return -ENXIO;
+		case ND_CMD_GET_CONFIG_DATA:
+			rc = nfit_test_cmd_get_config_data(buf, buf_len,
+				t->label[i]);
+			break;
+		case ND_CMD_SET_CONFIG_DATA:
+			rc = nfit_test_cmd_set_config_data(buf, buf_len,
+				t->label[i]);
+			break;
+		default:
+			return -ENOTTY;
+		}
+	} else {
+		if (!nd_desc || !test_bit(cmd, &nd_desc->dsm_mask))
+			return -ENOTTY;
 
 
-	switch (cmd) {
-	case ND_CMD_GET_CONFIG_SIZE: {
-		struct nd_cmd_get_config_size *nd_cmd = buf;
-
-		if (buf_len < sizeof(*nd_cmd))
-			return -EINVAL;
-		nd_cmd->status = 0;
-		nd_cmd->config_size = LABEL_SIZE;
-		nd_cmd->max_xfer = SZ_4K;
-		rc = 0;
-		break;
-	}
-	case ND_CMD_GET_CONFIG_DATA: {
-		struct nd_cmd_get_config_data_hdr *nd_cmd = buf;
-		unsigned int len, offset = nd_cmd->in_offset;
-
-		if (buf_len < sizeof(*nd_cmd))
-			return -EINVAL;
-		if (offset >= LABEL_SIZE)
-			return -EINVAL;
-		if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len)
-			return -EINVAL;
-
-		nd_cmd->status = 0;
-		len = min(nd_cmd->in_length, LABEL_SIZE - offset);
-		memcpy(nd_cmd->out_buf, t->label[i] + offset, len);
-		rc = buf_len - sizeof(*nd_cmd) - len;
-		break;
-	}
-	case ND_CMD_SET_CONFIG_DATA: {
-		struct nd_cmd_set_config_hdr *nd_cmd = buf;
-		unsigned int len, offset = nd_cmd->in_offset;
-		u32 *status;
-
-		if (buf_len < sizeof(*nd_cmd))
-			return -EINVAL;
-		if (offset >= LABEL_SIZE)
-			return -EINVAL;
-		if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len)
-			return -EINVAL;
-
-		status = buf + nd_cmd->in_length + sizeof(*nd_cmd);
-		*status = 0;
-		len = min(nd_cmd->in_length, LABEL_SIZE - offset);
-		memcpy(t->label[i] + offset, nd_cmd->in_buf, len);
-		rc = buf_len - sizeof(*nd_cmd) - (len + 4);
-		break;
-	}
-	default:
-		return -ENOTTY;
+		switch (cmd) {
+		case ND_CMD_ARS_CAP:
+			rc = nfit_test_cmd_ars_cap(buf, buf_len);
+			break;
+		case ND_CMD_ARS_START:
+			rc = nfit_test_cmd_ars_start(buf, buf_len);
+			break;
+		case ND_CMD_ARS_STATUS:
+			rc = nfit_test_cmd_ars_status(buf, buf_len);
+			break;
+		default:
+			return -ENOTTY;
+		}
 	}
 	}
 
 
 	return rc;
 	return rc;
@@ -876,6 +954,9 @@ static void nfit_test0_setup(struct nfit_test *t)
 	set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en);
 	set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en);
 	set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
 	set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
 	set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
 	set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
+	set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en);
+	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en);
+	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en);
 	nd_desc = &acpi_desc->nd_desc;
 	nd_desc = &acpi_desc->nd_desc;
 	nd_desc->ndctl = nfit_test_ctl;
 	nd_desc->ndctl = nfit_test_ctl;
 }
 }
@@ -948,9 +1029,13 @@ static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
 
 
 	lane = nd_region_acquire_lane(nd_region);
 	lane = nd_region_acquire_lane(nd_region);
 	if (rw)
 	if (rw)
-		memcpy(mmio->base + dpa, iobuf, len);
-	else
-		memcpy(iobuf, mmio->base + dpa, len);
+		memcpy(mmio->addr.base + dpa, iobuf, len);
+	else {
+		memcpy(iobuf, mmio->addr.base + dpa, len);
+
+		/* give us some some coverage of the mmio_flush_range() API */
+		mmio_flush_range(mmio->addr.base + dpa, len);
+	}
 	nd_region_release_lane(nd_region, lane);
 	nd_region_release_lane(nd_region, lane);
 
 
 	return 0;
 	return 0;