Эх сурвалжийг харах

Merge tag 'libnvdimm-for-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:

 - Improve the efficiency and performance of reading nvdimm-namespace
   labels. Reduce the amount of label data read at driver load time by a
   few orders of magnitude. Reduce heavyweight call-outs to
   platform-firmware routines.

 - Handle media errors located in the 'struct page' array stored on a
   persistent memory namespace. Let the kernel clear these errors rather
   than an awkward userspace workaround.

 - Fix Address Range Scrub (ARS) completion tracking. Correct occasions
   where the kernel indicates completion of ARS before submission.

 - Fix asynchronous device registration reference counting.

 - Add support for reporting an nvdimm dirty-shutdown-count via sysfs.

 - Fix various small libnvdimm core and uapi issues.

* tag 'libnvdimm-for-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (21 commits)
  acpi, nfit: Further restrict userspace ARS start requests
  acpi, nfit: Fix Address Range Scrub completion tracking
  UAPI: ndctl: Remove use of PAGE_SIZE
  UAPI: ndctl: Fix g++-unsupported initialisation in headers
  tools/testing/nvdimm: Populate dirty shutdown data
  acpi, nfit: Collect shutdown status
  acpi, nfit: Introduce nfit_mem flags
  libnvdimm, label: Fix sparse warning
  nvdimm: Use namespace index data to reduce number of label reads needed
  nvdimm: Split label init out from the logic for getting config data
  nvdimm: Remove empty if statement
  nvdimm: Clarify comment in sizeof_namespace_index
  nvdimm: Sanity check labeloff
  libnvdimm, dimm: Maximize label transfer size
  libnvdimm, pmem: Fix badblocks population for 'raw' namespaces
  libnvdimm, namespace: Drop the repeat assignment for variable dev->parent
  libnvdimm, region: Fail badblocks listing for inactive regions
  libnvdimm, pfn: during init, clear errors in the metadata area
  libnvdimm: Set device node in nd_device_register
  libnvdimm: Hold reference on parent while scheduling async init
  ...
Linus Torvalds 6 жил өмнө
parent
commit
6078e07dcf

+ 206 - 91
drivers/acpi/nfit/core.c

@@ -25,6 +25,7 @@
 #include <asm/cacheflush.h>
 #include <acpi/nfit.h>
 #include "nfit.h"
+#include "intel.h"
 
 /*
  * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
@@ -191,18 +192,20 @@ static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd
 		 * In the _LSI, _LSR, _LSW case the locked status is
 		 * communicated via the read/write commands
 		 */
-		if (nfit_mem->has_lsr)
+		if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags))
 			break;
 
 		if (status >> 16 & ND_CONFIG_LOCKED)
 			return -EACCES;
 		break;
 	case ND_CMD_GET_CONFIG_DATA:
-		if (nfit_mem->has_lsr && status == ACPI_LABELS_LOCKED)
+		if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)
+				&& status == ACPI_LABELS_LOCKED)
 			return -EACCES;
 		break;
 	case ND_CMD_SET_CONFIG_DATA:
-		if (nfit_mem->has_lsw && status == ACPI_LABELS_LOCKED)
+		if (test_bit(NFIT_MEM_LSW, &nfit_mem->flags)
+				&& status == ACPI_LABELS_LOCKED)
 			return -EACCES;
 		break;
 	default:
@@ -480,14 +483,16 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 			min_t(u32, 256, in_buf.buffer.length), true);
 
 	/* call the BIOS, prefer the named methods over _DSM if available */
-	if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsr)
+	if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE
+			&& test_bit(NFIT_MEM_LSR, &nfit_mem->flags))
 		out_obj = acpi_label_info(handle);
-	else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) {
+	else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA
+			&& test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) {
 		struct nd_cmd_get_config_data_hdr *p = buf;
 
 		out_obj = acpi_label_read(handle, p->in_offset, p->in_length);
 	} else if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA
-			&& nfit_mem->has_lsw) {
+			&& test_bit(NFIT_MEM_LSW, &nfit_mem->flags)) {
 		struct nd_cmd_set_config_hdr *p = buf;
 
 		out_obj = acpi_label_write(handle, p->in_offset, p->in_length,
@@ -1547,7 +1552,12 @@ static DEVICE_ATTR_RO(dsm_mask);
 static ssize_t flags_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	u16 flags = to_nfit_memdev(dev)->flags;
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+	u16 flags = __to_nfit_memdev(nfit_mem)->flags;
+
+	if (test_bit(NFIT_MEM_DIRTY, &nfit_mem->flags))
+		flags |= ACPI_NFIT_MEM_FLUSH_FAILED;
 
 	return sprintf(buf, "%s%s%s%s%s%s%s\n",
 		flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "",
@@ -1578,6 +1588,16 @@ static ssize_t id_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(id);
 
+static ssize_t dirty_shutdown_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+	return sprintf(buf, "%d\n", nfit_mem->dirty_shutdown);
+}
+static DEVICE_ATTR_RO(dirty_shutdown);
+
 static struct attribute *acpi_nfit_dimm_attributes[] = {
 	&dev_attr_handle.attr,
 	&dev_attr_phys_id.attr,
@@ -1595,6 +1615,7 @@ static struct attribute *acpi_nfit_dimm_attributes[] = {
 	&dev_attr_id.attr,
 	&dev_attr_family.attr,
 	&dev_attr_dsm_mask.attr,
+	&dev_attr_dirty_shutdown.attr,
 	NULL,
 };
 
@@ -1603,6 +1624,7 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct nvdimm *nvdimm = to_nvdimm(dev);
+	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
 
 	if (!to_nfit_dcr(dev)) {
 		/* Without a dcr only the memdev attributes can be surfaced */
@@ -1616,6 +1638,11 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
 
 	if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
 		return 0;
+
+	if (!test_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags)
+			&& a == &dev_attr_dirty_shutdown.attr)
+		return 0;
+
 	return a->mode;
 }
 
@@ -1694,6 +1721,56 @@ static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
 	return false;
 }
 
+__weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
+{
+	struct nd_intel_smart smart = { 0 };
+	union acpi_object in_buf = {
+		.type = ACPI_TYPE_BUFFER,
+		.buffer.pointer = (char *) &smart,
+		.buffer.length = sizeof(smart),
+	};
+	union acpi_object in_obj = {
+		.type = ACPI_TYPE_PACKAGE,
+		.package.count = 1,
+		.package.elements = &in_buf,
+	};
+	const u8 func = ND_INTEL_SMART;
+	const guid_t *guid = to_nfit_uuid(nfit_mem->family);
+	u8 revid = nfit_dsm_revid(nfit_mem->family, func);
+	struct acpi_device *adev = nfit_mem->adev;
+	acpi_handle handle = adev->handle;
+	union acpi_object *out_obj;
+
+	if ((nfit_mem->dsm_mask & (1 << func)) == 0)
+		return;
+
+	out_obj = acpi_evaluate_dsm(handle, guid, revid, func, &in_obj);
+	if (!out_obj)
+		return;
+
+	if (smart.flags & ND_INTEL_SMART_SHUTDOWN_VALID) {
+		if (smart.shutdown_state)
+			set_bit(NFIT_MEM_DIRTY, &nfit_mem->flags);
+	}
+
+	if (smart.flags & ND_INTEL_SMART_SHUTDOWN_COUNT_VALID) {
+		set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags);
+		nfit_mem->dirty_shutdown = smart.shutdown_count;
+	}
+	ACPI_FREE(out_obj);
+}
+
+static void populate_shutdown_status(struct nfit_mem *nfit_mem)
+{
+	/*
+	 * For DIMMs that provide a dynamic facility to retrieve a
+	 * dirty-shutdown status and/or a dirty-shutdown count, cache
+	 * these values in nfit_mem.
+	 */
+	if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
+		nfit_intel_shutdown_status(nfit_mem);
+}
+
 static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_mem *nfit_mem, u32 device_handle)
 {
@@ -1708,8 +1785,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
 	nfit_mem->family = NVDIMM_FAMILY_INTEL;
 	adev = to_acpi_dev(acpi_desc);
-	if (!adev)
+	if (!adev) {
+		/* unit test case */
+		populate_shutdown_status(nfit_mem);
 		return 0;
+	}
 
 	adev_dimm = acpi_find_child_device(adev, device_handle, false);
 	nfit_mem->adev = adev_dimm;
@@ -1784,14 +1864,17 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	if (acpi_nvdimm_has_method(adev_dimm, "_LSI")
 			&& acpi_nvdimm_has_method(adev_dimm, "_LSR")) {
 		dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
-		nfit_mem->has_lsr = true;
+		set_bit(NFIT_MEM_LSR, &nfit_mem->flags);
 	}
 
-	if (nfit_mem->has_lsr && acpi_nvdimm_has_method(adev_dimm, "_LSW")) {
+	if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)
+			&& acpi_nvdimm_has_method(adev_dimm, "_LSW")) {
 		dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev));
-		nfit_mem->has_lsw = true;
+		set_bit(NFIT_MEM_LSW, &nfit_mem->flags);
 	}
 
+	populate_shutdown_status(nfit_mem);
+
 	return 0;
 }
 
@@ -1878,11 +1961,11 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 			cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
 		}
 
-		if (nfit_mem->has_lsr) {
+		if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) {
 			set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
 			set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
 		}
-		if (nfit_mem->has_lsw)
+		if (test_bit(NFIT_MEM_LSW, &nfit_mem->flags))
 			set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
 
 		flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush
@@ -2466,7 +2549,8 @@ static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
 	return cmd_rc;
 }
 
-static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
+static int ars_start(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_spa *nfit_spa, enum nfit_ars_state req_type)
 {
 	int rc;
 	int cmd_rc;
@@ -2477,7 +2561,7 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa
 	memset(&ars_start, 0, sizeof(ars_start));
 	ars_start.address = spa->address;
 	ars_start.length = spa->length;
-	if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
+	if (req_type == ARS_REQ_SHORT)
 		ars_start.flags = ND_ARS_RETURN_PREV_DATA;
 	if (nfit_spa_type(spa) == NFIT_SPA_PM)
 		ars_start.type = ND_ARS_PERSISTENT;
@@ -2534,6 +2618,15 @@ static void ars_complete(struct acpi_nfit_desc *acpi_desc,
 	struct nd_region *nd_region = nfit_spa->nd_region;
 	struct device *dev;
 
+	lockdep_assert_held(&acpi_desc->init_mutex);
+	/*
+	 * Only advance the ARS state for ARS runs initiated by the
+	 * kernel, ignore ARS results from BIOS initiated runs for scrub
+	 * completion tracking.
+	 */
+	if (acpi_desc->scrub_spa != nfit_spa)
+		return;
+
 	if ((ars_status->address >= spa->address && ars_status->address
 				< spa->address + spa->length)
 			|| (ars_status->address < spa->address)) {
@@ -2553,28 +2646,13 @@ static void ars_complete(struct acpi_nfit_desc *acpi_desc,
 	} else
 		return;
 
-	if (test_bit(ARS_DONE, &nfit_spa->ars_state))
-		return;
-
-	if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
-		return;
-
+	acpi_desc->scrub_spa = NULL;
 	if (nd_region) {
 		dev = nd_region_dev(nd_region);
 		nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
 	} else
 		dev = acpi_desc->dev;
-
-	dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
-			test_bit(ARS_SHORT, &nfit_spa->ars_state)
-			? "short" : "long");
-	clear_bit(ARS_SHORT, &nfit_spa->ars_state);
-	if (test_and_clear_bit(ARS_REQ_REDO, &nfit_spa->ars_state)) {
-		set_bit(ARS_SHORT, &nfit_spa->ars_state);
-		set_bit(ARS_REQ, &nfit_spa->ars_state);
-		dev_dbg(dev, "ARS: processing scrub request received while in progress\n");
-	} else
-		set_bit(ARS_DONE, &nfit_spa->ars_state);
+	dev_dbg(dev, "ARS: range %d complete\n", spa->range_index);
 }
 
 static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
@@ -2855,46 +2933,55 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
 	return 0;
 }
 
-static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa,
-		int *query_rc)
+static int ars_register(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_spa *nfit_spa)
 {
-	int rc = *query_rc;
+	int rc;
 
-	if (no_init_ars)
+	if (no_init_ars || test_bit(ARS_FAILED, &nfit_spa->ars_state))
 		return acpi_nfit_register_region(acpi_desc, nfit_spa);
 
-	set_bit(ARS_REQ, &nfit_spa->ars_state);
-	set_bit(ARS_SHORT, &nfit_spa->ars_state);
+	set_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
+	set_bit(ARS_REQ_LONG, &nfit_spa->ars_state);
 
-	switch (rc) {
+	switch (acpi_nfit_query_poison(acpi_desc)) {
 	case 0:
 	case -EAGAIN:
-		rc = ars_start(acpi_desc, nfit_spa);
-		if (rc == -EBUSY) {
-			*query_rc = rc;
+		rc = ars_start(acpi_desc, nfit_spa, ARS_REQ_SHORT);
+		/* shouldn't happen, try again later */
+		if (rc == -EBUSY)
 			break;
-		} else if (rc == 0) {
-			rc = acpi_nfit_query_poison(acpi_desc);
-		} else {
+		if (rc) {
 			set_bit(ARS_FAILED, &nfit_spa->ars_state);
 			break;
 		}
-		if (rc == -EAGAIN)
-			clear_bit(ARS_SHORT, &nfit_spa->ars_state);
-		else if (rc == 0)
-			ars_complete(acpi_desc, nfit_spa);
+		clear_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
+		rc = acpi_nfit_query_poison(acpi_desc);
+		if (rc)
+			break;
+		acpi_desc->scrub_spa = nfit_spa;
+		ars_complete(acpi_desc, nfit_spa);
+		/*
+		 * If ars_complete() says we didn't complete the
+		 * short scrub, we'll try again with a long
+		 * request.
+		 */
+		acpi_desc->scrub_spa = NULL;
 		break;
 	case -EBUSY:
+	case -ENOMEM:
 	case -ENOSPC:
+		/*
+		 * BIOS was using ARS, wait for it to complete (or
+		 * resources to become available) and then perform our
+		 * own scrubs.
+		 */
 		break;
 	default:
 		set_bit(ARS_FAILED, &nfit_spa->ars_state);
 		break;
 	}
 
-	if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
-		set_bit(ARS_REQ, &nfit_spa->ars_state);
-
 	return acpi_nfit_register_region(acpi_desc, nfit_spa);
 }
 
@@ -2916,6 +3003,8 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
 	struct device *dev = acpi_desc->dev;
 	struct nfit_spa *nfit_spa;
 
+	lockdep_assert_held(&acpi_desc->init_mutex);
+
 	if (acpi_desc->cancel)
 		return 0;
 
@@ -2939,21 +3028,49 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
 
 	ars_complete_all(acpi_desc);
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+		enum nfit_ars_state req_type;
+		int rc;
+
 		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
 			continue;
-		if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
-			int rc = ars_start(acpi_desc, nfit_spa);
-
-			clear_bit(ARS_DONE, &nfit_spa->ars_state);
-			dev = nd_region_dev(nfit_spa->nd_region);
-			dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
-					nfit_spa->spa->range_index, rc);
-			if (rc == 0 || rc == -EBUSY)
-				return 1;
-			dev_err(dev, "ARS: range %d ARS failed (%d)\n",
-					nfit_spa->spa->range_index, rc);
-			set_bit(ARS_FAILED, &nfit_spa->ars_state);
+
+		/* prefer short ARS requests first */
+		if (test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state))
+			req_type = ARS_REQ_SHORT;
+		else if (test_bit(ARS_REQ_LONG, &nfit_spa->ars_state))
+			req_type = ARS_REQ_LONG;
+		else
+			continue;
+		rc = ars_start(acpi_desc, nfit_spa, req_type);
+
+		dev = nd_region_dev(nfit_spa->nd_region);
+		dev_dbg(dev, "ARS: range %d ARS start %s (%d)\n",
+				nfit_spa->spa->range_index,
+				req_type == ARS_REQ_SHORT ? "short" : "long",
+				rc);
+		/*
+		 * Hmm, we raced someone else starting ARS? Try again in
+		 * a bit.
+		 */
+		if (rc == -EBUSY)
+			return 1;
+		if (rc == 0) {
+			dev_WARN_ONCE(dev, acpi_desc->scrub_spa,
+					"scrub start while range %d active\n",
+					acpi_desc->scrub_spa->spa->range_index);
+			clear_bit(req_type, &nfit_spa->ars_state);
+			acpi_desc->scrub_spa = nfit_spa;
+			/*
+			 * Consider this spa last for future scrub
+			 * requests
+			 */
+			list_move_tail(&nfit_spa->list, &acpi_desc->spas);
+			return 1;
 		}
+
+		dev_err(dev, "ARS: range %d ARS failed (%d)\n",
+				nfit_spa->spa->range_index, rc);
+		set_bit(ARS_FAILED, &nfit_spa->ars_state);
 	}
 	return 0;
 }
@@ -3009,6 +3126,7 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
 	struct nd_cmd_ars_cap ars_cap;
 	int rc;
 
+	set_bit(ARS_FAILED, &nfit_spa->ars_state);
 	memset(&ars_cap, 0, sizeof(ars_cap));
 	rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
 	if (rc < 0)
@@ -3025,16 +3143,14 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
 	nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
 	acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
 	clear_bit(ARS_FAILED, &nfit_spa->ars_state);
-	set_bit(ARS_REQ, &nfit_spa->ars_state);
 }
 
 static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nfit_spa *nfit_spa;
-	int rc, query_rc;
+	int rc;
 
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-		set_bit(ARS_FAILED, &nfit_spa->ars_state);
 		switch (nfit_spa_type(nfit_spa->spa)) {
 		case NFIT_SPA_VOLATILE:
 		case NFIT_SPA_PM:
@@ -3043,20 +3159,12 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
 		}
 	}
 
-	/*
-	 * Reap any results that might be pending before starting new
-	 * short requests.
-	 */
-	query_rc = acpi_nfit_query_poison(acpi_desc);
-	if (query_rc == 0)
-		ars_complete_all(acpi_desc);
-
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
 		switch (nfit_spa_type(nfit_spa->spa)) {
 		case NFIT_SPA_VOLATILE:
 		case NFIT_SPA_PM:
 			/* register regions and kick off initial ARS run */
-			rc = ars_register(acpi_desc, nfit_spa, &query_rc);
+			rc = ars_register(acpi_desc, nfit_spa);
 			if (rc)
 				return rc;
 			break;
@@ -3233,6 +3341,8 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd)
 {
 	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+	struct nfit_spa *nfit_spa;
+	int rc = 0;
 
 	if (nvdimm)
 		return 0;
@@ -3242,16 +3352,24 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
 	/*
 	 * The kernel and userspace may race to initiate a scrub, but
 	 * the scrub thread is prepared to lose that initial race.  It
-	 * just needs guarantees that any ars it initiates are not
-	 * interrupted by any intervening start reqeusts from userspace.
+	 * just needs guarantees that any ARS it initiates are not
+	 * interrupted by any intervening start requests from userspace.
 	 */
-	if (work_busy(&acpi_desc->dwork.work))
-		return -EBUSY;
+	mutex_lock(&acpi_desc->init_mutex);
+	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+		if (acpi_desc->scrub_spa
+				|| test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state)
+				|| test_bit(ARS_REQ_LONG, &nfit_spa->ars_state)) {
+			rc = -EBUSY;
+			break;
+		}
+	mutex_unlock(&acpi_desc->init_mutex);
 
-	return 0;
+	return rc;
 }
 
-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
+		enum nfit_ars_state req_type)
 {
 	struct device *dev = acpi_desc->dev;
 	int scheduled = 0, busy = 0;
@@ -3271,14 +3389,10 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
 		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
 			continue;
 
-		if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state)) {
+		if (test_and_set_bit(req_type, &nfit_spa->ars_state))
 			busy++;
-			set_bit(ARS_REQ_REDO, &nfit_spa->ars_state);
-		} else {
-			if (test_bit(ARS_SHORT, &flags))
-				set_bit(ARS_SHORT, &nfit_spa->ars_state);
+		else
 			scheduled++;
-		}
 	}
 	if (scheduled) {
 		sched_ars(acpi_desc);
@@ -3464,10 +3578,11 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
 static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
 {
 	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
-	unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
-			0 : 1 << ARS_SHORT;
 
-	acpi_nfit_ars_rescan(acpi_desc, flags);
+	if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON)
+		acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
+	else
+		acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_SHORT);
 }
 
 void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)

+ 38 - 0
drivers/acpi/nfit/intel.h

@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(c) 2018 Intel Corporation. All rights reserved.
+ * Intel specific definitions for NVDIMM Firmware Interface Table - NFIT
+ */
+#ifndef _NFIT_INTEL_H_
+#define _NFIT_INTEL_H_
+
+#define ND_INTEL_SMART 1
+
+#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID     (1 << 5)
+#define ND_INTEL_SMART_SHUTDOWN_VALID           (1 << 10)
+
+struct nd_intel_smart {
+	u32 status;
+	union {
+		struct {
+			u32 flags;
+			u8 reserved0[4];
+			u8 health;
+			u8 spares;
+			u8 life_used;
+			u8 alarm_flags;
+			u16 media_temperature;
+			u16 ctrl_temperature;
+			u32 shutdown_count;
+			u8 ait_status;
+			u16 pmic_temperature;
+			u8 reserved1[8];
+			u8 shutdown_state;
+			u32 vendor_size;
+			u8 vendor_data[92];
+		} __packed;
+		u8 data[128];
+	};
+} __packed;
+
+#endif

+ 14 - 7
drivers/acpi/nfit/nfit.h

@@ -118,10 +118,8 @@ enum nfit_dimm_notifiers {
 };
 
 enum nfit_ars_state {
-	ARS_REQ,
-	ARS_REQ_REDO,
-	ARS_DONE,
-	ARS_SHORT,
+	ARS_REQ_SHORT,
+	ARS_REQ_LONG,
 	ARS_FAILED,
 };
 
@@ -159,6 +157,13 @@ struct nfit_memdev {
 	struct acpi_nfit_memory_map memdev[0];
 };
 
+enum nfit_mem_flags {
+	NFIT_MEM_LSR,
+	NFIT_MEM_LSW,
+	NFIT_MEM_DIRTY,
+	NFIT_MEM_DIRTY_COUNT,
+};
+
 /* assembled tables for a given dimm/memory-device */
 struct nfit_mem {
 	struct nvdimm *nvdimm;
@@ -178,9 +183,9 @@ struct nfit_mem {
 	struct acpi_nfit_desc *acpi_desc;
 	struct resource *flush_wpq;
 	unsigned long dsm_mask;
+	unsigned long flags;
+	u32 dirty_shutdown;
 	int family;
-	bool has_lsr;
-	bool has_lsw;
 };
 
 struct acpi_nfit_desc {
@@ -198,6 +203,7 @@ struct acpi_nfit_desc {
 	struct device *dev;
 	u8 ars_start_flags;
 	struct nd_cmd_ars_status *ars_status;
+	struct nfit_spa *scrub_spa;
 	struct delayed_work dwork;
 	struct list_head list;
 	struct kernfs_node *scrub_count_state;
@@ -252,7 +258,8 @@ struct nfit_blk {
 
 extern struct list_head acpi_descs;
 extern struct mutex acpi_desc_lock;
-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags);
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
+		enum nfit_ars_state req_type);
 
 #ifdef CONFIG_X86_MCE
 void nfit_mce_register(void);

+ 14 - 6
drivers/nvdimm/bus.c

@@ -54,12 +54,6 @@ static int to_nd_device_type(struct device *dev)
 
 static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
-	/*
-	 * Ensure that region devices always have their numa node set as
-	 * early as possible.
-	 */
-	if (is_nd_region(dev))
-		set_dev_node(dev, to_nd_region(dev)->numa_node);
 	return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
 			to_nd_device_type(dev));
 }
@@ -488,6 +482,8 @@ static void nd_async_device_register(void *d, async_cookie_t cookie)
 		put_device(dev);
 	}
 	put_device(dev);
+	if (dev->parent)
+		put_device(dev->parent);
 }
 
 static void nd_async_device_unregister(void *d, async_cookie_t cookie)
@@ -506,7 +502,19 @@ void __nd_device_register(struct device *dev)
 {
 	if (!dev)
 		return;
+
+	/*
+	 * Ensure that region devices always have their NUMA node set as
+	 * early as possible. This way we are able to make certain that
+	 * any memory associated with the creation and the creation
+	 * itself of the region is associated with the correct node.
+	 */
+	if (is_nd_region(dev))
+		set_dev_node(dev, to_nd_region(dev)->numa_node);
+
 	dev->bus = &nvdimm_bus_type;
+	if (dev->parent)
+		get_device(dev->parent);
 	get_device(dev);
 	async_schedule_domain(nd_async_device_register, dev,
 			&nd_async_domain);

+ 1 - 5
drivers/nvdimm/dimm.c

@@ -75,7 +75,7 @@ static int nvdimm_probe(struct device *dev)
 	 * DIMM capacity. We fail the dimm probe to prevent regions from
 	 * attempting to parse the label area.
 	 */
-	rc = nvdimm_init_config_data(ndd);
+	rc = nd_label_data_init(ndd);
 	if (rc == -EACCES)
 		nvdimm_set_locked(dev);
 	if (rc)
@@ -84,10 +84,6 @@ static int nvdimm_probe(struct device *dev)
 	dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size);
 
 	nvdimm_bus_lock(dev);
-	ndd->ns_current = nd_label_validate(ndd);
-	ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
-	nd_label_copy(ndd, to_next_namespace_index(ndd),
-			to_current_namespace_index(ndd));
 	if (ndd->ns_current >= 0) {
 		rc = nd_label_reserve_dpa(ndd);
 		if (rc == 0)

+ 24 - 36
drivers/nvdimm/dimm_devs.c

@@ -85,56 +85,48 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
 	return cmd_rc;
 }
 
-int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
+int nvdimm_get_config_data(struct nvdimm_drvdata *ndd, void *buf,
+			   size_t offset, size_t len)
 {
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
 	int rc = validate_dimm(ndd), cmd_rc = 0;
 	struct nd_cmd_get_config_data_hdr *cmd;
-	struct nvdimm_bus_descriptor *nd_desc;
-	u32 max_cmd_size, config_size;
-	size_t offset;
+	size_t max_cmd_size, buf_offset;
 
 	if (rc)
 		return rc;
 
-	if (ndd->data)
-		return 0;
-
-	if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0
-			|| ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) {
-		dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n",
-				ndd->nsarea.max_xfer, ndd->nsarea.config_size);
+	if (offset + len > ndd->nsarea.config_size)
 		return -ENXIO;
-	}
 
-	ndd->data = kvmalloc(ndd->nsarea.config_size, GFP_KERNEL);
-	if (!ndd->data)
-		return -ENOMEM;
-
-	max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer);
-	cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL);
+	max_cmd_size = min_t(u32, len, ndd->nsarea.max_xfer);
+	cmd = kvzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL);
 	if (!cmd)
 		return -ENOMEM;
 
-	nd_desc = nvdimm_bus->nd_desc;
-	for (config_size = ndd->nsarea.config_size, offset = 0;
-			config_size; config_size -= cmd->in_length,
-			offset += cmd->in_length) {
-		cmd->in_length = min(config_size, max_cmd_size);
-		cmd->in_offset = offset;
+	for (buf_offset = 0; len;
+	     len -= cmd->in_length, buf_offset += cmd->in_length) {
+		size_t cmd_size;
+
+		cmd->in_offset = offset + buf_offset;
+		cmd->in_length = min(max_cmd_size, len);
+
+		cmd_size = sizeof(*cmd) + cmd->in_length;
+
 		rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
-				ND_CMD_GET_CONFIG_DATA, cmd,
-				cmd->in_length + sizeof(*cmd), &cmd_rc);
+				ND_CMD_GET_CONFIG_DATA, cmd, cmd_size, &cmd_rc);
 		if (rc < 0)
 			break;
 		if (cmd_rc < 0) {
 			rc = cmd_rc;
 			break;
 		}
-		memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
+
+		/* out_buf should be valid, copy it into our output buffer */
+		memcpy(buf + buf_offset, cmd->out_buf, cmd->in_length);
 	}
-	dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc);
-	kfree(cmd);
+	kvfree(cmd);
 
 	return rc;
 }
@@ -151,15 +143,11 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
 	if (rc)
 		return rc;
 
-	if (!ndd->data)
-		return -ENXIO;
-
 	if (offset + len > ndd->nsarea.config_size)
 		return -ENXIO;
 
-	max_cmd_size = min_t(u32, PAGE_SIZE, len);
-	max_cmd_size = min_t(u32, max_cmd_size, ndd->nsarea.max_xfer);
-	cmd = kzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL);
+	max_cmd_size = min_t(u32, len, ndd->nsarea.max_xfer);
+	cmd = kvzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL);
 	if (!cmd)
 		return -ENOMEM;
 
@@ -183,7 +171,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
 			break;
 		}
 	}
-	kfree(cmd);
+	kvfree(cmd);
 
 	return rc;
 }

+ 137 - 7
drivers/nvdimm/label.c

@@ -75,7 +75,8 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
 	/*
 	 * Per UEFI 2.7, the minimum size of the Label Storage Area is large
 	 * enough to hold 2 index blocks and 2 labels.  The minimum index
-	 * block size is 256 bytes, and the minimum label size is 256 bytes.
+	 * block size is 256 bytes. The label size is 128 for namespaces
+	 * prior to version 1.2 and at minimum 256 for version 1.2 and later.
 	 */
 	nslot = nvdimm_num_label_slots(ndd);
 	space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd);
@@ -183,6 +184,13 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 					__le64_to_cpu(nsindex[i]->otheroff));
 			continue;
 		}
+		if (__le64_to_cpu(nsindex[i]->labeloff)
+				!= 2 * sizeof_namespace_index(ndd)) {
+			dev_dbg(dev, "nsindex%d labeloff: %#llx invalid\n",
+					i, (unsigned long long)
+					__le64_to_cpu(nsindex[i]->labeloff));
+			continue;
+		}
 
 		size = __le64_to_cpu(nsindex[i]->mysize);
 		if (size > sizeof_namespace_index(ndd)
@@ -227,7 +235,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 	return -1;
 }
 
-int nd_label_validate(struct nvdimm_drvdata *ndd)
+static int nd_label_validate(struct nvdimm_drvdata *ndd)
 {
 	/*
 	 * In order to probe for and validate namespace index blocks we
@@ -250,12 +258,12 @@ int nd_label_validate(struct nvdimm_drvdata *ndd)
 	return -1;
 }
 
-void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
-		struct nd_namespace_index *src)
+static void nd_label_copy(struct nvdimm_drvdata *ndd,
+			  struct nd_namespace_index *dst,
+			  struct nd_namespace_index *src)
 {
-	if (dst && src)
-		/* pass */;
-	else
+	/* just exit if either destination or source is NULL */
+	if (!dst || !src)
 		return;
 
 	memcpy(dst, src, sizeof_namespace_index(ndd));
@@ -410,6 +418,128 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
 	return 0;
 }
 
+int nd_label_data_init(struct nvdimm_drvdata *ndd)
+{
+	size_t config_size, read_size, max_xfer, offset;
+	struct nd_namespace_index *nsindex;
+	unsigned int i;
+	int rc = 0;
+	u32 nslot;
+
+	if (ndd->data)
+		return 0;
+
+	if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0) {
+		dev_dbg(ndd->dev, "failed to init config data area: (%u:%u)\n",
+			ndd->nsarea.max_xfer, ndd->nsarea.config_size);
+		return -ENXIO;
+	}
+
+	/*
+	 * We need to determine the maximum index area as this is the section
+	 * we must read and validate before we can start processing labels.
+	 *
+	 * If the area is too small to contain the two indexes and 2 labels
+	 * then we abort.
+	 *
+	 * Start at a label size of 128 as this should result in the largest
+	 * possible namespace index size.
+	 */
+	ndd->nslabel_size = 128;
+	read_size = sizeof_namespace_index(ndd) * 2;
+	if (!read_size)
+		return -ENXIO;
+
+	/* Allocate config data */
+	config_size = ndd->nsarea.config_size;
+	ndd->data = kvzalloc(config_size, GFP_KERNEL);
+	if (!ndd->data)
+		return -ENOMEM;
+
+	/*
+	 * We want to guarantee as few reads as possible while conserving
+	 * memory. To do that we figure out how much unused space will be left
+	 * in the last read, divide that by the total number of reads it is
+	 * going to take given our maximum transfer size, and then reduce our
+	 * maximum transfer size based on that result.
+	 */
+	max_xfer = min_t(size_t, ndd->nsarea.max_xfer, config_size);
+	if (read_size < max_xfer) {
+		/* trim waste */
+		max_xfer -= ((max_xfer - 1) - (config_size - 1) % max_xfer) /
+			    DIV_ROUND_UP(config_size, max_xfer);
+		/* make certain we read indexes in exactly 1 read */
+		if (max_xfer < read_size)
+			max_xfer = read_size;
+	}
+
+	/* Make our initial read size a multiple of max_xfer size */
+	read_size = min(DIV_ROUND_UP(read_size, max_xfer) * max_xfer,
+			config_size);
+
+	/* Read the index data */
+	rc = nvdimm_get_config_data(ndd, ndd->data, 0, read_size);
+	if (rc)
+		goto out_err;
+
+	/* Validate index data, if not valid assume all labels are invalid */
+	ndd->ns_current = nd_label_validate(ndd);
+	if (ndd->ns_current < 0)
+		return 0;
+
+	/* Record our index values */
+	ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
+
+	/* Copy "current" index on top of the "next" index */
+	nsindex = to_current_namespace_index(ndd);
+	nd_label_copy(ndd, to_next_namespace_index(ndd), nsindex);
+
+	/* Determine starting offset for label data */
+	offset = __le64_to_cpu(nsindex->labeloff);
+	nslot = __le32_to_cpu(nsindex->nslot);
+
+	/* Loop through the free list pulling in any active labels */
+	for (i = 0; i < nslot; i++, offset += ndd->nslabel_size) {
+		size_t label_read_size;
+
+		/* zero out the unused labels */
+		if (test_bit_le(i, nsindex->free)) {
+			memset(ndd->data + offset, 0, ndd->nslabel_size);
+			continue;
+		}
+
+		/* if we already read past here then just continue */
+		if (offset + ndd->nslabel_size <= read_size)
+			continue;
+
+		/* if we haven't read in a while reset our read_size offset */
+		if (read_size < offset)
+			read_size = offset;
+
+		/* determine how much more will be read after this next call. */
+		label_read_size = offset + ndd->nslabel_size - read_size;
+		label_read_size = DIV_ROUND_UP(label_read_size, max_xfer) *
+				  max_xfer;
+
+		/* truncate last read if needed */
+		if (read_size + label_read_size > config_size)
+			label_read_size = config_size - read_size;
+
+		/* Read the label data */
+		rc = nvdimm_get_config_data(ndd, ndd->data + read_size,
+					    read_size, label_read_size);
+		if (rc)
+			goto out_err;
+
+		/* push read_size to next read offset */
+		read_size += label_read_size;
+	}
+
+	dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc);
+out_err:
+	return rc;
+}
+
 int nd_label_active_count(struct nvdimm_drvdata *ndd)
 {
 	struct nd_namespace_index *nsindex;

+ 1 - 3
drivers/nvdimm/label.h

@@ -138,9 +138,7 @@ static inline int nd_label_next_nsindex(int index)
 }
 
 struct nvdimm_drvdata;
-int nd_label_validate(struct nvdimm_drvdata *ndd);
-void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
-		struct nd_namespace_index *src);
+int nd_label_data_init(struct nvdimm_drvdata *ndd);
 size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd);
 int nd_label_active_count(struct nvdimm_drvdata *ndd);
 struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);

+ 0 - 1
drivers/nvdimm/namespace_devs.c

@@ -2099,7 +2099,6 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region)
 		return NULL;
 	}
 	dev_set_name(dev, "namespace%d.%d", nd_region->id, nspm->id);
-	dev->parent = &nd_region->dev;
 	dev->groups = nd_namespace_attribute_groups;
 	nd_namespace_pmem_set_resource(nd_region, nspm, 0);
 

+ 0 - 1
drivers/nvdimm/nd-core.h

@@ -14,7 +14,6 @@
 #define __ND_CORE_H__
 #include <linux/libnvdimm.h>
 #include <linux/device.h>
-#include <linux/libnvdimm.h>
 #include <linux/sizes.h>
 #include <linux/mutex.h>
 #include <linux/nd.h>

+ 2 - 0
drivers/nvdimm/nd.h

@@ -241,6 +241,8 @@ struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
 int nvdimm_check_config_data(struct device *dev);
 int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
+int nvdimm_get_config_data(struct nvdimm_drvdata *ndd, void *buf,
+			   size_t offset, size_t len);
 int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
 		void *buf, size_t len);
 long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,

+ 60 - 1
drivers/nvdimm/pfn_devs.c

@@ -361,6 +361,65 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
 	return dev;
 }
 
+/*
+ * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap
+ * space associated with the namespace. If the memmap is set to DRAM, then
+ * this is a no-op. Since the memmap area is freshly initialized during
+ * probe, we have an opportunity to clear any badblocks in this area.
+ */
+static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
+{
+	struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent);
+	struct nd_namespace_common *ndns = nd_pfn->ndns;
+	void *zero_page = page_address(ZERO_PAGE(0));
+	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+	int num_bad, meta_num, rc, bb_present;
+	sector_t first_bad, meta_start;
+	struct nd_namespace_io *nsio;
+
+	if (nd_pfn->mode != PFN_MODE_PMEM)
+		return 0;
+
+	nsio = to_nd_namespace_io(&ndns->dev);
+	meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9;
+	meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start;
+
+	do {
+		unsigned long zero_len;
+		u64 nsoff;
+
+		bb_present = badblocks_check(&nd_region->bb, meta_start,
+				meta_num, &first_bad, &num_bad);
+		if (bb_present) {
+			dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %lx\n",
+					num_bad, first_bad);
+			nsoff = ALIGN_DOWN((nd_region->ndr_start
+					+ (first_bad << 9)) - nsio->res.start,
+					PAGE_SIZE);
+			zero_len = ALIGN(num_bad << 9, PAGE_SIZE);
+			while (zero_len) {
+				unsigned long chunk = min(zero_len, PAGE_SIZE);
+
+				rc = nvdimm_write_bytes(ndns, nsoff, zero_page,
+							chunk, 0);
+				if (rc)
+					break;
+
+				zero_len -= chunk;
+				nsoff += chunk;
+			}
+			if (rc) {
+				dev_err(&nd_pfn->dev,
+					"error clearing %x badblocks at %lx\n",
+					num_bad, first_bad);
+				return rc;
+			}
+		}
+	} while (bb_present);
+
+	return 0;
+}
+
 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 {
 	u64 checksum, offset;
@@ -477,7 +536,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -ENXIO;
 	}
 
-	return 0;
+	return nd_pfn_clear_memmap_errors(nd_pfn);
 }
 EXPORT_SYMBOL(nd_pfn_validate);
 

+ 3 - 1
drivers/nvdimm/pmem.c

@@ -421,9 +421,11 @@ static int pmem_attach_disk(struct device *dev,
 		addr = devm_memremap_pages(dev, &pmem->pgmap);
 		pmem->pfn_flags |= PFN_MAP;
 		memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
-	} else
+	} else {
 		addr = devm_memremap(dev, pmem->phys_addr,
 				pmem->size, ARCH_MEMREMAP_PMEM);
+		memcpy(&bb_res, &nsio->res, sizeof(bb_res));
+	}
 
 	/*
 	 * At release time the queue must be frozen before

+ 9 - 2
drivers/nvdimm/region_devs.c

@@ -560,10 +560,17 @@ static ssize_t region_badblocks_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
 	struct nd_region *nd_region = to_nd_region(dev);
+	ssize_t rc;
 
-	return badblocks_show(&nd_region->bb, buf, 0);
-}
+	device_lock(dev);
+	if (dev->driver)
+		rc = badblocks_show(&nd_region->bb, buf, 0);
+	else
+		rc = -ENXIO;
+	device_unlock(dev);
 
+	return rc;
+}
 static DEVICE_ATTR(badblocks, 0444, region_badblocks_show, NULL);
 
 static ssize_t resource_show(struct device *dev,

+ 22 - 0
include/linux/ndctl.h

@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2014-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ */
+#ifndef _LINUX_NDCTL_H
+#define _LINUX_NDCTL_H
+
+#include <uapi/linux/ndctl.h>
+
+enum {
+	ND_MIN_NAMESPACE_SIZE = PAGE_SIZE,
+};
+
+#endif /* _LINUX_NDCTL_H */

+ 21 - 31
include/uapi/linux/ndctl.h

@@ -128,37 +128,31 @@ enum {
 
 static inline const char *nvdimm_bus_cmd_name(unsigned cmd)
 {
-	static const char * const names[] = {
-		[ND_CMD_ARS_CAP] = "ars_cap",
-		[ND_CMD_ARS_START] = "ars_start",
-		[ND_CMD_ARS_STATUS] = "ars_status",
-		[ND_CMD_CLEAR_ERROR] = "clear_error",
-		[ND_CMD_CALL] = "cmd_call",
-	};
-
-	if (cmd < ARRAY_SIZE(names) && names[cmd])
-		return names[cmd];
-	return "unknown";
+	switch (cmd) {
+	case ND_CMD_ARS_CAP:		return "ars_cap";
+	case ND_CMD_ARS_START:		return "ars_start";
+	case ND_CMD_ARS_STATUS:		return "ars_status";
+	case ND_CMD_CLEAR_ERROR:	return "clear_error";
+	case ND_CMD_CALL:		return "cmd_call";
+	default:			return "unknown";
+	}
 }
 
 static inline const char *nvdimm_cmd_name(unsigned cmd)
 {
-	static const char * const names[] = {
-		[ND_CMD_SMART] = "smart",
-		[ND_CMD_SMART_THRESHOLD] = "smart_thresh",
-		[ND_CMD_DIMM_FLAGS] = "flags",
-		[ND_CMD_GET_CONFIG_SIZE] = "get_size",
-		[ND_CMD_GET_CONFIG_DATA] = "get_data",
-		[ND_CMD_SET_CONFIG_DATA] = "set_data",
-		[ND_CMD_VENDOR_EFFECT_LOG_SIZE] = "effect_size",
-		[ND_CMD_VENDOR_EFFECT_LOG] = "effect_log",
-		[ND_CMD_VENDOR] = "vendor",
-		[ND_CMD_CALL] = "cmd_call",
-	};
-
-	if (cmd < ARRAY_SIZE(names) && names[cmd])
-		return names[cmd];
-	return "unknown";
+	switch (cmd) {
+	case ND_CMD_SMART:			return "smart";
+	case ND_CMD_SMART_THRESHOLD:		return "smart_thresh";
+	case ND_CMD_DIMM_FLAGS:			return "flags";
+	case ND_CMD_GET_CONFIG_SIZE:		return "get_size";
+	case ND_CMD_GET_CONFIG_DATA:		return "get_data";
+	case ND_CMD_SET_CONFIG_DATA:		return "set_data";
+	case ND_CMD_VENDOR_EFFECT_LOG_SIZE:	return "effect_size";
+	case ND_CMD_VENDOR_EFFECT_LOG:		return "effect_log";
+	case ND_CMD_VENDOR:			return "vendor";
+	case ND_CMD_CALL:			return "cmd_call";
+	default:				return "unknown";
+	}
 }
 
 #define ND_IOCTL 'N'
@@ -208,10 +202,6 @@ enum nd_driver_flags {
 	ND_DRIVER_DAX_PMEM	  = 1 << ND_DEVICE_DAX_PMEM,
 };
 
-enum {
-	ND_MIN_NAMESPACE_SIZE = PAGE_SIZE,
-};
-
 enum ars_masks {
 	ARS_STATUS_MASK = 0x0000FFFF,
 	ARS_EXT_STATUS_SHIFT = 16,

+ 1 - 0
tools/testing/nvdimm/Kbuild

@@ -22,6 +22,7 @@ NVDIMM_SRC := $(DRIVERS)/nvdimm
 ACPI_SRC := $(DRIVERS)/acpi/nfit
 DAX_SRC := $(DRIVERS)/dax
 ccflags-y := -I$(src)/$(NVDIMM_SRC)/
+ccflags-y += -I$(src)/$(ACPI_SRC)/
 
 obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o

+ 8 - 0
tools/testing/nvdimm/acpi_nfit_test.c

@@ -4,5 +4,13 @@
 #include <linux/module.h>
 #include <linux/printk.h>
 #include "watermark.h"
+#include <nfit.h>
 
 nfit_test_watermark(acpi_nfit);
+
+/* strong / override definition of nfit_intel_shutdown_status */
+void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
+{
+	set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags);
+	nfit_mem->dirty_shutdown = 42;
+}

+ 3 - 1
tools/testing/nvdimm/test/nfit.c

@@ -24,6 +24,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <nd-core.h>
+#include <intel.h>
 #include <nfit.h>
 #include <nd.h>
 #include "nfit_test.h"
@@ -148,6 +149,7 @@ static const struct nd_intel_smart smart_def = {
 		| ND_INTEL_SMART_ALARM_VALID
 		| ND_INTEL_SMART_USED_VALID
 		| ND_INTEL_SMART_SHUTDOWN_VALID
+		| ND_INTEL_SMART_SHUTDOWN_COUNT_VALID
 		| ND_INTEL_SMART_MTEMP_VALID
 		| ND_INTEL_SMART_CTEMP_VALID,
 	.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
@@ -160,8 +162,8 @@ static const struct nd_intel_smart smart_def = {
 	.ait_status = 1,
 	.life_used = 5,
 	.shutdown_state = 0,
+	.shutdown_count = 42,
 	.vendor_size = 0,
-	.shutdown_count = 100,
 };
 
 struct nfit_test_fw {

+ 0 - 24
tools/testing/nvdimm/test/nfit_test.h

@@ -117,30 +117,6 @@ struct nd_cmd_ars_err_inj_stat {
 #define ND_INTEL_SMART_INJECT_FATAL		(1 << 2)
 #define ND_INTEL_SMART_INJECT_SHUTDOWN		(1 << 3)
 
-struct nd_intel_smart {
-	__u32 status;
-	union {
-		struct {
-			__u32 flags;
-			__u8 reserved0[4];
-			__u8 health;
-			__u8 spares;
-			__u8 life_used;
-			__u8 alarm_flags;
-			__u16 media_temperature;
-			__u16 ctrl_temperature;
-			__u32 shutdown_count;
-			__u8 ait_status;
-			__u16 pmic_temperature;
-			__u8 reserved1[8];
-			__u8 shutdown_state;
-			__u32 vendor_size;
-			__u8 vendor_data[92];
-		} __packed;
-		__u8 data[128];
-	};
-} __packed;
-
 struct nd_intel_smart_threshold {
 	__u32 status;
 	union {