core.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. /*
  2. * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of version 2 of the GNU General Public License as
  6. * published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. */
  13. #include <linux/libnvdimm.h>
  14. #include <linux/badblocks.h>
  15. #include <linux/export.h>
  16. #include <linux/module.h>
  17. #include <linux/blkdev.h>
  18. #include <linux/device.h>
  19. #include <linux/ctype.h>
  20. #include <linux/ndctl.h>
  21. #include <linux/mutex.h>
  22. #include <linux/slab.h>
  23. #include "nd-core.h"
  24. #include "nd.h"
  25. LIST_HEAD(nvdimm_bus_list);
  26. DEFINE_MUTEX(nvdimm_bus_list_mutex);
  27. static DEFINE_IDA(nd_ida);
  28. void nvdimm_bus_lock(struct device *dev)
  29. {
  30. struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
  31. if (!nvdimm_bus)
  32. return;
  33. mutex_lock(&nvdimm_bus->reconfig_mutex);
  34. }
  35. EXPORT_SYMBOL(nvdimm_bus_lock);
  36. void nvdimm_bus_unlock(struct device *dev)
  37. {
  38. struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
  39. if (!nvdimm_bus)
  40. return;
  41. mutex_unlock(&nvdimm_bus->reconfig_mutex);
  42. }
  43. EXPORT_SYMBOL(nvdimm_bus_unlock);
  44. bool is_nvdimm_bus_locked(struct device *dev)
  45. {
  46. struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
  47. if (!nvdimm_bus)
  48. return false;
  49. return mutex_is_locked(&nvdimm_bus->reconfig_mutex);
  50. }
  51. EXPORT_SYMBOL(is_nvdimm_bus_locked);
  52. u64 nd_fletcher64(void *addr, size_t len, bool le)
  53. {
  54. u32 *buf = addr;
  55. u32 lo32 = 0;
  56. u64 hi32 = 0;
  57. int i;
  58. for (i = 0; i < len / sizeof(u32); i++) {
  59. lo32 += le ? le32_to_cpu((__le32) buf[i]) : buf[i];
  60. hi32 += lo32;
  61. }
  62. return hi32 << 32 | lo32;
  63. }
  64. EXPORT_SYMBOL_GPL(nd_fletcher64);
  65. static void nvdimm_bus_release(struct device *dev)
  66. {
  67. struct nvdimm_bus *nvdimm_bus;
  68. nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
  69. ida_simple_remove(&nd_ida, nvdimm_bus->id);
  70. kfree(nvdimm_bus);
  71. }
  72. struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
  73. {
  74. struct nvdimm_bus *nvdimm_bus;
  75. nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
  76. WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
  77. return nvdimm_bus;
  78. }
  79. EXPORT_SYMBOL_GPL(to_nvdimm_bus);
  80. struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
  81. {
  82. /* struct nvdimm_bus definition is private to libnvdimm */
  83. return nvdimm_bus->nd_desc;
  84. }
  85. EXPORT_SYMBOL_GPL(to_nd_desc);
  86. struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
  87. {
  88. struct device *dev;
  89. for (dev = nd_dev; dev; dev = dev->parent)
  90. if (dev->release == nvdimm_bus_release)
  91. break;
  92. dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
  93. if (dev)
  94. return to_nvdimm_bus(dev);
  95. return NULL;
  96. }
  97. static bool is_uuid_sep(char sep)
  98. {
  99. if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
  100. return true;
  101. return false;
  102. }
  103. static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
  104. size_t len)
  105. {
  106. const char *str = buf;
  107. u8 uuid[16];
  108. int i;
  109. for (i = 0; i < 16; i++) {
  110. if (!isxdigit(str[0]) || !isxdigit(str[1])) {
  111. dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
  112. __func__, i, str - buf, str[0],
  113. str + 1 - buf, str[1]);
  114. return -EINVAL;
  115. }
  116. uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]);
  117. str += 2;
  118. if (is_uuid_sep(*str))
  119. str++;
  120. }
  121. memcpy(uuid_out, uuid, sizeof(uuid));
  122. return 0;
  123. }
  124. /**
  125. * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes
  126. * @dev: container device for the uuid property
  127. * @uuid_out: uuid buffer to replace
  128. * @buf: raw sysfs buffer to parse
  129. *
  130. * Enforce that uuids can only be changed while the device is disabled
  131. * (driver detached)
  132. * LOCKING: expects device_lock() is held on entry
  133. */
  134. int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
  135. size_t len)
  136. {
  137. u8 uuid[16];
  138. int rc;
  139. if (dev->driver)
  140. return -EBUSY;
  141. rc = nd_uuid_parse(dev, uuid, buf, len);
  142. if (rc)
  143. return rc;
  144. kfree(*uuid_out);
  145. *uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL);
  146. if (!(*uuid_out))
  147. return -ENOMEM;
  148. return 0;
  149. }
  150. ssize_t nd_sector_size_show(unsigned long current_lbasize,
  151. const unsigned long *supported, char *buf)
  152. {
  153. ssize_t len = 0;
  154. int i;
  155. for (i = 0; supported[i]; i++)
  156. if (current_lbasize == supported[i])
  157. len += sprintf(buf + len, "[%ld] ", supported[i]);
  158. else
  159. len += sprintf(buf + len, "%ld ", supported[i]);
  160. len += sprintf(buf + len, "\n");
  161. return len;
  162. }
  163. ssize_t nd_sector_size_store(struct device *dev, const char *buf,
  164. unsigned long *current_lbasize, const unsigned long *supported)
  165. {
  166. unsigned long lbasize;
  167. int rc, i;
  168. if (dev->driver)
  169. return -EBUSY;
  170. rc = kstrtoul(buf, 0, &lbasize);
  171. if (rc)
  172. return rc;
  173. for (i = 0; supported[i]; i++)
  174. if (lbasize == supported[i])
  175. break;
  176. if (supported[i]) {
  177. *current_lbasize = lbasize;
  178. return 0;
  179. } else {
  180. return -EINVAL;
  181. }
  182. }
  183. void __nd_iostat_start(struct bio *bio, unsigned long *start)
  184. {
  185. struct gendisk *disk = bio->bi_bdev->bd_disk;
  186. const int rw = bio_data_dir(bio);
  187. int cpu = part_stat_lock();
  188. *start = jiffies;
  189. part_round_stats(cpu, &disk->part0);
  190. part_stat_inc(cpu, &disk->part0, ios[rw]);
  191. part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
  192. part_inc_in_flight(&disk->part0, rw);
  193. part_stat_unlock();
  194. }
  195. EXPORT_SYMBOL(__nd_iostat_start);
  196. void nd_iostat_end(struct bio *bio, unsigned long start)
  197. {
  198. struct gendisk *disk = bio->bi_bdev->bd_disk;
  199. unsigned long duration = jiffies - start;
  200. const int rw = bio_data_dir(bio);
  201. int cpu = part_stat_lock();
  202. part_stat_add(cpu, &disk->part0, ticks[rw], duration);
  203. part_round_stats(cpu, &disk->part0);
  204. part_dec_in_flight(&disk->part0, rw);
  205. part_stat_unlock();
  206. }
  207. EXPORT_SYMBOL(nd_iostat_end);
  208. static ssize_t commands_show(struct device *dev,
  209. struct device_attribute *attr, char *buf)
  210. {
  211. int cmd, len = 0;
  212. struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
  213. struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
  214. for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG)
  215. len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd));
  216. len += sprintf(buf + len, "\n");
  217. return len;
  218. }
  219. static DEVICE_ATTR_RO(commands);
  220. static const char *nvdimm_bus_provider(struct nvdimm_bus *nvdimm_bus)
  221. {
  222. struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
  223. struct device *parent = nvdimm_bus->dev.parent;
  224. if (nd_desc->provider_name)
  225. return nd_desc->provider_name;
  226. else if (parent)
  227. return dev_name(parent);
  228. else
  229. return "unknown";
  230. }
  231. static ssize_t provider_show(struct device *dev,
  232. struct device_attribute *attr, char *buf)
  233. {
  234. struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
  235. return sprintf(buf, "%s\n", nvdimm_bus_provider(nvdimm_bus));
  236. }
  237. static DEVICE_ATTR_RO(provider);
  238. static int flush_namespaces(struct device *dev, void *data)
  239. {
  240. device_lock(dev);
  241. device_unlock(dev);
  242. return 0;
  243. }
  244. static int flush_regions_dimms(struct device *dev, void *data)
  245. {
  246. device_lock(dev);
  247. device_unlock(dev);
  248. device_for_each_child(dev, NULL, flush_namespaces);
  249. return 0;
  250. }
  251. static ssize_t wait_probe_show(struct device *dev,
  252. struct device_attribute *attr, char *buf)
  253. {
  254. nd_synchronize();
  255. device_for_each_child(dev, NULL, flush_regions_dimms);
  256. return sprintf(buf, "1\n");
  257. }
  258. static DEVICE_ATTR_RO(wait_probe);
  259. static struct attribute *nvdimm_bus_attributes[] = {
  260. &dev_attr_commands.attr,
  261. &dev_attr_wait_probe.attr,
  262. &dev_attr_provider.attr,
  263. NULL,
  264. };
  265. struct attribute_group nvdimm_bus_attribute_group = {
  266. .attrs = nvdimm_bus_attributes,
  267. };
  268. EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
  269. struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
  270. struct nvdimm_bus_descriptor *nd_desc, struct module *module)
  271. {
  272. struct nvdimm_bus *nvdimm_bus;
  273. int rc;
  274. nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
  275. if (!nvdimm_bus)
  276. return NULL;
  277. INIT_LIST_HEAD(&nvdimm_bus->list);
  278. INIT_LIST_HEAD(&nvdimm_bus->poison_list);
  279. init_waitqueue_head(&nvdimm_bus->probe_wait);
  280. nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
  281. mutex_init(&nvdimm_bus->reconfig_mutex);
  282. if (nvdimm_bus->id < 0) {
  283. kfree(nvdimm_bus);
  284. return NULL;
  285. }
  286. nvdimm_bus->nd_desc = nd_desc;
  287. nvdimm_bus->module = module;
  288. nvdimm_bus->dev.parent = parent;
  289. nvdimm_bus->dev.release = nvdimm_bus_release;
  290. nvdimm_bus->dev.groups = nd_desc->attr_groups;
  291. dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
  292. rc = device_register(&nvdimm_bus->dev);
  293. if (rc) {
  294. dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
  295. goto err;
  296. }
  297. rc = nvdimm_bus_create_ndctl(nvdimm_bus);
  298. if (rc)
  299. goto err;
  300. mutex_lock(&nvdimm_bus_list_mutex);
  301. list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
  302. mutex_unlock(&nvdimm_bus_list_mutex);
  303. return nvdimm_bus;
  304. err:
  305. put_device(&nvdimm_bus->dev);
  306. return NULL;
  307. }
  308. EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
  309. static void set_badblock(struct badblocks *bb, sector_t s, int num)
  310. {
  311. dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
  312. (u64) s * 512, (u64) num * 512);
  313. /* this isn't an error as the hardware will still throw an exception */
  314. if (badblocks_set(bb, s, num, 1))
  315. dev_info_once(bb->dev, "%s: failed for sector %llx\n",
  316. __func__, (u64) s);
  317. }
  318. /**
  319. * __add_badblock_range() - Convert a physical address range to bad sectors
  320. * @bb: badblocks instance to populate
  321. * @ns_offset: namespace offset where the error range begins (in bytes)
  322. * @len: number of bytes of poison to be added
  323. *
  324. * This assumes that the range provided with (ns_offset, len) is within
  325. * the bounds of physical addresses for this namespace, i.e. lies in the
  326. * interval [ns_start, ns_start + ns_size)
  327. */
  328. static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
  329. {
  330. const unsigned int sector_size = 512;
  331. sector_t start_sector;
  332. u64 num_sectors;
  333. u32 rem;
  334. start_sector = div_u64(ns_offset, sector_size);
  335. num_sectors = div_u64_rem(len, sector_size, &rem);
  336. if (rem)
  337. num_sectors++;
  338. if (unlikely(num_sectors > (u64)INT_MAX)) {
  339. u64 remaining = num_sectors;
  340. sector_t s = start_sector;
  341. while (remaining) {
  342. int done = min_t(u64, remaining, INT_MAX);
  343. set_badblock(bb, s, done);
  344. remaining -= done;
  345. s += done;
  346. }
  347. } else
  348. set_badblock(bb, start_sector, num_sectors);
  349. }
  350. /**
  351. * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks
  352. * @ndns: the namespace containing poison ranges
  353. * @bb: badblocks instance to populate
  354. * @offset: offset at the start of the namespace before 'sector 0'
  355. *
  356. * The poison list generated during NFIT initialization may contain multiple,
  357. * possibly overlapping ranges in the SPA (System Physical Address) space.
  358. * Compare each of these ranges to the namespace currently being initialized,
  359. * and add badblocks to the gendisk for all matching sub-ranges
  360. */
  361. void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns,
  362. struct badblocks *bb, resource_size_t offset)
  363. {
  364. struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
  365. struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
  366. struct nvdimm_bus *nvdimm_bus;
  367. struct list_head *poison_list;
  368. u64 ns_start, ns_end, ns_size;
  369. struct nd_poison *pl;
  370. ns_size = nvdimm_namespace_capacity(ndns) - offset;
  371. ns_start = nsio->res.start + offset;
  372. ns_end = nsio->res.end;
  373. nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent);
  374. poison_list = &nvdimm_bus->poison_list;
  375. if (list_empty(poison_list))
  376. return;
  377. list_for_each_entry(pl, poison_list, list) {
  378. u64 pl_end = pl->start + pl->length - 1;
  379. /* Discard intervals with no intersection */
  380. if (pl_end < ns_start)
  381. continue;
  382. if (pl->start > ns_end)
  383. continue;
  384. /* Deal with any overlap after start of the namespace */
  385. if (pl->start >= ns_start) {
  386. u64 start = pl->start;
  387. u64 len;
  388. if (pl_end <= ns_end)
  389. len = pl->length;
  390. else
  391. len = ns_start + ns_size - pl->start;
  392. __add_badblock_range(bb, start - ns_start, len);
  393. continue;
  394. }
  395. /* Deal with overlap for poison starting before the namespace */
  396. if (pl->start < ns_start) {
  397. u64 len;
  398. if (pl_end < ns_end)
  399. len = pl->start + pl->length - ns_start;
  400. else
  401. len = ns_size;
  402. __add_badblock_range(bb, 0, len);
  403. }
  404. }
  405. }
  406. EXPORT_SYMBOL_GPL(nvdimm_namespace_add_poison);
  407. static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
  408. {
  409. struct nd_poison *pl;
  410. pl = kzalloc(sizeof(*pl), GFP_KERNEL);
  411. if (!pl)
  412. return -ENOMEM;
  413. pl->start = addr;
  414. pl->length = length;
  415. list_add_tail(&pl->list, &nvdimm_bus->poison_list);
  416. return 0;
  417. }
  418. int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
  419. {
  420. struct nd_poison *pl;
  421. if (list_empty(&nvdimm_bus->poison_list))
  422. return __add_poison(nvdimm_bus, addr, length);
  423. /*
  424. * There is a chance this is a duplicate, check for those first.
  425. * This will be the common case as ARS_STATUS returns all known
  426. * errors in the SPA space, and we can't query it per region
  427. */
  428. list_for_each_entry(pl, &nvdimm_bus->poison_list, list)
  429. if (pl->start == addr) {
  430. /* If length has changed, update this list entry */
  431. if (pl->length != length)
  432. pl->length = length;
  433. return 0;
  434. }
  435. /*
  436. * If not a duplicate or a simple length update, add the entry as is,
  437. * as any overlapping ranges will get resolved when the list is consumed
  438. * and converted to badblocks
  439. */
  440. return __add_poison(nvdimm_bus, addr, length);
  441. }
  442. EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
  443. static void free_poison_list(struct list_head *poison_list)
  444. {
  445. struct nd_poison *pl, *next;
  446. list_for_each_entry_safe(pl, next, poison_list, list) {
  447. list_del(&pl->list);
  448. kfree(pl);
  449. }
  450. list_del_init(poison_list);
  451. }
  452. static int child_unregister(struct device *dev, void *data)
  453. {
  454. /*
  455. * the singular ndctl class device per bus needs to be
  456. * "device_destroy"ed, so skip it here
  457. *
  458. * i.e. remove classless children
  459. */
  460. if (dev->class)
  461. /* pass */;
  462. else
  463. nd_device_unregister(dev, ND_SYNC);
  464. return 0;
  465. }
  466. void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
  467. {
  468. if (!nvdimm_bus)
  469. return;
  470. mutex_lock(&nvdimm_bus_list_mutex);
  471. list_del_init(&nvdimm_bus->list);
  472. mutex_unlock(&nvdimm_bus_list_mutex);
  473. nd_synchronize();
  474. device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
  475. free_poison_list(&nvdimm_bus->poison_list);
  476. nvdimm_bus_destroy_ndctl(nvdimm_bus);
  477. device_unregister(&nvdimm_bus->dev);
  478. }
  479. EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
  480. #ifdef CONFIG_BLK_DEV_INTEGRITY
  481. int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
  482. {
  483. struct blk_integrity bi;
  484. if (meta_size == 0)
  485. return 0;
  486. bi.profile = NULL;
  487. bi.tuple_size = meta_size;
  488. bi.tag_size = meta_size;
  489. blk_integrity_register(disk, &bi);
  490. blk_queue_max_integrity_segments(disk->queue, 1);
  491. return 0;
  492. }
  493. EXPORT_SYMBOL(nd_integrity_init);
  494. #else /* CONFIG_BLK_DEV_INTEGRITY */
  495. int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
  496. {
  497. return 0;
  498. }
  499. EXPORT_SYMBOL(nd_integrity_init);
  500. #endif
  501. static __init int libnvdimm_init(void)
  502. {
  503. int rc;
  504. rc = nvdimm_bus_init();
  505. if (rc)
  506. return rc;
  507. rc = nvdimm_init();
  508. if (rc)
  509. goto err_dimm;
  510. rc = nd_region_init();
  511. if (rc)
  512. goto err_region;
  513. return 0;
  514. err_region:
  515. nvdimm_exit();
  516. err_dimm:
  517. nvdimm_bus_exit();
  518. return rc;
  519. }
  520. static __exit void libnvdimm_exit(void)
  521. {
  522. WARN_ON(!list_empty(&nvdimm_bus_list));
  523. nd_region_exit();
  524. nvdimm_exit();
  525. nvdimm_bus_exit();
  526. }
  527. MODULE_LICENSE("GPL v2");
  528. MODULE_AUTHOR("Intel Corporation");
  529. subsys_initcall(libnvdimm_init);
  530. module_exit(libnvdimm_exit);