region_devs.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065
  1. /*
  2. * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of version 2 of the GNU General Public License as
  6. * published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. */
  13. #include <linux/scatterlist.h>
  14. #include <linux/highmem.h>
  15. #include <linux/sched.h>
  16. #include <linux/slab.h>
  17. #include <linux/hash.h>
  18. #include <linux/pmem.h>
  19. #include <linux/sort.h>
  20. #include <linux/io.h>
  21. #include <linux/nd.h>
  22. #include "nd-core.h"
  23. #include "nd.h"
  24. /*
  25. * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
  26. * irrelevant.
  27. */
  28. #include <linux/io-64-nonatomic-hi-lo.h>
  29. static DEFINE_IDA(region_ida);
  30. static DEFINE_PER_CPU(int, flush_idx);
  31. static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
  32. struct nd_region_data *ndrd)
  33. {
  34. int i, j;
  35. dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
  36. nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
  37. for (i = 0; i < (1 << ndrd->hints_shift); i++) {
  38. struct resource *res = &nvdimm->flush_wpq[i];
  39. unsigned long pfn = PHYS_PFN(res->start);
  40. void __iomem *flush_page;
  41. /* check if flush hints share a page */
  42. for (j = 0; j < i; j++) {
  43. struct resource *res_j = &nvdimm->flush_wpq[j];
  44. unsigned long pfn_j = PHYS_PFN(res_j->start);
  45. if (pfn == pfn_j)
  46. break;
  47. }
  48. if (j < i)
  49. flush_page = (void __iomem *) ((unsigned long)
  50. ndrd_get_flush_wpq(ndrd, dimm, j)
  51. & PAGE_MASK);
  52. else
  53. flush_page = devm_nvdimm_ioremap(dev,
  54. PFN_PHYS(pfn), PAGE_SIZE);
  55. if (!flush_page)
  56. return -ENXIO;
  57. ndrd_set_flush_wpq(ndrd, dimm, i, flush_page
  58. + (res->start & ~PAGE_MASK));
  59. }
  60. return 0;
  61. }
  62. int nd_region_activate(struct nd_region *nd_region)
  63. {
  64. int i, j, num_flush = 0;
  65. struct nd_region_data *ndrd;
  66. struct device *dev = &nd_region->dev;
  67. size_t flush_data_size = sizeof(void *);
  68. nvdimm_bus_lock(&nd_region->dev);
  69. for (i = 0; i < nd_region->ndr_mappings; i++) {
  70. struct nd_mapping *nd_mapping = &nd_region->mapping[i];
  71. struct nvdimm *nvdimm = nd_mapping->nvdimm;
  72. /* at least one null hint slot per-dimm for the "no-hint" case */
  73. flush_data_size += sizeof(void *);
  74. num_flush = min_not_zero(num_flush, nvdimm->num_flush);
  75. if (!nvdimm->num_flush)
  76. continue;
  77. flush_data_size += nvdimm->num_flush * sizeof(void *);
  78. }
  79. nvdimm_bus_unlock(&nd_region->dev);
  80. ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
  81. if (!ndrd)
  82. return -ENOMEM;
  83. dev_set_drvdata(dev, ndrd);
  84. if (!num_flush)
  85. return 0;
  86. ndrd->hints_shift = ilog2(num_flush);
  87. for (i = 0; i < nd_region->ndr_mappings; i++) {
  88. struct nd_mapping *nd_mapping = &nd_region->mapping[i];
  89. struct nvdimm *nvdimm = nd_mapping->nvdimm;
  90. int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);
  91. if (rc)
  92. return rc;
  93. }
  94. /*
  95. * Clear out entries that are duplicates. This should prevent the
  96. * extra flushings.
  97. */
  98. for (i = 0; i < nd_region->ndr_mappings - 1; i++) {
  99. /* ignore if NULL already */
  100. if (!ndrd_get_flush_wpq(ndrd, i, 0))
  101. continue;
  102. for (j = i + 1; j < nd_region->ndr_mappings; j++)
  103. if (ndrd_get_flush_wpq(ndrd, i, 0) ==
  104. ndrd_get_flush_wpq(ndrd, j, 0))
  105. ndrd_set_flush_wpq(ndrd, j, 0, NULL);
  106. }
  107. return 0;
  108. }
  109. static void nd_region_release(struct device *dev)
  110. {
  111. struct nd_region *nd_region = to_nd_region(dev);
  112. u16 i;
  113. for (i = 0; i < nd_region->ndr_mappings; i++) {
  114. struct nd_mapping *nd_mapping = &nd_region->mapping[i];
  115. struct nvdimm *nvdimm = nd_mapping->nvdimm;
  116. put_device(&nvdimm->dev);
  117. }
  118. free_percpu(nd_region->lane);
  119. ida_simple_remove(&region_ida, nd_region->id);
  120. if (is_nd_blk(dev))
  121. kfree(to_nd_blk_region(dev));
  122. else
  123. kfree(nd_region);
  124. }
  125. static struct device_type nd_blk_device_type = {
  126. .name = "nd_blk",
  127. .release = nd_region_release,
  128. };
  129. static struct device_type nd_pmem_device_type = {
  130. .name = "nd_pmem",
  131. .release = nd_region_release,
  132. };
  133. static struct device_type nd_volatile_device_type = {
  134. .name = "nd_volatile",
  135. .release = nd_region_release,
  136. };
  137. bool is_nd_pmem(struct device *dev)
  138. {
  139. return dev ? dev->type == &nd_pmem_device_type : false;
  140. }
  141. bool is_nd_blk(struct device *dev)
  142. {
  143. return dev ? dev->type == &nd_blk_device_type : false;
  144. }
  145. struct nd_region *to_nd_region(struct device *dev)
  146. {
  147. struct nd_region *nd_region = container_of(dev, struct nd_region, dev);
  148. WARN_ON(dev->type->release != nd_region_release);
  149. return nd_region;
  150. }
  151. EXPORT_SYMBOL_GPL(to_nd_region);
  152. struct nd_blk_region *to_nd_blk_region(struct device *dev)
  153. {
  154. struct nd_region *nd_region = to_nd_region(dev);
  155. WARN_ON(!is_nd_blk(dev));
  156. return container_of(nd_region, struct nd_blk_region, nd_region);
  157. }
  158. EXPORT_SYMBOL_GPL(to_nd_blk_region);
  159. void *nd_region_provider_data(struct nd_region *nd_region)
  160. {
  161. return nd_region->provider_data;
  162. }
  163. EXPORT_SYMBOL_GPL(nd_region_provider_data);
  164. void *nd_blk_region_provider_data(struct nd_blk_region *ndbr)
  165. {
  166. return ndbr->blk_provider_data;
  167. }
  168. EXPORT_SYMBOL_GPL(nd_blk_region_provider_data);
  169. void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data)
  170. {
  171. ndbr->blk_provider_data = data;
  172. }
  173. EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);
  174. /**
  175. * nd_region_to_nstype() - region to an integer namespace type
  176. * @nd_region: region-device to interrogate
  177. *
  178. * This is the 'nstype' attribute of a region as well, an input to the
  179. * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match
  180. * namespace devices with namespace drivers.
  181. */
  182. int nd_region_to_nstype(struct nd_region *nd_region)
  183. {
  184. if (is_nd_pmem(&nd_region->dev)) {
  185. u16 i, alias;
  186. for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
  187. struct nd_mapping *nd_mapping = &nd_region->mapping[i];
  188. struct nvdimm *nvdimm = nd_mapping->nvdimm;
  189. if (test_bit(NDD_ALIASING, &nvdimm->flags))
  190. alias++;
  191. }
  192. if (alias)
  193. return ND_DEVICE_NAMESPACE_PMEM;
  194. else
  195. return ND_DEVICE_NAMESPACE_IO;
  196. } else if (is_nd_blk(&nd_region->dev)) {
  197. return ND_DEVICE_NAMESPACE_BLK;
  198. }
  199. return 0;
  200. }
  201. EXPORT_SYMBOL(nd_region_to_nstype);
  202. static ssize_t size_show(struct device *dev,
  203. struct device_attribute *attr, char *buf)
  204. {
  205. struct nd_region *nd_region = to_nd_region(dev);
  206. unsigned long long size = 0;
  207. if (is_nd_pmem(dev)) {
  208. size = nd_region->ndr_size;
  209. } else if (nd_region->ndr_mappings == 1) {
  210. struct nd_mapping *nd_mapping = &nd_region->mapping[0];
  211. size = nd_mapping->size;
  212. }
  213. return sprintf(buf, "%llu\n", size);
  214. }
  215. static DEVICE_ATTR_RO(size);
  216. static ssize_t deep_flush_show(struct device *dev,
  217. struct device_attribute *attr, char *buf)
  218. {
  219. struct nd_region *nd_region = to_nd_region(dev);
  220. /*
  221. * NOTE: in the nvdimm_has_flush() error case this attribute is
  222. * not visible.
  223. */
  224. return sprintf(buf, "%d\n", nvdimm_has_flush(nd_region));
  225. }
  226. static ssize_t deep_flush_store(struct device *dev, struct device_attribute *attr,
  227. const char *buf, size_t len)
  228. {
  229. bool flush;
  230. int rc = strtobool(buf, &flush);
  231. struct nd_region *nd_region = to_nd_region(dev);
  232. if (rc)
  233. return rc;
  234. if (!flush)
  235. return -EINVAL;
  236. nvdimm_flush(nd_region);
  237. return len;
  238. }
  239. static DEVICE_ATTR_RW(deep_flush);
  240. static ssize_t mappings_show(struct device *dev,
  241. struct device_attribute *attr, char *buf)
  242. {
  243. struct nd_region *nd_region = to_nd_region(dev);
  244. return sprintf(buf, "%d\n", nd_region->ndr_mappings);
  245. }
  246. static DEVICE_ATTR_RO(mappings);
  247. static ssize_t nstype_show(struct device *dev,
  248. struct device_attribute *attr, char *buf)
  249. {
  250. struct nd_region *nd_region = to_nd_region(dev);
  251. return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
  252. }
  253. static DEVICE_ATTR_RO(nstype);
  254. static ssize_t set_cookie_show(struct device *dev,
  255. struct device_attribute *attr, char *buf)
  256. {
  257. struct nd_region *nd_region = to_nd_region(dev);
  258. struct nd_interleave_set *nd_set = nd_region->nd_set;
  259. if (is_nd_pmem(dev) && nd_set)
  260. /* pass, should be precluded by region_visible */;
  261. else
  262. return -ENXIO;
  263. return sprintf(buf, "%#llx\n", nd_set->cookie);
  264. }
  265. static DEVICE_ATTR_RO(set_cookie);
  266. resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
  267. {
  268. resource_size_t blk_max_overlap = 0, available, overlap;
  269. int i;
  270. WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
  271. retry:
  272. available = 0;
  273. overlap = blk_max_overlap;
  274. for (i = 0; i < nd_region->ndr_mappings; i++) {
  275. struct nd_mapping *nd_mapping = &nd_region->mapping[i];
  276. struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
  277. /* if a dimm is disabled the available capacity is zero */
  278. if (!ndd)
  279. return 0;
  280. if (is_nd_pmem(&nd_region->dev)) {
  281. available += nd_pmem_available_dpa(nd_region,
  282. nd_mapping, &overlap);
  283. if (overlap > blk_max_overlap) {
  284. blk_max_overlap = overlap;
  285. goto retry;
  286. }
  287. } else if (is_nd_blk(&nd_region->dev))
  288. available += nd_blk_available_dpa(nd_region);
  289. }
  290. return available;
  291. }
  292. static ssize_t available_size_show(struct device *dev,
  293. struct device_attribute *attr, char *buf)
  294. {
  295. struct nd_region *nd_region = to_nd_region(dev);
  296. unsigned long long available = 0;
  297. /*
  298. * Flush in-flight updates and grab a snapshot of the available
  299. * size. Of course, this value is potentially invalidated the
  300. * memory nvdimm_bus_lock() is dropped, but that's userspace's
  301. * problem to not race itself.
  302. */
  303. nvdimm_bus_lock(dev);
  304. wait_nvdimm_bus_probe_idle(dev);
  305. available = nd_region_available_dpa(nd_region);
  306. nvdimm_bus_unlock(dev);
  307. return sprintf(buf, "%llu\n", available);
  308. }
  309. static DEVICE_ATTR_RO(available_size);
  310. static ssize_t init_namespaces_show(struct device *dev,
  311. struct device_attribute *attr, char *buf)
  312. {
  313. struct nd_region_data *ndrd = dev_get_drvdata(dev);
  314. ssize_t rc;
  315. nvdimm_bus_lock(dev);
  316. if (ndrd)
  317. rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count);
  318. else
  319. rc = -ENXIO;
  320. nvdimm_bus_unlock(dev);
  321. return rc;
  322. }
  323. static DEVICE_ATTR_RO(init_namespaces);
  324. static ssize_t namespace_seed_show(struct device *dev,
  325. struct device_attribute *attr, char *buf)
  326. {
  327. struct nd_region *nd_region = to_nd_region(dev);
  328. ssize_t rc;
  329. nvdimm_bus_lock(dev);
  330. if (nd_region->ns_seed)
  331. rc = sprintf(buf, "%s\n", dev_name(nd_region->ns_seed));
  332. else
  333. rc = sprintf(buf, "\n");
  334. nvdimm_bus_unlock(dev);
  335. return rc;
  336. }
  337. static DEVICE_ATTR_RO(namespace_seed);
  338. static ssize_t btt_seed_show(struct device *dev,
  339. struct device_attribute *attr, char *buf)
  340. {
  341. struct nd_region *nd_region = to_nd_region(dev);
  342. ssize_t rc;
  343. nvdimm_bus_lock(dev);
  344. if (nd_region->btt_seed)
  345. rc = sprintf(buf, "%s\n", dev_name(nd_region->btt_seed));
  346. else
  347. rc = sprintf(buf, "\n");
  348. nvdimm_bus_unlock(dev);
  349. return rc;
  350. }
  351. static DEVICE_ATTR_RO(btt_seed);
  352. static ssize_t pfn_seed_show(struct device *dev,
  353. struct device_attribute *attr, char *buf)
  354. {
  355. struct nd_region *nd_region = to_nd_region(dev);
  356. ssize_t rc;
  357. nvdimm_bus_lock(dev);
  358. if (nd_region->pfn_seed)
  359. rc = sprintf(buf, "%s\n", dev_name(nd_region->pfn_seed));
  360. else
  361. rc = sprintf(buf, "\n");
  362. nvdimm_bus_unlock(dev);
  363. return rc;
  364. }
  365. static DEVICE_ATTR_RO(pfn_seed);
  366. static ssize_t dax_seed_show(struct device *dev,
  367. struct device_attribute *attr, char *buf)
  368. {
  369. struct nd_region *nd_region = to_nd_region(dev);
  370. ssize_t rc;
  371. nvdimm_bus_lock(dev);
  372. if (nd_region->dax_seed)
  373. rc = sprintf(buf, "%s\n", dev_name(nd_region->dax_seed));
  374. else
  375. rc = sprintf(buf, "\n");
  376. nvdimm_bus_unlock(dev);
  377. return rc;
  378. }
  379. static DEVICE_ATTR_RO(dax_seed);
  380. static ssize_t read_only_show(struct device *dev,
  381. struct device_attribute *attr, char *buf)
  382. {
  383. struct nd_region *nd_region = to_nd_region(dev);
  384. return sprintf(buf, "%d\n", nd_region->ro);
  385. }
  386. static ssize_t read_only_store(struct device *dev,
  387. struct device_attribute *attr, const char *buf, size_t len)
  388. {
  389. bool ro;
  390. int rc = strtobool(buf, &ro);
  391. struct nd_region *nd_region = to_nd_region(dev);
  392. if (rc)
  393. return rc;
  394. nd_region->ro = ro;
  395. return len;
  396. }
  397. static DEVICE_ATTR_RW(read_only);
  398. static ssize_t region_badblocks_show(struct device *dev,
  399. struct device_attribute *attr, char *buf)
  400. {
  401. struct nd_region *nd_region = to_nd_region(dev);
  402. return badblocks_show(&nd_region->bb, buf, 0);
  403. }
  404. static DEVICE_ATTR(badblocks, 0444, region_badblocks_show, NULL);
  405. static ssize_t resource_show(struct device *dev,
  406. struct device_attribute *attr, char *buf)
  407. {
  408. struct nd_region *nd_region = to_nd_region(dev);
  409. return sprintf(buf, "%#llx\n", nd_region->ndr_start);
  410. }
  411. static DEVICE_ATTR_RO(resource);
  412. static struct attribute *nd_region_attributes[] = {
  413. &dev_attr_size.attr,
  414. &dev_attr_nstype.attr,
  415. &dev_attr_mappings.attr,
  416. &dev_attr_btt_seed.attr,
  417. &dev_attr_pfn_seed.attr,
  418. &dev_attr_dax_seed.attr,
  419. &dev_attr_deep_flush.attr,
  420. &dev_attr_read_only.attr,
  421. &dev_attr_set_cookie.attr,
  422. &dev_attr_available_size.attr,
  423. &dev_attr_namespace_seed.attr,
  424. &dev_attr_init_namespaces.attr,
  425. &dev_attr_badblocks.attr,
  426. &dev_attr_resource.attr,
  427. NULL,
  428. };
  429. static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
  430. {
  431. struct device *dev = container_of(kobj, typeof(*dev), kobj);
  432. struct nd_region *nd_region = to_nd_region(dev);
  433. struct nd_interleave_set *nd_set = nd_region->nd_set;
  434. int type = nd_region_to_nstype(nd_region);
  435. if (!is_nd_pmem(dev) && a == &dev_attr_pfn_seed.attr)
  436. return 0;
  437. if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr)
  438. return 0;
  439. if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr)
  440. return 0;
  441. if (!is_nd_pmem(dev) && a == &dev_attr_resource.attr)
  442. return 0;
  443. if (a == &dev_attr_deep_flush.attr) {
  444. int has_flush = nvdimm_has_flush(nd_region);
  445. if (has_flush == 1)
  446. return a->mode;
  447. else if (has_flush == 0)
  448. return 0444;
  449. else
  450. return 0;
  451. }
  452. if (a != &dev_attr_set_cookie.attr
  453. && a != &dev_attr_available_size.attr)
  454. return a->mode;
  455. if ((type == ND_DEVICE_NAMESPACE_PMEM
  456. || type == ND_DEVICE_NAMESPACE_BLK)
  457. && a == &dev_attr_available_size.attr)
  458. return a->mode;
  459. else if (is_nd_pmem(dev) && nd_set)
  460. return a->mode;
  461. return 0;
  462. }
  463. struct attribute_group nd_region_attribute_group = {
  464. .attrs = nd_region_attributes,
  465. .is_visible = region_visible,
  466. };
  467. EXPORT_SYMBOL_GPL(nd_region_attribute_group);
  468. u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
  469. {
  470. struct nd_interleave_set *nd_set = nd_region->nd_set;
  471. if (nd_set)
  472. return nd_set->cookie;
  473. return 0;
  474. }
  475. u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
  476. {
  477. struct nd_interleave_set *nd_set = nd_region->nd_set;
  478. if (nd_set)
  479. return nd_set->altcookie;
  480. return 0;
  481. }
  482. void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
  483. {
  484. struct nd_label_ent *label_ent, *e;
  485. lockdep_assert_held(&nd_mapping->lock);
  486. list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
  487. list_del(&label_ent->list);
  488. kfree(label_ent);
  489. }
  490. }
  491. /*
  492. * Upon successful probe/remove, take/release a reference on the
  493. * associated interleave set (if present), and plant new btt + namespace
  494. * seeds. Also, on the removal of a BLK region, notify the provider to
  495. * disable the region.
  496. */
  497. static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
  498. struct device *dev, bool probe)
  499. {
  500. struct nd_region *nd_region;
  501. if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) {
  502. int i;
  503. nd_region = to_nd_region(dev);
  504. for (i = 0; i < nd_region->ndr_mappings; i++) {
  505. struct nd_mapping *nd_mapping = &nd_region->mapping[i];
  506. struct nvdimm_drvdata *ndd = nd_mapping->ndd;
  507. struct nvdimm *nvdimm = nd_mapping->nvdimm;
  508. mutex_lock(&nd_mapping->lock);
  509. nd_mapping_free_labels(nd_mapping);
  510. mutex_unlock(&nd_mapping->lock);
  511. put_ndd(ndd);
  512. nd_mapping->ndd = NULL;
  513. if (ndd)
  514. atomic_dec(&nvdimm->busy);
  515. }
  516. if (is_nd_pmem(dev))
  517. return;
  518. }
  519. if (dev->parent && (is_nd_blk(dev->parent) || is_nd_pmem(dev->parent))
  520. && probe) {
  521. nd_region = to_nd_region(dev->parent);
  522. nvdimm_bus_lock(dev);
  523. if (nd_region->ns_seed == dev)
  524. nd_region_create_ns_seed(nd_region);
  525. nvdimm_bus_unlock(dev);
  526. }
  527. if (is_nd_btt(dev) && probe) {
  528. struct nd_btt *nd_btt = to_nd_btt(dev);
  529. nd_region = to_nd_region(dev->parent);
  530. nvdimm_bus_lock(dev);
  531. if (nd_region->btt_seed == dev)
  532. nd_region_create_btt_seed(nd_region);
  533. if (nd_region->ns_seed == &nd_btt->ndns->dev)
  534. nd_region_create_ns_seed(nd_region);
  535. nvdimm_bus_unlock(dev);
  536. }
  537. if (is_nd_pfn(dev) && probe) {
  538. struct nd_pfn *nd_pfn = to_nd_pfn(dev);
  539. nd_region = to_nd_region(dev->parent);
  540. nvdimm_bus_lock(dev);
  541. if (nd_region->pfn_seed == dev)
  542. nd_region_create_pfn_seed(nd_region);
  543. if (nd_region->ns_seed == &nd_pfn->ndns->dev)
  544. nd_region_create_ns_seed(nd_region);
  545. nvdimm_bus_unlock(dev);
  546. }
  547. if (is_nd_dax(dev) && probe) {
  548. struct nd_dax *nd_dax = to_nd_dax(dev);
  549. nd_region = to_nd_region(dev->parent);
  550. nvdimm_bus_lock(dev);
  551. if (nd_region->dax_seed == dev)
  552. nd_region_create_dax_seed(nd_region);
  553. if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev)
  554. nd_region_create_ns_seed(nd_region);
  555. nvdimm_bus_unlock(dev);
  556. }
  557. }
  558. void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev)
  559. {
  560. nd_region_notify_driver_action(nvdimm_bus, dev, true);
  561. }
  562. void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev)
  563. {
  564. nd_region_notify_driver_action(nvdimm_bus, dev, false);
  565. }
  566. static ssize_t mappingN(struct device *dev, char *buf, int n)
  567. {
  568. struct nd_region *nd_region = to_nd_region(dev);
  569. struct nd_mapping *nd_mapping;
  570. struct nvdimm *nvdimm;
  571. if (n >= nd_region->ndr_mappings)
  572. return -ENXIO;
  573. nd_mapping = &nd_region->mapping[n];
  574. nvdimm = nd_mapping->nvdimm;
  575. return sprintf(buf, "%s,%llu,%llu\n", dev_name(&nvdimm->dev),
  576. nd_mapping->start, nd_mapping->size);
  577. }
  578. #define REGION_MAPPING(idx) \
  579. static ssize_t mapping##idx##_show(struct device *dev, \
  580. struct device_attribute *attr, char *buf) \
  581. { \
  582. return mappingN(dev, buf, idx); \
  583. } \
  584. static DEVICE_ATTR_RO(mapping##idx)
  585. /*
  586. * 32 should be enough for a while, even in the presence of socket
  587. * interleave a 32-way interleave set is a degenerate case.
  588. */
  589. REGION_MAPPING(0);
  590. REGION_MAPPING(1);
  591. REGION_MAPPING(2);
  592. REGION_MAPPING(3);
  593. REGION_MAPPING(4);
  594. REGION_MAPPING(5);
  595. REGION_MAPPING(6);
  596. REGION_MAPPING(7);
  597. REGION_MAPPING(8);
  598. REGION_MAPPING(9);
  599. REGION_MAPPING(10);
  600. REGION_MAPPING(11);
  601. REGION_MAPPING(12);
  602. REGION_MAPPING(13);
  603. REGION_MAPPING(14);
  604. REGION_MAPPING(15);
  605. REGION_MAPPING(16);
  606. REGION_MAPPING(17);
  607. REGION_MAPPING(18);
  608. REGION_MAPPING(19);
  609. REGION_MAPPING(20);
  610. REGION_MAPPING(21);
  611. REGION_MAPPING(22);
  612. REGION_MAPPING(23);
  613. REGION_MAPPING(24);
  614. REGION_MAPPING(25);
  615. REGION_MAPPING(26);
  616. REGION_MAPPING(27);
  617. REGION_MAPPING(28);
  618. REGION_MAPPING(29);
  619. REGION_MAPPING(30);
  620. REGION_MAPPING(31);
  621. static umode_t mapping_visible(struct kobject *kobj, struct attribute *a, int n)
  622. {
  623. struct device *dev = container_of(kobj, struct device, kobj);
  624. struct nd_region *nd_region = to_nd_region(dev);
  625. if (n < nd_region->ndr_mappings)
  626. return a->mode;
  627. return 0;
  628. }
  629. static struct attribute *mapping_attributes[] = {
  630. &dev_attr_mapping0.attr,
  631. &dev_attr_mapping1.attr,
  632. &dev_attr_mapping2.attr,
  633. &dev_attr_mapping3.attr,
  634. &dev_attr_mapping4.attr,
  635. &dev_attr_mapping5.attr,
  636. &dev_attr_mapping6.attr,
  637. &dev_attr_mapping7.attr,
  638. &dev_attr_mapping8.attr,
  639. &dev_attr_mapping9.attr,
  640. &dev_attr_mapping10.attr,
  641. &dev_attr_mapping11.attr,
  642. &dev_attr_mapping12.attr,
  643. &dev_attr_mapping13.attr,
  644. &dev_attr_mapping14.attr,
  645. &dev_attr_mapping15.attr,
  646. &dev_attr_mapping16.attr,
  647. &dev_attr_mapping17.attr,
  648. &dev_attr_mapping18.attr,
  649. &dev_attr_mapping19.attr,
  650. &dev_attr_mapping20.attr,
  651. &dev_attr_mapping21.attr,
  652. &dev_attr_mapping22.attr,
  653. &dev_attr_mapping23.attr,
  654. &dev_attr_mapping24.attr,
  655. &dev_attr_mapping25.attr,
  656. &dev_attr_mapping26.attr,
  657. &dev_attr_mapping27.attr,
  658. &dev_attr_mapping28.attr,
  659. &dev_attr_mapping29.attr,
  660. &dev_attr_mapping30.attr,
  661. &dev_attr_mapping31.attr,
  662. NULL,
  663. };
  664. struct attribute_group nd_mapping_attribute_group = {
  665. .is_visible = mapping_visible,
  666. .attrs = mapping_attributes,
  667. };
  668. EXPORT_SYMBOL_GPL(nd_mapping_attribute_group);
  669. int nd_blk_region_init(struct nd_region *nd_region)
  670. {
  671. struct device *dev = &nd_region->dev;
  672. struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
  673. if (!is_nd_blk(dev))
  674. return 0;
  675. if (nd_region->ndr_mappings < 1) {
  676. dev_err(dev, "invalid BLK region\n");
  677. return -ENXIO;
  678. }
  679. return to_nd_blk_region(dev)->enable(nvdimm_bus, dev);
  680. }
  681. /**
  682. * nd_region_acquire_lane - allocate and lock a lane
  683. * @nd_region: region id and number of lanes possible
  684. *
  685. * A lane correlates to a BLK-data-window and/or a log slot in the BTT.
  686. * We optimize for the common case where there are 256 lanes, one
  687. * per-cpu. For larger systems we need to lock to share lanes. For now
  688. * this implementation assumes the cost of maintaining an allocator for
  689. * free lanes is on the order of the lock hold time, so it implements a
  690. * static lane = cpu % num_lanes mapping.
  691. *
  692. * In the case of a BTT instance on top of a BLK namespace a lane may be
  693. * acquired recursively. We lock on the first instance.
  694. *
  695. * In the case of a BTT instance on top of PMEM, we only acquire a lane
  696. * for the BTT metadata updates.
  697. */
  698. unsigned int nd_region_acquire_lane(struct nd_region *nd_region)
  699. {
  700. unsigned int cpu, lane;
  701. cpu = get_cpu();
  702. if (nd_region->num_lanes < nr_cpu_ids) {
  703. struct nd_percpu_lane *ndl_lock, *ndl_count;
  704. lane = cpu % nd_region->num_lanes;
  705. ndl_count = per_cpu_ptr(nd_region->lane, cpu);
  706. ndl_lock = per_cpu_ptr(nd_region->lane, lane);
  707. if (ndl_count->count++ == 0)
  708. spin_lock(&ndl_lock->lock);
  709. } else
  710. lane = cpu;
  711. return lane;
  712. }
  713. EXPORT_SYMBOL(nd_region_acquire_lane);
  714. void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
  715. {
  716. if (nd_region->num_lanes < nr_cpu_ids) {
  717. unsigned int cpu = get_cpu();
  718. struct nd_percpu_lane *ndl_lock, *ndl_count;
  719. ndl_count = per_cpu_ptr(nd_region->lane, cpu);
  720. ndl_lock = per_cpu_ptr(nd_region->lane, lane);
  721. if (--ndl_count->count == 0)
  722. spin_unlock(&ndl_lock->lock);
  723. put_cpu();
  724. }
  725. put_cpu();
  726. }
  727. EXPORT_SYMBOL(nd_region_release_lane);
  728. static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
  729. struct nd_region_desc *ndr_desc, struct device_type *dev_type,
  730. const char *caller)
  731. {
  732. struct nd_region *nd_region;
  733. struct device *dev;
  734. void *region_buf;
  735. unsigned int i;
  736. int ro = 0;
  737. for (i = 0; i < ndr_desc->num_mappings; i++) {
  738. struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
  739. struct nvdimm *nvdimm = mapping->nvdimm;
  740. if ((mapping->start | mapping->size) % SZ_4K) {
  741. dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n",
  742. caller, dev_name(&nvdimm->dev), i);
  743. return NULL;
  744. }
  745. if (test_bit(NDD_UNARMED, &nvdimm->flags))
  746. ro = 1;
  747. }
  748. if (dev_type == &nd_blk_device_type) {
  749. struct nd_blk_region_desc *ndbr_desc;
  750. struct nd_blk_region *ndbr;
  751. ndbr_desc = to_blk_region_desc(ndr_desc);
  752. ndbr = kzalloc(sizeof(*ndbr) + sizeof(struct nd_mapping)
  753. * ndr_desc->num_mappings,
  754. GFP_KERNEL);
  755. if (ndbr) {
  756. nd_region = &ndbr->nd_region;
  757. ndbr->enable = ndbr_desc->enable;
  758. ndbr->do_io = ndbr_desc->do_io;
  759. }
  760. region_buf = ndbr;
  761. } else {
  762. nd_region = kzalloc(sizeof(struct nd_region)
  763. + sizeof(struct nd_mapping)
  764. * ndr_desc->num_mappings,
  765. GFP_KERNEL);
  766. region_buf = nd_region;
  767. }
  768. if (!region_buf)
  769. return NULL;
  770. nd_region->id = ida_simple_get(&region_ida, 0, 0, GFP_KERNEL);
  771. if (nd_region->id < 0)
  772. goto err_id;
  773. nd_region->lane = alloc_percpu(struct nd_percpu_lane);
  774. if (!nd_region->lane)
  775. goto err_percpu;
  776. for (i = 0; i < nr_cpu_ids; i++) {
  777. struct nd_percpu_lane *ndl;
  778. ndl = per_cpu_ptr(nd_region->lane, i);
  779. spin_lock_init(&ndl->lock);
  780. ndl->count = 0;
  781. }
  782. for (i = 0; i < ndr_desc->num_mappings; i++) {
  783. struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
  784. struct nvdimm *nvdimm = mapping->nvdimm;
  785. nd_region->mapping[i].nvdimm = nvdimm;
  786. nd_region->mapping[i].start = mapping->start;
  787. nd_region->mapping[i].size = mapping->size;
  788. INIT_LIST_HEAD(&nd_region->mapping[i].labels);
  789. mutex_init(&nd_region->mapping[i].lock);
  790. get_device(&nvdimm->dev);
  791. }
  792. nd_region->ndr_mappings = ndr_desc->num_mappings;
  793. nd_region->provider_data = ndr_desc->provider_data;
  794. nd_region->nd_set = ndr_desc->nd_set;
  795. nd_region->num_lanes = ndr_desc->num_lanes;
  796. nd_region->flags = ndr_desc->flags;
  797. nd_region->ro = ro;
  798. nd_region->numa_node = ndr_desc->numa_node;
  799. ida_init(&nd_region->ns_ida);
  800. ida_init(&nd_region->btt_ida);
  801. ida_init(&nd_region->pfn_ida);
  802. ida_init(&nd_region->dax_ida);
  803. dev = &nd_region->dev;
  804. dev_set_name(dev, "region%d", nd_region->id);
  805. dev->parent = &nvdimm_bus->dev;
  806. dev->type = dev_type;
  807. dev->groups = ndr_desc->attr_groups;
  808. nd_region->ndr_size = resource_size(ndr_desc->res);
  809. nd_region->ndr_start = ndr_desc->res->start;
  810. nd_device_register(dev);
  811. return nd_region;
  812. err_percpu:
  813. ida_simple_remove(&region_ida, nd_region->id);
  814. err_id:
  815. kfree(region_buf);
  816. return NULL;
  817. }
  818. struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
  819. struct nd_region_desc *ndr_desc)
  820. {
  821. ndr_desc->num_lanes = ND_MAX_LANES;
  822. return nd_region_create(nvdimm_bus, ndr_desc, &nd_pmem_device_type,
  823. __func__);
  824. }
  825. EXPORT_SYMBOL_GPL(nvdimm_pmem_region_create);
  826. struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
  827. struct nd_region_desc *ndr_desc)
  828. {
  829. if (ndr_desc->num_mappings > 1)
  830. return NULL;
  831. ndr_desc->num_lanes = min(ndr_desc->num_lanes, ND_MAX_LANES);
  832. return nd_region_create(nvdimm_bus, ndr_desc, &nd_blk_device_type,
  833. __func__);
  834. }
  835. EXPORT_SYMBOL_GPL(nvdimm_blk_region_create);
  836. struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
  837. struct nd_region_desc *ndr_desc)
  838. {
  839. ndr_desc->num_lanes = ND_MAX_LANES;
  840. return nd_region_create(nvdimm_bus, ndr_desc, &nd_volatile_device_type,
  841. __func__);
  842. }
  843. EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
  844. /**
  845. * nvdimm_flush - flush any posted write queues between the cpu and pmem media
  846. * @nd_region: blk or interleaved pmem region
  847. */
  848. void nvdimm_flush(struct nd_region *nd_region)
  849. {
  850. struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
  851. int i, idx;
  852. /*
  853. * Try to encourage some diversity in flush hint addresses
  854. * across cpus assuming a limited number of flush hints.
  855. */
  856. idx = this_cpu_read(flush_idx);
  857. idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
  858. /*
  859. * The first wmb() is needed to 'sfence' all previous writes
  860. * such that they are architecturally visible for the platform
  861. * buffer flush. Note that we've already arranged for pmem
  862. * writes to avoid the cache via memcpy_flushcache(). The final
  863. * wmb() ensures ordering for the NVDIMM flush write.
  864. */
  865. wmb();
  866. for (i = 0; i < nd_region->ndr_mappings; i++)
  867. if (ndrd_get_flush_wpq(ndrd, i, 0))
  868. writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
  869. wmb();
  870. }
  871. EXPORT_SYMBOL_GPL(nvdimm_flush);
  872. /**
  873. * nvdimm_has_flush - determine write flushing requirements
  874. * @nd_region: blk or interleaved pmem region
  875. *
  876. * Returns 1 if writes require flushing
  877. * Returns 0 if writes do not require flushing
  878. * Returns -ENXIO if flushing capability can not be determined
  879. */
  880. int nvdimm_has_flush(struct nd_region *nd_region)
  881. {
  882. int i;
  883. /* no nvdimm == flushing capability unknown */
  884. if (nd_region->ndr_mappings == 0)
  885. return -ENXIO;
  886. for (i = 0; i < nd_region->ndr_mappings; i++) {
  887. struct nd_mapping *nd_mapping = &nd_region->mapping[i];
  888. struct nvdimm *nvdimm = nd_mapping->nvdimm;
  889. /* flush hints present / available */
  890. if (nvdimm->num_flush)
  891. return 1;
  892. }
  893. /*
  894. * The platform defines dimm devices without hints, assume
  895. * platform persistence mechanism like ADR
  896. */
  897. return 0;
  898. }
  899. EXPORT_SYMBOL_GPL(nvdimm_has_flush);
  900. void __exit nd_region_devs_exit(void)
  901. {
  902. ida_destroy(&region_ida);
  903. }