pblk-init.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176
  1. /*
  2. * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
  3. * Copyright (C) 2016 CNEX Labs
  4. * Initial release: Javier Gonzalez <javier@cnexlabs.com>
  5. * Matias Bjorling <matias@cnexlabs.com>
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License version
  9. * 2 as published by the Free Software Foundation.
  10. *
  11. * This program is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * General Public License for more details.
  15. *
  16. * Implementation of a physical block-device target for Open-channel SSDs.
  17. *
  18. * pblk-init.c - pblk's initialization.
  19. */
  20. #include "pblk.h"
  21. static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
  22. *pblk_w_rq_cache;
  23. static DECLARE_RWSEM(pblk_lock);
  24. struct bio_set *pblk_bio_set;
  25. static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
  26. struct bio *bio)
  27. {
  28. int ret;
  29. /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
  30. * constraint. Writes can be of arbitrary size.
  31. */
  32. if (bio_data_dir(bio) == READ) {
  33. blk_queue_split(q, &bio);
  34. ret = pblk_submit_read(pblk, bio);
  35. if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
  36. bio_put(bio);
  37. return ret;
  38. }
  39. /* Prevent deadlock in the case of a modest LUN configuration and large
  40. * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
  41. * available for user I/O.
  42. */
  43. if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
  44. blk_queue_split(q, &bio);
  45. return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
  46. }
  47. static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
  48. {
  49. struct pblk *pblk = q->queuedata;
  50. if (bio_op(bio) == REQ_OP_DISCARD) {
  51. pblk_discard(pblk, bio);
  52. if (!(bio->bi_opf & REQ_PREFLUSH)) {
  53. bio_endio(bio);
  54. return BLK_QC_T_NONE;
  55. }
  56. }
  57. switch (pblk_rw_io(q, pblk, bio)) {
  58. case NVM_IO_ERR:
  59. bio_io_error(bio);
  60. break;
  61. case NVM_IO_DONE:
  62. bio_endio(bio);
  63. break;
  64. }
  65. return BLK_QC_T_NONE;
  66. }
  67. static size_t pblk_trans_map_size(struct pblk *pblk)
  68. {
  69. int entry_size = 8;
  70. if (pblk->ppaf_bitsize < 32)
  71. entry_size = 4;
  72. return entry_size * pblk->rl.nr_secs;
  73. }
  74. #ifdef CONFIG_NVM_DEBUG
  75. static u32 pblk_l2p_crc(struct pblk *pblk)
  76. {
  77. size_t map_size;
  78. u32 crc = ~(u32)0;
  79. map_size = pblk_trans_map_size(pblk);
  80. crc = crc32_le(crc, pblk->trans_map, map_size);
  81. return crc;
  82. }
  83. #endif
  84. static void pblk_l2p_free(struct pblk *pblk)
  85. {
  86. vfree(pblk->trans_map);
  87. }
  88. static int pblk_l2p_recover(struct pblk *pblk, bool factory_init)
  89. {
  90. struct pblk_line *line = NULL;
  91. if (factory_init) {
  92. pblk_setup_uuid(pblk);
  93. } else {
  94. line = pblk_recov_l2p(pblk);
  95. if (IS_ERR(line)) {
  96. pr_err("pblk: could not recover l2p table\n");
  97. return -EFAULT;
  98. }
  99. }
  100. #ifdef CONFIG_NVM_DEBUG
  101. pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk));
  102. #endif
  103. /* Free full lines directly as GC has not been started yet */
  104. pblk_gc_free_full_lines(pblk);
  105. if (!line) {
  106. /* Configure next line for user data */
  107. line = pblk_line_get_first_data(pblk);
  108. if (!line) {
  109. pr_err("pblk: line list corrupted\n");
  110. return -EFAULT;
  111. }
  112. }
  113. return 0;
  114. }
  115. static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
  116. {
  117. sector_t i;
  118. struct ppa_addr ppa;
  119. size_t map_size;
  120. map_size = pblk_trans_map_size(pblk);
  121. pblk->trans_map = vmalloc(map_size);
  122. if (!pblk->trans_map)
  123. return -ENOMEM;
  124. pblk_ppa_set_empty(&ppa);
  125. for (i = 0; i < pblk->rl.nr_secs; i++)
  126. pblk_trans_map_set(pblk, i, ppa);
  127. return pblk_l2p_recover(pblk, factory_init);
  128. }
  129. static void pblk_rwb_free(struct pblk *pblk)
  130. {
  131. if (pblk_rb_tear_down_check(&pblk->rwb))
  132. pr_err("pblk: write buffer error on tear down\n");
  133. pblk_rb_data_free(&pblk->rwb);
  134. vfree(pblk_rb_entries_ref(&pblk->rwb));
  135. }
  136. static int pblk_rwb_init(struct pblk *pblk)
  137. {
  138. struct nvm_tgt_dev *dev = pblk->dev;
  139. struct nvm_geo *geo = &dev->geo;
  140. struct pblk_rb_entry *entries;
  141. unsigned long nr_entries;
  142. unsigned int power_size, power_seg_sz;
  143. nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer);
  144. entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry));
  145. if (!entries)
  146. return -ENOMEM;
  147. power_size = get_count_order(nr_entries);
  148. power_seg_sz = get_count_order(geo->csecs);
  149. return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz);
  150. }
  151. /* Minimum pages needed within a lun */
  152. #define ADDR_POOL_SIZE 64
  153. static int pblk_set_addrf_12(struct nvm_geo *geo, struct nvm_addrf_12 *dst)
  154. {
  155. struct nvm_addrf_12 *src = (struct nvm_addrf_12 *)&geo->addrf;
  156. int power_len;
  157. /* Re-calculate channel and lun format to adapt to configuration */
  158. power_len = get_count_order(geo->nr_chnls);
  159. if (1 << power_len != geo->nr_chnls) {
  160. pr_err("pblk: supports only power-of-two channel config.\n");
  161. return -EINVAL;
  162. }
  163. dst->ch_len = power_len;
  164. power_len = get_count_order(geo->nr_luns);
  165. if (1 << power_len != geo->nr_luns) {
  166. pr_err("pblk: supports only power-of-two LUN config.\n");
  167. return -EINVAL;
  168. }
  169. dst->lun_len = power_len;
  170. dst->blk_len = src->blk_len;
  171. dst->pg_len = src->pg_len;
  172. dst->pln_len = src->pln_len;
  173. dst->sect_len = src->sect_len;
  174. dst->sect_offset = 0;
  175. dst->pln_offset = dst->sect_len;
  176. dst->ch_offset = dst->pln_offset + dst->pln_len;
  177. dst->lun_offset = dst->ch_offset + dst->ch_len;
  178. dst->pg_offset = dst->lun_offset + dst->lun_len;
  179. dst->blk_offset = dst->pg_offset + dst->pg_len;
  180. dst->sec_mask = ((1ULL << dst->sect_len) - 1) << dst->sect_offset;
  181. dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset;
  182. dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
  183. dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
  184. dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset;
  185. dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset;
  186. return dst->blk_offset + src->blk_len;
  187. }
  188. static int pblk_set_ppaf(struct pblk *pblk)
  189. {
  190. struct nvm_tgt_dev *dev = pblk->dev;
  191. struct nvm_geo *geo = &dev->geo;
  192. int mod;
  193. div_u64_rem(geo->clba, pblk->min_write_pgs, &mod);
  194. if (mod) {
  195. pr_err("pblk: bad configuration of sectors/pages\n");
  196. return -EINVAL;
  197. }
  198. pblk->ppaf_bitsize = pblk_set_addrf_12(geo, (void *)&pblk->ppaf);
  199. return 0;
  200. }
  201. static int pblk_init_global_caches(struct pblk *pblk)
  202. {
  203. down_write(&pblk_lock);
  204. pblk_ws_cache = kmem_cache_create("pblk_blk_ws",
  205. sizeof(struct pblk_line_ws), 0, 0, NULL);
  206. if (!pblk_ws_cache) {
  207. up_write(&pblk_lock);
  208. return -ENOMEM;
  209. }
  210. pblk_rec_cache = kmem_cache_create("pblk_rec",
  211. sizeof(struct pblk_rec_ctx), 0, 0, NULL);
  212. if (!pblk_rec_cache) {
  213. kmem_cache_destroy(pblk_ws_cache);
  214. up_write(&pblk_lock);
  215. return -ENOMEM;
  216. }
  217. pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
  218. 0, 0, NULL);
  219. if (!pblk_g_rq_cache) {
  220. kmem_cache_destroy(pblk_ws_cache);
  221. kmem_cache_destroy(pblk_rec_cache);
  222. up_write(&pblk_lock);
  223. return -ENOMEM;
  224. }
  225. pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size,
  226. 0, 0, NULL);
  227. if (!pblk_w_rq_cache) {
  228. kmem_cache_destroy(pblk_ws_cache);
  229. kmem_cache_destroy(pblk_rec_cache);
  230. kmem_cache_destroy(pblk_g_rq_cache);
  231. up_write(&pblk_lock);
  232. return -ENOMEM;
  233. }
  234. up_write(&pblk_lock);
  235. return 0;
  236. }
  237. static void pblk_free_global_caches(struct pblk *pblk)
  238. {
  239. kmem_cache_destroy(pblk_ws_cache);
  240. kmem_cache_destroy(pblk_rec_cache);
  241. kmem_cache_destroy(pblk_g_rq_cache);
  242. kmem_cache_destroy(pblk_w_rq_cache);
  243. }
  244. static int pblk_core_init(struct pblk *pblk)
  245. {
  246. struct nvm_tgt_dev *dev = pblk->dev;
  247. struct nvm_geo *geo = &dev->geo;
  248. int max_write_ppas;
  249. atomic64_set(&pblk->user_wa, 0);
  250. atomic64_set(&pblk->pad_wa, 0);
  251. atomic64_set(&pblk->gc_wa, 0);
  252. pblk->user_rst_wa = 0;
  253. pblk->pad_rst_wa = 0;
  254. pblk->gc_rst_wa = 0;
  255. atomic64_set(&pblk->nr_flush, 0);
  256. pblk->nr_flush_rst = 0;
  257. pblk->pgs_in_buffer = geo->mw_cunits * geo->all_luns;
  258. pblk->min_write_pgs = geo->ws_opt * (geo->csecs / PAGE_SIZE);
  259. max_write_ppas = pblk->min_write_pgs * geo->all_luns;
  260. pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
  261. pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
  262. if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
  263. pr_err("pblk: vector list too big(%u > %u)\n",
  264. pblk->max_write_pgs, PBLK_MAX_REQ_ADDRS);
  265. return -EINVAL;
  266. }
  267. pblk->pad_dist = kzalloc((pblk->min_write_pgs - 1) * sizeof(atomic64_t),
  268. GFP_KERNEL);
  269. if (!pblk->pad_dist)
  270. return -ENOMEM;
  271. if (pblk_init_global_caches(pblk))
  272. goto fail_free_pad_dist;
  273. /* Internal bios can be at most the sectors signaled by the device. */
  274. pblk->page_bio_pool = mempool_create_page_pool(NVM_MAX_VLBA, 0);
  275. if (!pblk->page_bio_pool)
  276. goto free_global_caches;
  277. pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE,
  278. pblk_ws_cache);
  279. if (!pblk->gen_ws_pool)
  280. goto free_page_bio_pool;
  281. pblk->rec_pool = mempool_create_slab_pool(geo->all_luns,
  282. pblk_rec_cache);
  283. if (!pblk->rec_pool)
  284. goto free_gen_ws_pool;
  285. pblk->r_rq_pool = mempool_create_slab_pool(geo->all_luns,
  286. pblk_g_rq_cache);
  287. if (!pblk->r_rq_pool)
  288. goto free_rec_pool;
  289. pblk->e_rq_pool = mempool_create_slab_pool(geo->all_luns,
  290. pblk_g_rq_cache);
  291. if (!pblk->e_rq_pool)
  292. goto free_r_rq_pool;
  293. pblk->w_rq_pool = mempool_create_slab_pool(geo->all_luns,
  294. pblk_w_rq_cache);
  295. if (!pblk->w_rq_pool)
  296. goto free_e_rq_pool;
  297. pblk->close_wq = alloc_workqueue("pblk-close-wq",
  298. WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
  299. if (!pblk->close_wq)
  300. goto free_w_rq_pool;
  301. pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
  302. WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
  303. if (!pblk->bb_wq)
  304. goto free_close_wq;
  305. pblk->r_end_wq = alloc_workqueue("pblk-read-end-wq",
  306. WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
  307. if (!pblk->r_end_wq)
  308. goto free_bb_wq;
  309. if (pblk_set_ppaf(pblk))
  310. goto free_r_end_wq;
  311. INIT_LIST_HEAD(&pblk->compl_list);
  312. return 0;
  313. free_r_end_wq:
  314. destroy_workqueue(pblk->r_end_wq);
  315. free_bb_wq:
  316. destroy_workqueue(pblk->bb_wq);
  317. free_close_wq:
  318. destroy_workqueue(pblk->close_wq);
  319. free_w_rq_pool:
  320. mempool_destroy(pblk->w_rq_pool);
  321. free_e_rq_pool:
  322. mempool_destroy(pblk->e_rq_pool);
  323. free_r_rq_pool:
  324. mempool_destroy(pblk->r_rq_pool);
  325. free_rec_pool:
  326. mempool_destroy(pblk->rec_pool);
  327. free_gen_ws_pool:
  328. mempool_destroy(pblk->gen_ws_pool);
  329. free_page_bio_pool:
  330. mempool_destroy(pblk->page_bio_pool);
  331. free_global_caches:
  332. pblk_free_global_caches(pblk);
  333. fail_free_pad_dist:
  334. kfree(pblk->pad_dist);
  335. return -ENOMEM;
  336. }
  337. static void pblk_core_free(struct pblk *pblk)
  338. {
  339. if (pblk->close_wq)
  340. destroy_workqueue(pblk->close_wq);
  341. if (pblk->r_end_wq)
  342. destroy_workqueue(pblk->r_end_wq);
  343. if (pblk->bb_wq)
  344. destroy_workqueue(pblk->bb_wq);
  345. mempool_destroy(pblk->page_bio_pool);
  346. mempool_destroy(pblk->gen_ws_pool);
  347. mempool_destroy(pblk->rec_pool);
  348. mempool_destroy(pblk->r_rq_pool);
  349. mempool_destroy(pblk->e_rq_pool);
  350. mempool_destroy(pblk->w_rq_pool);
  351. pblk_free_global_caches(pblk);
  352. kfree(pblk->pad_dist);
  353. }
  354. static void pblk_line_mg_free(struct pblk *pblk)
  355. {
  356. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  357. int i;
  358. kfree(l_mg->bb_template);
  359. kfree(l_mg->bb_aux);
  360. kfree(l_mg->vsc_list);
  361. for (i = 0; i < PBLK_DATA_LINES; i++) {
  362. kfree(l_mg->sline_meta[i]);
  363. pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
  364. kfree(l_mg->eline_meta[i]);
  365. }
  366. }
  367. static void pblk_line_meta_free(struct pblk_line *line)
  368. {
  369. kfree(line->blk_bitmap);
  370. kfree(line->erase_bitmap);
  371. }
  372. static void pblk_lines_free(struct pblk *pblk)
  373. {
  374. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  375. struct pblk_line *line;
  376. int i;
  377. spin_lock(&l_mg->free_lock);
  378. for (i = 0; i < l_mg->nr_lines; i++) {
  379. line = &pblk->lines[i];
  380. pblk_line_free(pblk, line);
  381. pblk_line_meta_free(line);
  382. }
  383. spin_unlock(&l_mg->free_lock);
  384. pblk_line_mg_free(pblk);
  385. kfree(pblk->luns);
  386. kfree(pblk->lines);
  387. }
  388. static int pblk_bb_get_tbl(struct nvm_tgt_dev *dev, struct pblk_lun *rlun,
  389. u8 *blks, int nr_blks)
  390. {
  391. struct ppa_addr ppa;
  392. int ret;
  393. ppa.ppa = 0;
  394. ppa.g.ch = rlun->bppa.g.ch;
  395. ppa.g.lun = rlun->bppa.g.lun;
  396. ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
  397. if (ret)
  398. return ret;
  399. nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
  400. if (nr_blks < 0)
  401. return -EIO;
  402. return 0;
  403. }
  404. static void *pblk_bb_get_log(struct pblk *pblk)
  405. {
  406. struct nvm_tgt_dev *dev = pblk->dev;
  407. struct nvm_geo *geo = &dev->geo;
  408. u8 *log;
  409. int i, nr_blks, blk_per_lun;
  410. int ret;
  411. blk_per_lun = geo->nr_chks * geo->plane_mode;
  412. nr_blks = blk_per_lun * geo->all_luns;
  413. log = kmalloc(nr_blks, GFP_KERNEL);
  414. if (!log)
  415. return ERR_PTR(-ENOMEM);
  416. for (i = 0; i < geo->all_luns; i++) {
  417. struct pblk_lun *rlun = &pblk->luns[i];
  418. u8 *log_pos = log + i * blk_per_lun;
  419. ret = pblk_bb_get_tbl(dev, rlun, log_pos, blk_per_lun);
  420. if (ret) {
  421. kfree(log);
  422. return ERR_PTR(-EIO);
  423. }
  424. }
  425. return log;
  426. }
  427. static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
  428. u8 *bb_log, int blk_per_line)
  429. {
  430. struct nvm_tgt_dev *dev = pblk->dev;
  431. struct nvm_geo *geo = &dev->geo;
  432. int i, bb_cnt = 0;
  433. int blk_per_lun = geo->nr_chks * geo->plane_mode;
  434. for (i = 0; i < blk_per_line; i++) {
  435. struct pblk_lun *rlun = &pblk->luns[i];
  436. u8 *lun_bb_log = bb_log + i * blk_per_lun;
  437. if (lun_bb_log[line->id] == NVM_BLK_T_FREE)
  438. continue;
  439. set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
  440. bb_cnt++;
  441. }
  442. return bb_cnt;
  443. }
  444. static int pblk_luns_init(struct pblk *pblk)
  445. {
  446. struct nvm_tgt_dev *dev = pblk->dev;
  447. struct nvm_geo *geo = &dev->geo;
  448. struct pblk_lun *rlun;
  449. int i;
  450. /* TODO: Implement unbalanced LUN support */
  451. if (geo->nr_luns < 0) {
  452. pr_err("pblk: unbalanced LUN config.\n");
  453. return -EINVAL;
  454. }
  455. pblk->luns = kcalloc(geo->all_luns, sizeof(struct pblk_lun),
  456. GFP_KERNEL);
  457. if (!pblk->luns)
  458. return -ENOMEM;
  459. for (i = 0; i < geo->all_luns; i++) {
  460. /* Stripe across channels */
  461. int ch = i % geo->nr_chnls;
  462. int lun_raw = i / geo->nr_chnls;
  463. int lunid = lun_raw + ch * geo->nr_luns;
  464. rlun = &pblk->luns[i];
  465. rlun->bppa = dev->luns[lunid];
  466. sema_init(&rlun->wr_sem, 1);
  467. }
  468. return 0;
  469. }
  470. /* See comment over struct line_emeta definition */
  471. static unsigned int calc_emeta_len(struct pblk *pblk)
  472. {
  473. struct pblk_line_meta *lm = &pblk->lm;
  474. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  475. struct nvm_tgt_dev *dev = pblk->dev;
  476. struct nvm_geo *geo = &dev->geo;
  477. /* Round to sector size so that lba_list starts on its own sector */
  478. lm->emeta_sec[1] = DIV_ROUND_UP(
  479. sizeof(struct line_emeta) + lm->blk_bitmap_len +
  480. sizeof(struct wa_counters), geo->csecs);
  481. lm->emeta_len[1] = lm->emeta_sec[1] * geo->csecs;
  482. /* Round to sector size so that vsc_list starts on its own sector */
  483. lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
  484. lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
  485. geo->csecs);
  486. lm->emeta_len[2] = lm->emeta_sec[2] * geo->csecs;
  487. lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
  488. geo->csecs);
  489. lm->emeta_len[3] = lm->emeta_sec[3] * geo->csecs;
  490. lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
  491. return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
  492. }
  493. static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
  494. {
  495. struct nvm_tgt_dev *dev = pblk->dev;
  496. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  497. struct pblk_line_meta *lm = &pblk->lm;
  498. struct nvm_geo *geo = &dev->geo;
  499. sector_t provisioned;
  500. int sec_meta, blk_meta;
  501. if (geo->op == NVM_TARGET_DEFAULT_OP)
  502. pblk->op = PBLK_DEFAULT_OP;
  503. else
  504. pblk->op = geo->op;
  505. provisioned = nr_free_blks;
  506. provisioned *= (100 - pblk->op);
  507. sector_div(provisioned, 100);
  508. pblk->op_blks = nr_free_blks - provisioned;
  509. /* Internally pblk manages all free blocks, but all calculations based
  510. * on user capacity consider only provisioned blocks
  511. */
  512. pblk->rl.total_blocks = nr_free_blks;
  513. pblk->rl.nr_secs = nr_free_blks * geo->clba;
  514. /* Consider sectors used for metadata */
  515. sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
  516. blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
  517. pblk->capacity = (provisioned - blk_meta) * geo->clba;
  518. atomic_set(&pblk->rl.free_blocks, nr_free_blks);
  519. atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
  520. }
  521. static int pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line,
  522. void *chunk_log, long *nr_bad_blks)
  523. {
  524. struct pblk_line_meta *lm = &pblk->lm;
  525. line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
  526. if (!line->blk_bitmap)
  527. return -ENOMEM;
  528. line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
  529. if (!line->erase_bitmap) {
  530. kfree(line->blk_bitmap);
  531. return -ENOMEM;
  532. }
  533. *nr_bad_blks = pblk_bb_line(pblk, line, chunk_log, lm->blk_per_line);
  534. return 0;
  535. }
  536. static int pblk_line_mg_init(struct pblk *pblk)
  537. {
  538. struct nvm_tgt_dev *dev = pblk->dev;
  539. struct nvm_geo *geo = &dev->geo;
  540. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  541. struct pblk_line_meta *lm = &pblk->lm;
  542. int i, bb_distance;
  543. l_mg->nr_lines = geo->nr_chks;
  544. l_mg->log_line = l_mg->data_line = NULL;
  545. l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
  546. l_mg->nr_free_lines = 0;
  547. bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
  548. INIT_LIST_HEAD(&l_mg->free_list);
  549. INIT_LIST_HEAD(&l_mg->corrupt_list);
  550. INIT_LIST_HEAD(&l_mg->bad_list);
  551. INIT_LIST_HEAD(&l_mg->gc_full_list);
  552. INIT_LIST_HEAD(&l_mg->gc_high_list);
  553. INIT_LIST_HEAD(&l_mg->gc_mid_list);
  554. INIT_LIST_HEAD(&l_mg->gc_low_list);
  555. INIT_LIST_HEAD(&l_mg->gc_empty_list);
  556. INIT_LIST_HEAD(&l_mg->emeta_list);
  557. l_mg->gc_lists[0] = &l_mg->gc_high_list;
  558. l_mg->gc_lists[1] = &l_mg->gc_mid_list;
  559. l_mg->gc_lists[2] = &l_mg->gc_low_list;
  560. spin_lock_init(&l_mg->free_lock);
  561. spin_lock_init(&l_mg->close_lock);
  562. spin_lock_init(&l_mg->gc_lock);
  563. l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
  564. if (!l_mg->vsc_list)
  565. goto fail;
  566. l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
  567. if (!l_mg->bb_template)
  568. goto fail_free_vsc_list;
  569. l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
  570. if (!l_mg->bb_aux)
  571. goto fail_free_bb_template;
  572. /* smeta is always small enough to fit on a kmalloc memory allocation,
  573. * emeta depends on the number of LUNs allocated to the pblk instance
  574. */
  575. for (i = 0; i < PBLK_DATA_LINES; i++) {
  576. l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
  577. if (!l_mg->sline_meta[i])
  578. goto fail_free_smeta;
  579. }
  580. /* emeta allocates three different buffers for managing metadata with
  581. * in-memory and in-media layouts
  582. */
  583. for (i = 0; i < PBLK_DATA_LINES; i++) {
  584. struct pblk_emeta *emeta;
  585. emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
  586. if (!emeta)
  587. goto fail_free_emeta;
  588. if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
  589. l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
  590. emeta->buf = vmalloc(lm->emeta_len[0]);
  591. if (!emeta->buf) {
  592. kfree(emeta);
  593. goto fail_free_emeta;
  594. }
  595. emeta->nr_entries = lm->emeta_sec[0];
  596. l_mg->eline_meta[i] = emeta;
  597. } else {
  598. l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
  599. emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
  600. if (!emeta->buf) {
  601. kfree(emeta);
  602. goto fail_free_emeta;
  603. }
  604. emeta->nr_entries = lm->emeta_sec[0];
  605. l_mg->eline_meta[i] = emeta;
  606. }
  607. }
  608. for (i = 0; i < l_mg->nr_lines; i++)
  609. l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
  610. bb_distance = (geo->all_luns) * geo->ws_opt;
  611. for (i = 0; i < lm->sec_per_line; i += bb_distance)
  612. bitmap_set(l_mg->bb_template, i, geo->ws_opt);
  613. return 0;
  614. fail_free_emeta:
  615. while (--i >= 0) {
  616. if (l_mg->emeta_alloc_type == PBLK_VMALLOC_META)
  617. vfree(l_mg->eline_meta[i]->buf);
  618. else
  619. kfree(l_mg->eline_meta[i]->buf);
  620. kfree(l_mg->eline_meta[i]);
  621. }
  622. fail_free_smeta:
  623. for (i = 0; i < PBLK_DATA_LINES; i++)
  624. kfree(l_mg->sline_meta[i]);
  625. kfree(l_mg->bb_aux);
  626. fail_free_bb_template:
  627. kfree(l_mg->bb_template);
  628. fail_free_vsc_list:
  629. kfree(l_mg->vsc_list);
  630. fail:
  631. return -ENOMEM;
  632. }
  633. static int pblk_line_meta_init(struct pblk *pblk)
  634. {
  635. struct nvm_tgt_dev *dev = pblk->dev;
  636. struct nvm_geo *geo = &dev->geo;
  637. struct pblk_line_meta *lm = &pblk->lm;
  638. unsigned int smeta_len, emeta_len;
  639. int i;
  640. lm->sec_per_line = geo->clba * geo->all_luns;
  641. lm->blk_per_line = geo->all_luns;
  642. lm->blk_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long);
  643. lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long);
  644. lm->lun_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long);
  645. lm->mid_thrs = lm->sec_per_line / 2;
  646. lm->high_thrs = lm->sec_per_line / 4;
  647. lm->meta_distance = (geo->all_luns / 2) * pblk->min_write_pgs;
  648. /* Calculate necessary pages for smeta. See comment over struct
  649. * line_smeta definition
  650. */
  651. i = 1;
  652. add_smeta_page:
  653. lm->smeta_sec = i * geo->ws_opt;
  654. lm->smeta_len = lm->smeta_sec * geo->csecs;
  655. smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
  656. if (smeta_len > lm->smeta_len) {
  657. i++;
  658. goto add_smeta_page;
  659. }
  660. /* Calculate necessary pages for emeta. See comment over struct
  661. * line_emeta definition
  662. */
  663. i = 1;
  664. add_emeta_page:
  665. lm->emeta_sec[0] = i * geo->ws_opt;
  666. lm->emeta_len[0] = lm->emeta_sec[0] * geo->csecs;
  667. emeta_len = calc_emeta_len(pblk);
  668. if (emeta_len > lm->emeta_len[0]) {
  669. i++;
  670. goto add_emeta_page;
  671. }
  672. lm->emeta_bb = geo->all_luns > i ? geo->all_luns - i : 0;
  673. lm->min_blk_line = 1;
  674. if (geo->all_luns > 1)
  675. lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec +
  676. lm->emeta_sec[0], geo->clba);
  677. if (lm->min_blk_line > lm->blk_per_line) {
  678. pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
  679. lm->blk_per_line);
  680. return -EINVAL;
  681. }
  682. return 0;
  683. }
  684. static int pblk_lines_init(struct pblk *pblk)
  685. {
  686. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  687. struct pblk_line_meta *lm = &pblk->lm;
  688. struct pblk_line *line;
  689. void *chunk_log;
  690. long nr_bad_blks = 0, nr_free_blks = 0;
  691. int i, ret;
  692. ret = pblk_line_meta_init(pblk);
  693. if (ret)
  694. return ret;
  695. ret = pblk_line_mg_init(pblk);
  696. if (ret)
  697. return ret;
  698. ret = pblk_luns_init(pblk);
  699. if (ret)
  700. goto fail_free_meta;
  701. chunk_log = pblk_bb_get_log(pblk);
  702. if (IS_ERR(chunk_log)) {
  703. pr_err("pblk: could not get bad block log (%lu)\n",
  704. PTR_ERR(chunk_log));
  705. ret = PTR_ERR(chunk_log);
  706. goto fail_free_luns;
  707. }
  708. pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
  709. GFP_KERNEL);
  710. if (!pblk->lines) {
  711. ret = -ENOMEM;
  712. goto fail_free_chunk_log;
  713. }
  714. for (i = 0; i < l_mg->nr_lines; i++) {
  715. int chk_in_line;
  716. line = &pblk->lines[i];
  717. line->pblk = pblk;
  718. line->id = i;
  719. line->type = PBLK_LINETYPE_FREE;
  720. line->state = PBLK_LINESTATE_FREE;
  721. line->gc_group = PBLK_LINEGC_NONE;
  722. line->vsc = &l_mg->vsc_list[i];
  723. spin_lock_init(&line->lock);
  724. ret = pblk_setup_line_meta(pblk, line, chunk_log, &nr_bad_blks);
  725. if (ret)
  726. goto fail_free_lines;
  727. chk_in_line = lm->blk_per_line - nr_bad_blks;
  728. if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line ||
  729. chk_in_line < lm->min_blk_line) {
  730. line->state = PBLK_LINESTATE_BAD;
  731. list_add_tail(&line->list, &l_mg->bad_list);
  732. continue;
  733. }
  734. nr_free_blks += chk_in_line;
  735. atomic_set(&line->blk_in_line, chk_in_line);
  736. l_mg->nr_free_lines++;
  737. list_add_tail(&line->list, &l_mg->free_list);
  738. }
  739. pblk_set_provision(pblk, nr_free_blks);
  740. kfree(chunk_log);
  741. return 0;
  742. fail_free_lines:
  743. while (--i >= 0)
  744. pblk_line_meta_free(&pblk->lines[i]);
  745. kfree(pblk->lines);
  746. fail_free_chunk_log:
  747. kfree(chunk_log);
  748. fail_free_luns:
  749. kfree(pblk->luns);
  750. fail_free_meta:
  751. pblk_line_mg_free(pblk);
  752. return ret;
  753. }
  754. static int pblk_writer_init(struct pblk *pblk)
  755. {
  756. pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t");
  757. if (IS_ERR(pblk->writer_ts)) {
  758. int err = PTR_ERR(pblk->writer_ts);
  759. if (err != -EINTR)
  760. pr_err("pblk: could not allocate writer kthread (%d)\n",
  761. err);
  762. return err;
  763. }
  764. timer_setup(&pblk->wtimer, pblk_write_timer_fn, 0);
  765. mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100));
  766. return 0;
  767. }
  768. static void pblk_writer_stop(struct pblk *pblk)
  769. {
  770. /* The pipeline must be stopped and the write buffer emptied before the
  771. * write thread is stopped
  772. */
  773. WARN(pblk_rb_read_count(&pblk->rwb),
  774. "Stopping not fully persisted write buffer\n");
  775. WARN(pblk_rb_sync_count(&pblk->rwb),
  776. "Stopping not fully synced write buffer\n");
  777. del_timer_sync(&pblk->wtimer);
  778. if (pblk->writer_ts)
  779. kthread_stop(pblk->writer_ts);
  780. }
  781. static void pblk_free(struct pblk *pblk)
  782. {
  783. pblk_lines_free(pblk);
  784. pblk_l2p_free(pblk);
  785. pblk_rwb_free(pblk);
  786. pblk_core_free(pblk);
  787. kfree(pblk);
  788. }
  789. static void pblk_tear_down(struct pblk *pblk)
  790. {
  791. pblk_pipeline_stop(pblk);
  792. pblk_writer_stop(pblk);
  793. pblk_rb_sync_l2p(&pblk->rwb);
  794. pblk_rl_free(&pblk->rl);
  795. pr_debug("pblk: consistent tear down\n");
  796. }
  797. static void pblk_exit(void *private)
  798. {
  799. struct pblk *pblk = private;
  800. down_write(&pblk_lock);
  801. pblk_gc_exit(pblk);
  802. pblk_tear_down(pblk);
  803. #ifdef CONFIG_NVM_DEBUG
  804. pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk));
  805. #endif
  806. pblk_free(pblk);
  807. up_write(&pblk_lock);
  808. }
  809. static sector_t pblk_capacity(void *private)
  810. {
  811. struct pblk *pblk = private;
  812. return pblk->capacity * NR_PHY_IN_LOG;
  813. }
  814. static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
  815. int flags)
  816. {
  817. struct nvm_geo *geo = &dev->geo;
  818. struct request_queue *bqueue = dev->q;
  819. struct request_queue *tqueue = tdisk->queue;
  820. struct pblk *pblk;
  821. int ret;
  822. if (dev->geo.dom & NVM_RSP_L2P) {
  823. pr_err("pblk: host-side L2P table not supported. (%x)\n",
  824. dev->geo.dom);
  825. return ERR_PTR(-EINVAL);
  826. }
  827. pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL);
  828. if (!pblk)
  829. return ERR_PTR(-ENOMEM);
  830. pblk->dev = dev;
  831. pblk->disk = tdisk;
  832. pblk->state = PBLK_STATE_RUNNING;
  833. pblk->gc.gc_enabled = 0;
  834. spin_lock_init(&pblk->trans_lock);
  835. spin_lock_init(&pblk->lock);
  836. #ifdef CONFIG_NVM_DEBUG
  837. atomic_long_set(&pblk->inflight_writes, 0);
  838. atomic_long_set(&pblk->padded_writes, 0);
  839. atomic_long_set(&pblk->padded_wb, 0);
  840. atomic_long_set(&pblk->req_writes, 0);
  841. atomic_long_set(&pblk->sub_writes, 0);
  842. atomic_long_set(&pblk->sync_writes, 0);
  843. atomic_long_set(&pblk->inflight_reads, 0);
  844. atomic_long_set(&pblk->cache_reads, 0);
  845. atomic_long_set(&pblk->sync_reads, 0);
  846. atomic_long_set(&pblk->recov_writes, 0);
  847. atomic_long_set(&pblk->recov_writes, 0);
  848. atomic_long_set(&pblk->recov_gc_writes, 0);
  849. atomic_long_set(&pblk->recov_gc_reads, 0);
  850. #endif
  851. atomic_long_set(&pblk->read_failed, 0);
  852. atomic_long_set(&pblk->read_empty, 0);
  853. atomic_long_set(&pblk->read_high_ecc, 0);
  854. atomic_long_set(&pblk->read_failed_gc, 0);
  855. atomic_long_set(&pblk->write_failed, 0);
  856. atomic_long_set(&pblk->erase_failed, 0);
  857. ret = pblk_core_init(pblk);
  858. if (ret) {
  859. pr_err("pblk: could not initialize core\n");
  860. goto fail;
  861. }
  862. ret = pblk_lines_init(pblk);
  863. if (ret) {
  864. pr_err("pblk: could not initialize lines\n");
  865. goto fail_free_core;
  866. }
  867. ret = pblk_rwb_init(pblk);
  868. if (ret) {
  869. pr_err("pblk: could not initialize write buffer\n");
  870. goto fail_free_lines;
  871. }
  872. ret = pblk_l2p_init(pblk, flags & NVM_TARGET_FACTORY);
  873. if (ret) {
  874. pr_err("pblk: could not initialize maps\n");
  875. goto fail_free_rwb;
  876. }
  877. ret = pblk_writer_init(pblk);
  878. if (ret) {
  879. if (ret != -EINTR)
  880. pr_err("pblk: could not initialize write thread\n");
  881. goto fail_free_l2p;
  882. }
  883. ret = pblk_gc_init(pblk);
  884. if (ret) {
  885. pr_err("pblk: could not initialize gc\n");
  886. goto fail_stop_writer;
  887. }
  888. /* inherit the size from the underlying device */
  889. blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
  890. blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
  891. blk_queue_write_cache(tqueue, true, false);
  892. tqueue->limits.discard_granularity = geo->clba * geo->csecs;
  893. tqueue->limits.discard_alignment = 0;
  894. blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9);
  895. blk_queue_flag_set(QUEUE_FLAG_DISCARD, tqueue);
  896. pr_info("pblk(%s): luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
  897. tdisk->disk_name,
  898. geo->all_luns, pblk->l_mg.nr_lines,
  899. (unsigned long long)pblk->rl.nr_secs,
  900. pblk->rwb.nr_entries);
  901. wake_up_process(pblk->writer_ts);
  902. /* Check if we need to start GC */
  903. pblk_gc_should_kick(pblk);
  904. return pblk;
  905. fail_stop_writer:
  906. pblk_writer_stop(pblk);
  907. fail_free_l2p:
  908. pblk_l2p_free(pblk);
  909. fail_free_rwb:
  910. pblk_rwb_free(pblk);
  911. fail_free_lines:
  912. pblk_lines_free(pblk);
  913. fail_free_core:
  914. pblk_core_free(pblk);
  915. fail:
  916. kfree(pblk);
  917. return ERR_PTR(ret);
  918. }
  919. /* physical block device target */
  920. static struct nvm_tgt_type tt_pblk = {
  921. .name = "pblk",
  922. .version = {1, 0, 0},
  923. .make_rq = pblk_make_rq,
  924. .capacity = pblk_capacity,
  925. .init = pblk_init,
  926. .exit = pblk_exit,
  927. .sysfs_init = pblk_sysfs_init,
  928. .sysfs_exit = pblk_sysfs_exit,
  929. .owner = THIS_MODULE,
  930. };
  931. static int __init pblk_module_init(void)
  932. {
  933. int ret;
  934. pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
  935. if (!pblk_bio_set)
  936. return -ENOMEM;
  937. ret = nvm_register_tgt_type(&tt_pblk);
  938. if (ret)
  939. bioset_free(pblk_bio_set);
  940. return ret;
  941. }
  942. static void pblk_module_exit(void)
  943. {
  944. bioset_free(pblk_bio_set);
  945. nvm_unregister_tgt_type(&tt_pblk);
  946. }
  947. module_init(pblk_module_init);
  948. module_exit(pblk_module_exit);
  949. MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
  950. MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
  951. MODULE_LICENSE("GPL v2");
  952. MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");