pblk-recovery.c 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087
  1. /*
  2. * Copyright (C) 2016 CNEX Labs
  3. * Initial: Javier Gonzalez <javier@cnexlabs.com>
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License version
  7. * 2 as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * pblk-recovery.c - pblk's recovery path
  15. */
  16. #include "pblk.h"
  17. void pblk_submit_rec(struct work_struct *work)
  18. {
  19. struct pblk_rec_ctx *recovery =
  20. container_of(work, struct pblk_rec_ctx, ws_rec);
  21. struct pblk *pblk = recovery->pblk;
  22. struct nvm_rq *rqd = recovery->rqd;
  23. struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
  24. struct bio *bio;
  25. unsigned int nr_rec_secs;
  26. unsigned int pgs_read;
  27. int ret;
  28. nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
  29. NVM_MAX_VLBA);
  30. bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
  31. bio->bi_iter.bi_sector = 0;
  32. bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
  33. rqd->bio = bio;
  34. rqd->nr_ppas = nr_rec_secs;
  35. pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed,
  36. nr_rec_secs);
  37. if (pgs_read != nr_rec_secs) {
  38. pr_err("pblk: could not read recovery entries\n");
  39. goto err;
  40. }
  41. if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) {
  42. pr_err("pblk: could not setup recovery request\n");
  43. goto err;
  44. }
  45. #ifdef CONFIG_NVM_DEBUG
  46. atomic_long_add(nr_rec_secs, &pblk->recov_writes);
  47. #endif
  48. ret = pblk_submit_io(pblk, rqd);
  49. if (ret) {
  50. pr_err("pblk: I/O submission failed: %d\n", ret);
  51. goto err;
  52. }
  53. mempool_free(recovery, pblk->rec_pool);
  54. return;
  55. err:
  56. bio_put(bio);
  57. pblk_free_rqd(pblk, rqd, PBLK_WRITE);
  58. }
  59. int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
  60. struct pblk_rec_ctx *recovery, u64 *comp_bits,
  61. unsigned int comp)
  62. {
  63. struct nvm_rq *rec_rqd;
  64. struct pblk_c_ctx *rec_ctx;
  65. int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
  66. rec_rqd = pblk_alloc_rqd(pblk, PBLK_WRITE);
  67. rec_ctx = nvm_rq_to_pdu(rec_rqd);
  68. /* Copy completion bitmap, but exclude the first X completed entries */
  69. bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
  70. (unsigned long int *)comp_bits,
  71. comp, NVM_MAX_VLBA);
  72. /* Save the context for the entries that need to be re-written and
  73. * update current context with the completed entries.
  74. */
  75. rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp);
  76. if (comp >= c_ctx->nr_valid) {
  77. rec_ctx->nr_valid = 0;
  78. rec_ctx->nr_padded = nr_entries - comp;
  79. c_ctx->nr_padded = comp - c_ctx->nr_valid;
  80. } else {
  81. rec_ctx->nr_valid = c_ctx->nr_valid - comp;
  82. rec_ctx->nr_padded = c_ctx->nr_padded;
  83. c_ctx->nr_valid = comp;
  84. c_ctx->nr_padded = 0;
  85. }
  86. recovery->rqd = rec_rqd;
  87. recovery->pblk = pblk;
  88. return 0;
  89. }
  90. int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
  91. {
  92. u32 crc;
  93. crc = pblk_calc_emeta_crc(pblk, emeta_buf);
  94. if (le32_to_cpu(emeta_buf->crc) != crc)
  95. return 1;
  96. if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
  97. return 1;
  98. return 0;
  99. }
  100. static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
  101. {
  102. struct nvm_tgt_dev *dev = pblk->dev;
  103. struct nvm_geo *geo = &dev->geo;
  104. struct pblk_line_meta *lm = &pblk->lm;
  105. struct pblk_emeta *emeta = line->emeta;
  106. struct line_emeta *emeta_buf = emeta->buf;
  107. __le64 *lba_list;
  108. u64 data_start, data_end;
  109. u64 nr_valid_lbas, nr_lbas = 0;
  110. u64 i;
  111. lba_list = emeta_to_lbas(pblk, emeta_buf);
  112. if (!lba_list)
  113. return 1;
  114. data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
  115. data_end = line->emeta_ssec;
  116. nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
  117. for (i = data_start; i < data_end; i++) {
  118. struct ppa_addr ppa;
  119. int pos;
  120. ppa = addr_to_gen_ppa(pblk, i, line->id);
  121. pos = pblk_ppa_to_pos(geo, ppa);
  122. /* Do not update bad blocks */
  123. if (test_bit(pos, line->blk_bitmap))
  124. continue;
  125. if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
  126. spin_lock(&line->lock);
  127. if (test_and_set_bit(i, line->invalid_bitmap))
  128. WARN_ONCE(1, "pblk: rec. double invalidate:\n");
  129. else
  130. le32_add_cpu(line->vsc, -1);
  131. spin_unlock(&line->lock);
  132. continue;
  133. }
  134. pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa);
  135. nr_lbas++;
  136. }
  137. if (nr_valid_lbas != nr_lbas)
  138. pr_err("pblk: line %d - inconsistent lba list(%llu/%llu)\n",
  139. line->id, nr_valid_lbas, nr_lbas);
  140. line->left_msecs = 0;
  141. return 0;
  142. }
  143. static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
  144. {
  145. struct nvm_tgt_dev *dev = pblk->dev;
  146. struct nvm_geo *geo = &dev->geo;
  147. struct pblk_line_meta *lm = &pblk->lm;
  148. int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
  149. return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
  150. nr_bb * geo->clba;
  151. }
  152. struct pblk_recov_alloc {
  153. struct ppa_addr *ppa_list;
  154. struct pblk_sec_meta *meta_list;
  155. struct nvm_rq *rqd;
  156. void *data;
  157. dma_addr_t dma_ppa_list;
  158. dma_addr_t dma_meta_list;
  159. };
  160. static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
  161. struct pblk_recov_alloc p, u64 r_ptr)
  162. {
  163. struct nvm_tgt_dev *dev = pblk->dev;
  164. struct nvm_geo *geo = &dev->geo;
  165. struct ppa_addr *ppa_list;
  166. struct pblk_sec_meta *meta_list;
  167. struct nvm_rq *rqd;
  168. struct bio *bio;
  169. void *data;
  170. dma_addr_t dma_ppa_list, dma_meta_list;
  171. u64 r_ptr_int;
  172. int left_ppas;
  173. int rq_ppas, rq_len;
  174. int i, j;
  175. int ret = 0;
  176. ppa_list = p.ppa_list;
  177. meta_list = p.meta_list;
  178. rqd = p.rqd;
  179. data = p.data;
  180. dma_ppa_list = p.dma_ppa_list;
  181. dma_meta_list = p.dma_meta_list;
  182. left_ppas = line->cur_sec - r_ptr;
  183. if (!left_ppas)
  184. return 0;
  185. r_ptr_int = r_ptr;
  186. next_read_rq:
  187. memset(rqd, 0, pblk_g_rq_size);
  188. rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
  189. if (!rq_ppas)
  190. rq_ppas = pblk->min_write_pgs;
  191. rq_len = rq_ppas * geo->csecs;
  192. bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
  193. if (IS_ERR(bio))
  194. return PTR_ERR(bio);
  195. bio->bi_iter.bi_sector = 0; /* internal bio */
  196. bio_set_op_attrs(bio, REQ_OP_READ, 0);
  197. rqd->bio = bio;
  198. rqd->opcode = NVM_OP_PREAD;
  199. rqd->meta_list = meta_list;
  200. rqd->nr_ppas = rq_ppas;
  201. rqd->ppa_list = ppa_list;
  202. rqd->dma_ppa_list = dma_ppa_list;
  203. rqd->dma_meta_list = dma_meta_list;
  204. if (pblk_io_aligned(pblk, rq_ppas))
  205. rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
  206. else
  207. rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
  208. for (i = 0; i < rqd->nr_ppas; ) {
  209. struct ppa_addr ppa;
  210. int pos;
  211. ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
  212. pos = pblk_ppa_to_pos(geo, ppa);
  213. while (test_bit(pos, line->blk_bitmap)) {
  214. r_ptr_int += pblk->min_write_pgs;
  215. ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
  216. pos = pblk_ppa_to_pos(geo, ppa);
  217. }
  218. for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++)
  219. rqd->ppa_list[i] =
  220. addr_to_gen_ppa(pblk, r_ptr_int, line->id);
  221. }
  222. /* If read fails, more padding is needed */
  223. ret = pblk_submit_io_sync(pblk, rqd);
  224. if (ret) {
  225. pr_err("pblk: I/O submission failed: %d\n", ret);
  226. return ret;
  227. }
  228. atomic_dec(&pblk->inflight_io);
  229. /* At this point, the read should not fail. If it does, it is a problem
  230. * we cannot recover from here. Need FTL log.
  231. */
  232. if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
  233. pr_err("pblk: L2P recovery failed (%d)\n", rqd->error);
  234. return -EINTR;
  235. }
  236. for (i = 0; i < rqd->nr_ppas; i++) {
  237. u64 lba = le64_to_cpu(meta_list[i].lba);
  238. if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
  239. continue;
  240. pblk_update_map(pblk, lba, rqd->ppa_list[i]);
  241. }
  242. left_ppas -= rq_ppas;
  243. if (left_ppas > 0)
  244. goto next_read_rq;
  245. return 0;
  246. }
  247. static void pblk_recov_complete(struct kref *ref)
  248. {
  249. struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
  250. complete(&pad_rq->wait);
  251. }
  252. static void pblk_end_io_recov(struct nvm_rq *rqd)
  253. {
  254. struct pblk_pad_rq *pad_rq = rqd->private;
  255. struct pblk *pblk = pad_rq->pblk;
  256. pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
  257. pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
  258. atomic_dec(&pblk->inflight_io);
  259. kref_put(&pad_rq->ref, pblk_recov_complete);
  260. }
  261. static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
  262. int left_ppas)
  263. {
  264. struct nvm_tgt_dev *dev = pblk->dev;
  265. struct nvm_geo *geo = &dev->geo;
  266. struct ppa_addr *ppa_list;
  267. struct pblk_sec_meta *meta_list;
  268. struct pblk_pad_rq *pad_rq;
  269. struct nvm_rq *rqd;
  270. struct bio *bio;
  271. void *data;
  272. dma_addr_t dma_ppa_list, dma_meta_list;
  273. __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
  274. u64 w_ptr = line->cur_sec;
  275. int left_line_ppas, rq_ppas, rq_len;
  276. int i, j;
  277. int ret = 0;
  278. spin_lock(&line->lock);
  279. left_line_ppas = line->left_msecs;
  280. spin_unlock(&line->lock);
  281. pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
  282. if (!pad_rq)
  283. return -ENOMEM;
  284. data = vzalloc(pblk->max_write_pgs * geo->csecs);
  285. if (!data) {
  286. ret = -ENOMEM;
  287. goto free_rq;
  288. }
  289. pad_rq->pblk = pblk;
  290. init_completion(&pad_rq->wait);
  291. kref_init(&pad_rq->ref);
  292. next_pad_rq:
  293. rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
  294. if (rq_ppas < pblk->min_write_pgs) {
  295. pr_err("pblk: corrupted pad line %d\n", line->id);
  296. goto fail_free_pad;
  297. }
  298. rq_len = rq_ppas * geo->csecs;
  299. meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
  300. if (!meta_list) {
  301. ret = -ENOMEM;
  302. goto fail_free_pad;
  303. }
  304. ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
  305. dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
  306. bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
  307. PBLK_VMALLOC_META, GFP_KERNEL);
  308. if (IS_ERR(bio)) {
  309. ret = PTR_ERR(bio);
  310. goto fail_free_meta;
  311. }
  312. bio->bi_iter.bi_sector = 0; /* internal bio */
  313. bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
  314. rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
  315. rqd->bio = bio;
  316. rqd->opcode = NVM_OP_PWRITE;
  317. rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
  318. rqd->meta_list = meta_list;
  319. rqd->nr_ppas = rq_ppas;
  320. rqd->ppa_list = ppa_list;
  321. rqd->dma_ppa_list = dma_ppa_list;
  322. rqd->dma_meta_list = dma_meta_list;
  323. rqd->end_io = pblk_end_io_recov;
  324. rqd->private = pad_rq;
  325. for (i = 0; i < rqd->nr_ppas; ) {
  326. struct ppa_addr ppa;
  327. int pos;
  328. w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
  329. ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
  330. pos = pblk_ppa_to_pos(geo, ppa);
  331. while (test_bit(pos, line->blk_bitmap)) {
  332. w_ptr += pblk->min_write_pgs;
  333. ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
  334. pos = pblk_ppa_to_pos(geo, ppa);
  335. }
  336. for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
  337. struct ppa_addr dev_ppa;
  338. __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
  339. dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
  340. pblk_map_invalidate(pblk, dev_ppa);
  341. lba_list[w_ptr] = meta_list[i].lba = addr_empty;
  342. rqd->ppa_list[i] = dev_ppa;
  343. }
  344. }
  345. kref_get(&pad_rq->ref);
  346. pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
  347. ret = pblk_submit_io(pblk, rqd);
  348. if (ret) {
  349. pr_err("pblk: I/O submission failed: %d\n", ret);
  350. pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
  351. goto fail_free_bio;
  352. }
  353. left_line_ppas -= rq_ppas;
  354. left_ppas -= rq_ppas;
  355. if (left_ppas && left_line_ppas)
  356. goto next_pad_rq;
  357. kref_put(&pad_rq->ref, pblk_recov_complete);
  358. if (!wait_for_completion_io_timeout(&pad_rq->wait,
  359. msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
  360. pr_err("pblk: pad write timed out\n");
  361. ret = -ETIME;
  362. }
  363. if (!pblk_line_is_full(line))
  364. pr_err("pblk: corrupted padded line: %d\n", line->id);
  365. vfree(data);
  366. free_rq:
  367. kfree(pad_rq);
  368. return ret;
  369. fail_free_bio:
  370. bio_put(bio);
  371. fail_free_meta:
  372. nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
  373. fail_free_pad:
  374. kfree(pad_rq);
  375. vfree(data);
  376. return ret;
  377. }
  378. /* When this function is called, it means that not all upper pages have been
  379. * written in a page that contains valid data. In order to recover this data, we
  380. * first find the write pointer on the device, then we pad all necessary
  381. * sectors, and finally attempt to read the valid data
  382. */
  383. static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
  384. struct pblk_recov_alloc p)
  385. {
  386. struct nvm_tgt_dev *dev = pblk->dev;
  387. struct nvm_geo *geo = &dev->geo;
  388. struct ppa_addr *ppa_list;
  389. struct pblk_sec_meta *meta_list;
  390. struct nvm_rq *rqd;
  391. struct bio *bio;
  392. void *data;
  393. dma_addr_t dma_ppa_list, dma_meta_list;
  394. u64 w_ptr = 0, r_ptr;
  395. int rq_ppas, rq_len;
  396. int i, j;
  397. int ret = 0;
  398. int rec_round;
  399. int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec;
  400. ppa_list = p.ppa_list;
  401. meta_list = p.meta_list;
  402. rqd = p.rqd;
  403. data = p.data;
  404. dma_ppa_list = p.dma_ppa_list;
  405. dma_meta_list = p.dma_meta_list;
  406. /* we could recover up until the line write pointer */
  407. r_ptr = line->cur_sec;
  408. rec_round = 0;
  409. next_rq:
  410. memset(rqd, 0, pblk_g_rq_size);
  411. rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
  412. if (!rq_ppas)
  413. rq_ppas = pblk->min_write_pgs;
  414. rq_len = rq_ppas * geo->csecs;
  415. bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
  416. if (IS_ERR(bio))
  417. return PTR_ERR(bio);
  418. bio->bi_iter.bi_sector = 0; /* internal bio */
  419. bio_set_op_attrs(bio, REQ_OP_READ, 0);
  420. rqd->bio = bio;
  421. rqd->opcode = NVM_OP_PREAD;
  422. rqd->meta_list = meta_list;
  423. rqd->nr_ppas = rq_ppas;
  424. rqd->ppa_list = ppa_list;
  425. rqd->dma_ppa_list = dma_ppa_list;
  426. rqd->dma_meta_list = dma_meta_list;
  427. if (pblk_io_aligned(pblk, rq_ppas))
  428. rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
  429. else
  430. rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
  431. for (i = 0; i < rqd->nr_ppas; ) {
  432. struct ppa_addr ppa;
  433. int pos;
  434. w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
  435. ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
  436. pos = pblk_ppa_to_pos(geo, ppa);
  437. while (test_bit(pos, line->blk_bitmap)) {
  438. w_ptr += pblk->min_write_pgs;
  439. ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
  440. pos = pblk_ppa_to_pos(geo, ppa);
  441. }
  442. for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++)
  443. rqd->ppa_list[i] =
  444. addr_to_gen_ppa(pblk, w_ptr, line->id);
  445. }
  446. ret = pblk_submit_io_sync(pblk, rqd);
  447. if (ret) {
  448. pr_err("pblk: I/O submission failed: %d\n", ret);
  449. return ret;
  450. }
  451. atomic_dec(&pblk->inflight_io);
  452. /* This should not happen since the read failed during normal recovery,
  453. * but the media works funny sometimes...
  454. */
  455. if (!rec_round++ && !rqd->error) {
  456. rec_round = 0;
  457. for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) {
  458. u64 lba = le64_to_cpu(meta_list[i].lba);
  459. if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
  460. continue;
  461. pblk_update_map(pblk, lba, rqd->ppa_list[i]);
  462. }
  463. }
  464. /* Reached the end of the written line */
  465. if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
  466. int pad_secs, nr_error_bits, bit;
  467. int ret;
  468. bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
  469. nr_error_bits = rqd->nr_ppas - bit;
  470. /* Roll back failed sectors */
  471. line->cur_sec -= nr_error_bits;
  472. line->left_msecs += nr_error_bits;
  473. bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
  474. pad_secs = pblk_pad_distance(pblk);
  475. if (pad_secs > line->left_msecs)
  476. pad_secs = line->left_msecs;
  477. ret = pblk_recov_pad_oob(pblk, line, pad_secs);
  478. if (ret)
  479. pr_err("pblk: OOB padding failed (err:%d)\n", ret);
  480. ret = pblk_recov_read_oob(pblk, line, p, r_ptr);
  481. if (ret)
  482. pr_err("pblk: OOB read failed (err:%d)\n", ret);
  483. left_ppas = 0;
  484. }
  485. left_ppas -= rq_ppas;
  486. if (left_ppas > 0)
  487. goto next_rq;
  488. return ret;
  489. }
  490. static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
  491. struct pblk_recov_alloc p, int *done)
  492. {
  493. struct nvm_tgt_dev *dev = pblk->dev;
  494. struct nvm_geo *geo = &dev->geo;
  495. struct ppa_addr *ppa_list;
  496. struct pblk_sec_meta *meta_list;
  497. struct nvm_rq *rqd;
  498. struct bio *bio;
  499. void *data;
  500. dma_addr_t dma_ppa_list, dma_meta_list;
  501. u64 paddr;
  502. int rq_ppas, rq_len;
  503. int i, j;
  504. int ret = 0;
  505. int left_ppas = pblk_calc_sec_in_line(pblk, line);
  506. ppa_list = p.ppa_list;
  507. meta_list = p.meta_list;
  508. rqd = p.rqd;
  509. data = p.data;
  510. dma_ppa_list = p.dma_ppa_list;
  511. dma_meta_list = p.dma_meta_list;
  512. *done = 1;
  513. next_rq:
  514. memset(rqd, 0, pblk_g_rq_size);
  515. rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
  516. if (!rq_ppas)
  517. rq_ppas = pblk->min_write_pgs;
  518. rq_len = rq_ppas * geo->csecs;
  519. bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
  520. if (IS_ERR(bio))
  521. return PTR_ERR(bio);
  522. bio->bi_iter.bi_sector = 0; /* internal bio */
  523. bio_set_op_attrs(bio, REQ_OP_READ, 0);
  524. rqd->bio = bio;
  525. rqd->opcode = NVM_OP_PREAD;
  526. rqd->meta_list = meta_list;
  527. rqd->nr_ppas = rq_ppas;
  528. rqd->ppa_list = ppa_list;
  529. rqd->dma_ppa_list = dma_ppa_list;
  530. rqd->dma_meta_list = dma_meta_list;
  531. if (pblk_io_aligned(pblk, rq_ppas))
  532. rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
  533. else
  534. rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
  535. for (i = 0; i < rqd->nr_ppas; ) {
  536. struct ppa_addr ppa;
  537. int pos;
  538. paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
  539. ppa = addr_to_gen_ppa(pblk, paddr, line->id);
  540. pos = pblk_ppa_to_pos(geo, ppa);
  541. while (test_bit(pos, line->blk_bitmap)) {
  542. paddr += pblk->min_write_pgs;
  543. ppa = addr_to_gen_ppa(pblk, paddr, line->id);
  544. pos = pblk_ppa_to_pos(geo, ppa);
  545. }
  546. for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++)
  547. rqd->ppa_list[i] =
  548. addr_to_gen_ppa(pblk, paddr, line->id);
  549. }
  550. ret = pblk_submit_io_sync(pblk, rqd);
  551. if (ret) {
  552. pr_err("pblk: I/O submission failed: %d\n", ret);
  553. bio_put(bio);
  554. return ret;
  555. }
  556. atomic_dec(&pblk->inflight_io);
  557. /* Reached the end of the written line */
  558. if (rqd->error) {
  559. int nr_error_bits, bit;
  560. bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
  561. nr_error_bits = rqd->nr_ppas - bit;
  562. /* Roll back failed sectors */
  563. line->cur_sec -= nr_error_bits;
  564. line->left_msecs += nr_error_bits;
  565. bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
  566. left_ppas = 0;
  567. rqd->nr_ppas = bit;
  568. if (rqd->error != NVM_RSP_ERR_EMPTYPAGE)
  569. *done = 0;
  570. }
  571. for (i = 0; i < rqd->nr_ppas; i++) {
  572. u64 lba = le64_to_cpu(meta_list[i].lba);
  573. if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
  574. continue;
  575. pblk_update_map(pblk, lba, rqd->ppa_list[i]);
  576. }
  577. left_ppas -= rq_ppas;
  578. if (left_ppas > 0)
  579. goto next_rq;
  580. return ret;
  581. }
  582. /* Scan line for lbas on out of bound area */
  583. static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
  584. {
  585. struct nvm_tgt_dev *dev = pblk->dev;
  586. struct nvm_geo *geo = &dev->geo;
  587. struct nvm_rq *rqd;
  588. struct ppa_addr *ppa_list;
  589. struct pblk_sec_meta *meta_list;
  590. struct pblk_recov_alloc p;
  591. void *data;
  592. dma_addr_t dma_ppa_list, dma_meta_list;
  593. int done, ret = 0;
  594. meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
  595. if (!meta_list)
  596. return -ENOMEM;
  597. ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
  598. dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
  599. data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
  600. if (!data) {
  601. ret = -ENOMEM;
  602. goto free_meta_list;
  603. }
  604. rqd = pblk_alloc_rqd(pblk, PBLK_READ);
  605. p.ppa_list = ppa_list;
  606. p.meta_list = meta_list;
  607. p.rqd = rqd;
  608. p.data = data;
  609. p.dma_ppa_list = dma_ppa_list;
  610. p.dma_meta_list = dma_meta_list;
  611. ret = pblk_recov_scan_oob(pblk, line, p, &done);
  612. if (ret) {
  613. pr_err("pblk: could not recover L2P from OOB\n");
  614. goto out;
  615. }
  616. if (!done) {
  617. ret = pblk_recov_scan_all_oob(pblk, line, p);
  618. if (ret) {
  619. pr_err("pblk: could not recover L2P from OOB\n");
  620. goto out;
  621. }
  622. }
  623. if (pblk_line_is_full(line))
  624. pblk_line_recov_close(pblk, line);
  625. out:
  626. kfree(data);
  627. free_meta_list:
  628. nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
  629. return ret;
  630. }
  631. /* Insert lines ordered by sequence number (seq_num) on list */
  632. static void pblk_recov_line_add_ordered(struct list_head *head,
  633. struct pblk_line *line)
  634. {
  635. struct pblk_line *t = NULL;
  636. list_for_each_entry(t, head, list)
  637. if (t->seq_nr > line->seq_nr)
  638. break;
  639. __list_add(&line->list, t->list.prev, &t->list);
  640. }
  641. static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line)
  642. {
  643. struct nvm_tgt_dev *dev = pblk->dev;
  644. struct nvm_geo *geo = &dev->geo;
  645. struct pblk_line_meta *lm = &pblk->lm;
  646. unsigned int emeta_secs;
  647. u64 emeta_start;
  648. struct ppa_addr ppa;
  649. int pos;
  650. emeta_secs = lm->emeta_sec[0];
  651. emeta_start = lm->sec_per_line;
  652. while (emeta_secs) {
  653. emeta_start--;
  654. ppa = addr_to_gen_ppa(pblk, emeta_start, line->id);
  655. pos = pblk_ppa_to_pos(geo, ppa);
  656. if (!test_bit(pos, line->blk_bitmap))
  657. emeta_secs--;
  658. }
  659. return emeta_start;
  660. }
  661. static int pblk_recov_check_line_version(struct pblk *pblk,
  662. struct line_emeta *emeta)
  663. {
  664. struct line_header *header = &emeta->header;
  665. if (header->version_major != EMETA_VERSION_MAJOR) {
  666. pr_err("pblk: line major version mismatch: %d, expected: %d\n",
  667. header->version_major, EMETA_VERSION_MAJOR);
  668. return 1;
  669. }
  670. #ifdef NVM_DEBUG
  671. if (header->version_minor > EMETA_VERSION_MINOR)
  672. pr_info("pblk: newer line minor version found: %d\n", line_v);
  673. #endif
  674. return 0;
  675. }
  676. static void pblk_recov_wa_counters(struct pblk *pblk,
  677. struct line_emeta *emeta)
  678. {
  679. struct pblk_line_meta *lm = &pblk->lm;
  680. struct line_header *header = &emeta->header;
  681. struct wa_counters *wa = emeta_to_wa(lm, emeta);
  682. /* WA counters were introduced in emeta version 0.2 */
  683. if (header->version_major > 0 || header->version_minor >= 2) {
  684. u64 user = le64_to_cpu(wa->user);
  685. u64 pad = le64_to_cpu(wa->pad);
  686. u64 gc = le64_to_cpu(wa->gc);
  687. atomic64_set(&pblk->user_wa, user);
  688. atomic64_set(&pblk->pad_wa, pad);
  689. atomic64_set(&pblk->gc_wa, gc);
  690. pblk->user_rst_wa = user;
  691. pblk->pad_rst_wa = pad;
  692. pblk->gc_rst_wa = gc;
  693. }
  694. }
  695. static int pblk_line_was_written(struct pblk_line *line,
  696. struct pblk_line_meta *lm)
  697. {
  698. int i;
  699. int state_mask = NVM_CHK_ST_OFFLINE | NVM_CHK_ST_FREE;
  700. for (i = 0; i < lm->blk_per_line; i++) {
  701. if (!(line->chks[i].state & state_mask))
  702. return 1;
  703. }
  704. return 0;
  705. }
  706. struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
  707. {
  708. struct pblk_line_meta *lm = &pblk->lm;
  709. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  710. struct pblk_line *line, *tline, *data_line = NULL;
  711. struct pblk_smeta *smeta;
  712. struct pblk_emeta *emeta;
  713. struct line_smeta *smeta_buf;
  714. int found_lines = 0, recovered_lines = 0, open_lines = 0;
  715. int is_next = 0;
  716. int meta_line;
  717. int i, valid_uuid = 0;
  718. LIST_HEAD(recov_list);
  719. /* TODO: Implement FTL snapshot */
  720. /* Scan recovery - takes place when FTL snapshot fails */
  721. spin_lock(&l_mg->free_lock);
  722. meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
  723. set_bit(meta_line, &l_mg->meta_bitmap);
  724. smeta = l_mg->sline_meta[meta_line];
  725. emeta = l_mg->eline_meta[meta_line];
  726. smeta_buf = (struct line_smeta *)smeta;
  727. spin_unlock(&l_mg->free_lock);
  728. /* Order data lines using their sequence number */
  729. for (i = 0; i < l_mg->nr_lines; i++) {
  730. u32 crc;
  731. line = &pblk->lines[i];
  732. memset(smeta, 0, lm->smeta_len);
  733. line->smeta = smeta;
  734. line->lun_bitmap = ((void *)(smeta_buf)) +
  735. sizeof(struct line_smeta);
  736. if (!pblk_line_was_written(line, lm))
  737. continue;
  738. /* Lines that cannot be read are assumed as not written here */
  739. if (pblk_line_read_smeta(pblk, line))
  740. continue;
  741. crc = pblk_calc_smeta_crc(pblk, smeta_buf);
  742. if (le32_to_cpu(smeta_buf->crc) != crc)
  743. continue;
  744. if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
  745. continue;
  746. if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) {
  747. pr_err("pblk: found incompatible line version %u\n",
  748. smeta_buf->header.version_major);
  749. return ERR_PTR(-EINVAL);
  750. }
  751. /* The first valid instance uuid is used for initialization */
  752. if (!valid_uuid) {
  753. memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
  754. valid_uuid = 1;
  755. }
  756. if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
  757. pr_debug("pblk: ignore line %u due to uuid mismatch\n",
  758. i);
  759. continue;
  760. }
  761. /* Update line metadata */
  762. spin_lock(&line->lock);
  763. line->id = le32_to_cpu(smeta_buf->header.id);
  764. line->type = le16_to_cpu(smeta_buf->header.type);
  765. line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
  766. spin_unlock(&line->lock);
  767. /* Update general metadata */
  768. spin_lock(&l_mg->free_lock);
  769. if (line->seq_nr >= l_mg->d_seq_nr)
  770. l_mg->d_seq_nr = line->seq_nr + 1;
  771. l_mg->nr_free_lines--;
  772. spin_unlock(&l_mg->free_lock);
  773. if (pblk_line_recov_alloc(pblk, line))
  774. goto out;
  775. pblk_recov_line_add_ordered(&recov_list, line);
  776. found_lines++;
  777. pr_debug("pblk: recovering data line %d, seq:%llu\n",
  778. line->id, smeta_buf->seq_nr);
  779. }
  780. if (!found_lines) {
  781. pblk_setup_uuid(pblk);
  782. spin_lock(&l_mg->free_lock);
  783. WARN_ON_ONCE(!test_and_clear_bit(meta_line,
  784. &l_mg->meta_bitmap));
  785. spin_unlock(&l_mg->free_lock);
  786. goto out;
  787. }
  788. /* Verify closed blocks and recover this portion of L2P table*/
  789. list_for_each_entry_safe(line, tline, &recov_list, list) {
  790. recovered_lines++;
  791. line->emeta_ssec = pblk_line_emeta_start(pblk, line);
  792. line->emeta = emeta;
  793. memset(line->emeta->buf, 0, lm->emeta_len[0]);
  794. if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
  795. pblk_recov_l2p_from_oob(pblk, line);
  796. goto next;
  797. }
  798. if (pblk_recov_check_emeta(pblk, line->emeta->buf)) {
  799. pblk_recov_l2p_from_oob(pblk, line);
  800. goto next;
  801. }
  802. if (pblk_recov_check_line_version(pblk, line->emeta->buf))
  803. return ERR_PTR(-EINVAL);
  804. pblk_recov_wa_counters(pblk, line->emeta->buf);
  805. if (pblk_recov_l2p_from_emeta(pblk, line))
  806. pblk_recov_l2p_from_oob(pblk, line);
  807. next:
  808. if (pblk_line_is_full(line)) {
  809. struct list_head *move_list;
  810. spin_lock(&line->lock);
  811. line->state = PBLK_LINESTATE_CLOSED;
  812. move_list = pblk_line_gc_list(pblk, line);
  813. spin_unlock(&line->lock);
  814. spin_lock(&l_mg->gc_lock);
  815. list_move_tail(&line->list, move_list);
  816. spin_unlock(&l_mg->gc_lock);
  817. kfree(line->map_bitmap);
  818. line->map_bitmap = NULL;
  819. line->smeta = NULL;
  820. line->emeta = NULL;
  821. } else {
  822. if (open_lines > 1)
  823. pr_err("pblk: failed to recover L2P\n");
  824. open_lines++;
  825. line->meta_line = meta_line;
  826. data_line = line;
  827. }
  828. }
  829. spin_lock(&l_mg->free_lock);
  830. if (!open_lines) {
  831. WARN_ON_ONCE(!test_and_clear_bit(meta_line,
  832. &l_mg->meta_bitmap));
  833. pblk_line_replace_data(pblk);
  834. } else {
  835. /* Allocate next line for preparation */
  836. l_mg->data_next = pblk_line_get(pblk);
  837. if (l_mg->data_next) {
  838. l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
  839. l_mg->data_next->type = PBLK_LINETYPE_DATA;
  840. is_next = 1;
  841. }
  842. }
  843. spin_unlock(&l_mg->free_lock);
  844. if (is_next)
  845. pblk_line_erase(pblk, l_mg->data_next);
  846. out:
  847. if (found_lines != recovered_lines)
  848. pr_err("pblk: failed to recover all found lines %d/%d\n",
  849. found_lines, recovered_lines);
  850. return data_line;
  851. }
  852. /*
  853. * Pad current line
  854. */
  855. int pblk_recov_pad(struct pblk *pblk)
  856. {
  857. struct pblk_line *line;
  858. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  859. int left_msecs;
  860. int ret = 0;
  861. spin_lock(&l_mg->free_lock);
  862. line = l_mg->data_line;
  863. left_msecs = line->left_msecs;
  864. spin_unlock(&l_mg->free_lock);
  865. ret = pblk_recov_pad_oob(pblk, line, left_msecs);
  866. if (ret) {
  867. pr_err("pblk: Tear down padding failed (%d)\n", ret);
  868. return ret;
  869. }
  870. pblk_line_close_meta(pblk, line);
  871. return ret;
  872. }