pblk-core.c 52 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2016 CNEX Labs
  4. * Initial release: Javier Gonzalez <javier@cnexlabs.com>
  5. * Matias Bjorling <matias@cnexlabs.com>
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License version
  9. * 2 as published by the Free Software Foundation.
  10. *
  11. * This program is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * General Public License for more details.
  15. *
  16. * pblk-core.c - pblk's core functionality
  17. *
  18. */
  19. #define CREATE_TRACE_POINTS
  20. #include "pblk.h"
  21. #include "pblk-trace.h"
  22. static void pblk_line_mark_bb(struct work_struct *work)
  23. {
  24. struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
  25. ws);
  26. struct pblk *pblk = line_ws->pblk;
  27. struct nvm_tgt_dev *dev = pblk->dev;
  28. struct ppa_addr *ppa = line_ws->priv;
  29. int ret;
  30. ret = nvm_set_chunk_meta(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
  31. if (ret) {
  32. struct pblk_line *line;
  33. int pos;
  34. line = pblk_ppa_to_line(pblk, *ppa);
  35. pos = pblk_ppa_to_pos(&dev->geo, *ppa);
  36. pblk_err(pblk, "failed to mark bb, line:%d, pos:%d\n",
  37. line->id, pos);
  38. }
  39. kfree(ppa);
  40. mempool_free(line_ws, &pblk->gen_ws_pool);
  41. }
  42. static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
  43. struct ppa_addr ppa_addr)
  44. {
  45. struct nvm_tgt_dev *dev = pblk->dev;
  46. struct nvm_geo *geo = &dev->geo;
  47. struct ppa_addr *ppa;
  48. int pos = pblk_ppa_to_pos(geo, ppa_addr);
  49. pblk_debug(pblk, "erase failed: line:%d, pos:%d\n", line->id, pos);
  50. atomic_long_inc(&pblk->erase_failed);
  51. atomic_dec(&line->blk_in_line);
  52. if (test_and_set_bit(pos, line->blk_bitmap))
  53. pblk_err(pblk, "attempted to erase bb: line:%d, pos:%d\n",
  54. line->id, pos);
  55. /* Not necessary to mark bad blocks on 2.0 spec. */
  56. if (geo->version == NVM_OCSSD_SPEC_20)
  57. return;
  58. ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
  59. if (!ppa)
  60. return;
  61. *ppa = ppa_addr;
  62. pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb,
  63. GFP_ATOMIC, pblk->bb_wq);
  64. }
  65. static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
  66. {
  67. struct nvm_tgt_dev *dev = pblk->dev;
  68. struct nvm_geo *geo = &dev->geo;
  69. struct nvm_chk_meta *chunk;
  70. struct pblk_line *line;
  71. int pos;
  72. line = pblk_ppa_to_line(pblk, rqd->ppa_addr);
  73. pos = pblk_ppa_to_pos(geo, rqd->ppa_addr);
  74. chunk = &line->chks[pos];
  75. atomic_dec(&line->left_seblks);
  76. if (rqd->error) {
  77. trace_pblk_chunk_reset(pblk_disk_name(pblk),
  78. &rqd->ppa_addr, PBLK_CHUNK_RESET_FAILED);
  79. chunk->state = NVM_CHK_ST_OFFLINE;
  80. pblk_mark_bb(pblk, line, rqd->ppa_addr);
  81. } else {
  82. trace_pblk_chunk_reset(pblk_disk_name(pblk),
  83. &rqd->ppa_addr, PBLK_CHUNK_RESET_DONE);
  84. chunk->state = NVM_CHK_ST_FREE;
  85. }
  86. trace_pblk_chunk_state(pblk_disk_name(pblk), &rqd->ppa_addr,
  87. chunk->state);
  88. atomic_dec(&pblk->inflight_io);
  89. }
  90. /* Erase completion assumes that only one block is erased at the time */
  91. static void pblk_end_io_erase(struct nvm_rq *rqd)
  92. {
  93. struct pblk *pblk = rqd->private;
  94. __pblk_end_io_erase(pblk, rqd);
  95. mempool_free(rqd, &pblk->e_rq_pool);
  96. }
  97. /*
  98. * Get information for all chunks from the device.
  99. *
  100. * The caller is responsible for freeing (vmalloc) the returned structure
  101. */
  102. struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk)
  103. {
  104. struct nvm_tgt_dev *dev = pblk->dev;
  105. struct nvm_geo *geo = &dev->geo;
  106. struct nvm_chk_meta *meta;
  107. struct ppa_addr ppa;
  108. unsigned long len;
  109. int ret;
  110. ppa.ppa = 0;
  111. len = geo->all_chunks * sizeof(*meta);
  112. meta = vzalloc(len);
  113. if (!meta)
  114. return ERR_PTR(-ENOMEM);
  115. ret = nvm_get_chunk_meta(dev, ppa, geo->all_chunks, meta);
  116. if (ret) {
  117. kfree(meta);
  118. return ERR_PTR(-EIO);
  119. }
  120. return meta;
  121. }
  122. struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
  123. struct nvm_chk_meta *meta,
  124. struct ppa_addr ppa)
  125. {
  126. struct nvm_tgt_dev *dev = pblk->dev;
  127. struct nvm_geo *geo = &dev->geo;
  128. int ch_off = ppa.m.grp * geo->num_chk * geo->num_lun;
  129. int lun_off = ppa.m.pu * geo->num_chk;
  130. int chk_off = ppa.m.chk;
  131. return meta + ch_off + lun_off + chk_off;
  132. }
  133. void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
  134. u64 paddr)
  135. {
  136. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  137. struct list_head *move_list = NULL;
  138. /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P
  139. * table is modified with reclaimed sectors, a check is done to endure
  140. * that newer updates are not overwritten.
  141. */
  142. spin_lock(&line->lock);
  143. WARN_ON(line->state == PBLK_LINESTATE_FREE);
  144. if (test_and_set_bit(paddr, line->invalid_bitmap)) {
  145. WARN_ONCE(1, "pblk: double invalidate\n");
  146. spin_unlock(&line->lock);
  147. return;
  148. }
  149. le32_add_cpu(line->vsc, -1);
  150. if (line->state == PBLK_LINESTATE_CLOSED)
  151. move_list = pblk_line_gc_list(pblk, line);
  152. spin_unlock(&line->lock);
  153. if (move_list) {
  154. spin_lock(&l_mg->gc_lock);
  155. spin_lock(&line->lock);
  156. /* Prevent moving a line that has just been chosen for GC */
  157. if (line->state == PBLK_LINESTATE_GC) {
  158. spin_unlock(&line->lock);
  159. spin_unlock(&l_mg->gc_lock);
  160. return;
  161. }
  162. spin_unlock(&line->lock);
  163. list_move_tail(&line->list, move_list);
  164. spin_unlock(&l_mg->gc_lock);
  165. }
  166. }
  167. void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
  168. {
  169. struct pblk_line *line;
  170. u64 paddr;
  171. #ifdef CONFIG_NVM_PBLK_DEBUG
  172. /* Callers must ensure that the ppa points to a device address */
  173. BUG_ON(pblk_addr_in_cache(ppa));
  174. BUG_ON(pblk_ppa_empty(ppa));
  175. #endif
  176. line = pblk_ppa_to_line(pblk, ppa);
  177. paddr = pblk_dev_ppa_to_line_addr(pblk, ppa);
  178. __pblk_map_invalidate(pblk, line, paddr);
  179. }
  180. static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
  181. unsigned int nr_secs)
  182. {
  183. sector_t lba;
  184. spin_lock(&pblk->trans_lock);
  185. for (lba = slba; lba < slba + nr_secs; lba++) {
  186. struct ppa_addr ppa;
  187. ppa = pblk_trans_map_get(pblk, lba);
  188. if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa))
  189. pblk_map_invalidate(pblk, ppa);
  190. pblk_ppa_set_empty(&ppa);
  191. pblk_trans_map_set(pblk, lba, ppa);
  192. }
  193. spin_unlock(&pblk->trans_lock);
  194. }
  195. int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
  196. {
  197. struct nvm_tgt_dev *dev = pblk->dev;
  198. rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
  199. &rqd->dma_meta_list);
  200. if (!rqd->meta_list)
  201. return -ENOMEM;
  202. if (rqd->nr_ppas == 1)
  203. return 0;
  204. rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
  205. rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
  206. return 0;
  207. }
  208. void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
  209. {
  210. struct nvm_tgt_dev *dev = pblk->dev;
  211. if (rqd->meta_list)
  212. nvm_dev_dma_free(dev->parent, rqd->meta_list,
  213. rqd->dma_meta_list);
  214. }
  215. /* Caller must guarantee that the request is a valid type */
  216. struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type)
  217. {
  218. mempool_t *pool;
  219. struct nvm_rq *rqd;
  220. int rq_size;
  221. switch (type) {
  222. case PBLK_WRITE:
  223. case PBLK_WRITE_INT:
  224. pool = &pblk->w_rq_pool;
  225. rq_size = pblk_w_rq_size;
  226. break;
  227. case PBLK_READ:
  228. pool = &pblk->r_rq_pool;
  229. rq_size = pblk_g_rq_size;
  230. break;
  231. default:
  232. pool = &pblk->e_rq_pool;
  233. rq_size = pblk_g_rq_size;
  234. }
  235. rqd = mempool_alloc(pool, GFP_KERNEL);
  236. memset(rqd, 0, rq_size);
  237. return rqd;
  238. }
  239. /* Typically used on completion path. Cannot guarantee request consistency */
  240. void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type)
  241. {
  242. mempool_t *pool;
  243. switch (type) {
  244. case PBLK_WRITE:
  245. kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap);
  246. /* fall through */
  247. case PBLK_WRITE_INT:
  248. pool = &pblk->w_rq_pool;
  249. break;
  250. case PBLK_READ:
  251. pool = &pblk->r_rq_pool;
  252. break;
  253. case PBLK_ERASE:
  254. pool = &pblk->e_rq_pool;
  255. break;
  256. default:
  257. pblk_err(pblk, "trying to free unknown rqd type\n");
  258. return;
  259. }
  260. pblk_free_rqd_meta(pblk, rqd);
  261. mempool_free(rqd, pool);
  262. }
  263. void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
  264. int nr_pages)
  265. {
  266. struct bio_vec bv;
  267. int i;
  268. WARN_ON(off + nr_pages != bio->bi_vcnt);
  269. for (i = off; i < nr_pages + off; i++) {
  270. bv = bio->bi_io_vec[i];
  271. mempool_free(bv.bv_page, &pblk->page_bio_pool);
  272. }
  273. }
  274. int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
  275. int nr_pages)
  276. {
  277. struct request_queue *q = pblk->dev->q;
  278. struct page *page;
  279. int i, ret;
  280. for (i = 0; i < nr_pages; i++) {
  281. page = mempool_alloc(&pblk->page_bio_pool, flags);
  282. ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
  283. if (ret != PBLK_EXPOSED_PAGE_SIZE) {
  284. pblk_err(pblk, "could not add page to bio\n");
  285. mempool_free(page, &pblk->page_bio_pool);
  286. goto err;
  287. }
  288. }
  289. return 0;
  290. err:
  291. pblk_bio_free_pages(pblk, bio, (bio->bi_vcnt - i), i);
  292. return -1;
  293. }
  294. void pblk_write_kick(struct pblk *pblk)
  295. {
  296. wake_up_process(pblk->writer_ts);
  297. mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
  298. }
  299. void pblk_write_timer_fn(struct timer_list *t)
  300. {
  301. struct pblk *pblk = from_timer(pblk, t, wtimer);
  302. /* kick the write thread every tick to flush outstanding data */
  303. pblk_write_kick(pblk);
  304. }
  305. void pblk_write_should_kick(struct pblk *pblk)
  306. {
  307. unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
  308. if (secs_avail >= pblk->min_write_pgs)
  309. pblk_write_kick(pblk);
  310. }
  311. static void pblk_wait_for_meta(struct pblk *pblk)
  312. {
  313. do {
  314. if (!atomic_read(&pblk->inflight_io))
  315. break;
  316. schedule();
  317. } while (1);
  318. }
  319. static void pblk_flush_writer(struct pblk *pblk)
  320. {
  321. pblk_rb_flush(&pblk->rwb);
  322. do {
  323. if (!pblk_rb_sync_count(&pblk->rwb))
  324. break;
  325. pblk_write_kick(pblk);
  326. schedule();
  327. } while (1);
  328. }
  329. struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
  330. {
  331. struct pblk_line_meta *lm = &pblk->lm;
  332. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  333. struct list_head *move_list = NULL;
  334. int vsc = le32_to_cpu(*line->vsc);
  335. lockdep_assert_held(&line->lock);
  336. if (line->w_err_gc->has_write_err) {
  337. if (line->gc_group != PBLK_LINEGC_WERR) {
  338. line->gc_group = PBLK_LINEGC_WERR;
  339. move_list = &l_mg->gc_werr_list;
  340. pblk_rl_werr_line_in(&pblk->rl);
  341. }
  342. } else if (!vsc) {
  343. if (line->gc_group != PBLK_LINEGC_FULL) {
  344. line->gc_group = PBLK_LINEGC_FULL;
  345. move_list = &l_mg->gc_full_list;
  346. }
  347. } else if (vsc < lm->high_thrs) {
  348. if (line->gc_group != PBLK_LINEGC_HIGH) {
  349. line->gc_group = PBLK_LINEGC_HIGH;
  350. move_list = &l_mg->gc_high_list;
  351. }
  352. } else if (vsc < lm->mid_thrs) {
  353. if (line->gc_group != PBLK_LINEGC_MID) {
  354. line->gc_group = PBLK_LINEGC_MID;
  355. move_list = &l_mg->gc_mid_list;
  356. }
  357. } else if (vsc < line->sec_in_line) {
  358. if (line->gc_group != PBLK_LINEGC_LOW) {
  359. line->gc_group = PBLK_LINEGC_LOW;
  360. move_list = &l_mg->gc_low_list;
  361. }
  362. } else if (vsc == line->sec_in_line) {
  363. if (line->gc_group != PBLK_LINEGC_EMPTY) {
  364. line->gc_group = PBLK_LINEGC_EMPTY;
  365. move_list = &l_mg->gc_empty_list;
  366. }
  367. } else {
  368. line->state = PBLK_LINESTATE_CORRUPT;
  369. trace_pblk_line_state(pblk_disk_name(pblk), line->id,
  370. line->state);
  371. line->gc_group = PBLK_LINEGC_NONE;
  372. move_list = &l_mg->corrupt_list;
  373. pblk_err(pblk, "corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
  374. line->id, vsc,
  375. line->sec_in_line,
  376. lm->high_thrs, lm->mid_thrs);
  377. }
  378. return move_list;
  379. }
  380. void pblk_discard(struct pblk *pblk, struct bio *bio)
  381. {
  382. sector_t slba = pblk_get_lba(bio);
  383. sector_t nr_secs = pblk_get_secs(bio);
  384. pblk_invalidate_range(pblk, slba, nr_secs);
  385. }
  386. void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd)
  387. {
  388. atomic_long_inc(&pblk->write_failed);
  389. #ifdef CONFIG_NVM_PBLK_DEBUG
  390. pblk_print_failed_rqd(pblk, rqd, rqd->error);
  391. #endif
  392. }
  393. void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
  394. {
  395. /* Empty page read is not necessarily an error (e.g., L2P recovery) */
  396. if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
  397. atomic_long_inc(&pblk->read_empty);
  398. return;
  399. }
  400. switch (rqd->error) {
  401. case NVM_RSP_WARN_HIGHECC:
  402. atomic_long_inc(&pblk->read_high_ecc);
  403. break;
  404. case NVM_RSP_ERR_FAILECC:
  405. case NVM_RSP_ERR_FAILCRC:
  406. atomic_long_inc(&pblk->read_failed);
  407. break;
  408. default:
  409. pblk_err(pblk, "unknown read error:%d\n", rqd->error);
  410. }
  411. #ifdef CONFIG_NVM_PBLK_DEBUG
  412. pblk_print_failed_rqd(pblk, rqd, rqd->error);
  413. #endif
  414. }
  415. void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
  416. {
  417. pblk->sec_per_write = sec_per_write;
  418. }
  419. int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
  420. {
  421. struct nvm_tgt_dev *dev = pblk->dev;
  422. atomic_inc(&pblk->inflight_io);
  423. #ifdef CONFIG_NVM_PBLK_DEBUG
  424. if (pblk_check_io(pblk, rqd))
  425. return NVM_IO_ERR;
  426. #endif
  427. return nvm_submit_io(dev, rqd);
  428. }
  429. void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd)
  430. {
  431. struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
  432. int i;
  433. for (i = 0; i < rqd->nr_ppas; i++) {
  434. struct ppa_addr *ppa = &ppa_list[i];
  435. struct nvm_chk_meta *chunk = pblk_dev_ppa_to_chunk(pblk, *ppa);
  436. u64 caddr = pblk_dev_ppa_to_chunk_addr(pblk, *ppa);
  437. if (caddr == 0)
  438. trace_pblk_chunk_state(pblk_disk_name(pblk),
  439. ppa, NVM_CHK_ST_OPEN);
  440. else if (caddr == chunk->cnlb)
  441. trace_pblk_chunk_state(pblk_disk_name(pblk),
  442. ppa, NVM_CHK_ST_CLOSED);
  443. }
  444. }
  445. int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
  446. {
  447. struct nvm_tgt_dev *dev = pblk->dev;
  448. int ret;
  449. atomic_inc(&pblk->inflight_io);
  450. #ifdef CONFIG_NVM_PBLK_DEBUG
  451. if (pblk_check_io(pblk, rqd))
  452. return NVM_IO_ERR;
  453. #endif
  454. ret = nvm_submit_io_sync(dev, rqd);
  455. if (trace_pblk_chunk_state_enabled() && !ret &&
  456. rqd->opcode == NVM_OP_PWRITE)
  457. pblk_check_chunk_state_update(pblk, rqd);
  458. return ret;
  459. }
  460. int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
  461. {
  462. struct ppa_addr *ppa_list;
  463. int ret;
  464. ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
  465. pblk_down_chunk(pblk, ppa_list[0]);
  466. ret = pblk_submit_io_sync(pblk, rqd);
  467. pblk_up_chunk(pblk, ppa_list[0]);
  468. return ret;
  469. }
  470. static void pblk_bio_map_addr_endio(struct bio *bio)
  471. {
  472. bio_put(bio);
  473. }
  474. struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
  475. unsigned int nr_secs, unsigned int len,
  476. int alloc_type, gfp_t gfp_mask)
  477. {
  478. struct nvm_tgt_dev *dev = pblk->dev;
  479. void *kaddr = data;
  480. struct page *page;
  481. struct bio *bio;
  482. int i, ret;
  483. if (alloc_type == PBLK_KMALLOC_META)
  484. return bio_map_kern(dev->q, kaddr, len, gfp_mask);
  485. bio = bio_kmalloc(gfp_mask, nr_secs);
  486. if (!bio)
  487. return ERR_PTR(-ENOMEM);
  488. for (i = 0; i < nr_secs; i++) {
  489. page = vmalloc_to_page(kaddr);
  490. if (!page) {
  491. pblk_err(pblk, "could not map vmalloc bio\n");
  492. bio_put(bio);
  493. bio = ERR_PTR(-ENOMEM);
  494. goto out;
  495. }
  496. ret = bio_add_pc_page(dev->q, bio, page, PAGE_SIZE, 0);
  497. if (ret != PAGE_SIZE) {
  498. pblk_err(pblk, "could not add page to bio\n");
  499. bio_put(bio);
  500. bio = ERR_PTR(-ENOMEM);
  501. goto out;
  502. }
  503. kaddr += PAGE_SIZE;
  504. }
  505. bio->bi_end_io = pblk_bio_map_addr_endio;
  506. out:
  507. return bio;
  508. }
  509. int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
  510. unsigned long secs_to_flush)
  511. {
  512. int max = pblk->sec_per_write;
  513. int min = pblk->min_write_pgs;
  514. int secs_to_sync = 0;
  515. if (secs_avail >= max)
  516. secs_to_sync = max;
  517. else if (secs_avail >= min)
  518. secs_to_sync = min * (secs_avail / min);
  519. else if (secs_to_flush)
  520. secs_to_sync = min;
  521. return secs_to_sync;
  522. }
  523. void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
  524. {
  525. u64 addr;
  526. int i;
  527. spin_lock(&line->lock);
  528. addr = find_next_zero_bit(line->map_bitmap,
  529. pblk->lm.sec_per_line, line->cur_sec);
  530. line->cur_sec = addr - nr_secs;
  531. for (i = 0; i < nr_secs; i++, line->cur_sec--)
  532. WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
  533. spin_unlock(&line->lock);
  534. }
  535. u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
  536. {
  537. u64 addr;
  538. int i;
  539. lockdep_assert_held(&line->lock);
  540. /* logic error: ppa out-of-bounds. Prevent generating bad address */
  541. if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
  542. WARN(1, "pblk: page allocation out of bounds\n");
  543. nr_secs = pblk->lm.sec_per_line - line->cur_sec;
  544. }
  545. line->cur_sec = addr = find_next_zero_bit(line->map_bitmap,
  546. pblk->lm.sec_per_line, line->cur_sec);
  547. for (i = 0; i < nr_secs; i++, line->cur_sec++)
  548. WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap));
  549. return addr;
  550. }
  551. u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
  552. {
  553. u64 addr;
  554. /* Lock needed in case a write fails and a recovery needs to remap
  555. * failed write buffer entries
  556. */
  557. spin_lock(&line->lock);
  558. addr = __pblk_alloc_page(pblk, line, nr_secs);
  559. line->left_msecs -= nr_secs;
  560. WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n");
  561. spin_unlock(&line->lock);
  562. return addr;
  563. }
  564. u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
  565. {
  566. u64 paddr;
  567. spin_lock(&line->lock);
  568. paddr = find_next_zero_bit(line->map_bitmap,
  569. pblk->lm.sec_per_line, line->cur_sec);
  570. spin_unlock(&line->lock);
  571. return paddr;
  572. }
  573. u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
  574. {
  575. struct nvm_tgt_dev *dev = pblk->dev;
  576. struct nvm_geo *geo = &dev->geo;
  577. struct pblk_line_meta *lm = &pblk->lm;
  578. int bit;
  579. /* This usually only happens on bad lines */
  580. bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
  581. if (bit >= lm->blk_per_line)
  582. return -1;
  583. return bit * geo->ws_opt;
  584. }
  585. int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
  586. {
  587. struct nvm_tgt_dev *dev = pblk->dev;
  588. struct pblk_line_meta *lm = &pblk->lm;
  589. struct bio *bio;
  590. struct nvm_rq rqd;
  591. u64 paddr = pblk_line_smeta_start(pblk, line);
  592. int i, ret;
  593. memset(&rqd, 0, sizeof(struct nvm_rq));
  594. ret = pblk_alloc_rqd_meta(pblk, &rqd);
  595. if (ret)
  596. return ret;
  597. bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
  598. if (IS_ERR(bio)) {
  599. ret = PTR_ERR(bio);
  600. goto clear_rqd;
  601. }
  602. bio->bi_iter.bi_sector = 0; /* internal bio */
  603. bio_set_op_attrs(bio, REQ_OP_READ, 0);
  604. rqd.bio = bio;
  605. rqd.opcode = NVM_OP_PREAD;
  606. rqd.nr_ppas = lm->smeta_sec;
  607. rqd.is_seq = 1;
  608. for (i = 0; i < lm->smeta_sec; i++, paddr++)
  609. rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
  610. ret = pblk_submit_io_sync(pblk, &rqd);
  611. if (ret) {
  612. pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
  613. bio_put(bio);
  614. goto clear_rqd;
  615. }
  616. atomic_dec(&pblk->inflight_io);
  617. if (rqd.error)
  618. pblk_log_read_err(pblk, &rqd);
  619. clear_rqd:
  620. pblk_free_rqd_meta(pblk, &rqd);
  621. return ret;
  622. }
  623. static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
  624. u64 paddr)
  625. {
  626. struct nvm_tgt_dev *dev = pblk->dev;
  627. struct pblk_line_meta *lm = &pblk->lm;
  628. struct bio *bio;
  629. struct nvm_rq rqd;
  630. __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
  631. __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
  632. int i, ret;
  633. memset(&rqd, 0, sizeof(struct nvm_rq));
  634. ret = pblk_alloc_rqd_meta(pblk, &rqd);
  635. if (ret)
  636. return ret;
  637. bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
  638. if (IS_ERR(bio)) {
  639. ret = PTR_ERR(bio);
  640. goto clear_rqd;
  641. }
  642. bio->bi_iter.bi_sector = 0; /* internal bio */
  643. bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
  644. rqd.bio = bio;
  645. rqd.opcode = NVM_OP_PWRITE;
  646. rqd.nr_ppas = lm->smeta_sec;
  647. rqd.is_seq = 1;
  648. for (i = 0; i < lm->smeta_sec; i++, paddr++) {
  649. struct pblk_sec_meta *meta_list = rqd.meta_list;
  650. rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
  651. meta_list[i].lba = lba_list[paddr] = addr_empty;
  652. }
  653. ret = pblk_submit_io_sync_sem(pblk, &rqd);
  654. if (ret) {
  655. pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
  656. bio_put(bio);
  657. goto clear_rqd;
  658. }
  659. atomic_dec(&pblk->inflight_io);
  660. if (rqd.error) {
  661. pblk_log_write_err(pblk, &rqd);
  662. ret = -EIO;
  663. }
  664. clear_rqd:
  665. pblk_free_rqd_meta(pblk, &rqd);
  666. return ret;
  667. }
  668. int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
  669. void *emeta_buf)
  670. {
  671. struct nvm_tgt_dev *dev = pblk->dev;
  672. struct nvm_geo *geo = &dev->geo;
  673. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  674. struct pblk_line_meta *lm = &pblk->lm;
  675. void *ppa_list, *meta_list;
  676. struct bio *bio;
  677. struct nvm_rq rqd;
  678. u64 paddr = line->emeta_ssec;
  679. dma_addr_t dma_ppa_list, dma_meta_list;
  680. int min = pblk->min_write_pgs;
  681. int left_ppas = lm->emeta_sec[0];
  682. int line_id = line->id;
  683. int rq_ppas, rq_len;
  684. int i, j;
  685. int ret;
  686. meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
  687. &dma_meta_list);
  688. if (!meta_list)
  689. return -ENOMEM;
  690. ppa_list = meta_list + pblk_dma_meta_size;
  691. dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
  692. next_rq:
  693. memset(&rqd, 0, sizeof(struct nvm_rq));
  694. rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
  695. rq_len = rq_ppas * geo->csecs;
  696. bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
  697. l_mg->emeta_alloc_type, GFP_KERNEL);
  698. if (IS_ERR(bio)) {
  699. ret = PTR_ERR(bio);
  700. goto free_rqd_dma;
  701. }
  702. bio->bi_iter.bi_sector = 0; /* internal bio */
  703. bio_set_op_attrs(bio, REQ_OP_READ, 0);
  704. rqd.bio = bio;
  705. rqd.meta_list = meta_list;
  706. rqd.ppa_list = ppa_list;
  707. rqd.dma_meta_list = dma_meta_list;
  708. rqd.dma_ppa_list = dma_ppa_list;
  709. rqd.opcode = NVM_OP_PREAD;
  710. rqd.nr_ppas = rq_ppas;
  711. for (i = 0; i < rqd.nr_ppas; ) {
  712. struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id);
  713. int pos = pblk_ppa_to_pos(geo, ppa);
  714. if (pblk_io_aligned(pblk, rq_ppas))
  715. rqd.is_seq = 1;
  716. while (test_bit(pos, line->blk_bitmap)) {
  717. paddr += min;
  718. if (pblk_boundary_paddr_checks(pblk, paddr)) {
  719. bio_put(bio);
  720. ret = -EINTR;
  721. goto free_rqd_dma;
  722. }
  723. ppa = addr_to_gen_ppa(pblk, paddr, line_id);
  724. pos = pblk_ppa_to_pos(geo, ppa);
  725. }
  726. if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
  727. bio_put(bio);
  728. ret = -EINTR;
  729. goto free_rqd_dma;
  730. }
  731. for (j = 0; j < min; j++, i++, paddr++)
  732. rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id);
  733. }
  734. ret = pblk_submit_io_sync(pblk, &rqd);
  735. if (ret) {
  736. pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
  737. bio_put(bio);
  738. goto free_rqd_dma;
  739. }
  740. atomic_dec(&pblk->inflight_io);
  741. if (rqd.error)
  742. pblk_log_read_err(pblk, &rqd);
  743. emeta_buf += rq_len;
  744. left_ppas -= rq_ppas;
  745. if (left_ppas)
  746. goto next_rq;
  747. free_rqd_dma:
  748. nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
  749. return ret;
  750. }
  751. static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
  752. struct ppa_addr ppa)
  753. {
  754. rqd->opcode = NVM_OP_ERASE;
  755. rqd->ppa_addr = ppa;
  756. rqd->nr_ppas = 1;
  757. rqd->is_seq = 1;
  758. rqd->bio = NULL;
  759. }
  760. static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
  761. {
  762. struct nvm_rq rqd = {NULL};
  763. int ret;
  764. trace_pblk_chunk_reset(pblk_disk_name(pblk), &ppa,
  765. PBLK_CHUNK_RESET_START);
  766. pblk_setup_e_rq(pblk, &rqd, ppa);
  767. /* The write thread schedules erases so that it minimizes disturbances
  768. * with writes. Thus, there is no need to take the LUN semaphore.
  769. */
  770. ret = pblk_submit_io_sync(pblk, &rqd);
  771. rqd.private = pblk;
  772. __pblk_end_io_erase(pblk, &rqd);
  773. return ret;
  774. }
  775. int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
  776. {
  777. struct pblk_line_meta *lm = &pblk->lm;
  778. struct ppa_addr ppa;
  779. int ret, bit = -1;
  780. /* Erase only good blocks, one at a time */
  781. do {
  782. spin_lock(&line->lock);
  783. bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line,
  784. bit + 1);
  785. if (bit >= lm->blk_per_line) {
  786. spin_unlock(&line->lock);
  787. break;
  788. }
  789. ppa = pblk->luns[bit].bppa; /* set ch and lun */
  790. ppa.a.blk = line->id;
  791. atomic_dec(&line->left_eblks);
  792. WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
  793. spin_unlock(&line->lock);
  794. ret = pblk_blk_erase_sync(pblk, ppa);
  795. if (ret) {
  796. pblk_err(pblk, "failed to erase line %d\n", line->id);
  797. return ret;
  798. }
  799. } while (1);
  800. return 0;
  801. }
  802. static void pblk_line_setup_metadata(struct pblk_line *line,
  803. struct pblk_line_mgmt *l_mg,
  804. struct pblk_line_meta *lm)
  805. {
  806. int meta_line;
  807. lockdep_assert_held(&l_mg->free_lock);
  808. retry_meta:
  809. meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
  810. if (meta_line == PBLK_DATA_LINES) {
  811. spin_unlock(&l_mg->free_lock);
  812. io_schedule();
  813. spin_lock(&l_mg->free_lock);
  814. goto retry_meta;
  815. }
  816. set_bit(meta_line, &l_mg->meta_bitmap);
  817. line->meta_line = meta_line;
  818. line->smeta = l_mg->sline_meta[meta_line];
  819. line->emeta = l_mg->eline_meta[meta_line];
  820. memset(line->smeta, 0, lm->smeta_len);
  821. memset(line->emeta->buf, 0, lm->emeta_len[0]);
  822. line->emeta->mem = 0;
  823. atomic_set(&line->emeta->sync, 0);
  824. }
  825. /* For now lines are always assumed full lines. Thus, smeta former and current
  826. * lun bitmaps are omitted.
  827. */
  828. static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
  829. struct pblk_line *cur)
  830. {
  831. struct nvm_tgt_dev *dev = pblk->dev;
  832. struct nvm_geo *geo = &dev->geo;
  833. struct pblk_line_meta *lm = &pblk->lm;
  834. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  835. struct pblk_emeta *emeta = line->emeta;
  836. struct line_emeta *emeta_buf = emeta->buf;
  837. struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
  838. int nr_blk_line;
  839. /* After erasing the line, new bad blocks might appear and we risk
  840. * having an invalid line
  841. */
  842. nr_blk_line = lm->blk_per_line -
  843. bitmap_weight(line->blk_bitmap, lm->blk_per_line);
  844. if (nr_blk_line < lm->min_blk_line) {
  845. spin_lock(&l_mg->free_lock);
  846. spin_lock(&line->lock);
  847. line->state = PBLK_LINESTATE_BAD;
  848. trace_pblk_line_state(pblk_disk_name(pblk), line->id,
  849. line->state);
  850. spin_unlock(&line->lock);
  851. list_add_tail(&line->list, &l_mg->bad_list);
  852. spin_unlock(&l_mg->free_lock);
  853. pblk_debug(pblk, "line %d is bad\n", line->id);
  854. return 0;
  855. }
  856. /* Run-time metadata */
  857. line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
  858. /* Mark LUNs allocated in this line (all for now) */
  859. bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
  860. smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
  861. memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
  862. smeta_buf->header.id = cpu_to_le32(line->id);
  863. smeta_buf->header.type = cpu_to_le16(line->type);
  864. smeta_buf->header.version_major = SMETA_VERSION_MAJOR;
  865. smeta_buf->header.version_minor = SMETA_VERSION_MINOR;
  866. /* Start metadata */
  867. smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
  868. smeta_buf->window_wr_lun = cpu_to_le32(geo->all_luns);
  869. /* Fill metadata among lines */
  870. if (cur) {
  871. memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
  872. smeta_buf->prev_id = cpu_to_le32(cur->id);
  873. cur->emeta->buf->next_id = cpu_to_le32(line->id);
  874. } else {
  875. smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
  876. }
  877. /* All smeta must be set at this point */
  878. smeta_buf->header.crc = cpu_to_le32(
  879. pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
  880. smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
  881. /* End metadata */
  882. memcpy(&emeta_buf->header, &smeta_buf->header,
  883. sizeof(struct line_header));
  884. emeta_buf->header.version_major = EMETA_VERSION_MAJOR;
  885. emeta_buf->header.version_minor = EMETA_VERSION_MINOR;
  886. emeta_buf->header.crc = cpu_to_le32(
  887. pblk_calc_meta_header_crc(pblk, &emeta_buf->header));
  888. emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
  889. emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
  890. emeta_buf->nr_valid_lbas = cpu_to_le64(0);
  891. emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
  892. emeta_buf->crc = cpu_to_le32(0);
  893. emeta_buf->prev_id = smeta_buf->prev_id;
  894. return 1;
  895. }
  896. static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line)
  897. {
  898. struct pblk_line_meta *lm = &pblk->lm;
  899. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  900. line->map_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL);
  901. if (!line->map_bitmap)
  902. return -ENOMEM;
  903. memset(line->map_bitmap, 0, lm->sec_bitmap_len);
  904. /* will be initialized using bb info from map_bitmap */
  905. line->invalid_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL);
  906. if (!line->invalid_bitmap) {
  907. mempool_free(line->map_bitmap, l_mg->bitmap_pool);
  908. line->map_bitmap = NULL;
  909. return -ENOMEM;
  910. }
  911. return 0;
  912. }
  913. /* For now lines are always assumed full lines. Thus, smeta former and current
  914. * lun bitmaps are omitted.
  915. */
  916. static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
  917. int init)
  918. {
  919. struct nvm_tgt_dev *dev = pblk->dev;
  920. struct nvm_geo *geo = &dev->geo;
  921. struct pblk_line_meta *lm = &pblk->lm;
  922. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  923. u64 off;
  924. int bit = -1;
  925. int emeta_secs;
  926. line->sec_in_line = lm->sec_per_line;
  927. /* Capture bad block information on line mapping bitmaps */
  928. while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
  929. bit + 1)) < lm->blk_per_line) {
  930. off = bit * geo->ws_opt;
  931. bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
  932. lm->sec_per_line);
  933. bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
  934. lm->sec_per_line);
  935. line->sec_in_line -= geo->clba;
  936. }
  937. /* Mark smeta metadata sectors as bad sectors */
  938. bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
  939. off = bit * geo->ws_opt;
  940. bitmap_set(line->map_bitmap, off, lm->smeta_sec);
  941. line->sec_in_line -= lm->smeta_sec;
  942. line->smeta_ssec = off;
  943. line->cur_sec = off + lm->smeta_sec;
  944. if (init && pblk_line_smeta_write(pblk, line, off)) {
  945. pblk_debug(pblk, "line smeta I/O failed. Retry\n");
  946. return 0;
  947. }
  948. bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
  949. /* Mark emeta metadata sectors as bad sectors. We need to consider bad
  950. * blocks to make sure that there are enough sectors to store emeta
  951. */
  952. emeta_secs = lm->emeta_sec[0];
  953. off = lm->sec_per_line;
  954. while (emeta_secs) {
  955. off -= geo->ws_opt;
  956. if (!test_bit(off, line->invalid_bitmap)) {
  957. bitmap_set(line->invalid_bitmap, off, geo->ws_opt);
  958. emeta_secs -= geo->ws_opt;
  959. }
  960. }
  961. line->emeta_ssec = off;
  962. line->sec_in_line -= lm->emeta_sec[0];
  963. line->nr_valid_lbas = 0;
  964. line->left_msecs = line->sec_in_line;
  965. *line->vsc = cpu_to_le32(line->sec_in_line);
  966. if (lm->sec_per_line - line->sec_in_line !=
  967. bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
  968. spin_lock(&line->lock);
  969. line->state = PBLK_LINESTATE_BAD;
  970. trace_pblk_line_state(pblk_disk_name(pblk), line->id,
  971. line->state);
  972. spin_unlock(&line->lock);
  973. list_add_tail(&line->list, &l_mg->bad_list);
  974. pblk_err(pblk, "unexpected line %d is bad\n", line->id);
  975. return 0;
  976. }
  977. return 1;
  978. }
  979. static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line)
  980. {
  981. struct pblk_line_meta *lm = &pblk->lm;
  982. struct nvm_tgt_dev *dev = pblk->dev;
  983. struct nvm_geo *geo = &dev->geo;
  984. int blk_to_erase = atomic_read(&line->blk_in_line);
  985. int i;
  986. for (i = 0; i < lm->blk_per_line; i++) {
  987. struct pblk_lun *rlun = &pblk->luns[i];
  988. int pos = pblk_ppa_to_pos(geo, rlun->bppa);
  989. int state = line->chks[pos].state;
  990. /* Free chunks should not be erased */
  991. if (state & NVM_CHK_ST_FREE) {
  992. set_bit(pblk_ppa_to_pos(geo, rlun->bppa),
  993. line->erase_bitmap);
  994. blk_to_erase--;
  995. }
  996. }
  997. return blk_to_erase;
  998. }
  999. static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
  1000. {
  1001. struct pblk_line_meta *lm = &pblk->lm;
  1002. int blk_in_line = atomic_read(&line->blk_in_line);
  1003. int blk_to_erase;
  1004. /* Bad blocks do not need to be erased */
  1005. bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
  1006. spin_lock(&line->lock);
  1007. /* If we have not written to this line, we need to mark up free chunks
  1008. * as already erased
  1009. */
  1010. if (line->state == PBLK_LINESTATE_NEW) {
  1011. blk_to_erase = pblk_prepare_new_line(pblk, line);
  1012. line->state = PBLK_LINESTATE_FREE;
  1013. trace_pblk_line_state(pblk_disk_name(pblk), line->id,
  1014. line->state);
  1015. } else {
  1016. blk_to_erase = blk_in_line;
  1017. }
  1018. if (blk_in_line < lm->min_blk_line) {
  1019. spin_unlock(&line->lock);
  1020. return -EAGAIN;
  1021. }
  1022. if (line->state != PBLK_LINESTATE_FREE) {
  1023. WARN(1, "pblk: corrupted line %d, state %d\n",
  1024. line->id, line->state);
  1025. spin_unlock(&line->lock);
  1026. return -EINTR;
  1027. }
  1028. line->state = PBLK_LINESTATE_OPEN;
  1029. trace_pblk_line_state(pblk_disk_name(pblk), line->id,
  1030. line->state);
  1031. atomic_set(&line->left_eblks, blk_to_erase);
  1032. atomic_set(&line->left_seblks, blk_to_erase);
  1033. line->meta_distance = lm->meta_distance;
  1034. spin_unlock(&line->lock);
  1035. kref_init(&line->ref);
  1036. return 0;
  1037. }
  1038. int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
  1039. {
  1040. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1041. int ret;
  1042. spin_lock(&l_mg->free_lock);
  1043. l_mg->data_line = line;
  1044. list_del(&line->list);
  1045. ret = pblk_line_prepare(pblk, line);
  1046. if (ret) {
  1047. list_add(&line->list, &l_mg->free_list);
  1048. spin_unlock(&l_mg->free_lock);
  1049. return ret;
  1050. }
  1051. spin_unlock(&l_mg->free_lock);
  1052. ret = pblk_line_alloc_bitmaps(pblk, line);
  1053. if (ret)
  1054. return ret;
  1055. if (!pblk_line_init_bb(pblk, line, 0)) {
  1056. list_add(&line->list, &l_mg->free_list);
  1057. return -EINTR;
  1058. }
  1059. pblk_rl_free_lines_dec(&pblk->rl, line, true);
  1060. return 0;
  1061. }
  1062. void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
  1063. {
  1064. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1065. mempool_free(line->map_bitmap, l_mg->bitmap_pool);
  1066. line->map_bitmap = NULL;
  1067. line->smeta = NULL;
  1068. line->emeta = NULL;
  1069. }
  1070. static void pblk_line_reinit(struct pblk_line *line)
  1071. {
  1072. *line->vsc = cpu_to_le32(EMPTY_ENTRY);
  1073. line->map_bitmap = NULL;
  1074. line->invalid_bitmap = NULL;
  1075. line->smeta = NULL;
  1076. line->emeta = NULL;
  1077. }
  1078. void pblk_line_free(struct pblk_line *line)
  1079. {
  1080. struct pblk *pblk = line->pblk;
  1081. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1082. mempool_free(line->map_bitmap, l_mg->bitmap_pool);
  1083. mempool_free(line->invalid_bitmap, l_mg->bitmap_pool);
  1084. pblk_line_reinit(line);
  1085. }
  1086. struct pblk_line *pblk_line_get(struct pblk *pblk)
  1087. {
  1088. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1089. struct pblk_line_meta *lm = &pblk->lm;
  1090. struct pblk_line *line;
  1091. int ret, bit;
  1092. lockdep_assert_held(&l_mg->free_lock);
  1093. retry:
  1094. if (list_empty(&l_mg->free_list)) {
  1095. pblk_err(pblk, "no free lines\n");
  1096. return NULL;
  1097. }
  1098. line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
  1099. list_del(&line->list);
  1100. l_mg->nr_free_lines--;
  1101. bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
  1102. if (unlikely(bit >= lm->blk_per_line)) {
  1103. spin_lock(&line->lock);
  1104. line->state = PBLK_LINESTATE_BAD;
  1105. trace_pblk_line_state(pblk_disk_name(pblk), line->id,
  1106. line->state);
  1107. spin_unlock(&line->lock);
  1108. list_add_tail(&line->list, &l_mg->bad_list);
  1109. pblk_debug(pblk, "line %d is bad\n", line->id);
  1110. goto retry;
  1111. }
  1112. ret = pblk_line_prepare(pblk, line);
  1113. if (ret) {
  1114. switch (ret) {
  1115. case -EAGAIN:
  1116. list_add(&line->list, &l_mg->bad_list);
  1117. goto retry;
  1118. case -EINTR:
  1119. list_add(&line->list, &l_mg->corrupt_list);
  1120. goto retry;
  1121. default:
  1122. pblk_err(pblk, "failed to prepare line %d\n", line->id);
  1123. list_add(&line->list, &l_mg->free_list);
  1124. l_mg->nr_free_lines++;
  1125. return NULL;
  1126. }
  1127. }
  1128. return line;
  1129. }
  1130. static struct pblk_line *pblk_line_retry(struct pblk *pblk,
  1131. struct pblk_line *line)
  1132. {
  1133. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1134. struct pblk_line *retry_line;
  1135. retry:
  1136. spin_lock(&l_mg->free_lock);
  1137. retry_line = pblk_line_get(pblk);
  1138. if (!retry_line) {
  1139. l_mg->data_line = NULL;
  1140. spin_unlock(&l_mg->free_lock);
  1141. return NULL;
  1142. }
  1143. retry_line->map_bitmap = line->map_bitmap;
  1144. retry_line->invalid_bitmap = line->invalid_bitmap;
  1145. retry_line->smeta = line->smeta;
  1146. retry_line->emeta = line->emeta;
  1147. retry_line->meta_line = line->meta_line;
  1148. pblk_line_reinit(line);
  1149. l_mg->data_line = retry_line;
  1150. spin_unlock(&l_mg->free_lock);
  1151. pblk_rl_free_lines_dec(&pblk->rl, line, false);
  1152. if (pblk_line_erase(pblk, retry_line))
  1153. goto retry;
  1154. return retry_line;
  1155. }
  1156. static void pblk_set_space_limit(struct pblk *pblk)
  1157. {
  1158. struct pblk_rl *rl = &pblk->rl;
  1159. atomic_set(&rl->rb_space, 0);
  1160. }
  1161. struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
  1162. {
  1163. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1164. struct pblk_line *line;
  1165. spin_lock(&l_mg->free_lock);
  1166. line = pblk_line_get(pblk);
  1167. if (!line) {
  1168. spin_unlock(&l_mg->free_lock);
  1169. return NULL;
  1170. }
  1171. line->seq_nr = l_mg->d_seq_nr++;
  1172. line->type = PBLK_LINETYPE_DATA;
  1173. l_mg->data_line = line;
  1174. pblk_line_setup_metadata(line, l_mg, &pblk->lm);
  1175. /* Allocate next line for preparation */
  1176. l_mg->data_next = pblk_line_get(pblk);
  1177. if (!l_mg->data_next) {
  1178. /* If we cannot get a new line, we need to stop the pipeline.
  1179. * Only allow as many writes in as we can store safely and then
  1180. * fail gracefully
  1181. */
  1182. pblk_set_space_limit(pblk);
  1183. l_mg->data_next = NULL;
  1184. } else {
  1185. l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
  1186. l_mg->data_next->type = PBLK_LINETYPE_DATA;
  1187. }
  1188. spin_unlock(&l_mg->free_lock);
  1189. if (pblk_line_alloc_bitmaps(pblk, line))
  1190. return NULL;
  1191. if (pblk_line_erase(pblk, line)) {
  1192. line = pblk_line_retry(pblk, line);
  1193. if (!line)
  1194. return NULL;
  1195. }
  1196. retry_setup:
  1197. if (!pblk_line_init_metadata(pblk, line, NULL)) {
  1198. line = pblk_line_retry(pblk, line);
  1199. if (!line)
  1200. return NULL;
  1201. goto retry_setup;
  1202. }
  1203. if (!pblk_line_init_bb(pblk, line, 1)) {
  1204. line = pblk_line_retry(pblk, line);
  1205. if (!line)
  1206. return NULL;
  1207. goto retry_setup;
  1208. }
  1209. pblk_rl_free_lines_dec(&pblk->rl, line, true);
  1210. return line;
  1211. }
  1212. void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa)
  1213. {
  1214. struct pblk_line *line;
  1215. line = pblk_ppa_to_line(pblk, ppa);
  1216. kref_put(&line->ref, pblk_line_put_wq);
  1217. }
  1218. void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd)
  1219. {
  1220. struct ppa_addr *ppa_list;
  1221. int i;
  1222. ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
  1223. for (i = 0; i < rqd->nr_ppas; i++)
  1224. pblk_ppa_to_line_put(pblk, ppa_list[i]);
  1225. }
  1226. static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
  1227. {
  1228. lockdep_assert_held(&pblk->l_mg.free_lock);
  1229. pblk_set_space_limit(pblk);
  1230. pblk->state = PBLK_STATE_STOPPING;
  1231. trace_pblk_state(pblk_disk_name(pblk), pblk->state);
  1232. }
  1233. static void pblk_line_close_meta_sync(struct pblk *pblk)
  1234. {
  1235. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1236. struct pblk_line_meta *lm = &pblk->lm;
  1237. struct pblk_line *line, *tline;
  1238. LIST_HEAD(list);
  1239. spin_lock(&l_mg->close_lock);
  1240. if (list_empty(&l_mg->emeta_list)) {
  1241. spin_unlock(&l_mg->close_lock);
  1242. return;
  1243. }
  1244. list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
  1245. spin_unlock(&l_mg->close_lock);
  1246. list_for_each_entry_safe(line, tline, &list, list) {
  1247. struct pblk_emeta *emeta = line->emeta;
  1248. while (emeta->mem < lm->emeta_len[0]) {
  1249. int ret;
  1250. ret = pblk_submit_meta_io(pblk, line);
  1251. if (ret) {
  1252. pblk_err(pblk, "sync meta line %d failed (%d)\n",
  1253. line->id, ret);
  1254. return;
  1255. }
  1256. }
  1257. }
  1258. pblk_wait_for_meta(pblk);
  1259. flush_workqueue(pblk->close_wq);
  1260. }
  1261. void __pblk_pipeline_flush(struct pblk *pblk)
  1262. {
  1263. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1264. int ret;
  1265. spin_lock(&l_mg->free_lock);
  1266. if (pblk->state == PBLK_STATE_RECOVERING ||
  1267. pblk->state == PBLK_STATE_STOPPED) {
  1268. spin_unlock(&l_mg->free_lock);
  1269. return;
  1270. }
  1271. pblk->state = PBLK_STATE_RECOVERING;
  1272. trace_pblk_state(pblk_disk_name(pblk), pblk->state);
  1273. spin_unlock(&l_mg->free_lock);
  1274. pblk_flush_writer(pblk);
  1275. pblk_wait_for_meta(pblk);
  1276. ret = pblk_recov_pad(pblk);
  1277. if (ret) {
  1278. pblk_err(pblk, "could not close data on teardown(%d)\n", ret);
  1279. return;
  1280. }
  1281. flush_workqueue(pblk->bb_wq);
  1282. pblk_line_close_meta_sync(pblk);
  1283. }
  1284. void __pblk_pipeline_stop(struct pblk *pblk)
  1285. {
  1286. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1287. spin_lock(&l_mg->free_lock);
  1288. pblk->state = PBLK_STATE_STOPPED;
  1289. trace_pblk_state(pblk_disk_name(pblk), pblk->state);
  1290. l_mg->data_line = NULL;
  1291. l_mg->data_next = NULL;
  1292. spin_unlock(&l_mg->free_lock);
  1293. }
  1294. void pblk_pipeline_stop(struct pblk *pblk)
  1295. {
  1296. __pblk_pipeline_flush(pblk);
  1297. __pblk_pipeline_stop(pblk);
  1298. }
  1299. struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
  1300. {
  1301. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1302. struct pblk_line *cur, *new = NULL;
  1303. unsigned int left_seblks;
  1304. new = l_mg->data_next;
  1305. if (!new)
  1306. goto out;
  1307. spin_lock(&l_mg->free_lock);
  1308. cur = l_mg->data_line;
  1309. l_mg->data_line = new;
  1310. pblk_line_setup_metadata(new, l_mg, &pblk->lm);
  1311. spin_unlock(&l_mg->free_lock);
  1312. retry_erase:
  1313. left_seblks = atomic_read(&new->left_seblks);
  1314. if (left_seblks) {
  1315. /* If line is not fully erased, erase it */
  1316. if (atomic_read(&new->left_eblks)) {
  1317. if (pblk_line_erase(pblk, new))
  1318. goto out;
  1319. } else {
  1320. io_schedule();
  1321. }
  1322. goto retry_erase;
  1323. }
  1324. if (pblk_line_alloc_bitmaps(pblk, new))
  1325. return NULL;
  1326. retry_setup:
  1327. if (!pblk_line_init_metadata(pblk, new, cur)) {
  1328. new = pblk_line_retry(pblk, new);
  1329. if (!new)
  1330. goto out;
  1331. goto retry_setup;
  1332. }
  1333. if (!pblk_line_init_bb(pblk, new, 1)) {
  1334. new = pblk_line_retry(pblk, new);
  1335. if (!new)
  1336. goto out;
  1337. goto retry_setup;
  1338. }
  1339. pblk_rl_free_lines_dec(&pblk->rl, new, true);
  1340. /* Allocate next line for preparation */
  1341. spin_lock(&l_mg->free_lock);
  1342. l_mg->data_next = pblk_line_get(pblk);
  1343. if (!l_mg->data_next) {
  1344. /* If we cannot get a new line, we need to stop the pipeline.
  1345. * Only allow as many writes in as we can store safely and then
  1346. * fail gracefully
  1347. */
  1348. pblk_stop_writes(pblk, new);
  1349. l_mg->data_next = NULL;
  1350. } else {
  1351. l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
  1352. l_mg->data_next->type = PBLK_LINETYPE_DATA;
  1353. }
  1354. spin_unlock(&l_mg->free_lock);
  1355. out:
  1356. return new;
  1357. }
  1358. static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
  1359. {
  1360. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1361. struct pblk_gc *gc = &pblk->gc;
  1362. spin_lock(&line->lock);
  1363. WARN_ON(line->state != PBLK_LINESTATE_GC);
  1364. line->state = PBLK_LINESTATE_FREE;
  1365. trace_pblk_line_state(pblk_disk_name(pblk), line->id,
  1366. line->state);
  1367. line->gc_group = PBLK_LINEGC_NONE;
  1368. pblk_line_free(line);
  1369. if (line->w_err_gc->has_write_err) {
  1370. pblk_rl_werr_line_out(&pblk->rl);
  1371. line->w_err_gc->has_write_err = 0;
  1372. }
  1373. spin_unlock(&line->lock);
  1374. atomic_dec(&gc->pipeline_gc);
  1375. spin_lock(&l_mg->free_lock);
  1376. list_add_tail(&line->list, &l_mg->free_list);
  1377. l_mg->nr_free_lines++;
  1378. spin_unlock(&l_mg->free_lock);
  1379. pblk_rl_free_lines_inc(&pblk->rl, line);
  1380. }
  1381. static void pblk_line_put_ws(struct work_struct *work)
  1382. {
  1383. struct pblk_line_ws *line_put_ws = container_of(work,
  1384. struct pblk_line_ws, ws);
  1385. struct pblk *pblk = line_put_ws->pblk;
  1386. struct pblk_line *line = line_put_ws->line;
  1387. __pblk_line_put(pblk, line);
  1388. mempool_free(line_put_ws, &pblk->gen_ws_pool);
  1389. }
  1390. void pblk_line_put(struct kref *ref)
  1391. {
  1392. struct pblk_line *line = container_of(ref, struct pblk_line, ref);
  1393. struct pblk *pblk = line->pblk;
  1394. __pblk_line_put(pblk, line);
  1395. }
  1396. void pblk_line_put_wq(struct kref *ref)
  1397. {
  1398. struct pblk_line *line = container_of(ref, struct pblk_line, ref);
  1399. struct pblk *pblk = line->pblk;
  1400. struct pblk_line_ws *line_put_ws;
  1401. line_put_ws = mempool_alloc(&pblk->gen_ws_pool, GFP_ATOMIC);
  1402. if (!line_put_ws)
  1403. return;
  1404. line_put_ws->pblk = pblk;
  1405. line_put_ws->line = line;
  1406. line_put_ws->priv = NULL;
  1407. INIT_WORK(&line_put_ws->ws, pblk_line_put_ws);
  1408. queue_work(pblk->r_end_wq, &line_put_ws->ws);
  1409. }
  1410. int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
  1411. {
  1412. struct nvm_rq *rqd;
  1413. int err;
  1414. rqd = pblk_alloc_rqd(pblk, PBLK_ERASE);
  1415. pblk_setup_e_rq(pblk, rqd, ppa);
  1416. rqd->end_io = pblk_end_io_erase;
  1417. rqd->private = pblk;
  1418. trace_pblk_chunk_reset(pblk_disk_name(pblk),
  1419. &ppa, PBLK_CHUNK_RESET_START);
  1420. /* The write thread schedules erases so that it minimizes disturbances
  1421. * with writes. Thus, there is no need to take the LUN semaphore.
  1422. */
  1423. err = pblk_submit_io(pblk, rqd);
  1424. if (err) {
  1425. struct nvm_tgt_dev *dev = pblk->dev;
  1426. struct nvm_geo *geo = &dev->geo;
  1427. pblk_err(pblk, "could not async erase line:%d,blk:%d\n",
  1428. pblk_ppa_to_line_id(ppa),
  1429. pblk_ppa_to_pos(geo, ppa));
  1430. }
  1431. return err;
  1432. }
  1433. struct pblk_line *pblk_line_get_data(struct pblk *pblk)
  1434. {
  1435. return pblk->l_mg.data_line;
  1436. }
  1437. /* For now, always erase next line */
  1438. struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
  1439. {
  1440. return pblk->l_mg.data_next;
  1441. }
  1442. int pblk_line_is_full(struct pblk_line *line)
  1443. {
  1444. return (line->left_msecs == 0);
  1445. }
  1446. static void pblk_line_should_sync_meta(struct pblk *pblk)
  1447. {
  1448. if (pblk_rl_is_limit(&pblk->rl))
  1449. pblk_line_close_meta_sync(pblk);
  1450. }
  1451. void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
  1452. {
  1453. struct nvm_tgt_dev *dev = pblk->dev;
  1454. struct nvm_geo *geo = &dev->geo;
  1455. struct pblk_line_meta *lm = &pblk->lm;
  1456. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1457. struct list_head *move_list;
  1458. int i;
  1459. #ifdef CONFIG_NVM_PBLK_DEBUG
  1460. WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
  1461. "pblk: corrupt closed line %d\n", line->id);
  1462. #endif
  1463. spin_lock(&l_mg->free_lock);
  1464. WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
  1465. spin_unlock(&l_mg->free_lock);
  1466. spin_lock(&l_mg->gc_lock);
  1467. spin_lock(&line->lock);
  1468. WARN_ON(line->state != PBLK_LINESTATE_OPEN);
  1469. line->state = PBLK_LINESTATE_CLOSED;
  1470. move_list = pblk_line_gc_list(pblk, line);
  1471. list_add_tail(&line->list, move_list);
  1472. mempool_free(line->map_bitmap, l_mg->bitmap_pool);
  1473. line->map_bitmap = NULL;
  1474. line->smeta = NULL;
  1475. line->emeta = NULL;
  1476. for (i = 0; i < lm->blk_per_line; i++) {
  1477. struct pblk_lun *rlun = &pblk->luns[i];
  1478. int pos = pblk_ppa_to_pos(geo, rlun->bppa);
  1479. int state = line->chks[pos].state;
  1480. if (!(state & NVM_CHK_ST_OFFLINE))
  1481. state = NVM_CHK_ST_CLOSED;
  1482. }
  1483. spin_unlock(&line->lock);
  1484. spin_unlock(&l_mg->gc_lock);
  1485. trace_pblk_line_state(pblk_disk_name(pblk), line->id,
  1486. line->state);
  1487. }
  1488. void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
  1489. {
  1490. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1491. struct pblk_line_meta *lm = &pblk->lm;
  1492. struct pblk_emeta *emeta = line->emeta;
  1493. struct line_emeta *emeta_buf = emeta->buf;
  1494. struct wa_counters *wa = emeta_to_wa(lm, emeta_buf);
  1495. /* No need for exact vsc value; avoid a big line lock and take aprox. */
  1496. memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
  1497. memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
  1498. wa->user = cpu_to_le64(atomic64_read(&pblk->user_wa));
  1499. wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa));
  1500. wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa));
  1501. if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) {
  1502. emeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
  1503. memcpy(emeta_buf->header.uuid, pblk->instance_uuid, 16);
  1504. emeta_buf->header.id = cpu_to_le32(line->id);
  1505. emeta_buf->header.type = cpu_to_le16(line->type);
  1506. emeta_buf->header.version_major = EMETA_VERSION_MAJOR;
  1507. emeta_buf->header.version_minor = EMETA_VERSION_MINOR;
  1508. emeta_buf->header.crc = cpu_to_le32(
  1509. pblk_calc_meta_header_crc(pblk, &emeta_buf->header));
  1510. }
  1511. emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
  1512. emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
  1513. spin_lock(&l_mg->close_lock);
  1514. spin_lock(&line->lock);
  1515. /* Update the in-memory start address for emeta, in case it has
  1516. * shifted due to write errors
  1517. */
  1518. if (line->emeta_ssec != line->cur_sec)
  1519. line->emeta_ssec = line->cur_sec;
  1520. list_add_tail(&line->list, &l_mg->emeta_list);
  1521. spin_unlock(&line->lock);
  1522. spin_unlock(&l_mg->close_lock);
  1523. pblk_line_should_sync_meta(pblk);
  1524. }
  1525. static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
  1526. {
  1527. struct pblk_line_meta *lm = &pblk->lm;
  1528. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  1529. unsigned int lba_list_size = lm->emeta_len[2];
  1530. struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
  1531. struct pblk_emeta *emeta = line->emeta;
  1532. w_err_gc->lba_list = pblk_malloc(lba_list_size,
  1533. l_mg->emeta_alloc_type, GFP_KERNEL);
  1534. memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
  1535. lba_list_size);
  1536. }
  1537. void pblk_line_close_ws(struct work_struct *work)
  1538. {
  1539. struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
  1540. ws);
  1541. struct pblk *pblk = line_ws->pblk;
  1542. struct pblk_line *line = line_ws->line;
  1543. struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
  1544. /* Write errors makes the emeta start address stored in smeta invalid,
  1545. * so keep a copy of the lba list until we've gc'd the line
  1546. */
  1547. if (w_err_gc->has_write_err)
  1548. pblk_save_lba_list(pblk, line);
  1549. pblk_line_close(pblk, line);
  1550. mempool_free(line_ws, &pblk->gen_ws_pool);
  1551. }
  1552. void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
  1553. void (*work)(struct work_struct *), gfp_t gfp_mask,
  1554. struct workqueue_struct *wq)
  1555. {
  1556. struct pblk_line_ws *line_ws;
  1557. line_ws = mempool_alloc(&pblk->gen_ws_pool, gfp_mask);
  1558. line_ws->pblk = pblk;
  1559. line_ws->line = line;
  1560. line_ws->priv = priv;
  1561. INIT_WORK(&line_ws->ws, work);
  1562. queue_work(wq, &line_ws->ws);
  1563. }
  1564. static void __pblk_down_chunk(struct pblk *pblk, int pos)
  1565. {
  1566. struct pblk_lun *rlun = &pblk->luns[pos];
  1567. int ret;
  1568. /*
  1569. * Only send one inflight I/O per LUN. Since we map at a page
  1570. * granurality, all ppas in the I/O will map to the same LUN
  1571. */
  1572. ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
  1573. if (ret == -ETIME || ret == -EINTR)
  1574. pblk_err(pblk, "taking lun semaphore timed out: err %d\n",
  1575. -ret);
  1576. }
  1577. void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa)
  1578. {
  1579. struct nvm_tgt_dev *dev = pblk->dev;
  1580. struct nvm_geo *geo = &dev->geo;
  1581. int pos = pblk_ppa_to_pos(geo, ppa);
  1582. __pblk_down_chunk(pblk, pos);
  1583. }
  1584. void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa,
  1585. unsigned long *lun_bitmap)
  1586. {
  1587. struct nvm_tgt_dev *dev = pblk->dev;
  1588. struct nvm_geo *geo = &dev->geo;
  1589. int pos = pblk_ppa_to_pos(geo, ppa);
  1590. /* If the LUN has been locked for this same request, do no attempt to
  1591. * lock it again
  1592. */
  1593. if (test_and_set_bit(pos, lun_bitmap))
  1594. return;
  1595. __pblk_down_chunk(pblk, pos);
  1596. }
  1597. void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa)
  1598. {
  1599. struct nvm_tgt_dev *dev = pblk->dev;
  1600. struct nvm_geo *geo = &dev->geo;
  1601. struct pblk_lun *rlun;
  1602. int pos = pblk_ppa_to_pos(geo, ppa);
  1603. rlun = &pblk->luns[pos];
  1604. up(&rlun->wr_sem);
  1605. }
  1606. void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap)
  1607. {
  1608. struct nvm_tgt_dev *dev = pblk->dev;
  1609. struct nvm_geo *geo = &dev->geo;
  1610. struct pblk_lun *rlun;
  1611. int num_lun = geo->all_luns;
  1612. int bit = -1;
  1613. while ((bit = find_next_bit(lun_bitmap, num_lun, bit + 1)) < num_lun) {
  1614. rlun = &pblk->luns[bit];
  1615. up(&rlun->wr_sem);
  1616. }
  1617. }
  1618. void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
  1619. {
  1620. struct ppa_addr ppa_l2p;
  1621. /* logic error: lba out-of-bounds. Ignore update */
  1622. if (!(lba < pblk->rl.nr_secs)) {
  1623. WARN(1, "pblk: corrupted L2P map request\n");
  1624. return;
  1625. }
  1626. spin_lock(&pblk->trans_lock);
  1627. ppa_l2p = pblk_trans_map_get(pblk, lba);
  1628. if (!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p))
  1629. pblk_map_invalidate(pblk, ppa_l2p);
  1630. pblk_trans_map_set(pblk, lba, ppa);
  1631. spin_unlock(&pblk->trans_lock);
  1632. }
  1633. void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
  1634. {
  1635. #ifdef CONFIG_NVM_PBLK_DEBUG
  1636. /* Callers must ensure that the ppa points to a cache address */
  1637. BUG_ON(!pblk_addr_in_cache(ppa));
  1638. BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
  1639. #endif
  1640. pblk_update_map(pblk, lba, ppa);
  1641. }
  1642. int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new,
  1643. struct pblk_line *gc_line, u64 paddr_gc)
  1644. {
  1645. struct ppa_addr ppa_l2p, ppa_gc;
  1646. int ret = 1;
  1647. #ifdef CONFIG_NVM_PBLK_DEBUG
  1648. /* Callers must ensure that the ppa points to a cache address */
  1649. BUG_ON(!pblk_addr_in_cache(ppa_new));
  1650. BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa_new)));
  1651. #endif
  1652. /* logic error: lba out-of-bounds. Ignore update */
  1653. if (!(lba < pblk->rl.nr_secs)) {
  1654. WARN(1, "pblk: corrupted L2P map request\n");
  1655. return 0;
  1656. }
  1657. spin_lock(&pblk->trans_lock);
  1658. ppa_l2p = pblk_trans_map_get(pblk, lba);
  1659. ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, gc_line->id);
  1660. if (!pblk_ppa_comp(ppa_l2p, ppa_gc)) {
  1661. spin_lock(&gc_line->lock);
  1662. WARN(!test_bit(paddr_gc, gc_line->invalid_bitmap),
  1663. "pblk: corrupted GC update");
  1664. spin_unlock(&gc_line->lock);
  1665. ret = 0;
  1666. goto out;
  1667. }
  1668. pblk_trans_map_set(pblk, lba, ppa_new);
  1669. out:
  1670. spin_unlock(&pblk->trans_lock);
  1671. return ret;
  1672. }
  1673. void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
  1674. struct ppa_addr ppa_mapped, struct ppa_addr ppa_cache)
  1675. {
  1676. struct ppa_addr ppa_l2p;
  1677. #ifdef CONFIG_NVM_PBLK_DEBUG
  1678. /* Callers must ensure that the ppa points to a device address */
  1679. BUG_ON(pblk_addr_in_cache(ppa_mapped));
  1680. #endif
  1681. /* Invalidate and discard padded entries */
  1682. if (lba == ADDR_EMPTY) {
  1683. atomic64_inc(&pblk->pad_wa);
  1684. #ifdef CONFIG_NVM_PBLK_DEBUG
  1685. atomic_long_inc(&pblk->padded_wb);
  1686. #endif
  1687. if (!pblk_ppa_empty(ppa_mapped))
  1688. pblk_map_invalidate(pblk, ppa_mapped);
  1689. return;
  1690. }
  1691. /* logic error: lba out-of-bounds. Ignore update */
  1692. if (!(lba < pblk->rl.nr_secs)) {
  1693. WARN(1, "pblk: corrupted L2P map request\n");
  1694. return;
  1695. }
  1696. spin_lock(&pblk->trans_lock);
  1697. ppa_l2p = pblk_trans_map_get(pblk, lba);
  1698. /* Do not update L2P if the cacheline has been updated. In this case,
  1699. * the mapped ppa must be invalidated
  1700. */
  1701. if (!pblk_ppa_comp(ppa_l2p, ppa_cache)) {
  1702. if (!pblk_ppa_empty(ppa_mapped))
  1703. pblk_map_invalidate(pblk, ppa_mapped);
  1704. goto out;
  1705. }
  1706. #ifdef CONFIG_NVM_PBLK_DEBUG
  1707. WARN_ON(!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p));
  1708. #endif
  1709. pblk_trans_map_set(pblk, lba, ppa_mapped);
  1710. out:
  1711. spin_unlock(&pblk->trans_lock);
  1712. }
  1713. void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
  1714. sector_t blba, int nr_secs)
  1715. {
  1716. int i;
  1717. spin_lock(&pblk->trans_lock);
  1718. for (i = 0; i < nr_secs; i++) {
  1719. struct ppa_addr ppa;
  1720. ppa = ppas[i] = pblk_trans_map_get(pblk, blba + i);
  1721. /* If the L2P entry maps to a line, the reference is valid */
  1722. if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) {
  1723. struct pblk_line *line = pblk_ppa_to_line(pblk, ppa);
  1724. kref_get(&line->ref);
  1725. }
  1726. }
  1727. spin_unlock(&pblk->trans_lock);
  1728. }
  1729. void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
  1730. u64 *lba_list, int nr_secs)
  1731. {
  1732. u64 lba;
  1733. int i;
  1734. spin_lock(&pblk->trans_lock);
  1735. for (i = 0; i < nr_secs; i++) {
  1736. lba = lba_list[i];
  1737. if (lba != ADDR_EMPTY) {
  1738. /* logic error: lba out-of-bounds. Ignore update */
  1739. if (!(lba < pblk->rl.nr_secs)) {
  1740. WARN(1, "pblk: corrupted L2P map request\n");
  1741. continue;
  1742. }
  1743. ppas[i] = pblk_trans_map_get(pblk, lba);
  1744. }
  1745. }
  1746. spin_unlock(&pblk->trans_lock);
  1747. }