pblk-gc.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. /*
  2. * Copyright (C) 2016 CNEX Labs
  3. * Initial release: Javier Gonzalez <javier@cnexlabs.com>
  4. * Matias Bjorling <matias@cnexlabs.com>
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License version
  8. * 2 as published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * pblk-gc.c - pblk's garbage collector
  16. */
  17. #include "pblk.h"
  18. #include <linux/delay.h>
  19. static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
  20. {
  21. kfree(gc_rq->data);
  22. kfree(gc_rq->lba_list);
  23. kfree(gc_rq);
  24. }
  25. static int pblk_gc_write(struct pblk *pblk)
  26. {
  27. struct pblk_gc *gc = &pblk->gc;
  28. struct pblk_gc_rq *gc_rq, *tgc_rq;
  29. LIST_HEAD(w_list);
  30. spin_lock(&gc->w_lock);
  31. if (list_empty(&gc->w_list)) {
  32. spin_unlock(&gc->w_lock);
  33. return 1;
  34. }
  35. list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
  36. list_move_tail(&gc_rq->list, &w_list);
  37. gc->w_entries--;
  38. }
  39. spin_unlock(&gc->w_lock);
  40. list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
  41. pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list,
  42. gc_rq->nr_secs, gc_rq->secs_to_gc,
  43. gc_rq->line, PBLK_IOTYPE_GC);
  44. kref_put(&gc_rq->line->ref, pblk_line_put);
  45. list_del(&gc_rq->list);
  46. pblk_gc_free_gc_rq(gc_rq);
  47. }
  48. return 0;
  49. }
  50. static void pblk_gc_writer_kick(struct pblk_gc *gc)
  51. {
  52. wake_up_process(gc->gc_writer_ts);
  53. }
  54. /*
  55. * Responsible for managing all memory related to a gc request. Also in case of
  56. * failure
  57. */
  58. static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
  59. u64 *lba_list, unsigned int nr_secs)
  60. {
  61. struct nvm_tgt_dev *dev = pblk->dev;
  62. struct nvm_geo *geo = &dev->geo;
  63. struct pblk_gc *gc = &pblk->gc;
  64. struct pblk_gc_rq *gc_rq;
  65. void *data;
  66. unsigned int secs_to_gc;
  67. int ret = NVM_IO_OK;
  68. data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
  69. if (!data) {
  70. ret = NVM_IO_ERR;
  71. goto free_lba_list;
  72. }
  73. /* Read from GC victim block */
  74. if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
  75. &secs_to_gc, line)) {
  76. ret = NVM_IO_ERR;
  77. goto free_data;
  78. }
  79. if (!secs_to_gc)
  80. goto free_data;
  81. gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
  82. if (!gc_rq) {
  83. ret = NVM_IO_ERR;
  84. goto free_data;
  85. }
  86. gc_rq->line = line;
  87. gc_rq->data = data;
  88. gc_rq->lba_list = lba_list;
  89. gc_rq->nr_secs = nr_secs;
  90. gc_rq->secs_to_gc = secs_to_gc;
  91. kref_get(&line->ref);
  92. retry:
  93. spin_lock(&gc->w_lock);
  94. if (gc->w_entries > 256) {
  95. spin_unlock(&gc->w_lock);
  96. usleep_range(256, 1024);
  97. goto retry;
  98. }
  99. gc->w_entries++;
  100. list_add_tail(&gc_rq->list, &gc->w_list);
  101. spin_unlock(&gc->w_lock);
  102. pblk_gc_writer_kick(&pblk->gc);
  103. return NVM_IO_OK;
  104. free_data:
  105. kfree(data);
  106. free_lba_list:
  107. kfree(lba_list);
  108. return ret;
  109. }
  110. static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
  111. {
  112. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  113. struct list_head *move_list;
  114. spin_lock(&line->lock);
  115. WARN_ON(line->state != PBLK_LINESTATE_GC);
  116. line->state = PBLK_LINESTATE_CLOSED;
  117. move_list = pblk_line_gc_list(pblk, line);
  118. spin_unlock(&line->lock);
  119. if (move_list) {
  120. spin_lock(&l_mg->gc_lock);
  121. list_add_tail(&line->list, move_list);
  122. spin_unlock(&l_mg->gc_lock);
  123. }
  124. }
  125. static void pblk_gc_line_ws(struct work_struct *work)
  126. {
  127. struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
  128. ws);
  129. struct pblk *pblk = line_ws->pblk;
  130. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  131. struct pblk_line *line = line_ws->line;
  132. struct pblk_line_meta *lm = &pblk->lm;
  133. __le64 *lba_list = line_ws->priv;
  134. u64 *gc_list;
  135. int sec_left;
  136. int nr_ppas, bit;
  137. int put_line = 1;
  138. pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
  139. spin_lock(&line->lock);
  140. sec_left = line->vsc;
  141. if (!sec_left) {
  142. /* Lines are erased before being used (l_mg->data_/log_next) */
  143. spin_unlock(&line->lock);
  144. goto out;
  145. }
  146. spin_unlock(&line->lock);
  147. if (sec_left < 0) {
  148. pr_err("pblk: corrupted GC line (%d)\n", line->id);
  149. put_line = 0;
  150. pblk_put_line_back(pblk, line);
  151. goto out;
  152. }
  153. bit = -1;
  154. next_rq:
  155. gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
  156. if (!gc_list) {
  157. put_line = 0;
  158. pblk_put_line_back(pblk, line);
  159. goto out;
  160. }
  161. nr_ppas = 0;
  162. do {
  163. bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
  164. bit + 1);
  165. if (bit > line->emeta_ssec)
  166. break;
  167. gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
  168. } while (nr_ppas < pblk->max_write_pgs);
  169. if (unlikely(!nr_ppas)) {
  170. kfree(gc_list);
  171. goto out;
  172. }
  173. if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
  174. pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
  175. line->id, line->vsc,
  176. nr_ppas, nr_ppas);
  177. put_line = 0;
  178. pblk_put_line_back(pblk, line);
  179. goto out;
  180. }
  181. sec_left -= nr_ppas;
  182. if (sec_left > 0)
  183. goto next_rq;
  184. out:
  185. pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
  186. mempool_free(line_ws, pblk->line_ws_pool);
  187. atomic_dec(&pblk->gc.inflight_gc);
  188. if (put_line)
  189. kref_put(&line->ref, pblk_line_put);
  190. }
  191. static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
  192. {
  193. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  194. struct pblk_line_meta *lm = &pblk->lm;
  195. struct pblk_line_ws *line_ws;
  196. __le64 *lba_list;
  197. int ret;
  198. line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
  199. line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
  200. GFP_KERNEL);
  201. if (!line->emeta) {
  202. pr_err("pblk: cannot use GC emeta\n");
  203. goto fail_free_ws;
  204. }
  205. ret = pblk_line_read_emeta(pblk, line);
  206. if (ret) {
  207. pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
  208. goto fail_free_emeta;
  209. }
  210. /* If this read fails, it means that emeta is corrupted. For now, leave
  211. * the line untouched. TODO: Implement a recovery routine that scans and
  212. * moves all sectors on the line.
  213. */
  214. lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
  215. if (!lba_list) {
  216. pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
  217. goto fail_free_emeta;
  218. }
  219. line_ws->pblk = pblk;
  220. line_ws->line = line;
  221. line_ws->priv = lba_list;
  222. INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
  223. queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
  224. return 0;
  225. fail_free_emeta:
  226. pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
  227. fail_free_ws:
  228. mempool_free(line_ws, pblk->line_ws_pool);
  229. pblk_put_line_back(pblk, line);
  230. return 1;
  231. }
  232. static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
  233. {
  234. struct pblk_line *line, *tline;
  235. list_for_each_entry_safe(line, tline, gc_list, list) {
  236. if (pblk_gc_line(pblk, line))
  237. pr_err("pblk: failed to GC line %d\n", line->id);
  238. list_del(&line->list);
  239. }
  240. }
  241. /*
  242. * Lines with no valid sectors will be returned to the free list immediately. If
  243. * GC is activated - either because the free block count is under the determined
  244. * threshold, or because it is being forced from user space - only lines with a
  245. * high count of invalid sectors will be recycled.
  246. */
  247. static void pblk_gc_run(struct pblk *pblk)
  248. {
  249. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  250. struct pblk_gc *gc = &pblk->gc;
  251. struct pblk_line *line, *tline;
  252. unsigned int nr_blocks_free, nr_blocks_need;
  253. struct list_head *group_list;
  254. int run_gc, gc_group = 0;
  255. int prev_gc = 0;
  256. int inflight_gc = atomic_read(&gc->inflight_gc);
  257. LIST_HEAD(gc_list);
  258. spin_lock(&l_mg->gc_lock);
  259. list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
  260. spin_lock(&line->lock);
  261. WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
  262. line->state = PBLK_LINESTATE_GC;
  263. spin_unlock(&line->lock);
  264. list_del(&line->list);
  265. kref_put(&line->ref, pblk_line_put);
  266. }
  267. spin_unlock(&l_mg->gc_lock);
  268. nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
  269. nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
  270. run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
  271. next_gc_group:
  272. group_list = l_mg->gc_lists[gc_group++];
  273. spin_lock(&l_mg->gc_lock);
  274. while (run_gc && !list_empty(group_list)) {
  275. /* No need to queue up more GC lines than we can handle */
  276. if (!run_gc || inflight_gc > gc->gc_jobs_active) {
  277. spin_unlock(&l_mg->gc_lock);
  278. pblk_gc_lines(pblk, &gc_list);
  279. return;
  280. }
  281. line = list_first_entry(group_list, struct pblk_line, list);
  282. nr_blocks_free += atomic_read(&line->blk_in_line);
  283. spin_lock(&line->lock);
  284. WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
  285. line->state = PBLK_LINESTATE_GC;
  286. list_move_tail(&line->list, &gc_list);
  287. atomic_inc(&gc->inflight_gc);
  288. inflight_gc++;
  289. spin_unlock(&line->lock);
  290. prev_gc = 1;
  291. run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
  292. }
  293. spin_unlock(&l_mg->gc_lock);
  294. pblk_gc_lines(pblk, &gc_list);
  295. if (!prev_gc && pblk->rl.rb_state > gc_group &&
  296. gc_group < PBLK_NR_GC_LISTS)
  297. goto next_gc_group;
  298. }
  299. static void pblk_gc_kick(struct pblk *pblk)
  300. {
  301. struct pblk_gc *gc = &pblk->gc;
  302. wake_up_process(gc->gc_ts);
  303. pblk_gc_writer_kick(gc);
  304. mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
  305. }
  306. static void pblk_gc_timer(unsigned long data)
  307. {
  308. struct pblk *pblk = (struct pblk *)data;
  309. pblk_gc_kick(pblk);
  310. }
  311. static int pblk_gc_ts(void *data)
  312. {
  313. struct pblk *pblk = data;
  314. while (!kthread_should_stop()) {
  315. pblk_gc_run(pblk);
  316. set_current_state(TASK_INTERRUPTIBLE);
  317. io_schedule();
  318. }
  319. return 0;
  320. }
  321. static int pblk_gc_writer_ts(void *data)
  322. {
  323. struct pblk *pblk = data;
  324. while (!kthread_should_stop()) {
  325. if (!pblk_gc_write(pblk))
  326. continue;
  327. set_current_state(TASK_INTERRUPTIBLE);
  328. io_schedule();
  329. }
  330. return 0;
  331. }
  332. static void pblk_gc_start(struct pblk *pblk)
  333. {
  334. pblk->gc.gc_active = 1;
  335. pr_debug("pblk: gc start\n");
  336. }
  337. int pblk_gc_status(struct pblk *pblk)
  338. {
  339. struct pblk_gc *gc = &pblk->gc;
  340. int ret;
  341. spin_lock(&gc->lock);
  342. ret = gc->gc_active;
  343. spin_unlock(&gc->lock);
  344. return ret;
  345. }
  346. static void __pblk_gc_should_start(struct pblk *pblk)
  347. {
  348. struct pblk_gc *gc = &pblk->gc;
  349. lockdep_assert_held(&gc->lock);
  350. if (gc->gc_enabled && !gc->gc_active)
  351. pblk_gc_start(pblk);
  352. }
  353. void pblk_gc_should_start(struct pblk *pblk)
  354. {
  355. struct pblk_gc *gc = &pblk->gc;
  356. spin_lock(&gc->lock);
  357. __pblk_gc_should_start(pblk);
  358. spin_unlock(&gc->lock);
  359. }
  360. /*
  361. * If flush_wq == 1 then no lock should be held by the caller since
  362. * flush_workqueue can sleep
  363. */
  364. static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
  365. {
  366. spin_lock(&pblk->gc.lock);
  367. pblk->gc.gc_active = 0;
  368. spin_unlock(&pblk->gc.lock);
  369. pr_debug("pblk: gc stop\n");
  370. }
  371. void pblk_gc_should_stop(struct pblk *pblk)
  372. {
  373. struct pblk_gc *gc = &pblk->gc;
  374. if (gc->gc_active && !gc->gc_forced)
  375. pblk_gc_stop(pblk, 0);
  376. }
  377. void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
  378. int *gc_active)
  379. {
  380. struct pblk_gc *gc = &pblk->gc;
  381. spin_lock(&gc->lock);
  382. *gc_enabled = gc->gc_enabled;
  383. *gc_active = gc->gc_active;
  384. spin_unlock(&gc->lock);
  385. }
  386. void pblk_gc_sysfs_force(struct pblk *pblk, int force)
  387. {
  388. struct pblk_gc *gc = &pblk->gc;
  389. int rsv = 0;
  390. spin_lock(&gc->lock);
  391. if (force) {
  392. gc->gc_enabled = 1;
  393. rsv = 64;
  394. }
  395. pblk_rl_set_gc_rsc(&pblk->rl, rsv);
  396. gc->gc_forced = force;
  397. __pblk_gc_should_start(pblk);
  398. spin_unlock(&gc->lock);
  399. }
  400. int pblk_gc_init(struct pblk *pblk)
  401. {
  402. struct pblk_gc *gc = &pblk->gc;
  403. int ret;
  404. gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
  405. if (IS_ERR(gc->gc_ts)) {
  406. pr_err("pblk: could not allocate GC main kthread\n");
  407. return PTR_ERR(gc->gc_ts);
  408. }
  409. gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
  410. "pblk-gc-writer-ts");
  411. if (IS_ERR(gc->gc_writer_ts)) {
  412. pr_err("pblk: could not allocate GC writer kthread\n");
  413. ret = PTR_ERR(gc->gc_writer_ts);
  414. goto fail_free_main_kthread;
  415. }
  416. setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
  417. mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
  418. gc->gc_active = 0;
  419. gc->gc_forced = 0;
  420. gc->gc_enabled = 1;
  421. gc->gc_jobs_active = 8;
  422. gc->w_entries = 0;
  423. atomic_set(&gc->inflight_gc, 0);
  424. gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
  425. WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
  426. if (!gc->gc_reader_wq) {
  427. pr_err("pblk: could not allocate GC reader workqueue\n");
  428. ret = -ENOMEM;
  429. goto fail_free_writer_kthread;
  430. }
  431. spin_lock_init(&gc->lock);
  432. spin_lock_init(&gc->w_lock);
  433. INIT_LIST_HEAD(&gc->w_list);
  434. return 0;
  435. fail_free_writer_kthread:
  436. kthread_stop(gc->gc_writer_ts);
  437. fail_free_main_kthread:
  438. kthread_stop(gc->gc_ts);
  439. return ret;
  440. }
  441. void pblk_gc_exit(struct pblk *pblk)
  442. {
  443. struct pblk_gc *gc = &pblk->gc;
  444. flush_workqueue(gc->gc_reader_wq);
  445. del_timer(&gc->gc_timer);
  446. pblk_gc_stop(pblk, 1);
  447. if (gc->gc_ts)
  448. kthread_stop(gc->gc_ts);
  449. if (pblk->gc.gc_reader_wq)
  450. destroy_workqueue(pblk->gc.gc_reader_wq);
  451. if (gc->gc_writer_ts)
  452. kthread_stop(gc->gc_writer_ts);
  453. }