pblk-gc.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648
  1. /*
  2. * Copyright (C) 2016 CNEX Labs
  3. * Initial release: Javier Gonzalez <javier@cnexlabs.com>
  4. * Matias Bjorling <matias@cnexlabs.com>
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License version
  8. * 2 as published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * pblk-gc.c - pblk's garbage collector
  16. */
  17. #include "pblk.h"
  18. #include <linux/delay.h>
  19. static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
  20. {
  21. vfree(gc_rq->data);
  22. kfree(gc_rq);
  23. }
  24. static int pblk_gc_write(struct pblk *pblk)
  25. {
  26. struct pblk_gc *gc = &pblk->gc;
  27. struct pblk_gc_rq *gc_rq, *tgc_rq;
  28. LIST_HEAD(w_list);
  29. spin_lock(&gc->w_lock);
  30. if (list_empty(&gc->w_list)) {
  31. spin_unlock(&gc->w_lock);
  32. return 1;
  33. }
  34. list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
  35. gc->w_entries = 0;
  36. spin_unlock(&gc->w_lock);
  37. list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
  38. pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list,
  39. gc_rq->nr_secs, gc_rq->secs_to_gc,
  40. gc_rq->line, PBLK_IOTYPE_GC);
  41. list_del(&gc_rq->list);
  42. kref_put(&gc_rq->line->ref, pblk_line_put);
  43. pblk_gc_free_gc_rq(gc_rq);
  44. }
  45. return 0;
  46. }
  47. static void pblk_gc_writer_kick(struct pblk_gc *gc)
  48. {
  49. wake_up_process(gc->gc_writer_ts);
  50. }
  51. /*
  52. * Responsible for managing all memory related to a gc request. Also in case of
  53. * failure
  54. */
  55. static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
  56. {
  57. struct nvm_tgt_dev *dev = pblk->dev;
  58. struct nvm_geo *geo = &dev->geo;
  59. struct pblk_gc *gc = &pblk->gc;
  60. struct pblk_line *line = gc_rq->line;
  61. void *data;
  62. unsigned int secs_to_gc;
  63. int ret = 0;
  64. data = vmalloc(gc_rq->nr_secs * geo->sec_size);
  65. if (!data) {
  66. ret = -ENOMEM;
  67. goto out;
  68. }
  69. /* Read from GC victim block */
  70. if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
  71. &secs_to_gc, line)) {
  72. ret = -EFAULT;
  73. goto free_data;
  74. }
  75. if (!secs_to_gc)
  76. goto free_rq;
  77. gc_rq->data = data;
  78. gc_rq->secs_to_gc = secs_to_gc;
  79. retry:
  80. spin_lock(&gc->w_lock);
  81. if (gc->w_entries >= PBLK_GC_W_QD) {
  82. spin_unlock(&gc->w_lock);
  83. pblk_gc_writer_kick(&pblk->gc);
  84. usleep_range(128, 256);
  85. goto retry;
  86. }
  87. gc->w_entries++;
  88. list_add_tail(&gc_rq->list, &gc->w_list);
  89. spin_unlock(&gc->w_lock);
  90. pblk_gc_writer_kick(&pblk->gc);
  91. return 0;
  92. free_rq:
  93. kfree(gc_rq);
  94. free_data:
  95. vfree(data);
  96. out:
  97. kref_put(&line->ref, pblk_line_put);
  98. return ret;
  99. }
  100. static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
  101. {
  102. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  103. struct list_head *move_list;
  104. spin_lock(&line->lock);
  105. WARN_ON(line->state != PBLK_LINESTATE_GC);
  106. line->state = PBLK_LINESTATE_CLOSED;
  107. move_list = pblk_line_gc_list(pblk, line);
  108. spin_unlock(&line->lock);
  109. if (move_list) {
  110. spin_lock(&l_mg->gc_lock);
  111. list_add_tail(&line->list, move_list);
  112. spin_unlock(&l_mg->gc_lock);
  113. }
  114. }
  115. static void pblk_gc_line_ws(struct work_struct *work)
  116. {
  117. struct pblk_line_ws *line_rq_ws = container_of(work,
  118. struct pblk_line_ws, ws);
  119. struct pblk *pblk = line_rq_ws->pblk;
  120. struct pblk_gc *gc = &pblk->gc;
  121. struct pblk_line *line = line_rq_ws->line;
  122. struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
  123. up(&gc->gc_sem);
  124. if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
  125. pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
  126. line->id, *line->vsc,
  127. gc_rq->nr_secs);
  128. }
  129. mempool_free(line_rq_ws, pblk->line_ws_pool);
  130. }
  131. static void pblk_gc_line_prepare_ws(struct work_struct *work)
  132. {
  133. struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
  134. ws);
  135. struct pblk *pblk = line_ws->pblk;
  136. struct pblk_line *line = line_ws->line;
  137. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  138. struct pblk_line_meta *lm = &pblk->lm;
  139. struct pblk_gc *gc = &pblk->gc;
  140. struct line_emeta *emeta_buf;
  141. struct pblk_line_ws *line_rq_ws;
  142. struct pblk_gc_rq *gc_rq;
  143. __le64 *lba_list;
  144. int sec_left, nr_secs, bit;
  145. int ret;
  146. emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
  147. GFP_KERNEL);
  148. if (!emeta_buf) {
  149. pr_err("pblk: cannot use GC emeta\n");
  150. return;
  151. }
  152. ret = pblk_line_read_emeta(pblk, line, emeta_buf);
  153. if (ret) {
  154. pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
  155. goto fail_free_emeta;
  156. }
  157. /* If this read fails, it means that emeta is corrupted. For now, leave
  158. * the line untouched. TODO: Implement a recovery routine that scans and
  159. * moves all sectors on the line.
  160. */
  161. lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
  162. if (!lba_list) {
  163. pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
  164. goto fail_free_emeta;
  165. }
  166. sec_left = pblk_line_vsc(line);
  167. if (sec_left < 0) {
  168. pr_err("pblk: corrupted GC line (%d)\n", line->id);
  169. goto fail_free_emeta;
  170. }
  171. bit = -1;
  172. next_rq:
  173. gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
  174. if (!gc_rq)
  175. goto fail_free_emeta;
  176. nr_secs = 0;
  177. do {
  178. bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
  179. bit + 1);
  180. if (bit > line->emeta_ssec)
  181. break;
  182. gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
  183. } while (nr_secs < pblk->max_write_pgs);
  184. if (unlikely(!nr_secs)) {
  185. kfree(gc_rq);
  186. goto out;
  187. }
  188. gc_rq->nr_secs = nr_secs;
  189. gc_rq->line = line;
  190. line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
  191. if (!line_rq_ws)
  192. goto fail_free_gc_rq;
  193. line_rq_ws->pblk = pblk;
  194. line_rq_ws->line = line;
  195. line_rq_ws->priv = gc_rq;
  196. down(&gc->gc_sem);
  197. kref_get(&line->ref);
  198. INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
  199. queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
  200. sec_left -= nr_secs;
  201. if (sec_left > 0)
  202. goto next_rq;
  203. out:
  204. pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
  205. mempool_free(line_ws, pblk->line_ws_pool);
  206. kref_put(&line->ref, pblk_line_put);
  207. atomic_dec(&gc->inflight_gc);
  208. return;
  209. fail_free_gc_rq:
  210. kfree(gc_rq);
  211. fail_free_emeta:
  212. pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
  213. pblk_put_line_back(pblk, line);
  214. kref_put(&line->ref, pblk_line_put);
  215. mempool_free(line_ws, pblk->line_ws_pool);
  216. atomic_dec(&gc->inflight_gc);
  217. pr_err("pblk: Failed to GC line %d\n", line->id);
  218. }
  219. static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
  220. {
  221. struct pblk_gc *gc = &pblk->gc;
  222. struct pblk_line_ws *line_ws;
  223. pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
  224. line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
  225. if (!line_ws)
  226. return -ENOMEM;
  227. line_ws->pblk = pblk;
  228. line_ws->line = line;
  229. INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
  230. queue_work(gc->gc_reader_wq, &line_ws->ws);
  231. return 0;
  232. }
  233. static int pblk_gc_read(struct pblk *pblk)
  234. {
  235. struct pblk_gc *gc = &pblk->gc;
  236. struct pblk_line *line;
  237. spin_lock(&gc->r_lock);
  238. if (list_empty(&gc->r_list)) {
  239. spin_unlock(&gc->r_lock);
  240. return 1;
  241. }
  242. line = list_first_entry(&gc->r_list, struct pblk_line, list);
  243. list_del(&line->list);
  244. spin_unlock(&gc->r_lock);
  245. pblk_gc_kick(pblk);
  246. if (pblk_gc_line(pblk, line))
  247. pr_err("pblk: failed to GC line %d\n", line->id);
  248. return 0;
  249. }
  250. static void pblk_gc_reader_kick(struct pblk_gc *gc)
  251. {
  252. wake_up_process(gc->gc_reader_ts);
  253. }
  254. static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
  255. struct list_head *group_list)
  256. {
  257. struct pblk_line *line, *victim;
  258. int line_vsc, victim_vsc;
  259. victim = list_first_entry(group_list, struct pblk_line, list);
  260. list_for_each_entry(line, group_list, list) {
  261. line_vsc = le32_to_cpu(*line->vsc);
  262. victim_vsc = le32_to_cpu(*victim->vsc);
  263. if (line_vsc < victim_vsc)
  264. victim = line;
  265. }
  266. return victim;
  267. }
  268. static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
  269. {
  270. unsigned int nr_blocks_free, nr_blocks_need;
  271. nr_blocks_need = pblk_rl_high_thrs(rl);
  272. nr_blocks_free = pblk_rl_nr_free_blks(rl);
  273. /* This is not critical, no need to take lock here */
  274. return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
  275. }
  276. /*
  277. * Lines with no valid sectors will be returned to the free list immediately. If
  278. * GC is activated - either because the free block count is under the determined
  279. * threshold, or because it is being forced from user space - only lines with a
  280. * high count of invalid sectors will be recycled.
  281. */
  282. static void pblk_gc_run(struct pblk *pblk)
  283. {
  284. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  285. struct pblk_gc *gc = &pblk->gc;
  286. struct pblk_line *line;
  287. struct list_head *group_list;
  288. bool run_gc;
  289. int inflight_gc, gc_group = 0, prev_group = 0;
  290. do {
  291. spin_lock(&l_mg->gc_lock);
  292. if (list_empty(&l_mg->gc_full_list)) {
  293. spin_unlock(&l_mg->gc_lock);
  294. break;
  295. }
  296. line = list_first_entry(&l_mg->gc_full_list,
  297. struct pblk_line, list);
  298. spin_lock(&line->lock);
  299. WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
  300. line->state = PBLK_LINESTATE_GC;
  301. spin_unlock(&line->lock);
  302. list_del(&line->list);
  303. spin_unlock(&l_mg->gc_lock);
  304. kref_put(&line->ref, pblk_line_put);
  305. } while (1);
  306. run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
  307. if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
  308. return;
  309. next_gc_group:
  310. group_list = l_mg->gc_lists[gc_group++];
  311. do {
  312. spin_lock(&l_mg->gc_lock);
  313. if (list_empty(group_list)) {
  314. spin_unlock(&l_mg->gc_lock);
  315. break;
  316. }
  317. line = pblk_gc_get_victim_line(pblk, group_list);
  318. spin_lock(&line->lock);
  319. WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
  320. line->state = PBLK_LINESTATE_GC;
  321. spin_unlock(&line->lock);
  322. list_del(&line->list);
  323. spin_unlock(&l_mg->gc_lock);
  324. spin_lock(&gc->r_lock);
  325. list_add_tail(&line->list, &gc->r_list);
  326. spin_unlock(&gc->r_lock);
  327. inflight_gc = atomic_inc_return(&gc->inflight_gc);
  328. pblk_gc_reader_kick(gc);
  329. prev_group = 1;
  330. /* No need to queue up more GC lines than we can handle */
  331. run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
  332. if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
  333. break;
  334. } while (1);
  335. if (!prev_group && pblk->rl.rb_state > gc_group &&
  336. gc_group < PBLK_GC_NR_LISTS)
  337. goto next_gc_group;
  338. }
  339. void pblk_gc_kick(struct pblk *pblk)
  340. {
  341. struct pblk_gc *gc = &pblk->gc;
  342. wake_up_process(gc->gc_ts);
  343. pblk_gc_writer_kick(gc);
  344. pblk_gc_reader_kick(gc);
  345. mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
  346. }
  347. static void pblk_gc_timer(unsigned long data)
  348. {
  349. struct pblk *pblk = (struct pblk *)data;
  350. pblk_gc_kick(pblk);
  351. }
  352. static int pblk_gc_ts(void *data)
  353. {
  354. struct pblk *pblk = data;
  355. while (!kthread_should_stop()) {
  356. pblk_gc_run(pblk);
  357. set_current_state(TASK_INTERRUPTIBLE);
  358. io_schedule();
  359. }
  360. return 0;
  361. }
  362. static int pblk_gc_writer_ts(void *data)
  363. {
  364. struct pblk *pblk = data;
  365. while (!kthread_should_stop()) {
  366. if (!pblk_gc_write(pblk))
  367. continue;
  368. set_current_state(TASK_INTERRUPTIBLE);
  369. io_schedule();
  370. }
  371. return 0;
  372. }
  373. static int pblk_gc_reader_ts(void *data)
  374. {
  375. struct pblk *pblk = data;
  376. while (!kthread_should_stop()) {
  377. if (!pblk_gc_read(pblk))
  378. continue;
  379. set_current_state(TASK_INTERRUPTIBLE);
  380. io_schedule();
  381. }
  382. return 0;
  383. }
  384. static void pblk_gc_start(struct pblk *pblk)
  385. {
  386. pblk->gc.gc_active = 1;
  387. pr_debug("pblk: gc start\n");
  388. }
  389. void pblk_gc_should_start(struct pblk *pblk)
  390. {
  391. struct pblk_gc *gc = &pblk->gc;
  392. if (gc->gc_enabled && !gc->gc_active)
  393. pblk_gc_start(pblk);
  394. pblk_gc_kick(pblk);
  395. }
  396. /*
  397. * If flush_wq == 1 then no lock should be held by the caller since
  398. * flush_workqueue can sleep
  399. */
  400. static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
  401. {
  402. pblk->gc.gc_active = 0;
  403. pr_debug("pblk: gc stop\n");
  404. }
  405. void pblk_gc_should_stop(struct pblk *pblk)
  406. {
  407. struct pblk_gc *gc = &pblk->gc;
  408. if (gc->gc_active && !gc->gc_forced)
  409. pblk_gc_stop(pblk, 0);
  410. }
  411. void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
  412. int *gc_active)
  413. {
  414. struct pblk_gc *gc = &pblk->gc;
  415. spin_lock(&gc->lock);
  416. *gc_enabled = gc->gc_enabled;
  417. *gc_active = gc->gc_active;
  418. spin_unlock(&gc->lock);
  419. }
  420. int pblk_gc_sysfs_force(struct pblk *pblk, int force)
  421. {
  422. struct pblk_gc *gc = &pblk->gc;
  423. if (force < 0 || force > 1)
  424. return -EINVAL;
  425. spin_lock(&gc->lock);
  426. gc->gc_forced = force;
  427. if (force)
  428. gc->gc_enabled = 1;
  429. else
  430. gc->gc_enabled = 0;
  431. spin_unlock(&gc->lock);
  432. pblk_gc_should_start(pblk);
  433. return 0;
  434. }
  435. int pblk_gc_init(struct pblk *pblk)
  436. {
  437. struct pblk_gc *gc = &pblk->gc;
  438. int ret;
  439. gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
  440. if (IS_ERR(gc->gc_ts)) {
  441. pr_err("pblk: could not allocate GC main kthread\n");
  442. return PTR_ERR(gc->gc_ts);
  443. }
  444. gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
  445. "pblk-gc-writer-ts");
  446. if (IS_ERR(gc->gc_writer_ts)) {
  447. pr_err("pblk: could not allocate GC writer kthread\n");
  448. ret = PTR_ERR(gc->gc_writer_ts);
  449. goto fail_free_main_kthread;
  450. }
  451. gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
  452. "pblk-gc-reader-ts");
  453. if (IS_ERR(gc->gc_reader_ts)) {
  454. pr_err("pblk: could not allocate GC reader kthread\n");
  455. ret = PTR_ERR(gc->gc_reader_ts);
  456. goto fail_free_writer_kthread;
  457. }
  458. setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
  459. mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
  460. gc->gc_active = 0;
  461. gc->gc_forced = 0;
  462. gc->gc_enabled = 1;
  463. gc->w_entries = 0;
  464. atomic_set(&gc->inflight_gc, 0);
  465. /* Workqueue that reads valid sectors from a line and submit them to the
  466. * GC writer to be recycled.
  467. */
  468. gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
  469. WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
  470. if (!gc->gc_line_reader_wq) {
  471. pr_err("pblk: could not allocate GC line reader workqueue\n");
  472. ret = -ENOMEM;
  473. goto fail_free_reader_kthread;
  474. }
  475. /* Workqueue that prepare lines for GC */
  476. gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
  477. WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
  478. if (!gc->gc_reader_wq) {
  479. pr_err("pblk: could not allocate GC reader workqueue\n");
  480. ret = -ENOMEM;
  481. goto fail_free_reader_line_wq;
  482. }
  483. spin_lock_init(&gc->lock);
  484. spin_lock_init(&gc->w_lock);
  485. spin_lock_init(&gc->r_lock);
  486. sema_init(&gc->gc_sem, 128);
  487. INIT_LIST_HEAD(&gc->w_list);
  488. INIT_LIST_HEAD(&gc->r_list);
  489. return 0;
  490. fail_free_reader_line_wq:
  491. destroy_workqueue(gc->gc_line_reader_wq);
  492. fail_free_reader_kthread:
  493. kthread_stop(gc->gc_reader_ts);
  494. fail_free_writer_kthread:
  495. kthread_stop(gc->gc_writer_ts);
  496. fail_free_main_kthread:
  497. kthread_stop(gc->gc_ts);
  498. return ret;
  499. }
  500. void pblk_gc_exit(struct pblk *pblk)
  501. {
  502. struct pblk_gc *gc = &pblk->gc;
  503. flush_workqueue(gc->gc_reader_wq);
  504. flush_workqueue(gc->gc_line_reader_wq);
  505. del_timer(&gc->gc_timer);
  506. pblk_gc_stop(pblk, 1);
  507. if (gc->gc_ts)
  508. kthread_stop(gc->gc_ts);
  509. if (gc->gc_reader_wq)
  510. destroy_workqueue(gc->gc_reader_wq);
  511. if (gc->gc_line_reader_wq)
  512. destroy_workqueue(gc->gc_line_reader_wq);
  513. if (gc->gc_writer_ts)
  514. kthread_stop(gc->gc_writer_ts);
  515. if (gc->gc_reader_ts)
  516. kthread_stop(gc->gc_reader_ts);
  517. }