pblk-rb.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2016 CNEX Labs
  4. * Initial release: Javier Gonzalez <javier@cnexlabs.com>
  5. *
  6. * Based upon the circular ringbuffer.
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License version
  10. * 2 as published by the Free Software Foundation.
  11. *
  12. * This program is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * General Public License for more details.
  16. *
  17. * pblk-rb.c - pblk's write buffer
  18. */
  19. #include <linux/circ_buf.h>
  20. #include "pblk.h"
  21. static DECLARE_RWSEM(pblk_rb_lock);
  22. static void pblk_rb_data_free(struct pblk_rb *rb)
  23. {
  24. struct pblk_rb_pages *p, *t;
  25. down_write(&pblk_rb_lock);
  26. list_for_each_entry_safe(p, t, &rb->pages, list) {
  27. free_pages((unsigned long)page_address(p->pages), p->order);
  28. list_del(&p->list);
  29. kfree(p);
  30. }
  31. up_write(&pblk_rb_lock);
  32. }
  33. void pblk_rb_free(struct pblk_rb *rb)
  34. {
  35. pblk_rb_data_free(rb);
  36. vfree(rb->entries);
  37. }
  38. /*
  39. * pblk_rb_calculate_size -- calculate the size of the write buffer
  40. */
  41. static unsigned int pblk_rb_calculate_size(unsigned int nr_entries)
  42. {
  43. /* Alloc a write buffer that can at least fit 128 entries */
  44. return (1 << max(get_count_order(nr_entries), 7));
  45. }
  46. /*
  47. * Initialize ring buffer. The data and metadata buffers must be previously
  48. * allocated and their size must be a power of two
  49. * (Documentation/core-api/circular-buffers.rst)
  50. */
  51. int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold,
  52. unsigned int seg_size)
  53. {
  54. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  55. struct pblk_rb_entry *entries;
  56. unsigned int init_entry = 0;
  57. unsigned int max_order = MAX_ORDER - 1;
  58. unsigned int power_size, power_seg_sz;
  59. unsigned int alloc_order, order, iter;
  60. unsigned int nr_entries;
  61. nr_entries = pblk_rb_calculate_size(size);
  62. entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry)));
  63. if (!entries)
  64. return -ENOMEM;
  65. power_size = get_count_order(size);
  66. power_seg_sz = get_count_order(seg_size);
  67. down_write(&pblk_rb_lock);
  68. rb->entries = entries;
  69. rb->seg_size = (1 << power_seg_sz);
  70. rb->nr_entries = (1 << power_size);
  71. rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
  72. rb->back_thres = threshold;
  73. rb->flush_point = EMPTY_ENTRY;
  74. spin_lock_init(&rb->w_lock);
  75. spin_lock_init(&rb->s_lock);
  76. INIT_LIST_HEAD(&rb->pages);
  77. alloc_order = power_size;
  78. if (alloc_order >= max_order) {
  79. order = max_order;
  80. iter = (1 << (alloc_order - max_order));
  81. } else {
  82. order = alloc_order;
  83. iter = 1;
  84. }
  85. do {
  86. struct pblk_rb_entry *entry;
  87. struct pblk_rb_pages *page_set;
  88. void *kaddr;
  89. unsigned long set_size;
  90. int i;
  91. page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
  92. if (!page_set) {
  93. up_write(&pblk_rb_lock);
  94. vfree(entries);
  95. return -ENOMEM;
  96. }
  97. page_set->order = order;
  98. page_set->pages = alloc_pages(GFP_KERNEL, order);
  99. if (!page_set->pages) {
  100. kfree(page_set);
  101. pblk_rb_data_free(rb);
  102. up_write(&pblk_rb_lock);
  103. vfree(entries);
  104. return -ENOMEM;
  105. }
  106. kaddr = page_address(page_set->pages);
  107. entry = &rb->entries[init_entry];
  108. entry->data = kaddr;
  109. entry->cacheline = pblk_cacheline_to_addr(init_entry++);
  110. entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
  111. set_size = (1 << order);
  112. for (i = 1; i < set_size; i++) {
  113. entry = &rb->entries[init_entry];
  114. entry->cacheline = pblk_cacheline_to_addr(init_entry++);
  115. entry->data = kaddr + (i * rb->seg_size);
  116. entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
  117. bio_list_init(&entry->w_ctx.bios);
  118. }
  119. list_add_tail(&page_set->list, &rb->pages);
  120. iter--;
  121. } while (iter > 0);
  122. up_write(&pblk_rb_lock);
  123. #ifdef CONFIG_NVM_PBLK_DEBUG
  124. atomic_set(&rb->inflight_flush_point, 0);
  125. #endif
  126. /*
  127. * Initialize rate-limiter, which controls access to the write buffer
  128. * but user and GC I/O
  129. */
  130. pblk_rl_init(&pblk->rl, rb->nr_entries);
  131. return 0;
  132. }
  133. static void clean_wctx(struct pblk_w_ctx *w_ctx)
  134. {
  135. int flags;
  136. flags = READ_ONCE(w_ctx->flags);
  137. WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
  138. "pblk: overwriting unsubmitted data\n");
  139. /* Release flags on context. Protect from writes and reads */
  140. smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
  141. pblk_ppa_set_empty(&w_ctx->ppa);
  142. w_ctx->lba = ADDR_EMPTY;
  143. }
  144. #define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
  145. #define pblk_rb_ring_space(rb, head, tail, size) \
  146. (CIRC_SPACE(head, tail, size))
  147. /*
  148. * Buffer space is calculated with respect to the back pointer signaling
  149. * synchronized entries to the media.
  150. */
  151. static unsigned int pblk_rb_space(struct pblk_rb *rb)
  152. {
  153. unsigned int mem = READ_ONCE(rb->mem);
  154. unsigned int sync = READ_ONCE(rb->sync);
  155. return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
  156. }
  157. unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p,
  158. unsigned int nr_entries)
  159. {
  160. return (p + nr_entries) & (rb->nr_entries - 1);
  161. }
  162. /*
  163. * Buffer count is calculated with respect to the submission entry signaling the
  164. * entries that are available to send to the media
  165. */
  166. unsigned int pblk_rb_read_count(struct pblk_rb *rb)
  167. {
  168. unsigned int mem = READ_ONCE(rb->mem);
  169. unsigned int subm = READ_ONCE(rb->subm);
  170. return pblk_rb_ring_count(mem, subm, rb->nr_entries);
  171. }
  172. unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
  173. {
  174. unsigned int mem = READ_ONCE(rb->mem);
  175. unsigned int sync = READ_ONCE(rb->sync);
  176. return pblk_rb_ring_count(mem, sync, rb->nr_entries);
  177. }
  178. unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
  179. {
  180. unsigned int subm;
  181. subm = READ_ONCE(rb->subm);
  182. /* Commit read means updating submission pointer */
  183. smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries));
  184. return subm;
  185. }
  186. static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update)
  187. {
  188. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  189. struct pblk_line *line;
  190. struct pblk_rb_entry *entry;
  191. struct pblk_w_ctx *w_ctx;
  192. unsigned int user_io = 0, gc_io = 0;
  193. unsigned int i;
  194. int flags;
  195. for (i = 0; i < to_update; i++) {
  196. entry = &rb->entries[rb->l2p_update];
  197. w_ctx = &entry->w_ctx;
  198. flags = READ_ONCE(entry->w_ctx.flags);
  199. if (flags & PBLK_IOTYPE_USER)
  200. user_io++;
  201. else if (flags & PBLK_IOTYPE_GC)
  202. gc_io++;
  203. else
  204. WARN(1, "pblk: unknown IO type\n");
  205. pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
  206. entry->cacheline);
  207. line = pblk_ppa_to_line(pblk, w_ctx->ppa);
  208. kref_put(&line->ref, pblk_line_put);
  209. clean_wctx(w_ctx);
  210. rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1);
  211. }
  212. pblk_rl_out(&pblk->rl, user_io, gc_io);
  213. return 0;
  214. }
  215. /*
  216. * When we move the l2p_update pointer, we update the l2p table - lookups will
  217. * point to the physical address instead of to the cacheline in the write buffer
  218. * from this moment on.
  219. */
  220. static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
  221. unsigned int mem, unsigned int sync)
  222. {
  223. unsigned int space, count;
  224. int ret = 0;
  225. lockdep_assert_held(&rb->w_lock);
  226. /* Update l2p only as buffer entries are being overwritten */
  227. space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
  228. if (space > nr_entries)
  229. goto out;
  230. count = nr_entries - space;
  231. /* l2p_update used exclusively under rb->w_lock */
  232. ret = __pblk_rb_update_l2p(rb, count);
  233. out:
  234. return ret;
  235. }
  236. /*
  237. * Update the l2p entry for all sectors stored on the write buffer. This means
  238. * that all future lookups to the l2p table will point to a device address, not
  239. * to the cacheline in the write buffer.
  240. */
  241. void pblk_rb_sync_l2p(struct pblk_rb *rb)
  242. {
  243. unsigned int sync;
  244. unsigned int to_update;
  245. spin_lock(&rb->w_lock);
  246. /* Protect from reads and writes */
  247. sync = smp_load_acquire(&rb->sync);
  248. to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
  249. __pblk_rb_update_l2p(rb, to_update);
  250. spin_unlock(&rb->w_lock);
  251. }
  252. /*
  253. * Write @nr_entries to ring buffer from @data buffer if there is enough space.
  254. * Typically, 4KB data chunks coming from a bio will be copied to the ring
  255. * buffer, thus the write will fail if not all incoming data can be copied.
  256. *
  257. */
  258. static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
  259. struct pblk_w_ctx w_ctx,
  260. struct pblk_rb_entry *entry)
  261. {
  262. memcpy(entry->data, data, rb->seg_size);
  263. entry->w_ctx.lba = w_ctx.lba;
  264. entry->w_ctx.ppa = w_ctx.ppa;
  265. }
  266. void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
  267. struct pblk_w_ctx w_ctx, unsigned int ring_pos)
  268. {
  269. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  270. struct pblk_rb_entry *entry;
  271. int flags;
  272. entry = &rb->entries[ring_pos];
  273. flags = READ_ONCE(entry->w_ctx.flags);
  274. #ifdef CONFIG_NVM_PBLK_DEBUG
  275. /* Caller must guarantee that the entry is free */
  276. BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
  277. #endif
  278. __pblk_rb_write_entry(rb, data, w_ctx, entry);
  279. pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
  280. flags = w_ctx.flags | PBLK_WRITTEN_DATA;
  281. /* Release flags on write context. Protect from writes */
  282. smp_store_release(&entry->w_ctx.flags, flags);
  283. }
  284. void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
  285. struct pblk_w_ctx w_ctx, struct pblk_line *line,
  286. u64 paddr, unsigned int ring_pos)
  287. {
  288. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  289. struct pblk_rb_entry *entry;
  290. int flags;
  291. entry = &rb->entries[ring_pos];
  292. flags = READ_ONCE(entry->w_ctx.flags);
  293. #ifdef CONFIG_NVM_PBLK_DEBUG
  294. /* Caller must guarantee that the entry is free */
  295. BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
  296. #endif
  297. __pblk_rb_write_entry(rb, data, w_ctx, entry);
  298. if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr))
  299. entry->w_ctx.lba = ADDR_EMPTY;
  300. flags = w_ctx.flags | PBLK_WRITTEN_DATA;
  301. /* Release flags on write context. Protect from writes */
  302. smp_store_release(&entry->w_ctx.flags, flags);
  303. }
  304. static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
  305. unsigned int pos)
  306. {
  307. struct pblk_rb_entry *entry;
  308. unsigned int sync, flush_point;
  309. pblk_rb_sync_init(rb, NULL);
  310. sync = READ_ONCE(rb->sync);
  311. if (pos == sync) {
  312. pblk_rb_sync_end(rb, NULL);
  313. return 0;
  314. }
  315. #ifdef CONFIG_NVM_PBLK_DEBUG
  316. atomic_inc(&rb->inflight_flush_point);
  317. #endif
  318. flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
  319. entry = &rb->entries[flush_point];
  320. /* Protect flush points */
  321. smp_store_release(&rb->flush_point, flush_point);
  322. if (bio)
  323. bio_list_add(&entry->w_ctx.bios, bio);
  324. pblk_rb_sync_end(rb, NULL);
  325. return bio ? 1 : 0;
  326. }
  327. static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
  328. unsigned int *pos)
  329. {
  330. unsigned int mem;
  331. unsigned int sync;
  332. unsigned int threshold;
  333. sync = READ_ONCE(rb->sync);
  334. mem = READ_ONCE(rb->mem);
  335. threshold = nr_entries + rb->back_thres;
  336. if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold)
  337. return 0;
  338. if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
  339. return 0;
  340. *pos = mem;
  341. return 1;
  342. }
  343. static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
  344. unsigned int *pos)
  345. {
  346. if (!__pblk_rb_may_write(rb, nr_entries, pos))
  347. return 0;
  348. /* Protect from read count */
  349. smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries));
  350. return 1;
  351. }
  352. void pblk_rb_flush(struct pblk_rb *rb)
  353. {
  354. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  355. unsigned int mem = READ_ONCE(rb->mem);
  356. if (pblk_rb_flush_point_set(rb, NULL, mem))
  357. return;
  358. pblk_write_kick(pblk);
  359. }
  360. static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
  361. unsigned int *pos, struct bio *bio,
  362. int *io_ret)
  363. {
  364. unsigned int mem;
  365. if (!__pblk_rb_may_write(rb, nr_entries, pos))
  366. return 0;
  367. mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries);
  368. *io_ret = NVM_IO_DONE;
  369. if (bio->bi_opf & REQ_PREFLUSH) {
  370. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  371. atomic64_inc(&pblk->nr_flush);
  372. if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
  373. *io_ret = NVM_IO_OK;
  374. }
  375. /* Protect from read count */
  376. smp_store_release(&rb->mem, mem);
  377. return 1;
  378. }
  379. /*
  380. * Atomically check that (i) there is space on the write buffer for the
  381. * incoming I/O, and (ii) the current I/O type has enough budget in the write
  382. * buffer (rate-limiter).
  383. */
  384. int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
  385. unsigned int nr_entries, unsigned int *pos)
  386. {
  387. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  388. int io_ret;
  389. spin_lock(&rb->w_lock);
  390. io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
  391. if (io_ret) {
  392. spin_unlock(&rb->w_lock);
  393. return io_ret;
  394. }
  395. if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
  396. spin_unlock(&rb->w_lock);
  397. return NVM_IO_REQUEUE;
  398. }
  399. pblk_rl_user_in(&pblk->rl, nr_entries);
  400. spin_unlock(&rb->w_lock);
  401. return io_ret;
  402. }
  403. /*
  404. * Look at pblk_rb_may_write_user comment
  405. */
  406. int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
  407. unsigned int *pos)
  408. {
  409. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  410. spin_lock(&rb->w_lock);
  411. if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
  412. spin_unlock(&rb->w_lock);
  413. return 0;
  414. }
  415. if (!pblk_rb_may_write(rb, nr_entries, pos)) {
  416. spin_unlock(&rb->w_lock);
  417. return 0;
  418. }
  419. pblk_rl_gc_in(&pblk->rl, nr_entries);
  420. spin_unlock(&rb->w_lock);
  421. return 1;
  422. }
  423. /*
  424. * Read available entries on rb and add them to the given bio. To avoid a memory
  425. * copy, a page reference to the write buffer is used to be added to the bio.
  426. *
  427. * This function is used by the write thread to form the write bio that will
  428. * persist data on the write buffer to the media.
  429. */
  430. unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
  431. unsigned int pos, unsigned int nr_entries,
  432. unsigned int count)
  433. {
  434. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  435. struct request_queue *q = pblk->dev->q;
  436. struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
  437. struct bio *bio = rqd->bio;
  438. struct pblk_rb_entry *entry;
  439. struct page *page;
  440. unsigned int pad = 0, to_read = nr_entries;
  441. unsigned int i;
  442. int flags;
  443. if (count < nr_entries) {
  444. pad = nr_entries - count;
  445. to_read = count;
  446. }
  447. c_ctx->sentry = pos;
  448. c_ctx->nr_valid = to_read;
  449. c_ctx->nr_padded = pad;
  450. for (i = 0; i < to_read; i++) {
  451. entry = &rb->entries[pos];
  452. /* A write has been allowed into the buffer, but data is still
  453. * being copied to it. It is ok to busy wait.
  454. */
  455. try:
  456. flags = READ_ONCE(entry->w_ctx.flags);
  457. if (!(flags & PBLK_WRITTEN_DATA)) {
  458. io_schedule();
  459. goto try;
  460. }
  461. page = virt_to_page(entry->data);
  462. if (!page) {
  463. pblk_err(pblk, "could not allocate write bio page\n");
  464. flags &= ~PBLK_WRITTEN_DATA;
  465. flags |= PBLK_SUBMITTED_ENTRY;
  466. /* Release flags on context. Protect from writes */
  467. smp_store_release(&entry->w_ctx.flags, flags);
  468. return NVM_IO_ERR;
  469. }
  470. if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
  471. rb->seg_size) {
  472. pblk_err(pblk, "could not add page to write bio\n");
  473. flags &= ~PBLK_WRITTEN_DATA;
  474. flags |= PBLK_SUBMITTED_ENTRY;
  475. /* Release flags on context. Protect from writes */
  476. smp_store_release(&entry->w_ctx.flags, flags);
  477. return NVM_IO_ERR;
  478. }
  479. flags &= ~PBLK_WRITTEN_DATA;
  480. flags |= PBLK_SUBMITTED_ENTRY;
  481. /* Release flags on context. Protect from writes */
  482. smp_store_release(&entry->w_ctx.flags, flags);
  483. pos = pblk_rb_ptr_wrap(rb, pos, 1);
  484. }
  485. if (pad) {
  486. if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
  487. pblk_err(pblk, "could not pad page in write bio\n");
  488. return NVM_IO_ERR;
  489. }
  490. if (pad < pblk->min_write_pgs)
  491. atomic64_inc(&pblk->pad_dist[pad - 1]);
  492. else
  493. pblk_warn(pblk, "padding more than min. sectors\n");
  494. atomic64_add(pad, &pblk->pad_wa);
  495. }
  496. #ifdef CONFIG_NVM_PBLK_DEBUG
  497. atomic_long_add(pad, &pblk->padded_writes);
  498. #endif
  499. return NVM_IO_OK;
  500. }
  501. /*
  502. * Copy to bio only if the lba matches the one on the given cache entry.
  503. * Otherwise, it means that the entry has been overwritten, and the bio should
  504. * be directed to disk.
  505. */
  506. int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
  507. struct ppa_addr ppa, int bio_iter, bool advanced_bio)
  508. {
  509. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  510. struct pblk_rb_entry *entry;
  511. struct pblk_w_ctx *w_ctx;
  512. struct ppa_addr l2p_ppa;
  513. u64 pos = pblk_addr_to_cacheline(ppa);
  514. void *data;
  515. int flags;
  516. int ret = 1;
  517. #ifdef CONFIG_NVM_PBLK_DEBUG
  518. /* Caller must ensure that the access will not cause an overflow */
  519. BUG_ON(pos >= rb->nr_entries);
  520. #endif
  521. entry = &rb->entries[pos];
  522. w_ctx = &entry->w_ctx;
  523. flags = READ_ONCE(w_ctx->flags);
  524. spin_lock(&rb->w_lock);
  525. spin_lock(&pblk->trans_lock);
  526. l2p_ppa = pblk_trans_map_get(pblk, lba);
  527. spin_unlock(&pblk->trans_lock);
  528. /* Check if the entry has been overwritten or is scheduled to be */
  529. if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
  530. flags & PBLK_WRITABLE_ENTRY) {
  531. ret = 0;
  532. goto out;
  533. }
  534. /* Only advance the bio if it hasn't been advanced already. If advanced,
  535. * this bio is at least a partial bio (i.e., it has partially been
  536. * filled with data from the cache). If part of the data resides on the
  537. * media, we will read later on
  538. */
  539. if (unlikely(!advanced_bio))
  540. bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE);
  541. data = bio_data(bio);
  542. memcpy(data, entry->data, rb->seg_size);
  543. out:
  544. spin_unlock(&rb->w_lock);
  545. return ret;
  546. }
  547. struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
  548. {
  549. unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0);
  550. return &rb->entries[entry].w_ctx;
  551. }
  552. unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
  553. __acquires(&rb->s_lock)
  554. {
  555. if (flags)
  556. spin_lock_irqsave(&rb->s_lock, *flags);
  557. else
  558. spin_lock_irq(&rb->s_lock);
  559. return rb->sync;
  560. }
  561. void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
  562. __releases(&rb->s_lock)
  563. {
  564. lockdep_assert_held(&rb->s_lock);
  565. if (flags)
  566. spin_unlock_irqrestore(&rb->s_lock, *flags);
  567. else
  568. spin_unlock_irq(&rb->s_lock);
  569. }
  570. unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
  571. {
  572. unsigned int sync, flush_point;
  573. lockdep_assert_held(&rb->s_lock);
  574. sync = READ_ONCE(rb->sync);
  575. flush_point = READ_ONCE(rb->flush_point);
  576. if (flush_point != EMPTY_ENTRY) {
  577. unsigned int secs_to_flush;
  578. secs_to_flush = pblk_rb_ring_count(flush_point, sync,
  579. rb->nr_entries);
  580. if (secs_to_flush < nr_entries) {
  581. /* Protect flush points */
  582. smp_store_release(&rb->flush_point, EMPTY_ENTRY);
  583. }
  584. }
  585. sync = pblk_rb_ptr_wrap(rb, sync, nr_entries);
  586. /* Protect from counts */
  587. smp_store_release(&rb->sync, sync);
  588. return sync;
  589. }
  590. /* Calculate how many sectors to submit up to the current flush point. */
  591. unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb)
  592. {
  593. unsigned int subm, sync, flush_point;
  594. unsigned int submitted, to_flush;
  595. /* Protect flush points */
  596. flush_point = smp_load_acquire(&rb->flush_point);
  597. if (flush_point == EMPTY_ENTRY)
  598. return 0;
  599. /* Protect syncs */
  600. sync = smp_load_acquire(&rb->sync);
  601. subm = READ_ONCE(rb->subm);
  602. submitted = pblk_rb_ring_count(subm, sync, rb->nr_entries);
  603. /* The sync point itself counts as a sector to sync */
  604. to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1;
  605. return (submitted < to_flush) ? (to_flush - submitted) : 0;
  606. }
  607. int pblk_rb_tear_down_check(struct pblk_rb *rb)
  608. {
  609. struct pblk_rb_entry *entry;
  610. int i;
  611. int ret = 0;
  612. spin_lock(&rb->w_lock);
  613. spin_lock_irq(&rb->s_lock);
  614. if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
  615. (rb->sync == rb->l2p_update) &&
  616. (rb->flush_point == EMPTY_ENTRY)) {
  617. goto out;
  618. }
  619. if (!rb->entries) {
  620. ret = 1;
  621. goto out;
  622. }
  623. for (i = 0; i < rb->nr_entries; i++) {
  624. entry = &rb->entries[i];
  625. if (!entry->data) {
  626. ret = 1;
  627. goto out;
  628. }
  629. }
  630. out:
  631. spin_unlock(&rb->w_lock);
  632. spin_unlock_irq(&rb->s_lock);
  633. return ret;
  634. }
  635. unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
  636. {
  637. return (pos & (rb->nr_entries - 1));
  638. }
  639. int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
  640. {
  641. return (pos >= rb->nr_entries);
  642. }
  643. ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
  644. {
  645. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  646. struct pblk_c_ctx *c;
  647. ssize_t offset;
  648. int queued_entries = 0;
  649. spin_lock_irq(&rb->s_lock);
  650. list_for_each_entry(c, &pblk->compl_list, list)
  651. queued_entries++;
  652. spin_unlock_irq(&rb->s_lock);
  653. if (rb->flush_point != EMPTY_ENTRY)
  654. offset = scnprintf(buf, PAGE_SIZE,
  655. "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
  656. rb->nr_entries,
  657. rb->mem,
  658. rb->subm,
  659. rb->sync,
  660. rb->l2p_update,
  661. #ifdef CONFIG_NVM_PBLK_DEBUG
  662. atomic_read(&rb->inflight_flush_point),
  663. #else
  664. 0,
  665. #endif
  666. rb->flush_point,
  667. pblk_rb_read_count(rb),
  668. pblk_rb_space(rb),
  669. pblk_rb_flush_point_count(rb),
  670. queued_entries);
  671. else
  672. offset = scnprintf(buf, PAGE_SIZE,
  673. "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
  674. rb->nr_entries,
  675. rb->mem,
  676. rb->subm,
  677. rb->sync,
  678. rb->l2p_update,
  679. #ifdef CONFIG_NVM_PBLK_DEBUG
  680. atomic_read(&rb->inflight_flush_point),
  681. #else
  682. 0,
  683. #endif
  684. pblk_rb_read_count(rb),
  685. pblk_rb_space(rb),
  686. pblk_rb_flush_point_count(rb),
  687. queued_entries);
  688. return offset;
  689. }