log.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032
  1. /*
  2. * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
  3. * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
  4. *
  5. * This copyrighted material is made available to anyone wishing to use,
  6. * modify, copy, or redistribute it subject to the terms and conditions
  7. * of the GNU General Public License version 2.
  8. */
  9. #include <linux/sched.h>
  10. #include <linux/slab.h>
  11. #include <linux/spinlock.h>
  12. #include <linux/completion.h>
  13. #include <linux/buffer_head.h>
  14. #include <linux/gfs2_ondisk.h>
  15. #include <linux/crc32.h>
  16. #include <linux/crc32c.h>
  17. #include <linux/delay.h>
  18. #include <linux/kthread.h>
  19. #include <linux/freezer.h>
  20. #include <linux/bio.h>
  21. #include <linux/blkdev.h>
  22. #include <linux/writeback.h>
  23. #include <linux/list_sort.h>
  24. #include "gfs2.h"
  25. #include "incore.h"
  26. #include "bmap.h"
  27. #include "glock.h"
  28. #include "log.h"
  29. #include "lops.h"
  30. #include "meta_io.h"
  31. #include "util.h"
  32. #include "dir.h"
  33. #include "trace_gfs2.h"
  34. /**
  35. * gfs2_struct2blk - compute stuff
  36. * @sdp: the filesystem
  37. * @nstruct: the number of structures
  38. * @ssize: the size of the structures
  39. *
  40. * Compute the number of log descriptor blocks needed to hold a certain number
  41. * of structures of a certain size.
  42. *
  43. * Returns: the number of blocks needed (minimum is always 1)
  44. */
  45. unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
  46. unsigned int ssize)
  47. {
  48. unsigned int blks;
  49. unsigned int first, second;
  50. blks = 1;
  51. first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize;
  52. if (nstruct > first) {
  53. second = (sdp->sd_sb.sb_bsize -
  54. sizeof(struct gfs2_meta_header)) / ssize;
  55. blks += DIV_ROUND_UP(nstruct - first, second);
  56. }
  57. return blks;
  58. }
  59. /**
  60. * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
  61. * @mapping: The associated mapping (maybe NULL)
  62. * @bd: The gfs2_bufdata to remove
  63. *
  64. * The ail lock _must_ be held when calling this function
  65. *
  66. */
  67. static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
  68. {
  69. bd->bd_tr = NULL;
  70. list_del_init(&bd->bd_ail_st_list);
  71. list_del_init(&bd->bd_ail_gl_list);
  72. atomic_dec(&bd->bd_gl->gl_ail_count);
  73. brelse(bd->bd_bh);
  74. }
  75. /**
  76. * gfs2_ail1_start_one - Start I/O on a part of the AIL
  77. * @sdp: the filesystem
  78. * @wbc: The writeback control structure
  79. * @ai: The ail structure
  80. *
  81. */
  82. static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
  83. struct writeback_control *wbc,
  84. struct gfs2_trans *tr)
  85. __releases(&sdp->sd_ail_lock)
  86. __acquires(&sdp->sd_ail_lock)
  87. {
  88. struct gfs2_glock *gl = NULL;
  89. struct address_space *mapping;
  90. struct gfs2_bufdata *bd, *s;
  91. struct buffer_head *bh;
  92. list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) {
  93. bh = bd->bd_bh;
  94. gfs2_assert(sdp, bd->bd_tr == tr);
  95. if (!buffer_busy(bh)) {
  96. if (!buffer_uptodate(bh))
  97. gfs2_io_error_bh(sdp, bh);
  98. list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
  99. continue;
  100. }
  101. if (!buffer_dirty(bh))
  102. continue;
  103. if (gl == bd->bd_gl)
  104. continue;
  105. gl = bd->bd_gl;
  106. list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list);
  107. mapping = bh->b_page->mapping;
  108. if (!mapping)
  109. continue;
  110. spin_unlock(&sdp->sd_ail_lock);
  111. generic_writepages(mapping, wbc);
  112. spin_lock(&sdp->sd_ail_lock);
  113. if (wbc->nr_to_write <= 0)
  114. break;
  115. return 1;
  116. }
  117. return 0;
  118. }
  119. /**
  120. * gfs2_ail1_flush - start writeback of some ail1 entries
  121. * @sdp: The super block
  122. * @wbc: The writeback control structure
  123. *
  124. * Writes back some ail1 entries, according to the limits in the
  125. * writeback control structure
  126. */
  127. void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
  128. {
  129. struct list_head *head = &sdp->sd_ail1_list;
  130. struct gfs2_trans *tr;
  131. struct blk_plug plug;
  132. trace_gfs2_ail_flush(sdp, wbc, 1);
  133. blk_start_plug(&plug);
  134. spin_lock(&sdp->sd_ail_lock);
  135. restart:
  136. list_for_each_entry_reverse(tr, head, tr_list) {
  137. if (wbc->nr_to_write <= 0)
  138. break;
  139. if (gfs2_ail1_start_one(sdp, wbc, tr))
  140. goto restart;
  141. }
  142. spin_unlock(&sdp->sd_ail_lock);
  143. blk_finish_plug(&plug);
  144. trace_gfs2_ail_flush(sdp, wbc, 0);
  145. }
  146. /**
  147. * gfs2_ail1_start - start writeback of all ail1 entries
  148. * @sdp: The superblock
  149. */
  150. static void gfs2_ail1_start(struct gfs2_sbd *sdp)
  151. {
  152. struct writeback_control wbc = {
  153. .sync_mode = WB_SYNC_NONE,
  154. .nr_to_write = LONG_MAX,
  155. .range_start = 0,
  156. .range_end = LLONG_MAX,
  157. };
  158. return gfs2_ail1_flush(sdp, &wbc);
  159. }
  160. /**
  161. * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
  162. * @sdp: the filesystem
  163. * @ai: the AIL entry
  164. *
  165. */
  166. static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  167. {
  168. struct gfs2_bufdata *bd, *s;
  169. struct buffer_head *bh;
  170. list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list,
  171. bd_ail_st_list) {
  172. bh = bd->bd_bh;
  173. gfs2_assert(sdp, bd->bd_tr == tr);
  174. if (buffer_busy(bh))
  175. continue;
  176. if (!buffer_uptodate(bh))
  177. gfs2_io_error_bh(sdp, bh);
  178. list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
  179. }
  180. }
  181. /**
  182. * gfs2_ail1_empty - Try to empty the ail1 lists
  183. * @sdp: The superblock
  184. *
  185. * Tries to empty the ail1 lists, starting with the oldest first
  186. */
  187. static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
  188. {
  189. struct gfs2_trans *tr, *s;
  190. int oldest_tr = 1;
  191. int ret;
  192. spin_lock(&sdp->sd_ail_lock);
  193. list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
  194. gfs2_ail1_empty_one(sdp, tr);
  195. if (list_empty(&tr->tr_ail1_list) && oldest_tr)
  196. list_move(&tr->tr_list, &sdp->sd_ail2_list);
  197. else
  198. oldest_tr = 0;
  199. }
  200. ret = list_empty(&sdp->sd_ail1_list);
  201. spin_unlock(&sdp->sd_ail_lock);
  202. return ret;
  203. }
  204. static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
  205. {
  206. struct gfs2_trans *tr;
  207. struct gfs2_bufdata *bd;
  208. struct buffer_head *bh;
  209. spin_lock(&sdp->sd_ail_lock);
  210. list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
  211. list_for_each_entry(bd, &tr->tr_ail1_list, bd_ail_st_list) {
  212. bh = bd->bd_bh;
  213. if (!buffer_locked(bh))
  214. continue;
  215. get_bh(bh);
  216. spin_unlock(&sdp->sd_ail_lock);
  217. wait_on_buffer(bh);
  218. brelse(bh);
  219. return;
  220. }
  221. }
  222. spin_unlock(&sdp->sd_ail_lock);
  223. }
  224. /**
  225. * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
  226. * @sdp: the filesystem
  227. * @ai: the AIL entry
  228. *
  229. */
  230. static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  231. {
  232. struct list_head *head = &tr->tr_ail2_list;
  233. struct gfs2_bufdata *bd;
  234. while (!list_empty(head)) {
  235. bd = list_entry(head->prev, struct gfs2_bufdata,
  236. bd_ail_st_list);
  237. gfs2_assert(sdp, bd->bd_tr == tr);
  238. gfs2_remove_from_ail(bd);
  239. }
  240. }
  241. static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
  242. {
  243. struct gfs2_trans *tr, *safe;
  244. unsigned int old_tail = sdp->sd_log_tail;
  245. int wrap = (new_tail < old_tail);
  246. int a, b, rm;
  247. spin_lock(&sdp->sd_ail_lock);
  248. list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) {
  249. a = (old_tail <= tr->tr_first);
  250. b = (tr->tr_first < new_tail);
  251. rm = (wrap) ? (a || b) : (a && b);
  252. if (!rm)
  253. continue;
  254. gfs2_ail2_empty_one(sdp, tr);
  255. list_del(&tr->tr_list);
  256. gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
  257. gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
  258. kfree(tr);
  259. }
  260. spin_unlock(&sdp->sd_ail_lock);
  261. }
  262. /**
  263. * gfs2_log_release - Release a given number of log blocks
  264. * @sdp: The GFS2 superblock
  265. * @blks: The number of blocks
  266. *
  267. */
  268. void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
  269. {
  270. atomic_add(blks, &sdp->sd_log_blks_free);
  271. trace_gfs2_log_blocks(sdp, blks);
  272. gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
  273. sdp->sd_jdesc->jd_blocks);
  274. up_read(&sdp->sd_log_flush_lock);
  275. }
  276. /**
  277. * gfs2_log_reserve - Make a log reservation
  278. * @sdp: The GFS2 superblock
  279. * @blks: The number of blocks to reserve
  280. *
  281. * Note that we never give out the last few blocks of the journal. Thats
  282. * due to the fact that there is a small number of header blocks
  283. * associated with each log flush. The exact number can't be known until
  284. * flush time, so we ensure that we have just enough free blocks at all
  285. * times to avoid running out during a log flush.
  286. *
  287. * We no longer flush the log here, instead we wake up logd to do that
  288. * for us. To avoid the thundering herd and to ensure that we deal fairly
  289. * with queued waiters, we use an exclusive wait. This means that when we
  290. * get woken with enough journal space to get our reservation, we need to
  291. * wake the next waiter on the list.
  292. *
  293. * Returns: errno
  294. */
  295. int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
  296. {
  297. int ret = 0;
  298. unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
  299. unsigned wanted = blks + reserved_blks;
  300. DEFINE_WAIT(wait);
  301. int did_wait = 0;
  302. unsigned int free_blocks;
  303. if (gfs2_assert_warn(sdp, blks) ||
  304. gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
  305. return -EINVAL;
  306. atomic_add(blks, &sdp->sd_log_blks_needed);
  307. retry:
  308. free_blocks = atomic_read(&sdp->sd_log_blks_free);
  309. if (unlikely(free_blocks <= wanted)) {
  310. do {
  311. prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait,
  312. TASK_UNINTERRUPTIBLE);
  313. wake_up(&sdp->sd_logd_waitq);
  314. did_wait = 1;
  315. if (atomic_read(&sdp->sd_log_blks_free) <= wanted)
  316. io_schedule();
  317. free_blocks = atomic_read(&sdp->sd_log_blks_free);
  318. } while(free_blocks <= wanted);
  319. finish_wait(&sdp->sd_log_waitq, &wait);
  320. }
  321. atomic_inc(&sdp->sd_reserving_log);
  322. if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
  323. free_blocks - blks) != free_blocks) {
  324. if (atomic_dec_and_test(&sdp->sd_reserving_log))
  325. wake_up(&sdp->sd_reserving_log_wait);
  326. goto retry;
  327. }
  328. atomic_sub(blks, &sdp->sd_log_blks_needed);
  329. trace_gfs2_log_blocks(sdp, -blks);
  330. /*
  331. * If we waited, then so might others, wake them up _after_ we get
  332. * our share of the log.
  333. */
  334. if (unlikely(did_wait))
  335. wake_up(&sdp->sd_log_waitq);
  336. down_read(&sdp->sd_log_flush_lock);
  337. if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
  338. gfs2_log_release(sdp, blks);
  339. ret = -EROFS;
  340. }
  341. if (atomic_dec_and_test(&sdp->sd_reserving_log))
  342. wake_up(&sdp->sd_reserving_log_wait);
  343. return ret;
  344. }
  345. /**
  346. * log_distance - Compute distance between two journal blocks
  347. * @sdp: The GFS2 superblock
  348. * @newer: The most recent journal block of the pair
  349. * @older: The older journal block of the pair
  350. *
  351. * Compute the distance (in the journal direction) between two
  352. * blocks in the journal
  353. *
  354. * Returns: the distance in blocks
  355. */
  356. static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer,
  357. unsigned int older)
  358. {
  359. int dist;
  360. dist = newer - older;
  361. if (dist < 0)
  362. dist += sdp->sd_jdesc->jd_blocks;
  363. return dist;
  364. }
  365. /**
  366. * calc_reserved - Calculate the number of blocks to reserve when
  367. * refunding a transaction's unused buffers.
  368. * @sdp: The GFS2 superblock
  369. *
  370. * This is complex. We need to reserve room for all our currently used
  371. * metadata buffers (e.g. normal file I/O rewriting file time stamps) and
  372. * all our journaled data buffers for journaled files (e.g. files in the
  373. * meta_fs like rindex, or files for which chattr +j was done.)
  374. * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
  375. * will count it as free space (sd_log_blks_free) and corruption will follow.
  376. *
  377. * We can have metadata bufs and jdata bufs in the same journal. So each
  378. * type gets its own log header, for which we need to reserve a block.
  379. * In fact, each type has the potential for needing more than one header
  380. * in cases where we have more buffers than will fit on a journal page.
  381. * Metadata journal entries take up half the space of journaled buffer entries.
  382. * Thus, metadata entries have buf_limit (502) and journaled buffers have
  383. * databuf_limit (251) before they cause a wrap around.
  384. *
  385. * Also, we need to reserve blocks for revoke journal entries and one for an
  386. * overall header for the lot.
  387. *
  388. * Returns: the number of blocks reserved
  389. */
  390. static unsigned int calc_reserved(struct gfs2_sbd *sdp)
  391. {
  392. unsigned int reserved = 0;
  393. unsigned int mbuf;
  394. unsigned int dbuf;
  395. struct gfs2_trans *tr = sdp->sd_log_tr;
  396. if (tr) {
  397. mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
  398. dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
  399. reserved = mbuf + dbuf;
  400. /* Account for header blocks */
  401. reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp));
  402. reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp));
  403. }
  404. if (sdp->sd_log_commited_revoke > 0)
  405. reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
  406. sizeof(u64));
  407. /* One for the overall header */
  408. if (reserved)
  409. reserved++;
  410. return reserved;
  411. }
  412. static unsigned int current_tail(struct gfs2_sbd *sdp)
  413. {
  414. struct gfs2_trans *tr;
  415. unsigned int tail;
  416. spin_lock(&sdp->sd_ail_lock);
  417. if (list_empty(&sdp->sd_ail1_list)) {
  418. tail = sdp->sd_log_head;
  419. } else {
  420. tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans,
  421. tr_list);
  422. tail = tr->tr_first;
  423. }
  424. spin_unlock(&sdp->sd_ail_lock);
  425. return tail;
  426. }
  427. static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
  428. {
  429. unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
  430. ail2_empty(sdp, new_tail);
  431. atomic_add(dist, &sdp->sd_log_blks_free);
  432. trace_gfs2_log_blocks(sdp, dist);
  433. gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
  434. sdp->sd_jdesc->jd_blocks);
  435. sdp->sd_log_tail = new_tail;
  436. }
  437. static void log_flush_wait(struct gfs2_sbd *sdp)
  438. {
  439. DEFINE_WAIT(wait);
  440. if (atomic_read(&sdp->sd_log_in_flight)) {
  441. do {
  442. prepare_to_wait(&sdp->sd_log_flush_wait, &wait,
  443. TASK_UNINTERRUPTIBLE);
  444. if (atomic_read(&sdp->sd_log_in_flight))
  445. io_schedule();
  446. } while(atomic_read(&sdp->sd_log_in_flight));
  447. finish_wait(&sdp->sd_log_flush_wait, &wait);
  448. }
  449. }
  450. static int ip_cmp(void *priv, struct list_head *a, struct list_head *b)
  451. {
  452. struct gfs2_inode *ipa, *ipb;
  453. ipa = list_entry(a, struct gfs2_inode, i_ordered);
  454. ipb = list_entry(b, struct gfs2_inode, i_ordered);
  455. if (ipa->i_no_addr < ipb->i_no_addr)
  456. return -1;
  457. if (ipa->i_no_addr > ipb->i_no_addr)
  458. return 1;
  459. return 0;
  460. }
  461. static void gfs2_ordered_write(struct gfs2_sbd *sdp)
  462. {
  463. struct gfs2_inode *ip;
  464. LIST_HEAD(written);
  465. spin_lock(&sdp->sd_ordered_lock);
  466. list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
  467. while (!list_empty(&sdp->sd_log_le_ordered)) {
  468. ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
  469. if (ip->i_inode.i_mapping->nrpages == 0) {
  470. test_and_clear_bit(GIF_ORDERED, &ip->i_flags);
  471. list_del(&ip->i_ordered);
  472. continue;
  473. }
  474. list_move(&ip->i_ordered, &written);
  475. spin_unlock(&sdp->sd_ordered_lock);
  476. filemap_fdatawrite(ip->i_inode.i_mapping);
  477. spin_lock(&sdp->sd_ordered_lock);
  478. }
  479. list_splice(&written, &sdp->sd_log_le_ordered);
  480. spin_unlock(&sdp->sd_ordered_lock);
  481. }
  482. static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
  483. {
  484. struct gfs2_inode *ip;
  485. spin_lock(&sdp->sd_ordered_lock);
  486. while (!list_empty(&sdp->sd_log_le_ordered)) {
  487. ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
  488. list_del(&ip->i_ordered);
  489. WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
  490. if (ip->i_inode.i_mapping->nrpages == 0)
  491. continue;
  492. spin_unlock(&sdp->sd_ordered_lock);
  493. filemap_fdatawait(ip->i_inode.i_mapping);
  494. spin_lock(&sdp->sd_ordered_lock);
  495. }
  496. spin_unlock(&sdp->sd_ordered_lock);
  497. }
  498. void gfs2_ordered_del_inode(struct gfs2_inode *ip)
  499. {
  500. struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
  501. spin_lock(&sdp->sd_ordered_lock);
  502. if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags))
  503. list_del(&ip->i_ordered);
  504. spin_unlock(&sdp->sd_ordered_lock);
  505. }
  506. void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
  507. {
  508. struct buffer_head *bh = bd->bd_bh;
  509. struct gfs2_glock *gl = bd->bd_gl;
  510. bh->b_private = NULL;
  511. bd->bd_blkno = bh->b_blocknr;
  512. gfs2_remove_from_ail(bd); /* drops ref on bh */
  513. bd->bd_bh = NULL;
  514. bd->bd_ops = &gfs2_revoke_lops;
  515. sdp->sd_log_num_revoke++;
  516. atomic_inc(&gl->gl_revokes);
  517. set_bit(GLF_LFLUSH, &gl->gl_flags);
  518. list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
  519. }
  520. void gfs2_write_revokes(struct gfs2_sbd *sdp)
  521. {
  522. struct gfs2_trans *tr;
  523. struct gfs2_bufdata *bd, *tmp;
  524. int have_revokes = 0;
  525. int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
  526. gfs2_ail1_empty(sdp);
  527. spin_lock(&sdp->sd_ail_lock);
  528. list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) {
  529. list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) {
  530. if (list_empty(&bd->bd_list)) {
  531. have_revokes = 1;
  532. goto done;
  533. }
  534. }
  535. }
  536. done:
  537. spin_unlock(&sdp->sd_ail_lock);
  538. if (have_revokes == 0)
  539. return;
  540. while (sdp->sd_log_num_revoke > max_revokes)
  541. max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
  542. max_revokes -= sdp->sd_log_num_revoke;
  543. if (!sdp->sd_log_num_revoke) {
  544. atomic_dec(&sdp->sd_log_blks_free);
  545. /* If no blocks have been reserved, we need to also
  546. * reserve a block for the header */
  547. if (!sdp->sd_log_blks_reserved)
  548. atomic_dec(&sdp->sd_log_blks_free);
  549. }
  550. gfs2_log_lock(sdp);
  551. spin_lock(&sdp->sd_ail_lock);
  552. list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) {
  553. list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) {
  554. if (max_revokes == 0)
  555. goto out_of_blocks;
  556. if (!list_empty(&bd->bd_list))
  557. continue;
  558. gfs2_add_revoke(sdp, bd);
  559. max_revokes--;
  560. }
  561. }
  562. out_of_blocks:
  563. spin_unlock(&sdp->sd_ail_lock);
  564. gfs2_log_unlock(sdp);
  565. if (!sdp->sd_log_num_revoke) {
  566. atomic_inc(&sdp->sd_log_blks_free);
  567. if (!sdp->sd_log_blks_reserved)
  568. atomic_inc(&sdp->sd_log_blks_free);
  569. }
  570. }
  571. /**
  572. * write_log_header - Write a journal log header buffer at sd_log_flush_head
  573. * @sdp: The GFS2 superblock
  574. * @jd: journal descriptor of the journal to which we are writing
  575. * @seq: sequence number
  576. * @tail: tail of the log
  577. * @flags: log header flags GFS2_LOG_HEAD_*
  578. * @op_flags: flags to pass to the bio
  579. *
  580. * Returns: the initialized log buffer descriptor
  581. */
  582. void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
  583. u64 seq, u32 tail, u32 flags, int op_flags)
  584. {
  585. struct gfs2_log_header *lh;
  586. u32 hash, crc;
  587. struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
  588. struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
  589. struct timespec64 tv;
  590. struct super_block *sb = sdp->sd_vfs;
  591. u64 addr;
  592. lh = page_address(page);
  593. clear_page(lh);
  594. lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
  595. lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
  596. lh->lh_header.__pad0 = cpu_to_be64(0);
  597. lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
  598. lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
  599. lh->lh_sequence = cpu_to_be64(seq);
  600. lh->lh_flags = cpu_to_be32(flags);
  601. lh->lh_tail = cpu_to_be32(tail);
  602. lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
  603. hash = ~crc32(~0, lh, LH_V1_SIZE);
  604. lh->lh_hash = cpu_to_be32(hash);
  605. tv = current_kernel_time64();
  606. lh->lh_nsec = cpu_to_be32(tv.tv_nsec);
  607. lh->lh_sec = cpu_to_be64(tv.tv_sec);
  608. addr = gfs2_log_bmap(sdp);
  609. lh->lh_addr = cpu_to_be64(addr);
  610. lh->lh_jinode = cpu_to_be64(GFS2_I(jd->jd_inode)->i_no_addr);
  611. /* We may only write local statfs, quota, etc., when writing to our
  612. own journal. The values are left 0 when recovering a journal
  613. different from our own. */
  614. if (!(flags & GFS2_LOG_HEAD_RECOVERY)) {
  615. lh->lh_statfs_addr =
  616. cpu_to_be64(GFS2_I(sdp->sd_sc_inode)->i_no_addr);
  617. lh->lh_quota_addr =
  618. cpu_to_be64(GFS2_I(sdp->sd_qc_inode)->i_no_addr);
  619. spin_lock(&sdp->sd_statfs_spin);
  620. lh->lh_local_total = cpu_to_be64(l_sc->sc_total);
  621. lh->lh_local_free = cpu_to_be64(l_sc->sc_free);
  622. lh->lh_local_dinodes = cpu_to_be64(l_sc->sc_dinodes);
  623. spin_unlock(&sdp->sd_statfs_spin);
  624. }
  625. BUILD_BUG_ON(offsetof(struct gfs2_log_header, lh_crc) != LH_V1_SIZE);
  626. crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
  627. sb->s_blocksize - LH_V1_SIZE - 4);
  628. lh->lh_crc = cpu_to_be32(crc);
  629. gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr);
  630. gfs2_log_flush_bio(sdp, REQ_OP_WRITE, op_flags);
  631. log_flush_wait(sdp);
  632. }
  633. /**
  634. * log_write_header - Get and initialize a journal header buffer
  635. * @sdp: The GFS2 superblock
  636. * @flags: The log header flags, including log header origin
  637. *
  638. * Returns: the initialized log buffer descriptor
  639. */
  640. static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
  641. {
  642. unsigned int tail;
  643. int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
  644. enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
  645. gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
  646. tail = current_tail(sdp);
  647. if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
  648. gfs2_ordered_wait(sdp);
  649. log_flush_wait(sdp);
  650. op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
  651. }
  652. sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
  653. gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, tail,
  654. flags, op_flags);
  655. if (sdp->sd_log_tail != tail)
  656. log_pull_tail(sdp, tail);
  657. }
  658. /**
  659. * gfs2_log_flush - flush incore transaction(s)
  660. * @sdp: the filesystem
  661. * @gl: The glock structure to flush. If NULL, flush the whole incore log
  662. * @flags: The log header flags: GFS2_LOG_HEAD_FLUSH_* and debug flags
  663. *
  664. */
  665. void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
  666. {
  667. struct gfs2_trans *tr;
  668. enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
  669. down_write(&sdp->sd_log_flush_lock);
  670. /* Log might have been flushed while we waited for the flush lock */
  671. if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
  672. up_write(&sdp->sd_log_flush_lock);
  673. return;
  674. }
  675. trace_gfs2_log_flush(sdp, 1, flags);
  676. if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
  677. clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
  678. sdp->sd_log_flush_head = sdp->sd_log_head;
  679. tr = sdp->sd_log_tr;
  680. if (tr) {
  681. sdp->sd_log_tr = NULL;
  682. INIT_LIST_HEAD(&tr->tr_ail1_list);
  683. INIT_LIST_HEAD(&tr->tr_ail2_list);
  684. tr->tr_first = sdp->sd_log_flush_head;
  685. if (unlikely (state == SFS_FROZEN))
  686. gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new);
  687. }
  688. if (unlikely(state == SFS_FROZEN))
  689. gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
  690. gfs2_assert_withdraw(sdp,
  691. sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
  692. gfs2_ordered_write(sdp);
  693. lops_before_commit(sdp, tr);
  694. gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
  695. if (sdp->sd_log_head != sdp->sd_log_flush_head) {
  696. log_flush_wait(sdp);
  697. log_write_header(sdp, flags);
  698. } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
  699. atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
  700. trace_gfs2_log_blocks(sdp, -1);
  701. log_write_header(sdp, flags);
  702. }
  703. lops_after_commit(sdp, tr);
  704. gfs2_log_lock(sdp);
  705. sdp->sd_log_head = sdp->sd_log_flush_head;
  706. sdp->sd_log_blks_reserved = 0;
  707. sdp->sd_log_commited_revoke = 0;
  708. spin_lock(&sdp->sd_ail_lock);
  709. if (tr && !list_empty(&tr->tr_ail1_list)) {
  710. list_add(&tr->tr_list, &sdp->sd_ail1_list);
  711. tr = NULL;
  712. }
  713. spin_unlock(&sdp->sd_ail_lock);
  714. gfs2_log_unlock(sdp);
  715. if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) {
  716. if (!sdp->sd_log_idle) {
  717. for (;;) {
  718. gfs2_ail1_start(sdp);
  719. gfs2_ail1_wait(sdp);
  720. if (gfs2_ail1_empty(sdp))
  721. break;
  722. }
  723. atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
  724. trace_gfs2_log_blocks(sdp, -1);
  725. log_write_header(sdp, flags);
  726. sdp->sd_log_head = sdp->sd_log_flush_head;
  727. }
  728. if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
  729. GFS2_LOG_HEAD_FLUSH_FREEZE))
  730. gfs2_log_shutdown(sdp);
  731. if (flags & GFS2_LOG_HEAD_FLUSH_FREEZE)
  732. atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
  733. }
  734. trace_gfs2_log_flush(sdp, 0, flags);
  735. up_write(&sdp->sd_log_flush_lock);
  736. kfree(tr);
  737. }
  738. /**
  739. * gfs2_merge_trans - Merge a new transaction into a cached transaction
  740. * @old: Original transaction to be expanded
  741. * @new: New transaction to be merged
  742. */
  743. static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
  744. {
  745. WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags));
  746. old->tr_num_buf_new += new->tr_num_buf_new;
  747. old->tr_num_databuf_new += new->tr_num_databuf_new;
  748. old->tr_num_buf_rm += new->tr_num_buf_rm;
  749. old->tr_num_databuf_rm += new->tr_num_databuf_rm;
  750. old->tr_num_revoke += new->tr_num_revoke;
  751. old->tr_num_revoke_rm += new->tr_num_revoke_rm;
  752. list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
  753. list_splice_tail_init(&new->tr_buf, &old->tr_buf);
  754. }
  755. static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  756. {
  757. unsigned int reserved;
  758. unsigned int unused;
  759. unsigned int maxres;
  760. gfs2_log_lock(sdp);
  761. if (sdp->sd_log_tr) {
  762. gfs2_merge_trans(sdp->sd_log_tr, tr);
  763. } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
  764. gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags));
  765. sdp->sd_log_tr = tr;
  766. set_bit(TR_ATTACHED, &tr->tr_flags);
  767. }
  768. sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
  769. reserved = calc_reserved(sdp);
  770. maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
  771. gfs2_assert_withdraw(sdp, maxres >= reserved);
  772. unused = maxres - reserved;
  773. atomic_add(unused, &sdp->sd_log_blks_free);
  774. trace_gfs2_log_blocks(sdp, unused);
  775. gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
  776. sdp->sd_jdesc->jd_blocks);
  777. sdp->sd_log_blks_reserved = reserved;
  778. gfs2_log_unlock(sdp);
  779. }
  780. /**
  781. * gfs2_log_commit - Commit a transaction to the log
  782. * @sdp: the filesystem
  783. * @tr: the transaction
  784. *
  785. * We wake up gfs2_logd if the number of pinned blocks exceed thresh1
  786. * or the total number of used blocks (pinned blocks plus AIL blocks)
  787. * is greater than thresh2.
  788. *
  789. * At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of
  790. * journal size.
  791. *
  792. * Returns: errno
  793. */
  794. void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  795. {
  796. log_refund(sdp, tr);
  797. if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
  798. ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
  799. atomic_read(&sdp->sd_log_thresh2)))
  800. wake_up(&sdp->sd_logd_waitq);
  801. }
  802. /**
  803. * gfs2_log_shutdown - write a shutdown header into a journal
  804. * @sdp: the filesystem
  805. *
  806. */
  807. void gfs2_log_shutdown(struct gfs2_sbd *sdp)
  808. {
  809. gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
  810. gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
  811. gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
  812. sdp->sd_log_flush_head = sdp->sd_log_head;
  813. log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT | GFS2_LFC_SHUTDOWN);
  814. gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
  815. gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
  816. sdp->sd_log_head = sdp->sd_log_flush_head;
  817. sdp->sd_log_tail = sdp->sd_log_head;
  818. }
  819. static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
  820. {
  821. return (atomic_read(&sdp->sd_log_pinned) +
  822. atomic_read(&sdp->sd_log_blks_needed) >=
  823. atomic_read(&sdp->sd_log_thresh1));
  824. }
  825. static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
  826. {
  827. unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
  828. if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
  829. return 1;
  830. return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
  831. atomic_read(&sdp->sd_log_thresh2);
  832. }
  833. /**
  834. * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
  835. * @sdp: Pointer to GFS2 superblock
  836. *
  837. * Also, periodically check to make sure that we're using the most recent
  838. * journal index.
  839. */
  840. int gfs2_logd(void *data)
  841. {
  842. struct gfs2_sbd *sdp = data;
  843. unsigned long t = 1;
  844. DEFINE_WAIT(wait);
  845. bool did_flush;
  846. while (!kthread_should_stop()) {
  847. /* Check for errors writing to the journal */
  848. if (sdp->sd_log_error) {
  849. gfs2_lm_withdraw(sdp,
  850. "GFS2: fsid=%s: error %d: "
  851. "withdrawing the file system to "
  852. "prevent further damage.\n",
  853. sdp->sd_fsname, sdp->sd_log_error);
  854. }
  855. did_flush = false;
  856. if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
  857. gfs2_ail1_empty(sdp);
  858. gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
  859. GFS2_LFC_LOGD_JFLUSH_REQD);
  860. did_flush = true;
  861. }
  862. if (gfs2_ail_flush_reqd(sdp)) {
  863. gfs2_ail1_start(sdp);
  864. gfs2_ail1_wait(sdp);
  865. gfs2_ail1_empty(sdp);
  866. gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
  867. GFS2_LFC_LOGD_AIL_FLUSH_REQD);
  868. did_flush = true;
  869. }
  870. if (!gfs2_ail_flush_reqd(sdp) || did_flush)
  871. wake_up(&sdp->sd_log_waitq);
  872. t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
  873. try_to_freeze();
  874. do {
  875. prepare_to_wait(&sdp->sd_logd_waitq, &wait,
  876. TASK_INTERRUPTIBLE);
  877. if (!gfs2_ail_flush_reqd(sdp) &&
  878. !gfs2_jrnl_flush_reqd(sdp) &&
  879. !kthread_should_stop())
  880. t = schedule_timeout(t);
  881. } while(t && !gfs2_ail_flush_reqd(sdp) &&
  882. !gfs2_jrnl_flush_reqd(sdp) &&
  883. !kthread_should_stop());
  884. finish_wait(&sdp->sd_logd_waitq, &wait);
  885. }
  886. return 0;
  887. }