common.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. /*
  2. * Copyright (C) 2017 Oracle. All Rights Reserved.
  3. *
  4. * Author: Darrick J. Wong <darrick.wong@oracle.com>
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version 2
  9. * of the License, or (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it would be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write the Free Software Foundation,
  18. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  19. */
  20. #include "xfs.h"
  21. #include "xfs_fs.h"
  22. #include "xfs_shared.h"
  23. #include "xfs_format.h"
  24. #include "xfs_trans_resv.h"
  25. #include "xfs_mount.h"
  26. #include "xfs_defer.h"
  27. #include "xfs_btree.h"
  28. #include "xfs_bit.h"
  29. #include "xfs_log_format.h"
  30. #include "xfs_trans.h"
  31. #include "xfs_sb.h"
  32. #include "xfs_inode.h"
  33. #include "xfs_icache.h"
  34. #include "xfs_itable.h"
  35. #include "xfs_alloc.h"
  36. #include "xfs_alloc_btree.h"
  37. #include "xfs_bmap.h"
  38. #include "xfs_bmap_btree.h"
  39. #include "xfs_ialloc.h"
  40. #include "xfs_ialloc_btree.h"
  41. #include "xfs_refcount.h"
  42. #include "xfs_refcount_btree.h"
  43. #include "xfs_rmap.h"
  44. #include "xfs_rmap_btree.h"
  45. #include "xfs_log.h"
  46. #include "xfs_trans_priv.h"
  47. #include "scrub/xfs_scrub.h"
  48. #include "scrub/scrub.h"
  49. #include "scrub/common.h"
  50. #include "scrub/trace.h"
  51. #include "scrub/btree.h"
  52. /* Common code for the metadata scrubbers. */
  53. /*
  54. * Handling operational errors.
  55. *
  56. * The *_process_error() family of functions are used to process error return
  57. * codes from functions called as part of a scrub operation.
  58. *
  59. * If there's no error, we return true to tell the caller that it's ok
  60. * to move on to the next check in its list.
  61. *
  62. * For non-verifier errors (e.g. ENOMEM) we return false to tell the
  63. * caller that something bad happened, and we preserve *error so that
  64. * the caller can return the *error up the stack to userspace.
  65. *
  66. * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
  67. * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words,
  68. * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
  69. * not via return codes. We return false to tell the caller that
  70. * something bad happened. Since the error has been cleared, the caller
  71. * will (presumably) return that zero and scrubbing will move on to
  72. * whatever's next.
  73. *
  74. * ftrace can be used to record the precise metadata location and the
  75. * approximate code location of the failed operation.
  76. */
  77. /* Check for operational errors. */
  78. bool
  79. xfs_scrub_process_error(
  80. struct xfs_scrub_context *sc,
  81. xfs_agnumber_t agno,
  82. xfs_agblock_t bno,
  83. int *error)
  84. {
  85. switch (*error) {
  86. case 0:
  87. return true;
  88. case -EDEADLOCK:
  89. /* Used to restart an op with deadlock avoidance. */
  90. trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
  91. break;
  92. case -EFSBADCRC:
  93. case -EFSCORRUPTED:
  94. /* Note the badness but don't abort. */
  95. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  96. *error = 0;
  97. /* fall through */
  98. default:
  99. trace_xfs_scrub_op_error(sc, agno, bno, *error,
  100. __return_address);
  101. break;
  102. }
  103. return false;
  104. }
  105. /* Check for operational errors for a file offset. */
  106. bool
  107. xfs_scrub_fblock_process_error(
  108. struct xfs_scrub_context *sc,
  109. int whichfork,
  110. xfs_fileoff_t offset,
  111. int *error)
  112. {
  113. switch (*error) {
  114. case 0:
  115. return true;
  116. case -EDEADLOCK:
  117. /* Used to restart an op with deadlock avoidance. */
  118. trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
  119. break;
  120. case -EFSBADCRC:
  121. case -EFSCORRUPTED:
  122. /* Note the badness but don't abort. */
  123. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  124. *error = 0;
  125. /* fall through */
  126. default:
  127. trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error,
  128. __return_address);
  129. break;
  130. }
  131. return false;
  132. }
  133. /*
  134. * Handling scrub corruption/optimization/warning checks.
  135. *
  136. * The *_set_{corrupt,preen,warning}() family of functions are used to
  137. * record the presence of metadata that is incorrect (corrupt), could be
  138. * optimized somehow (preen), or should be flagged for administrative
  139. * review but is not incorrect (warn).
  140. *
  141. * ftrace can be used to record the precise metadata location and
  142. * approximate code location of the failed check.
  143. */
  144. /* Record a block which could be optimized. */
  145. void
  146. xfs_scrub_block_set_preen(
  147. struct xfs_scrub_context *sc,
  148. struct xfs_buf *bp)
  149. {
  150. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
  151. trace_xfs_scrub_block_preen(sc, bp->b_bn, __return_address);
  152. }
  153. /*
  154. * Record an inode which could be optimized. The trace data will
  155. * include the block given by bp if bp is given; otherwise it will use
  156. * the block location of the inode record itself.
  157. */
  158. void
  159. xfs_scrub_ino_set_preen(
  160. struct xfs_scrub_context *sc,
  161. struct xfs_buf *bp)
  162. {
  163. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
  164. trace_xfs_scrub_ino_preen(sc, sc->ip->i_ino, bp ? bp->b_bn : 0,
  165. __return_address);
  166. }
  167. /* Record a corrupt block. */
  168. void
  169. xfs_scrub_block_set_corrupt(
  170. struct xfs_scrub_context *sc,
  171. struct xfs_buf *bp)
  172. {
  173. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  174. trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
  175. }
  176. /*
  177. * Record a corrupt inode. The trace data will include the block given
  178. * by bp if bp is given; otherwise it will use the block location of the
  179. * inode record itself.
  180. */
  181. void
  182. xfs_scrub_ino_set_corrupt(
  183. struct xfs_scrub_context *sc,
  184. xfs_ino_t ino,
  185. struct xfs_buf *bp)
  186. {
  187. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  188. trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
  189. }
  190. /* Record corruption in a block indexed by a file fork. */
  191. void
  192. xfs_scrub_fblock_set_corrupt(
  193. struct xfs_scrub_context *sc,
  194. int whichfork,
  195. xfs_fileoff_t offset)
  196. {
  197. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  198. trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
  199. }
  200. /*
  201. * Warn about inodes that need administrative review but is not
  202. * incorrect.
  203. */
  204. void
  205. xfs_scrub_ino_set_warning(
  206. struct xfs_scrub_context *sc,
  207. struct xfs_buf *bp)
  208. {
  209. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
  210. trace_xfs_scrub_ino_warning(sc, sc->ip->i_ino, bp ? bp->b_bn : 0,
  211. __return_address);
  212. }
  213. /* Warn about a block indexed by a file fork that needs review. */
  214. void
  215. xfs_scrub_fblock_set_warning(
  216. struct xfs_scrub_context *sc,
  217. int whichfork,
  218. xfs_fileoff_t offset)
  219. {
  220. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
  221. trace_xfs_scrub_fblock_warning(sc, whichfork, offset, __return_address);
  222. }
  223. /* Signal an incomplete scrub. */
  224. void
  225. xfs_scrub_set_incomplete(
  226. struct xfs_scrub_context *sc)
  227. {
  228. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
  229. trace_xfs_scrub_incomplete(sc, __return_address);
  230. }
  231. /*
  232. * AG scrubbing
  233. *
  234. * These helpers facilitate locking an allocation group's header
  235. * buffers, setting up cursors for all btrees that are present, and
  236. * cleaning everything up once we're through.
  237. */
  238. /* Decide if we want to return an AG header read failure. */
  239. static inline bool
  240. want_ag_read_header_failure(
  241. struct xfs_scrub_context *sc,
  242. unsigned int type)
  243. {
  244. /* Return all AG header read failures when scanning btrees. */
  245. if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
  246. sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
  247. sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
  248. return true;
  249. /*
  250. * If we're scanning a given type of AG header, we only want to
  251. * see read failures from that specific header. We'd like the
  252. * other headers to cross-check them, but this isn't required.
  253. */
  254. if (sc->sm->sm_type == type)
  255. return true;
  256. return false;
  257. }
  258. /*
  259. * Grab all the headers for an AG.
  260. *
  261. * The headers should be released by xfs_scrub_ag_free, but as a fail
  262. * safe we attach all the buffers we grab to the scrub transaction so
  263. * they'll all be freed when we cancel it.
  264. */
  265. int
  266. xfs_scrub_ag_read_headers(
  267. struct xfs_scrub_context *sc,
  268. xfs_agnumber_t agno,
  269. struct xfs_buf **agi,
  270. struct xfs_buf **agf,
  271. struct xfs_buf **agfl)
  272. {
  273. struct xfs_mount *mp = sc->mp;
  274. int error;
  275. error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi);
  276. if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
  277. goto out;
  278. error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf);
  279. if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
  280. goto out;
  281. error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
  282. if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
  283. goto out;
  284. out:
  285. return error;
  286. }
  287. /* Release all the AG btree cursors. */
  288. void
  289. xfs_scrub_ag_btcur_free(
  290. struct xfs_scrub_ag *sa)
  291. {
  292. if (sa->refc_cur)
  293. xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
  294. if (sa->rmap_cur)
  295. xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
  296. if (sa->fino_cur)
  297. xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
  298. if (sa->ino_cur)
  299. xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
  300. if (sa->cnt_cur)
  301. xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
  302. if (sa->bno_cur)
  303. xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
  304. sa->refc_cur = NULL;
  305. sa->rmap_cur = NULL;
  306. sa->fino_cur = NULL;
  307. sa->ino_cur = NULL;
  308. sa->bno_cur = NULL;
  309. sa->cnt_cur = NULL;
  310. }
  311. /* Initialize all the btree cursors for an AG. */
  312. int
  313. xfs_scrub_ag_btcur_init(
  314. struct xfs_scrub_context *sc,
  315. struct xfs_scrub_ag *sa)
  316. {
  317. struct xfs_mount *mp = sc->mp;
  318. xfs_agnumber_t agno = sa->agno;
  319. if (sa->agf_bp) {
  320. /* Set up a bnobt cursor for cross-referencing. */
  321. sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
  322. agno, XFS_BTNUM_BNO);
  323. if (!sa->bno_cur)
  324. goto err;
  325. /* Set up a cntbt cursor for cross-referencing. */
  326. sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
  327. agno, XFS_BTNUM_CNT);
  328. if (!sa->cnt_cur)
  329. goto err;
  330. }
  331. /* Set up a inobt cursor for cross-referencing. */
  332. if (sa->agi_bp) {
  333. sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
  334. agno, XFS_BTNUM_INO);
  335. if (!sa->ino_cur)
  336. goto err;
  337. }
  338. /* Set up a finobt cursor for cross-referencing. */
  339. if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) {
  340. sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
  341. agno, XFS_BTNUM_FINO);
  342. if (!sa->fino_cur)
  343. goto err;
  344. }
  345. /* Set up a rmapbt cursor for cross-referencing. */
  346. if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) {
  347. sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
  348. agno);
  349. if (!sa->rmap_cur)
  350. goto err;
  351. }
  352. /* Set up a refcountbt cursor for cross-referencing. */
  353. if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) {
  354. sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
  355. sa->agf_bp, agno, NULL);
  356. if (!sa->refc_cur)
  357. goto err;
  358. }
  359. return 0;
  360. err:
  361. return -ENOMEM;
  362. }
  363. /* Release the AG header context and btree cursors. */
  364. void
  365. xfs_scrub_ag_free(
  366. struct xfs_scrub_context *sc,
  367. struct xfs_scrub_ag *sa)
  368. {
  369. xfs_scrub_ag_btcur_free(sa);
  370. if (sa->agfl_bp) {
  371. xfs_trans_brelse(sc->tp, sa->agfl_bp);
  372. sa->agfl_bp = NULL;
  373. }
  374. if (sa->agf_bp) {
  375. xfs_trans_brelse(sc->tp, sa->agf_bp);
  376. sa->agf_bp = NULL;
  377. }
  378. if (sa->agi_bp) {
  379. xfs_trans_brelse(sc->tp, sa->agi_bp);
  380. sa->agi_bp = NULL;
  381. }
  382. sa->agno = NULLAGNUMBER;
  383. }
  384. /*
  385. * For scrub, grab the AGI and the AGF headers, in that order. Locking
  386. * order requires us to get the AGI before the AGF. We use the
  387. * transaction to avoid deadlocking on crosslinked metadata buffers;
  388. * either the caller passes one in (bmap scrub) or we have to create a
  389. * transaction ourselves.
  390. */
  391. int
  392. xfs_scrub_ag_init(
  393. struct xfs_scrub_context *sc,
  394. xfs_agnumber_t agno,
  395. struct xfs_scrub_ag *sa)
  396. {
  397. int error;
  398. sa->agno = agno;
  399. error = xfs_scrub_ag_read_headers(sc, agno, &sa->agi_bp,
  400. &sa->agf_bp, &sa->agfl_bp);
  401. if (error)
  402. return error;
  403. return xfs_scrub_ag_btcur_init(sc, sa);
  404. }
  405. /* Per-scrubber setup functions */
  406. /* Set us up with a transaction and an empty context. */
  407. int
  408. xfs_scrub_setup_fs(
  409. struct xfs_scrub_context *sc,
  410. struct xfs_inode *ip)
  411. {
  412. return xfs_scrub_trans_alloc(sc->sm, sc->mp, &sc->tp);
  413. }
  414. /* Set us up with AG headers and btree cursors. */
  415. int
  416. xfs_scrub_setup_ag_btree(
  417. struct xfs_scrub_context *sc,
  418. struct xfs_inode *ip,
  419. bool force_log)
  420. {
  421. struct xfs_mount *mp = sc->mp;
  422. int error;
  423. /*
  424. * If the caller asks us to checkpont the log, do so. This
  425. * expensive operation should be performed infrequently and only
  426. * as a last resort. Any caller that sets force_log should
  427. * document why they need to do so.
  428. */
  429. if (force_log) {
  430. error = xfs_scrub_checkpoint_log(mp);
  431. if (error)
  432. return error;
  433. }
  434. error = xfs_scrub_setup_ag_header(sc, ip);
  435. if (error)
  436. return error;
  437. return xfs_scrub_ag_init(sc, sc->sm->sm_agno, &sc->sa);
  438. }
  439. /* Push everything out of the log onto disk. */
  440. int
  441. xfs_scrub_checkpoint_log(
  442. struct xfs_mount *mp)
  443. {
  444. int error;
  445. error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
  446. if (error)
  447. return error;
  448. xfs_ail_push_all_sync(mp->m_ail);
  449. return 0;
  450. }
  451. /*
  452. * Given an inode and the scrub control structure, grab either the
  453. * inode referenced in the control structure or the inode passed in.
  454. * The inode is not locked.
  455. */
  456. int
  457. xfs_scrub_get_inode(
  458. struct xfs_scrub_context *sc,
  459. struct xfs_inode *ip_in)
  460. {
  461. struct xfs_mount *mp = sc->mp;
  462. struct xfs_inode *ip = NULL;
  463. int error;
  464. /*
  465. * If userspace passed us an AG number or a generation number
  466. * without an inode number, they haven't got a clue so bail out
  467. * immediately.
  468. */
  469. if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino))
  470. return -EINVAL;
  471. /* We want to scan the inode we already had opened. */
  472. if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
  473. sc->ip = ip_in;
  474. return 0;
  475. }
  476. /* Look up the inode, see if the generation number matches. */
  477. if (xfs_internal_inum(mp, sc->sm->sm_ino))
  478. return -ENOENT;
  479. error = xfs_iget(mp, NULL, sc->sm->sm_ino,
  480. XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
  481. if (error == -ENOENT || error == -EINVAL) {
  482. /* inode doesn't exist... */
  483. return -ENOENT;
  484. } else if (error) {
  485. trace_xfs_scrub_op_error(sc,
  486. XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
  487. XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
  488. error, __return_address);
  489. return error;
  490. }
  491. if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
  492. iput(VFS_I(ip));
  493. return -ENOENT;
  494. }
  495. sc->ip = ip;
  496. return 0;
  497. }