xfs_ag_resv.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * Copyright (C) 2016 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <darrick.wong@oracle.com>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_log_format.h"
  11. #include "xfs_trans_resv.h"
  12. #include "xfs_sb.h"
  13. #include "xfs_mount.h"
  14. #include "xfs_defer.h"
  15. #include "xfs_alloc.h"
  16. #include "xfs_errortag.h"
  17. #include "xfs_error.h"
  18. #include "xfs_trace.h"
  19. #include "xfs_cksum.h"
  20. #include "xfs_trans.h"
  21. #include "xfs_bit.h"
  22. #include "xfs_bmap.h"
  23. #include "xfs_bmap_btree.h"
  24. #include "xfs_ag_resv.h"
  25. #include "xfs_trans_space.h"
  26. #include "xfs_rmap_btree.h"
  27. #include "xfs_btree.h"
  28. #include "xfs_refcount_btree.h"
  29. #include "xfs_ialloc_btree.h"
  30. /*
  31. * Per-AG Block Reservations
  32. *
  33. * For some kinds of allocation group metadata structures, it is advantageous
  34. * to reserve a small number of blocks in each AG so that future expansions of
  35. * that data structure do not encounter ENOSPC because errors during a btree
  36. * split cause the filesystem to go offline.
  37. *
  38. * Prior to the introduction of reflink, this wasn't an issue because the free
  39. * space btrees maintain a reserve of space (the AGFL) to handle any expansion
  40. * that may be necessary; and allocations of other metadata (inodes, BMBT,
  41. * dir/attr) aren't restricted to a single AG. However, with reflink it is
  42. * possible to allocate all the space in an AG, have subsequent reflink/CoW
  43. * activity expand the refcount btree, and discover that there's no space left
  44. * to handle that expansion. Since we can calculate the maximum size of the
  45. * refcount btree, we can reserve space for it and avoid ENOSPC.
  46. *
  47. * Handling per-AG reservations consists of three changes to the allocator's
  48. * behavior: First, because these reservations are always needed, we decrease
  49. * the ag_max_usable counter to reflect the size of the AG after the reserved
  50. * blocks are taken. Second, the reservations must be reflected in the
  51. * fdblocks count to maintain proper accounting. Third, each AG must maintain
  52. * its own reserved block counter so that we can calculate the amount of space
  53. * that must remain free to maintain the reservations. Fourth, the "remaining
  54. * reserved blocks" count must be used when calculating the length of the
  55. * longest free extent in an AG and to clamp maxlen in the per-AG allocation
  56. * functions. In other words, we maintain a virtual allocation via in-core
  57. * accounting tricks so that we don't have to clean up after a crash. :)
  58. *
  59. * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type
  60. * values via struct xfs_alloc_arg or directly to the xfs_free_extent
  61. * function. It might seem a little funny to maintain a reservoir of blocks
  62. * to feed another reservoir, but the AGFL only holds enough blocks to get
  63. * through the next transaction. The per-AG reservation is to ensure (we
  64. * hope) that each AG never runs out of blocks. Each data structure wanting
  65. * to use the reservation system should update ask/used in xfs_ag_resv_init.
  66. */
  67. /*
  68. * Are we critically low on blocks? For now we'll define that as the number
  69. * of blocks we can get our hands on being less than 10% of what we reserved
  70. * or less than some arbitrary number (maximum btree height).
  71. */
  72. bool
  73. xfs_ag_resv_critical(
  74. struct xfs_perag *pag,
  75. enum xfs_ag_resv_type type)
  76. {
  77. xfs_extlen_t avail;
  78. xfs_extlen_t orig;
  79. switch (type) {
  80. case XFS_AG_RESV_METADATA:
  81. avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
  82. orig = pag->pag_meta_resv.ar_asked;
  83. break;
  84. case XFS_AG_RESV_RMAPBT:
  85. avail = pag->pagf_freeblks + pag->pagf_flcount -
  86. pag->pag_meta_resv.ar_reserved;
  87. orig = pag->pag_rmapbt_resv.ar_asked;
  88. break;
  89. default:
  90. ASSERT(0);
  91. return false;
  92. }
  93. trace_xfs_ag_resv_critical(pag, type, avail);
  94. /* Critically low if less than 10% or max btree height remains. */
  95. return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS,
  96. pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
  97. }
  98. /*
  99. * How many blocks are reserved but not used, and therefore must not be
  100. * allocated away?
  101. */
  102. xfs_extlen_t
  103. xfs_ag_resv_needed(
  104. struct xfs_perag *pag,
  105. enum xfs_ag_resv_type type)
  106. {
  107. xfs_extlen_t len;
  108. len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved;
  109. switch (type) {
  110. case XFS_AG_RESV_METADATA:
  111. case XFS_AG_RESV_RMAPBT:
  112. len -= xfs_perag_resv(pag, type)->ar_reserved;
  113. break;
  114. case XFS_AG_RESV_NONE:
  115. /* empty */
  116. break;
  117. default:
  118. ASSERT(0);
  119. }
  120. trace_xfs_ag_resv_needed(pag, type, len);
  121. return len;
  122. }
  123. /* Clean out a reservation */
  124. static int
  125. __xfs_ag_resv_free(
  126. struct xfs_perag *pag,
  127. enum xfs_ag_resv_type type)
  128. {
  129. struct xfs_ag_resv *resv;
  130. xfs_extlen_t oldresv;
  131. int error;
  132. trace_xfs_ag_resv_free(pag, type, 0);
  133. resv = xfs_perag_resv(pag, type);
  134. if (pag->pag_agno == 0)
  135. pag->pag_mount->m_ag_max_usable += resv->ar_asked;
  136. /*
  137. * RMAPBT blocks come from the AGFL and AGFL blocks are always
  138. * considered "free", so whatever was reserved at mount time must be
  139. * given back at umount.
  140. */
  141. if (type == XFS_AG_RESV_RMAPBT)
  142. oldresv = resv->ar_orig_reserved;
  143. else
  144. oldresv = resv->ar_reserved;
  145. error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
  146. resv->ar_reserved = 0;
  147. resv->ar_asked = 0;
  148. if (error)
  149. trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
  150. error, _RET_IP_);
  151. return error;
  152. }
  153. /* Free a per-AG reservation. */
  154. int
  155. xfs_ag_resv_free(
  156. struct xfs_perag *pag)
  157. {
  158. int error;
  159. int err2;
  160. error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
  161. err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
  162. if (err2 && !error)
  163. error = err2;
  164. return error;
  165. }
  166. static int
  167. __xfs_ag_resv_init(
  168. struct xfs_perag *pag,
  169. enum xfs_ag_resv_type type,
  170. xfs_extlen_t ask,
  171. xfs_extlen_t used)
  172. {
  173. struct xfs_mount *mp = pag->pag_mount;
  174. struct xfs_ag_resv *resv;
  175. int error;
  176. xfs_extlen_t reserved;
  177. if (used > ask)
  178. ask = used;
  179. reserved = ask - used;
  180. error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true);
  181. if (error) {
  182. trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
  183. error, _RET_IP_);
  184. xfs_warn(mp,
  185. "Per-AG reservation for AG %u failed. Filesystem may run out of space.",
  186. pag->pag_agno);
  187. return error;
  188. }
  189. /*
  190. * Reduce the maximum per-AG allocation length by however much we're
  191. * trying to reserve for an AG. Since this is a filesystem-wide
  192. * counter, we only make the adjustment for AG 0. This assumes that
  193. * there aren't any AGs hungrier for per-AG reservation than AG 0.
  194. */
  195. if (pag->pag_agno == 0)
  196. mp->m_ag_max_usable -= ask;
  197. resv = xfs_perag_resv(pag, type);
  198. resv->ar_asked = ask;
  199. resv->ar_reserved = resv->ar_orig_reserved = reserved;
  200. trace_xfs_ag_resv_init(pag, type, ask);
  201. return 0;
  202. }
  203. /* Create a per-AG block reservation. */
  204. int
  205. xfs_ag_resv_init(
  206. struct xfs_perag *pag)
  207. {
  208. struct xfs_mount *mp = pag->pag_mount;
  209. xfs_agnumber_t agno = pag->pag_agno;
  210. xfs_extlen_t ask;
  211. xfs_extlen_t used;
  212. int error = 0;
  213. /* Create the metadata reservation. */
  214. if (pag->pag_meta_resv.ar_asked == 0) {
  215. ask = used = 0;
  216. error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used);
  217. if (error)
  218. goto out;
  219. error = xfs_finobt_calc_reserves(mp, agno, &ask, &used);
  220. if (error)
  221. goto out;
  222. error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
  223. ask, used);
  224. if (error) {
  225. /*
  226. * Because we didn't have per-AG reservations when the
  227. * finobt feature was added we might not be able to
  228. * reserve all needed blocks. Warn and fall back to the
  229. * old and potentially buggy code in that case, but
  230. * ensure we do have the reservation for the refcountbt.
  231. */
  232. ask = used = 0;
  233. mp->m_inotbt_nores = true;
  234. error = xfs_refcountbt_calc_reserves(mp, agno, &ask,
  235. &used);
  236. if (error)
  237. goto out;
  238. error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
  239. ask, used);
  240. if (error)
  241. goto out;
  242. }
  243. }
  244. /* Create the RMAPBT metadata reservation */
  245. if (pag->pag_rmapbt_resv.ar_asked == 0) {
  246. ask = used = 0;
  247. error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
  248. if (error)
  249. goto out;
  250. error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used);
  251. if (error)
  252. goto out;
  253. }
  254. #ifdef DEBUG
  255. /* need to read in the AGF for the ASSERT below to work */
  256. error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0);
  257. if (error)
  258. return error;
  259. ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
  260. xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
  261. pag->pagf_freeblks + pag->pagf_flcount);
  262. #endif
  263. out:
  264. return error;
  265. }
  266. /* Allocate a block from the reservation. */
  267. void
  268. xfs_ag_resv_alloc_extent(
  269. struct xfs_perag *pag,
  270. enum xfs_ag_resv_type type,
  271. struct xfs_alloc_arg *args)
  272. {
  273. struct xfs_ag_resv *resv;
  274. xfs_extlen_t len;
  275. uint field;
  276. trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
  277. switch (type) {
  278. case XFS_AG_RESV_AGFL:
  279. return;
  280. case XFS_AG_RESV_METADATA:
  281. case XFS_AG_RESV_RMAPBT:
  282. resv = xfs_perag_resv(pag, type);
  283. break;
  284. default:
  285. ASSERT(0);
  286. /* fall through */
  287. case XFS_AG_RESV_NONE:
  288. field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
  289. XFS_TRANS_SB_FDBLOCKS;
  290. xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len);
  291. return;
  292. }
  293. len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
  294. resv->ar_reserved -= len;
  295. if (type == XFS_AG_RESV_RMAPBT)
  296. return;
  297. /* Allocations of reserved blocks only need on-disk sb updates... */
  298. xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
  299. /* ...but non-reserved blocks need in-core and on-disk updates. */
  300. if (args->len > len)
  301. xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS,
  302. -((int64_t)args->len - len));
  303. }
  304. /* Free a block to the reservation. */
  305. void
  306. xfs_ag_resv_free_extent(
  307. struct xfs_perag *pag,
  308. enum xfs_ag_resv_type type,
  309. struct xfs_trans *tp,
  310. xfs_extlen_t len)
  311. {
  312. xfs_extlen_t leftover;
  313. struct xfs_ag_resv *resv;
  314. trace_xfs_ag_resv_free_extent(pag, type, len);
  315. switch (type) {
  316. case XFS_AG_RESV_AGFL:
  317. return;
  318. case XFS_AG_RESV_METADATA:
  319. case XFS_AG_RESV_RMAPBT:
  320. resv = xfs_perag_resv(pag, type);
  321. break;
  322. default:
  323. ASSERT(0);
  324. /* fall through */
  325. case XFS_AG_RESV_NONE:
  326. xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
  327. return;
  328. }
  329. leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
  330. resv->ar_reserved += leftover;
  331. if (type == XFS_AG_RESV_RMAPBT)
  332. return;
  333. /* Freeing into the reserved pool only requires on-disk update... */
  334. xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
  335. /* ...but freeing beyond that requires in-core and on-disk update. */
  336. if (len > leftover)
  337. xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover);
  338. }