xfs_refcount.c 47 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * Copyright (C) 2016 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <darrick.wong@oracle.com>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_log_format.h"
  11. #include "xfs_trans_resv.h"
  12. #include "xfs_sb.h"
  13. #include "xfs_mount.h"
  14. #include "xfs_defer.h"
  15. #include "xfs_btree.h"
  16. #include "xfs_bmap.h"
  17. #include "xfs_refcount_btree.h"
  18. #include "xfs_alloc.h"
  19. #include "xfs_errortag.h"
  20. #include "xfs_error.h"
  21. #include "xfs_trace.h"
  22. #include "xfs_cksum.h"
  23. #include "xfs_trans.h"
  24. #include "xfs_bit.h"
  25. #include "xfs_refcount.h"
  26. #include "xfs_rmap.h"
  27. /* Allowable refcount adjustment amounts. */
  28. enum xfs_refc_adjust_op {
  29. XFS_REFCOUNT_ADJUST_INCREASE = 1,
  30. XFS_REFCOUNT_ADJUST_DECREASE = -1,
  31. XFS_REFCOUNT_ADJUST_COW_ALLOC = 0,
  32. XFS_REFCOUNT_ADJUST_COW_FREE = -1,
  33. };
  34. STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur,
  35. xfs_agblock_t agbno, xfs_extlen_t aglen,
  36. struct xfs_defer_ops *dfops);
  37. STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
  38. xfs_agblock_t agbno, xfs_extlen_t aglen,
  39. struct xfs_defer_ops *dfops);
  40. /*
  41. * Look up the first record less than or equal to [bno, len] in the btree
  42. * given by cur.
  43. */
  44. int
  45. xfs_refcount_lookup_le(
  46. struct xfs_btree_cur *cur,
  47. xfs_agblock_t bno,
  48. int *stat)
  49. {
  50. trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno,
  51. XFS_LOOKUP_LE);
  52. cur->bc_rec.rc.rc_startblock = bno;
  53. cur->bc_rec.rc.rc_blockcount = 0;
  54. return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
  55. }
  56. /*
  57. * Look up the first record greater than or equal to [bno, len] in the btree
  58. * given by cur.
  59. */
  60. int
  61. xfs_refcount_lookup_ge(
  62. struct xfs_btree_cur *cur,
  63. xfs_agblock_t bno,
  64. int *stat)
  65. {
  66. trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno,
  67. XFS_LOOKUP_GE);
  68. cur->bc_rec.rc.rc_startblock = bno;
  69. cur->bc_rec.rc.rc_blockcount = 0;
  70. return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
  71. }
  72. /*
  73. * Look up the first record equal to [bno, len] in the btree
  74. * given by cur.
  75. */
  76. int
  77. xfs_refcount_lookup_eq(
  78. struct xfs_btree_cur *cur,
  79. xfs_agblock_t bno,
  80. int *stat)
  81. {
  82. trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno,
  83. XFS_LOOKUP_LE);
  84. cur->bc_rec.rc.rc_startblock = bno;
  85. cur->bc_rec.rc.rc_blockcount = 0;
  86. return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
  87. }
  88. /* Convert on-disk record to in-core format. */
  89. void
  90. xfs_refcount_btrec_to_irec(
  91. union xfs_btree_rec *rec,
  92. struct xfs_refcount_irec *irec)
  93. {
  94. irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock);
  95. irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
  96. irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
  97. }
  98. /*
  99. * Get the data from the pointed-to record.
  100. */
  101. int
  102. xfs_refcount_get_rec(
  103. struct xfs_btree_cur *cur,
  104. struct xfs_refcount_irec *irec,
  105. int *stat)
  106. {
  107. struct xfs_mount *mp = cur->bc_mp;
  108. xfs_agnumber_t agno = cur->bc_private.a.agno;
  109. union xfs_btree_rec *rec;
  110. int error;
  111. xfs_agblock_t realstart;
  112. error = xfs_btree_get_rec(cur, &rec, stat);
  113. if (error || !*stat)
  114. return error;
  115. xfs_refcount_btrec_to_irec(rec, irec);
  116. agno = cur->bc_private.a.agno;
  117. if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
  118. goto out_bad_rec;
  119. /* handle special COW-staging state */
  120. realstart = irec->rc_startblock;
  121. if (realstart & XFS_REFC_COW_START) {
  122. if (irec->rc_refcount != 1)
  123. goto out_bad_rec;
  124. realstart &= ~XFS_REFC_COW_START;
  125. } else if (irec->rc_refcount < 2) {
  126. goto out_bad_rec;
  127. }
  128. /* check for valid extent range, including overflow */
  129. if (!xfs_verify_agbno(mp, agno, realstart))
  130. goto out_bad_rec;
  131. if (realstart > realstart + irec->rc_blockcount)
  132. goto out_bad_rec;
  133. if (!xfs_verify_agbno(mp, agno, realstart + irec->rc_blockcount - 1))
  134. goto out_bad_rec;
  135. if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT)
  136. goto out_bad_rec;
  137. trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, irec);
  138. return 0;
  139. out_bad_rec:
  140. xfs_warn(mp,
  141. "Refcount BTree record corruption in AG %d detected!", agno);
  142. xfs_warn(mp,
  143. "Start block 0x%x, block count 0x%x, references 0x%x",
  144. irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
  145. return -EFSCORRUPTED;
  146. }
  147. /*
  148. * Update the record referred to by cur to the value given
  149. * by [bno, len, refcount].
  150. * This either works (return 0) or gets an EFSCORRUPTED error.
  151. */
  152. STATIC int
  153. xfs_refcount_update(
  154. struct xfs_btree_cur *cur,
  155. struct xfs_refcount_irec *irec)
  156. {
  157. union xfs_btree_rec rec;
  158. int error;
  159. trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec);
  160. rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock);
  161. rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount);
  162. rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount);
  163. error = xfs_btree_update(cur, &rec);
  164. if (error)
  165. trace_xfs_refcount_update_error(cur->bc_mp,
  166. cur->bc_private.a.agno, error, _RET_IP_);
  167. return error;
  168. }
  169. /*
  170. * Insert the record referred to by cur to the value given
  171. * by [bno, len, refcount].
  172. * This either works (return 0) or gets an EFSCORRUPTED error.
  173. */
  174. int
  175. xfs_refcount_insert(
  176. struct xfs_btree_cur *cur,
  177. struct xfs_refcount_irec *irec,
  178. int *i)
  179. {
  180. int error;
  181. trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec);
  182. cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
  183. cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
  184. cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
  185. error = xfs_btree_insert(cur, i);
  186. if (error)
  187. goto out_error;
  188. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error);
  189. out_error:
  190. if (error)
  191. trace_xfs_refcount_insert_error(cur->bc_mp,
  192. cur->bc_private.a.agno, error, _RET_IP_);
  193. return error;
  194. }
  195. /*
  196. * Remove the record referred to by cur, then set the pointer to the spot
  197. * where the record could be re-inserted, in case we want to increment or
  198. * decrement the cursor.
  199. * This either works (return 0) or gets an EFSCORRUPTED error.
  200. */
  201. STATIC int
  202. xfs_refcount_delete(
  203. struct xfs_btree_cur *cur,
  204. int *i)
  205. {
  206. struct xfs_refcount_irec irec;
  207. int found_rec;
  208. int error;
  209. error = xfs_refcount_get_rec(cur, &irec, &found_rec);
  210. if (error)
  211. goto out_error;
  212. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
  213. trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec);
  214. error = xfs_btree_delete(cur, i);
  215. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error);
  216. if (error)
  217. goto out_error;
  218. error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec);
  219. out_error:
  220. if (error)
  221. trace_xfs_refcount_delete_error(cur->bc_mp,
  222. cur->bc_private.a.agno, error, _RET_IP_);
  223. return error;
  224. }
  225. /*
  226. * Adjusting the Reference Count
  227. *
  228. * As stated elsewhere, the reference count btree (refcbt) stores
  229. * >1 reference counts for extents of physical blocks. In this
  230. * operation, we're either raising or lowering the reference count of
  231. * some subrange stored in the tree:
  232. *
  233. * <------ adjustment range ------>
  234. * ----+ +---+-----+ +--+--------+---------
  235. * 2 | | 3 | 4 | |17| 55 | 10
  236. * ----+ +---+-----+ +--+--------+---------
  237. * X axis is physical blocks number;
  238. * reference counts are the numbers inside the rectangles
  239. *
  240. * The first thing we need to do is to ensure that there are no
  241. * refcount extents crossing either boundary of the range to be
  242. * adjusted. For any extent that does cross a boundary, split it into
  243. * two extents so that we can increment the refcount of one of the
  244. * pieces later:
  245. *
  246. * <------ adjustment range ------>
  247. * ----+ +---+-----+ +--+--------+----+----
  248. * 2 | | 3 | 2 | |17| 55 | 10 | 10
  249. * ----+ +---+-----+ +--+--------+----+----
  250. *
  251. * For this next step, let's assume that all the physical blocks in
  252. * the adjustment range are mapped to a file and are therefore in use
  253. * at least once. Therefore, we can infer that any gap in the
  254. * refcount tree within the adjustment range represents a physical
  255. * extent with refcount == 1:
  256. *
  257. * <------ adjustment range ------>
  258. * ----+---+---+-----+-+--+--------+----+----
  259. * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10
  260. * ----+---+---+-----+-+--+--------+----+----
  261. * ^
  262. *
  263. * For each extent that falls within the interval range, figure out
  264. * which extent is to the left or the right of that extent. Now we
  265. * have a left, current, and right extent. If the new reference count
  266. * of the center extent enables us to merge left, center, and right
  267. * into one record covering all three, do so. If the center extent is
  268. * at the left end of the range, abuts the left extent, and its new
  269. * reference count matches the left extent's record, then merge them.
  270. * If the center extent is at the right end of the range, abuts the
  271. * right extent, and the reference counts match, merge those. In the
  272. * example, we can left merge (assuming an increment operation):
  273. *
  274. * <------ adjustment range ------>
  275. * --------+---+-----+-+--+--------+----+----
  276. * 2 | 3 | 2 |1|17| 55 | 10 | 10
  277. * --------+---+-----+-+--+--------+----+----
  278. * ^
  279. *
  280. * For all other extents within the range, adjust the reference count
  281. * or delete it if the refcount falls below 2. If we were
  282. * incrementing, the end result looks like this:
  283. *
  284. * <------ adjustment range ------>
  285. * --------+---+-----+-+--+--------+----+----
  286. * 2 | 4 | 3 |2|18| 56 | 11 | 10
  287. * --------+---+-----+-+--+--------+----+----
  288. *
  289. * The result of a decrement operation looks as such:
  290. *
  291. * <------ adjustment range ------>
  292. * ----+ +---+ +--+--------+----+----
  293. * 2 | | 2 | |16| 54 | 9 | 10
  294. * ----+ +---+ +--+--------+----+----
  295. * DDDD 111111DD
  296. *
  297. * The blocks marked "D" are freed; the blocks marked "1" are only
  298. * referenced once and therefore the record is removed from the
  299. * refcount btree.
  300. */
  301. /* Next block after this extent. */
  302. static inline xfs_agblock_t
  303. xfs_refc_next(
  304. struct xfs_refcount_irec *rc)
  305. {
  306. return rc->rc_startblock + rc->rc_blockcount;
  307. }
  308. /*
  309. * Split a refcount extent that crosses agbno.
  310. */
  311. STATIC int
  312. xfs_refcount_split_extent(
  313. struct xfs_btree_cur *cur,
  314. xfs_agblock_t agbno,
  315. bool *shape_changed)
  316. {
  317. struct xfs_refcount_irec rcext, tmp;
  318. int found_rec;
  319. int error;
  320. *shape_changed = false;
  321. error = xfs_refcount_lookup_le(cur, agbno, &found_rec);
  322. if (error)
  323. goto out_error;
  324. if (!found_rec)
  325. return 0;
  326. error = xfs_refcount_get_rec(cur, &rcext, &found_rec);
  327. if (error)
  328. goto out_error;
  329. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
  330. if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno)
  331. return 0;
  332. *shape_changed = true;
  333. trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno,
  334. &rcext, agbno);
  335. /* Establish the right extent. */
  336. tmp = rcext;
  337. tmp.rc_startblock = agbno;
  338. tmp.rc_blockcount -= (agbno - rcext.rc_startblock);
  339. error = xfs_refcount_update(cur, &tmp);
  340. if (error)
  341. goto out_error;
  342. /* Insert the left extent. */
  343. tmp = rcext;
  344. tmp.rc_blockcount = agbno - rcext.rc_startblock;
  345. error = xfs_refcount_insert(cur, &tmp, &found_rec);
  346. if (error)
  347. goto out_error;
  348. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
  349. return error;
  350. out_error:
  351. trace_xfs_refcount_split_extent_error(cur->bc_mp,
  352. cur->bc_private.a.agno, error, _RET_IP_);
  353. return error;
  354. }
  355. /*
  356. * Merge the left, center, and right extents.
  357. */
  358. STATIC int
  359. xfs_refcount_merge_center_extents(
  360. struct xfs_btree_cur *cur,
  361. struct xfs_refcount_irec *left,
  362. struct xfs_refcount_irec *center,
  363. struct xfs_refcount_irec *right,
  364. unsigned long long extlen,
  365. xfs_extlen_t *aglen)
  366. {
  367. int error;
  368. int found_rec;
  369. trace_xfs_refcount_merge_center_extents(cur->bc_mp,
  370. cur->bc_private.a.agno, left, center, right);
  371. /*
  372. * Make sure the center and right extents are not in the btree.
  373. * If the center extent was synthesized, the first delete call
  374. * removes the right extent and we skip the second deletion.
  375. * If center and right were in the btree, then the first delete
  376. * call removes the center and the second one removes the right
  377. * extent.
  378. */
  379. error = xfs_refcount_lookup_ge(cur, center->rc_startblock,
  380. &found_rec);
  381. if (error)
  382. goto out_error;
  383. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
  384. error = xfs_refcount_delete(cur, &found_rec);
  385. if (error)
  386. goto out_error;
  387. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
  388. if (center->rc_refcount > 1) {
  389. error = xfs_refcount_delete(cur, &found_rec);
  390. if (error)
  391. goto out_error;
  392. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
  393. out_error);
  394. }
  395. /* Enlarge the left extent. */
  396. error = xfs_refcount_lookup_le(cur, left->rc_startblock,
  397. &found_rec);
  398. if (error)
  399. goto out_error;
  400. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
  401. left->rc_blockcount = extlen;
  402. error = xfs_refcount_update(cur, left);
  403. if (error)
  404. goto out_error;
  405. *aglen = 0;
  406. return error;
  407. out_error:
  408. trace_xfs_refcount_merge_center_extents_error(cur->bc_mp,
  409. cur->bc_private.a.agno, error, _RET_IP_);
  410. return error;
  411. }
  412. /*
  413. * Merge with the left extent.
  414. */
  415. STATIC int
  416. xfs_refcount_merge_left_extent(
  417. struct xfs_btree_cur *cur,
  418. struct xfs_refcount_irec *left,
  419. struct xfs_refcount_irec *cleft,
  420. xfs_agblock_t *agbno,
  421. xfs_extlen_t *aglen)
  422. {
  423. int error;
  424. int found_rec;
  425. trace_xfs_refcount_merge_left_extent(cur->bc_mp,
  426. cur->bc_private.a.agno, left, cleft);
  427. /* If the extent at agbno (cleft) wasn't synthesized, remove it. */
  428. if (cleft->rc_refcount > 1) {
  429. error = xfs_refcount_lookup_le(cur, cleft->rc_startblock,
  430. &found_rec);
  431. if (error)
  432. goto out_error;
  433. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
  434. out_error);
  435. error = xfs_refcount_delete(cur, &found_rec);
  436. if (error)
  437. goto out_error;
  438. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
  439. out_error);
  440. }
  441. /* Enlarge the left extent. */
  442. error = xfs_refcount_lookup_le(cur, left->rc_startblock,
  443. &found_rec);
  444. if (error)
  445. goto out_error;
  446. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
  447. left->rc_blockcount += cleft->rc_blockcount;
  448. error = xfs_refcount_update(cur, left);
  449. if (error)
  450. goto out_error;
  451. *agbno += cleft->rc_blockcount;
  452. *aglen -= cleft->rc_blockcount;
  453. return error;
  454. out_error:
  455. trace_xfs_refcount_merge_left_extent_error(cur->bc_mp,
  456. cur->bc_private.a.agno, error, _RET_IP_);
  457. return error;
  458. }
  459. /*
  460. * Merge with the right extent.
  461. */
  462. STATIC int
  463. xfs_refcount_merge_right_extent(
  464. struct xfs_btree_cur *cur,
  465. struct xfs_refcount_irec *right,
  466. struct xfs_refcount_irec *cright,
  467. xfs_extlen_t *aglen)
  468. {
  469. int error;
  470. int found_rec;
  471. trace_xfs_refcount_merge_right_extent(cur->bc_mp,
  472. cur->bc_private.a.agno, cright, right);
  473. /*
  474. * If the extent ending at agbno+aglen (cright) wasn't synthesized,
  475. * remove it.
  476. */
  477. if (cright->rc_refcount > 1) {
  478. error = xfs_refcount_lookup_le(cur, cright->rc_startblock,
  479. &found_rec);
  480. if (error)
  481. goto out_error;
  482. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
  483. out_error);
  484. error = xfs_refcount_delete(cur, &found_rec);
  485. if (error)
  486. goto out_error;
  487. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
  488. out_error);
  489. }
  490. /* Enlarge the right extent. */
  491. error = xfs_refcount_lookup_le(cur, right->rc_startblock,
  492. &found_rec);
  493. if (error)
  494. goto out_error;
  495. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
  496. right->rc_startblock -= cright->rc_blockcount;
  497. right->rc_blockcount += cright->rc_blockcount;
  498. error = xfs_refcount_update(cur, right);
  499. if (error)
  500. goto out_error;
  501. *aglen -= cright->rc_blockcount;
  502. return error;
  503. out_error:
  504. trace_xfs_refcount_merge_right_extent_error(cur->bc_mp,
  505. cur->bc_private.a.agno, error, _RET_IP_);
  506. return error;
  507. }
  508. #define XFS_FIND_RCEXT_SHARED 1
  509. #define XFS_FIND_RCEXT_COW 2
  510. /*
  511. * Find the left extent and the one after it (cleft). This function assumes
  512. * that we've already split any extent crossing agbno.
  513. */
  514. STATIC int
  515. xfs_refcount_find_left_extents(
  516. struct xfs_btree_cur *cur,
  517. struct xfs_refcount_irec *left,
  518. struct xfs_refcount_irec *cleft,
  519. xfs_agblock_t agbno,
  520. xfs_extlen_t aglen,
  521. int flags)
  522. {
  523. struct xfs_refcount_irec tmp;
  524. int error;
  525. int found_rec;
  526. left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK;
  527. error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec);
  528. if (error)
  529. goto out_error;
  530. if (!found_rec)
  531. return 0;
  532. error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
  533. if (error)
  534. goto out_error;
  535. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
  536. if (xfs_refc_next(&tmp) != agbno)
  537. return 0;
  538. if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
  539. return 0;
  540. if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
  541. return 0;
  542. /* We have a left extent; retrieve (or invent) the next right one */
  543. *left = tmp;
  544. error = xfs_btree_increment(cur, 0, &found_rec);
  545. if (error)
  546. goto out_error;
  547. if (found_rec) {
  548. error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
  549. if (error)
  550. goto out_error;
  551. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
  552. out_error);
  553. /* if tmp starts at the end of our range, just use that */
  554. if (tmp.rc_startblock == agbno)
  555. *cleft = tmp;
  556. else {
  557. /*
  558. * There's a gap in the refcntbt at the start of the
  559. * range we're interested in (refcount == 1) so
  560. * synthesize the implied extent and pass it back.
  561. * We assume here that the agbno/aglen range was
  562. * passed in from a data fork extent mapping and
  563. * therefore is allocated to exactly one owner.
  564. */
  565. cleft->rc_startblock = agbno;
  566. cleft->rc_blockcount = min(aglen,
  567. tmp.rc_startblock - agbno);
  568. cleft->rc_refcount = 1;
  569. }
  570. } else {
  571. /*
  572. * No extents, so pretend that there's one covering the whole
  573. * range.
  574. */
  575. cleft->rc_startblock = agbno;
  576. cleft->rc_blockcount = aglen;
  577. cleft->rc_refcount = 1;
  578. }
  579. trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno,
  580. left, cleft, agbno);
  581. return error;
  582. out_error:
  583. trace_xfs_refcount_find_left_extent_error(cur->bc_mp,
  584. cur->bc_private.a.agno, error, _RET_IP_);
  585. return error;
  586. }
  587. /*
  588. * Find the right extent and the one before it (cright). This function
  589. * assumes that we've already split any extents crossing agbno + aglen.
  590. */
  591. STATIC int
  592. xfs_refcount_find_right_extents(
  593. struct xfs_btree_cur *cur,
  594. struct xfs_refcount_irec *right,
  595. struct xfs_refcount_irec *cright,
  596. xfs_agblock_t agbno,
  597. xfs_extlen_t aglen,
  598. int flags)
  599. {
  600. struct xfs_refcount_irec tmp;
  601. int error;
  602. int found_rec;
  603. right->rc_startblock = cright->rc_startblock = NULLAGBLOCK;
  604. error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec);
  605. if (error)
  606. goto out_error;
  607. if (!found_rec)
  608. return 0;
  609. error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
  610. if (error)
  611. goto out_error;
  612. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
  613. if (tmp.rc_startblock != agbno + aglen)
  614. return 0;
  615. if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
  616. return 0;
  617. if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
  618. return 0;
  619. /* We have a right extent; retrieve (or invent) the next left one */
  620. *right = tmp;
  621. error = xfs_btree_decrement(cur, 0, &found_rec);
  622. if (error)
  623. goto out_error;
  624. if (found_rec) {
  625. error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
  626. if (error)
  627. goto out_error;
  628. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
  629. out_error);
  630. /* if tmp ends at the end of our range, just use that */
  631. if (xfs_refc_next(&tmp) == agbno + aglen)
  632. *cright = tmp;
  633. else {
  634. /*
  635. * There's a gap in the refcntbt at the end of the
  636. * range we're interested in (refcount == 1) so
  637. * create the implied extent and pass it back.
  638. * We assume here that the agbno/aglen range was
  639. * passed in from a data fork extent mapping and
  640. * therefore is allocated to exactly one owner.
  641. */
  642. cright->rc_startblock = max(agbno, xfs_refc_next(&tmp));
  643. cright->rc_blockcount = right->rc_startblock -
  644. cright->rc_startblock;
  645. cright->rc_refcount = 1;
  646. }
  647. } else {
  648. /*
  649. * No extents, so pretend that there's one covering the whole
  650. * range.
  651. */
  652. cright->rc_startblock = agbno;
  653. cright->rc_blockcount = aglen;
  654. cright->rc_refcount = 1;
  655. }
  656. trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno,
  657. cright, right, agbno + aglen);
  658. return error;
  659. out_error:
  660. trace_xfs_refcount_find_right_extent_error(cur->bc_mp,
  661. cur->bc_private.a.agno, error, _RET_IP_);
  662. return error;
  663. }
  664. /* Is this extent valid? */
  665. static inline bool
  666. xfs_refc_valid(
  667. struct xfs_refcount_irec *rc)
  668. {
  669. return rc->rc_startblock != NULLAGBLOCK;
  670. }
  671. /*
  672. * Try to merge with any extents on the boundaries of the adjustment range.
  673. */
  674. STATIC int
  675. xfs_refcount_merge_extents(
  676. struct xfs_btree_cur *cur,
  677. xfs_agblock_t *agbno,
  678. xfs_extlen_t *aglen,
  679. enum xfs_refc_adjust_op adjust,
  680. int flags,
  681. bool *shape_changed)
  682. {
  683. struct xfs_refcount_irec left = {0}, cleft = {0};
  684. struct xfs_refcount_irec cright = {0}, right = {0};
  685. int error;
  686. unsigned long long ulen;
  687. bool cequal;
  688. *shape_changed = false;
  689. /*
  690. * Find the extent just below agbno [left], just above agbno [cleft],
  691. * just below (agbno + aglen) [cright], and just above (agbno + aglen)
  692. * [right].
  693. */
  694. error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno,
  695. *aglen, flags);
  696. if (error)
  697. return error;
  698. error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno,
  699. *aglen, flags);
  700. if (error)
  701. return error;
  702. /* No left or right extent to merge; exit. */
  703. if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right))
  704. return 0;
  705. cequal = (cleft.rc_startblock == cright.rc_startblock) &&
  706. (cleft.rc_blockcount == cright.rc_blockcount);
  707. /* Try to merge left, cleft, and right. cleft must == cright. */
  708. ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount +
  709. right.rc_blockcount;
  710. if (xfs_refc_valid(&left) && xfs_refc_valid(&right) &&
  711. xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal &&
  712. left.rc_refcount == cleft.rc_refcount + adjust &&
  713. right.rc_refcount == cleft.rc_refcount + adjust &&
  714. ulen < MAXREFCEXTLEN) {
  715. *shape_changed = true;
  716. return xfs_refcount_merge_center_extents(cur, &left, &cleft,
  717. &right, ulen, aglen);
  718. }
  719. /* Try to merge left and cleft. */
  720. ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount;
  721. if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) &&
  722. left.rc_refcount == cleft.rc_refcount + adjust &&
  723. ulen < MAXREFCEXTLEN) {
  724. *shape_changed = true;
  725. error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
  726. agbno, aglen);
  727. if (error)
  728. return error;
  729. /*
  730. * If we just merged left + cleft and cleft == cright,
  731. * we no longer have a cright to merge with right. We're done.
  732. */
  733. if (cequal)
  734. return 0;
  735. }
  736. /* Try to merge cright and right. */
  737. ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount;
  738. if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) &&
  739. right.rc_refcount == cright.rc_refcount + adjust &&
  740. ulen < MAXREFCEXTLEN) {
  741. *shape_changed = true;
  742. return xfs_refcount_merge_right_extent(cur, &right, &cright,
  743. aglen);
  744. }
  745. return error;
  746. }
  747. /*
  748. * XXX: This is a pretty hand-wavy estimate. The penalty for guessing
  749. * true incorrectly is a shutdown FS; the penalty for guessing false
  750. * incorrectly is more transaction rolls than might be necessary.
  751. * Be conservative here.
  752. */
  753. static bool
  754. xfs_refcount_still_have_space(
  755. struct xfs_btree_cur *cur)
  756. {
  757. unsigned long overhead;
  758. overhead = cur->bc_private.a.priv.refc.shape_changes *
  759. xfs_allocfree_log_count(cur->bc_mp, 1);
  760. overhead *= cur->bc_mp->m_sb.sb_blocksize;
  761. /*
  762. * Only allow 2 refcount extent updates per transaction if the
  763. * refcount continue update "error" has been injected.
  764. */
  765. if (cur->bc_private.a.priv.refc.nr_ops > 2 &&
  766. XFS_TEST_ERROR(false, cur->bc_mp,
  767. XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
  768. return false;
  769. if (cur->bc_private.a.priv.refc.nr_ops == 0)
  770. return true;
  771. else if (overhead > cur->bc_tp->t_log_res)
  772. return false;
  773. return cur->bc_tp->t_log_res - overhead >
  774. cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
  775. }
  776. /*
  777. * Adjust the refcounts of middle extents. At this point we should have
  778. * split extents that crossed the adjustment range; merged with adjacent
  779. * extents; and updated agbno/aglen to reflect the merges. Therefore,
  780. * all we have to do is update the extents inside [agbno, agbno + aglen].
  781. */
  782. STATIC int
  783. xfs_refcount_adjust_extents(
  784. struct xfs_btree_cur *cur,
  785. xfs_agblock_t *agbno,
  786. xfs_extlen_t *aglen,
  787. enum xfs_refc_adjust_op adj,
  788. struct xfs_defer_ops *dfops,
  789. struct xfs_owner_info *oinfo)
  790. {
  791. struct xfs_refcount_irec ext, tmp;
  792. int error;
  793. int found_rec, found_tmp;
  794. xfs_fsblock_t fsbno;
  795. /* Merging did all the work already. */
  796. if (*aglen == 0)
  797. return 0;
  798. error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec);
  799. if (error)
  800. goto out_error;
  801. while (*aglen > 0 && xfs_refcount_still_have_space(cur)) {
  802. error = xfs_refcount_get_rec(cur, &ext, &found_rec);
  803. if (error)
  804. goto out_error;
  805. if (!found_rec) {
  806. ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
  807. ext.rc_blockcount = 0;
  808. ext.rc_refcount = 0;
  809. }
  810. /*
  811. * Deal with a hole in the refcount tree; if a file maps to
  812. * these blocks and there's no refcountbt record, pretend that
  813. * there is one with refcount == 1.
  814. */
  815. if (ext.rc_startblock != *agbno) {
  816. tmp.rc_startblock = *agbno;
  817. tmp.rc_blockcount = min(*aglen,
  818. ext.rc_startblock - *agbno);
  819. tmp.rc_refcount = 1 + adj;
  820. trace_xfs_refcount_modify_extent(cur->bc_mp,
  821. cur->bc_private.a.agno, &tmp);
  822. /*
  823. * Either cover the hole (increment) or
  824. * delete the range (decrement).
  825. */
  826. if (tmp.rc_refcount) {
  827. error = xfs_refcount_insert(cur, &tmp,
  828. &found_tmp);
  829. if (error)
  830. goto out_error;
  831. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
  832. found_tmp == 1, out_error);
  833. cur->bc_private.a.priv.refc.nr_ops++;
  834. } else {
  835. fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
  836. cur->bc_private.a.agno,
  837. tmp.rc_startblock);
  838. xfs_bmap_add_free(cur->bc_mp, dfops, fsbno,
  839. tmp.rc_blockcount, oinfo);
  840. }
  841. (*agbno) += tmp.rc_blockcount;
  842. (*aglen) -= tmp.rc_blockcount;
  843. error = xfs_refcount_lookup_ge(cur, *agbno,
  844. &found_rec);
  845. if (error)
  846. goto out_error;
  847. }
  848. /* Stop if there's nothing left to modify */
  849. if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
  850. break;
  851. /*
  852. * Adjust the reference count and either update the tree
  853. * (incr) or free the blocks (decr).
  854. */
  855. if (ext.rc_refcount == MAXREFCOUNT)
  856. goto skip;
  857. ext.rc_refcount += adj;
  858. trace_xfs_refcount_modify_extent(cur->bc_mp,
  859. cur->bc_private.a.agno, &ext);
  860. if (ext.rc_refcount > 1) {
  861. error = xfs_refcount_update(cur, &ext);
  862. if (error)
  863. goto out_error;
  864. cur->bc_private.a.priv.refc.nr_ops++;
  865. } else if (ext.rc_refcount == 1) {
  866. error = xfs_refcount_delete(cur, &found_rec);
  867. if (error)
  868. goto out_error;
  869. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
  870. found_rec == 1, out_error);
  871. cur->bc_private.a.priv.refc.nr_ops++;
  872. goto advloop;
  873. } else {
  874. fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
  875. cur->bc_private.a.agno,
  876. ext.rc_startblock);
  877. xfs_bmap_add_free(cur->bc_mp, dfops, fsbno,
  878. ext.rc_blockcount, oinfo);
  879. }
  880. skip:
  881. error = xfs_btree_increment(cur, 0, &found_rec);
  882. if (error)
  883. goto out_error;
  884. advloop:
  885. (*agbno) += ext.rc_blockcount;
  886. (*aglen) -= ext.rc_blockcount;
  887. }
  888. return error;
  889. out_error:
  890. trace_xfs_refcount_modify_extent_error(cur->bc_mp,
  891. cur->bc_private.a.agno, error, _RET_IP_);
  892. return error;
  893. }
  894. /* Adjust the reference count of a range of AG blocks. */
  895. STATIC int
  896. xfs_refcount_adjust(
  897. struct xfs_btree_cur *cur,
  898. xfs_agblock_t agbno,
  899. xfs_extlen_t aglen,
  900. xfs_agblock_t *new_agbno,
  901. xfs_extlen_t *new_aglen,
  902. enum xfs_refc_adjust_op adj,
  903. struct xfs_defer_ops *dfops,
  904. struct xfs_owner_info *oinfo)
  905. {
  906. bool shape_changed;
  907. int shape_changes = 0;
  908. int error;
  909. *new_agbno = agbno;
  910. *new_aglen = aglen;
  911. if (adj == XFS_REFCOUNT_ADJUST_INCREASE)
  912. trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno,
  913. agbno, aglen);
  914. else
  915. trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno,
  916. agbno, aglen);
  917. /*
  918. * Ensure that no rcextents cross the boundary of the adjustment range.
  919. */
  920. error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
  921. if (error)
  922. goto out_error;
  923. if (shape_changed)
  924. shape_changes++;
  925. error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
  926. if (error)
  927. goto out_error;
  928. if (shape_changed)
  929. shape_changes++;
  930. /*
  931. * Try to merge with the left or right extents of the range.
  932. */
  933. error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj,
  934. XFS_FIND_RCEXT_SHARED, &shape_changed);
  935. if (error)
  936. goto out_error;
  937. if (shape_changed)
  938. shape_changes++;
  939. if (shape_changes)
  940. cur->bc_private.a.priv.refc.shape_changes++;
  941. /* Now that we've taken care of the ends, adjust the middle extents */
  942. error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen,
  943. adj, dfops, oinfo);
  944. if (error)
  945. goto out_error;
  946. return 0;
  947. out_error:
  948. trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno,
  949. error, _RET_IP_);
  950. return error;
  951. }
  952. /* Clean up after calling xfs_refcount_finish_one. */
  953. void
  954. xfs_refcount_finish_one_cleanup(
  955. struct xfs_trans *tp,
  956. struct xfs_btree_cur *rcur,
  957. int error)
  958. {
  959. struct xfs_buf *agbp;
  960. if (rcur == NULL)
  961. return;
  962. agbp = rcur->bc_private.a.agbp;
  963. xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
  964. if (error)
  965. xfs_trans_brelse(tp, agbp);
  966. }
  967. /*
  968. * Process one of the deferred refcount operations. We pass back the
  969. * btree cursor to maintain our lock on the btree between calls.
  970. * This saves time and eliminates a buffer deadlock between the
  971. * superblock and the AGF because we'll always grab them in the same
  972. * order.
  973. */
  974. int
  975. xfs_refcount_finish_one(
  976. struct xfs_trans *tp,
  977. struct xfs_defer_ops *dfops,
  978. enum xfs_refcount_intent_type type,
  979. xfs_fsblock_t startblock,
  980. xfs_extlen_t blockcount,
  981. xfs_fsblock_t *new_fsb,
  982. xfs_extlen_t *new_len,
  983. struct xfs_btree_cur **pcur)
  984. {
  985. struct xfs_mount *mp = tp->t_mountp;
  986. struct xfs_btree_cur *rcur;
  987. struct xfs_buf *agbp = NULL;
  988. int error = 0;
  989. xfs_agnumber_t agno;
  990. xfs_agblock_t bno;
  991. xfs_agblock_t new_agbno;
  992. unsigned long nr_ops = 0;
  993. int shape_changes = 0;
  994. agno = XFS_FSB_TO_AGNO(mp, startblock);
  995. ASSERT(agno != NULLAGNUMBER);
  996. bno = XFS_FSB_TO_AGBNO(mp, startblock);
  997. trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock),
  998. type, XFS_FSB_TO_AGBNO(mp, startblock),
  999. blockcount);
  1000. if (XFS_TEST_ERROR(false, mp,
  1001. XFS_ERRTAG_REFCOUNT_FINISH_ONE))
  1002. return -EIO;
  1003. /*
  1004. * If we haven't gotten a cursor or the cursor AG doesn't match
  1005. * the startblock, get one now.
  1006. */
  1007. rcur = *pcur;
  1008. if (rcur != NULL && rcur->bc_private.a.agno != agno) {
  1009. nr_ops = rcur->bc_private.a.priv.refc.nr_ops;
  1010. shape_changes = rcur->bc_private.a.priv.refc.shape_changes;
  1011. xfs_refcount_finish_one_cleanup(tp, rcur, 0);
  1012. rcur = NULL;
  1013. *pcur = NULL;
  1014. }
  1015. if (rcur == NULL) {
  1016. error = xfs_alloc_read_agf(tp->t_mountp, tp, agno,
  1017. XFS_ALLOC_FLAG_FREEING, &agbp);
  1018. if (error)
  1019. return error;
  1020. if (!agbp)
  1021. return -EFSCORRUPTED;
  1022. rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, dfops);
  1023. if (!rcur) {
  1024. error = -ENOMEM;
  1025. goto out_cur;
  1026. }
  1027. rcur->bc_private.a.priv.refc.nr_ops = nr_ops;
  1028. rcur->bc_private.a.priv.refc.shape_changes = shape_changes;
  1029. }
  1030. *pcur = rcur;
  1031. switch (type) {
  1032. case XFS_REFCOUNT_INCREASE:
  1033. error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
  1034. new_len, XFS_REFCOUNT_ADJUST_INCREASE, dfops, NULL);
  1035. *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno);
  1036. break;
  1037. case XFS_REFCOUNT_DECREASE:
  1038. error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
  1039. new_len, XFS_REFCOUNT_ADJUST_DECREASE, dfops, NULL);
  1040. *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno);
  1041. break;
  1042. case XFS_REFCOUNT_ALLOC_COW:
  1043. *new_fsb = startblock + blockcount;
  1044. *new_len = 0;
  1045. error = __xfs_refcount_cow_alloc(rcur, bno, blockcount, dfops);
  1046. break;
  1047. case XFS_REFCOUNT_FREE_COW:
  1048. *new_fsb = startblock + blockcount;
  1049. *new_len = 0;
  1050. error = __xfs_refcount_cow_free(rcur, bno, blockcount, dfops);
  1051. break;
  1052. default:
  1053. ASSERT(0);
  1054. error = -EFSCORRUPTED;
  1055. }
  1056. if (!error && *new_len > 0)
  1057. trace_xfs_refcount_finish_one_leftover(mp, agno, type,
  1058. bno, blockcount, new_agbno, *new_len);
  1059. return error;
  1060. out_cur:
  1061. xfs_trans_brelse(tp, agbp);
  1062. return error;
  1063. }
  1064. /*
  1065. * Record a refcount intent for later processing.
  1066. */
  1067. static int
  1068. __xfs_refcount_add(
  1069. struct xfs_mount *mp,
  1070. struct xfs_defer_ops *dfops,
  1071. enum xfs_refcount_intent_type type,
  1072. xfs_fsblock_t startblock,
  1073. xfs_extlen_t blockcount)
  1074. {
  1075. struct xfs_refcount_intent *ri;
  1076. trace_xfs_refcount_defer(mp, XFS_FSB_TO_AGNO(mp, startblock),
  1077. type, XFS_FSB_TO_AGBNO(mp, startblock),
  1078. blockcount);
  1079. ri = kmem_alloc(sizeof(struct xfs_refcount_intent),
  1080. KM_SLEEP | KM_NOFS);
  1081. INIT_LIST_HEAD(&ri->ri_list);
  1082. ri->ri_type = type;
  1083. ri->ri_startblock = startblock;
  1084. ri->ri_blockcount = blockcount;
  1085. xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list);
  1086. return 0;
  1087. }
  1088. /*
  1089. * Increase the reference count of the blocks backing a file's extent.
  1090. */
  1091. int
  1092. xfs_refcount_increase_extent(
  1093. struct xfs_mount *mp,
  1094. struct xfs_defer_ops *dfops,
  1095. struct xfs_bmbt_irec *PREV)
  1096. {
  1097. if (!xfs_sb_version_hasreflink(&mp->m_sb))
  1098. return 0;
  1099. return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_INCREASE,
  1100. PREV->br_startblock, PREV->br_blockcount);
  1101. }
  1102. /*
  1103. * Decrease the reference count of the blocks backing a file's extent.
  1104. */
  1105. int
  1106. xfs_refcount_decrease_extent(
  1107. struct xfs_mount *mp,
  1108. struct xfs_defer_ops *dfops,
  1109. struct xfs_bmbt_irec *PREV)
  1110. {
  1111. if (!xfs_sb_version_hasreflink(&mp->m_sb))
  1112. return 0;
  1113. return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_DECREASE,
  1114. PREV->br_startblock, PREV->br_blockcount);
  1115. }
  1116. /*
  1117. * Given an AG extent, find the lowest-numbered run of shared blocks
  1118. * within that range and return the range in fbno/flen. If
  1119. * find_end_of_shared is set, return the longest contiguous extent of
  1120. * shared blocks; if not, just return the first extent we find. If no
  1121. * shared blocks are found, fbno and flen will be set to NULLAGBLOCK
  1122. * and 0, respectively.
  1123. */
  1124. int
  1125. xfs_refcount_find_shared(
  1126. struct xfs_btree_cur *cur,
  1127. xfs_agblock_t agbno,
  1128. xfs_extlen_t aglen,
  1129. xfs_agblock_t *fbno,
  1130. xfs_extlen_t *flen,
  1131. bool find_end_of_shared)
  1132. {
  1133. struct xfs_refcount_irec tmp;
  1134. int i;
  1135. int have;
  1136. int error;
  1137. trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno,
  1138. agbno, aglen);
  1139. /* By default, skip the whole range */
  1140. *fbno = NULLAGBLOCK;
  1141. *flen = 0;
  1142. /* Try to find a refcount extent that crosses the start */
  1143. error = xfs_refcount_lookup_le(cur, agbno, &have);
  1144. if (error)
  1145. goto out_error;
  1146. if (!have) {
  1147. /* No left extent, look at the next one */
  1148. error = xfs_btree_increment(cur, 0, &have);
  1149. if (error)
  1150. goto out_error;
  1151. if (!have)
  1152. goto done;
  1153. }
  1154. error = xfs_refcount_get_rec(cur, &tmp, &i);
  1155. if (error)
  1156. goto out_error;
  1157. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error);
  1158. /* If the extent ends before the start, look at the next one */
  1159. if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
  1160. error = xfs_btree_increment(cur, 0, &have);
  1161. if (error)
  1162. goto out_error;
  1163. if (!have)
  1164. goto done;
  1165. error = xfs_refcount_get_rec(cur, &tmp, &i);
  1166. if (error)
  1167. goto out_error;
  1168. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error);
  1169. }
  1170. /* If the extent starts after the range we want, bail out */
  1171. if (tmp.rc_startblock >= agbno + aglen)
  1172. goto done;
  1173. /* We found the start of a shared extent! */
  1174. if (tmp.rc_startblock < agbno) {
  1175. tmp.rc_blockcount -= (agbno - tmp.rc_startblock);
  1176. tmp.rc_startblock = agbno;
  1177. }
  1178. *fbno = tmp.rc_startblock;
  1179. *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno);
  1180. if (!find_end_of_shared)
  1181. goto done;
  1182. /* Otherwise, find the end of this shared extent */
  1183. while (*fbno + *flen < agbno + aglen) {
  1184. error = xfs_btree_increment(cur, 0, &have);
  1185. if (error)
  1186. goto out_error;
  1187. if (!have)
  1188. break;
  1189. error = xfs_refcount_get_rec(cur, &tmp, &i);
  1190. if (error)
  1191. goto out_error;
  1192. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error);
  1193. if (tmp.rc_startblock >= agbno + aglen ||
  1194. tmp.rc_startblock != *fbno + *flen)
  1195. break;
  1196. *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno);
  1197. }
  1198. done:
  1199. trace_xfs_refcount_find_shared_result(cur->bc_mp,
  1200. cur->bc_private.a.agno, *fbno, *flen);
  1201. out_error:
  1202. if (error)
  1203. trace_xfs_refcount_find_shared_error(cur->bc_mp,
  1204. cur->bc_private.a.agno, error, _RET_IP_);
  1205. return error;
  1206. }
  1207. /*
  1208. * Recovering CoW Blocks After a Crash
  1209. *
  1210. * Due to the way that the copy on write mechanism works, there's a window of
  1211. * opportunity in which we can lose track of allocated blocks during a crash.
  1212. * Because CoW uses delayed allocation in the in-core CoW fork, writeback
  1213. * causes blocks to be allocated and stored in the CoW fork. The blocks are
  1214. * no longer in the free space btree but are not otherwise recorded anywhere
  1215. * until the write completes and the blocks are mapped into the file. A crash
  1216. * in between allocation and remapping results in the replacement blocks being
  1217. * lost. This situation is exacerbated by the CoW extent size hint because
  1218. * allocations can hang around for long time.
  1219. *
  1220. * However, there is a place where we can record these allocations before they
  1221. * become mappings -- the reference count btree. The btree does not record
  1222. * extents with refcount == 1, so we can record allocations with a refcount of
  1223. * 1. Blocks being used for CoW writeout cannot be shared, so there should be
  1224. * no conflict with shared block records. These mappings should be created
  1225. * when we allocate blocks to the CoW fork and deleted when they're removed
  1226. * from the CoW fork.
  1227. *
  1228. * Minor nit: records for in-progress CoW allocations and records for shared
  1229. * extents must never be merged, to preserve the property that (except for CoW
  1230. * allocations) there are no refcount btree entries with refcount == 1. The
  1231. * only time this could potentially happen is when unsharing a block that's
  1232. * adjacent to CoW allocations, so we must be careful to avoid this.
  1233. *
  1234. * At mount time we recover lost CoW allocations by searching the refcount
  1235. * btree for these refcount == 1 mappings. These represent CoW allocations
  1236. * that were in progress at the time the filesystem went down, so we can free
  1237. * them to get the space back.
  1238. *
  1239. * This mechanism is superior to creating EFIs for unmapped CoW extents for
  1240. * several reasons -- first, EFIs pin the tail of the log and would have to be
  1241. * periodically relogged to avoid filling up the log. Second, CoW completions
  1242. * will have to file an EFD and create new EFIs for whatever remains in the
  1243. * CoW fork; this partially takes care of (1) but extent-size reservations
  1244. * will have to periodically relog even if there's no writeout in progress.
  1245. * This can happen if the CoW extent size hint is set, which you really want.
  1246. * Third, EFIs cannot currently be automatically relogged into newer
  1247. * transactions to advance the log tail. Fourth, stuffing the log full of
  1248. * EFIs places an upper bound on the number of CoW allocations that can be
  1249. * held filesystem-wide at any given time. Recording them in the refcount
  1250. * btree doesn't require us to maintain any state in memory and doesn't pin
  1251. * the log.
  1252. */
  1253. /*
  1254. * Adjust the refcounts of CoW allocations. These allocations are "magic"
  1255. * in that they're not referenced anywhere else in the filesystem, so we
  1256. * stash them in the refcount btree with a refcount of 1 until either file
  1257. * remapping (or CoW cancellation) happens.
  1258. */
  1259. STATIC int
  1260. xfs_refcount_adjust_cow_extents(
  1261. struct xfs_btree_cur *cur,
  1262. xfs_agblock_t agbno,
  1263. xfs_extlen_t aglen,
  1264. enum xfs_refc_adjust_op adj)
  1265. {
  1266. struct xfs_refcount_irec ext, tmp;
  1267. int error;
  1268. int found_rec, found_tmp;
  1269. if (aglen == 0)
  1270. return 0;
  1271. /* Find any overlapping refcount records */
  1272. error = xfs_refcount_lookup_ge(cur, agbno, &found_rec);
  1273. if (error)
  1274. goto out_error;
  1275. error = xfs_refcount_get_rec(cur, &ext, &found_rec);
  1276. if (error)
  1277. goto out_error;
  1278. if (!found_rec) {
  1279. ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks +
  1280. XFS_REFC_COW_START;
  1281. ext.rc_blockcount = 0;
  1282. ext.rc_refcount = 0;
  1283. }
  1284. switch (adj) {
  1285. case XFS_REFCOUNT_ADJUST_COW_ALLOC:
  1286. /* Adding a CoW reservation, there should be nothing here. */
  1287. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
  1288. ext.rc_startblock >= agbno + aglen, out_error);
  1289. tmp.rc_startblock = agbno;
  1290. tmp.rc_blockcount = aglen;
  1291. tmp.rc_refcount = 1;
  1292. trace_xfs_refcount_modify_extent(cur->bc_mp,
  1293. cur->bc_private.a.agno, &tmp);
  1294. error = xfs_refcount_insert(cur, &tmp,
  1295. &found_tmp);
  1296. if (error)
  1297. goto out_error;
  1298. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
  1299. found_tmp == 1, out_error);
  1300. break;
  1301. case XFS_REFCOUNT_ADJUST_COW_FREE:
  1302. /* Removing a CoW reservation, there should be one extent. */
  1303. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
  1304. ext.rc_startblock == agbno, out_error);
  1305. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
  1306. ext.rc_blockcount == aglen, out_error);
  1307. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
  1308. ext.rc_refcount == 1, out_error);
  1309. ext.rc_refcount = 0;
  1310. trace_xfs_refcount_modify_extent(cur->bc_mp,
  1311. cur->bc_private.a.agno, &ext);
  1312. error = xfs_refcount_delete(cur, &found_rec);
  1313. if (error)
  1314. goto out_error;
  1315. XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
  1316. found_rec == 1, out_error);
  1317. break;
  1318. default:
  1319. ASSERT(0);
  1320. }
  1321. return error;
  1322. out_error:
  1323. trace_xfs_refcount_modify_extent_error(cur->bc_mp,
  1324. cur->bc_private.a.agno, error, _RET_IP_);
  1325. return error;
  1326. }
  1327. /*
  1328. * Add or remove refcount btree entries for CoW reservations.
  1329. */
  1330. STATIC int
  1331. xfs_refcount_adjust_cow(
  1332. struct xfs_btree_cur *cur,
  1333. xfs_agblock_t agbno,
  1334. xfs_extlen_t aglen,
  1335. enum xfs_refc_adjust_op adj)
  1336. {
  1337. bool shape_changed;
  1338. int error;
  1339. agbno += XFS_REFC_COW_START;
  1340. /*
  1341. * Ensure that no rcextents cross the boundary of the adjustment range.
  1342. */
  1343. error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
  1344. if (error)
  1345. goto out_error;
  1346. error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
  1347. if (error)
  1348. goto out_error;
  1349. /*
  1350. * Try to merge with the left or right extents of the range.
  1351. */
  1352. error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj,
  1353. XFS_FIND_RCEXT_COW, &shape_changed);
  1354. if (error)
  1355. goto out_error;
  1356. /* Now that we've taken care of the ends, adjust the middle extents */
  1357. error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj);
  1358. if (error)
  1359. goto out_error;
  1360. return 0;
  1361. out_error:
  1362. trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno,
  1363. error, _RET_IP_);
  1364. return error;
  1365. }
  1366. /*
  1367. * Record a CoW allocation in the refcount btree.
  1368. */
  1369. STATIC int
  1370. __xfs_refcount_cow_alloc(
  1371. struct xfs_btree_cur *rcur,
  1372. xfs_agblock_t agbno,
  1373. xfs_extlen_t aglen,
  1374. struct xfs_defer_ops *dfops)
  1375. {
  1376. trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
  1377. agbno, aglen);
  1378. /* Add refcount btree reservation */
  1379. return xfs_refcount_adjust_cow(rcur, agbno, aglen,
  1380. XFS_REFCOUNT_ADJUST_COW_ALLOC);
  1381. }
  1382. /*
  1383. * Remove a CoW allocation from the refcount btree.
  1384. */
  1385. STATIC int
  1386. __xfs_refcount_cow_free(
  1387. struct xfs_btree_cur *rcur,
  1388. xfs_agblock_t agbno,
  1389. xfs_extlen_t aglen,
  1390. struct xfs_defer_ops *dfops)
  1391. {
  1392. trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
  1393. agbno, aglen);
  1394. /* Remove refcount btree reservation */
  1395. return xfs_refcount_adjust_cow(rcur, agbno, aglen,
  1396. XFS_REFCOUNT_ADJUST_COW_FREE);
  1397. }
  1398. /* Record a CoW staging extent in the refcount btree. */
  1399. int
  1400. xfs_refcount_alloc_cow_extent(
  1401. struct xfs_mount *mp,
  1402. struct xfs_defer_ops *dfops,
  1403. xfs_fsblock_t fsb,
  1404. xfs_extlen_t len)
  1405. {
  1406. int error;
  1407. if (!xfs_sb_version_hasreflink(&mp->m_sb))
  1408. return 0;
  1409. error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
  1410. fsb, len);
  1411. if (error)
  1412. return error;
  1413. /* Add rmap entry */
  1414. return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
  1415. XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
  1416. }
  1417. /* Forget a CoW staging event in the refcount btree. */
  1418. int
  1419. xfs_refcount_free_cow_extent(
  1420. struct xfs_mount *mp,
  1421. struct xfs_defer_ops *dfops,
  1422. xfs_fsblock_t fsb,
  1423. xfs_extlen_t len)
  1424. {
  1425. int error;
  1426. if (!xfs_sb_version_hasreflink(&mp->m_sb))
  1427. return 0;
  1428. /* Remove rmap entry */
  1429. error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
  1430. XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
  1431. if (error)
  1432. return error;
  1433. return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
  1434. fsb, len);
  1435. }
  1436. struct xfs_refcount_recovery {
  1437. struct list_head rr_list;
  1438. struct xfs_refcount_irec rr_rrec;
  1439. };
  1440. /* Stuff an extent on the recovery list. */
  1441. STATIC int
  1442. xfs_refcount_recover_extent(
  1443. struct xfs_btree_cur *cur,
  1444. union xfs_btree_rec *rec,
  1445. void *priv)
  1446. {
  1447. struct list_head *debris = priv;
  1448. struct xfs_refcount_recovery *rr;
  1449. if (be32_to_cpu(rec->refc.rc_refcount) != 1)
  1450. return -EFSCORRUPTED;
  1451. rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP);
  1452. xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
  1453. list_add_tail(&rr->rr_list, debris);
  1454. return 0;
  1455. }
  1456. /* Find and remove leftover CoW reservations. */
  1457. int
  1458. xfs_refcount_recover_cow_leftovers(
  1459. struct xfs_mount *mp,
  1460. xfs_agnumber_t agno)
  1461. {
  1462. struct xfs_trans *tp;
  1463. struct xfs_btree_cur *cur;
  1464. struct xfs_buf *agbp;
  1465. struct xfs_refcount_recovery *rr, *n;
  1466. struct list_head debris;
  1467. union xfs_btree_irec low;
  1468. union xfs_btree_irec high;
  1469. struct xfs_defer_ops dfops;
  1470. xfs_fsblock_t fsb;
  1471. xfs_agblock_t agbno;
  1472. int error;
  1473. if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
  1474. return -EOPNOTSUPP;
  1475. INIT_LIST_HEAD(&debris);
  1476. /*
  1477. * In this first part, we use an empty transaction to gather up
  1478. * all the leftover CoW extents so that we can subsequently
  1479. * delete them. The empty transaction is used to avoid
  1480. * a buffer lock deadlock if there happens to be a loop in the
  1481. * refcountbt because we're allowed to re-grab a buffer that is
  1482. * already attached to our transaction. When we're done
  1483. * recording the CoW debris we cancel the (empty) transaction
  1484. * and everything goes away cleanly.
  1485. */
  1486. error = xfs_trans_alloc_empty(mp, &tp);
  1487. if (error)
  1488. return error;
  1489. error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
  1490. if (error)
  1491. goto out_trans;
  1492. if (!agbp) {
  1493. error = -ENOMEM;
  1494. goto out_trans;
  1495. }
  1496. cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
  1497. /* Find all the leftover CoW staging extents. */
  1498. memset(&low, 0, sizeof(low));
  1499. memset(&high, 0, sizeof(high));
  1500. low.rc.rc_startblock = XFS_REFC_COW_START;
  1501. high.rc.rc_startblock = -1U;
  1502. error = xfs_btree_query_range(cur, &low, &high,
  1503. xfs_refcount_recover_extent, &debris);
  1504. if (error)
  1505. goto out_cursor;
  1506. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  1507. xfs_trans_brelse(tp, agbp);
  1508. xfs_trans_cancel(tp);
  1509. /* Now iterate the list to free the leftovers */
  1510. list_for_each_entry_safe(rr, n, &debris, rr_list) {
  1511. /* Set up transaction. */
  1512. error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
  1513. if (error)
  1514. goto out_free;
  1515. trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec);
  1516. /* Free the orphan record */
  1517. xfs_defer_init(&dfops, &fsb);
  1518. agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
  1519. fsb = XFS_AGB_TO_FSB(mp, agno, agbno);
  1520. error = xfs_refcount_free_cow_extent(mp, &dfops, fsb,
  1521. rr->rr_rrec.rc_blockcount);
  1522. if (error)
  1523. goto out_defer;
  1524. /* Free the block. */
  1525. xfs_bmap_add_free(mp, &dfops, fsb,
  1526. rr->rr_rrec.rc_blockcount, NULL);
  1527. error = xfs_defer_finish(&tp, &dfops);
  1528. if (error)
  1529. goto out_defer;
  1530. error = xfs_trans_commit(tp);
  1531. if (error)
  1532. goto out_free;
  1533. list_del(&rr->rr_list);
  1534. kmem_free(rr);
  1535. }
  1536. return error;
  1537. out_defer:
  1538. xfs_defer_cancel(&dfops);
  1539. out_trans:
  1540. xfs_trans_cancel(tp);
  1541. out_free:
  1542. /* Free the leftover list */
  1543. list_for_each_entry_safe(rr, n, &debris, rr_list) {
  1544. list_del(&rr->rr_list);
  1545. kmem_free(rr);
  1546. }
  1547. return error;
  1548. out_cursor:
  1549. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  1550. xfs_trans_brelse(tp, agbp);
  1551. goto out_trans;
  1552. }
  1553. /* Is there a record covering a given extent? */
  1554. int
  1555. xfs_refcount_has_record(
  1556. struct xfs_btree_cur *cur,
  1557. xfs_agblock_t bno,
  1558. xfs_extlen_t len,
  1559. bool *exists)
  1560. {
  1561. union xfs_btree_irec low;
  1562. union xfs_btree_irec high;
  1563. memset(&low, 0, sizeof(low));
  1564. low.rc.rc_startblock = bno;
  1565. memset(&high, 0xFF, sizeof(high));
  1566. high.rc.rc_startblock = bno + len - 1;
  1567. return xfs_btree_has_record(cur, &low, &high, exists);
  1568. }