delayed-ref.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2009 Oracle. All rights reserved.
  4. */
  5. #include <linux/sched.h>
  6. #include <linux/slab.h>
  7. #include <linux/sort.h>
  8. #include "ctree.h"
  9. #include "delayed-ref.h"
  10. #include "transaction.h"
  11. #include "qgroup.h"
  12. struct kmem_cache *btrfs_delayed_ref_head_cachep;
  13. struct kmem_cache *btrfs_delayed_tree_ref_cachep;
  14. struct kmem_cache *btrfs_delayed_data_ref_cachep;
  15. struct kmem_cache *btrfs_delayed_extent_op_cachep;
  16. /*
  17. * delayed back reference update tracking. For subvolume trees
  18. * we queue up extent allocations and backref maintenance for
  19. * delayed processing. This avoids deep call chains where we
  20. * add extents in the middle of btrfs_search_slot, and it allows
  21. * us to buffer up frequently modified backrefs in an rb tree instead
  22. * of hammering updates on the extent allocation tree.
  23. */
  24. /*
  25. * compare two delayed tree backrefs with same bytenr and type
  26. */
  27. static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
  28. struct btrfs_delayed_tree_ref *ref2)
  29. {
  30. if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
  31. if (ref1->root < ref2->root)
  32. return -1;
  33. if (ref1->root > ref2->root)
  34. return 1;
  35. } else {
  36. if (ref1->parent < ref2->parent)
  37. return -1;
  38. if (ref1->parent > ref2->parent)
  39. return 1;
  40. }
  41. return 0;
  42. }
  43. /*
  44. * compare two delayed data backrefs with same bytenr and type
  45. */
  46. static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
  47. struct btrfs_delayed_data_ref *ref2)
  48. {
  49. if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
  50. if (ref1->root < ref2->root)
  51. return -1;
  52. if (ref1->root > ref2->root)
  53. return 1;
  54. if (ref1->objectid < ref2->objectid)
  55. return -1;
  56. if (ref1->objectid > ref2->objectid)
  57. return 1;
  58. if (ref1->offset < ref2->offset)
  59. return -1;
  60. if (ref1->offset > ref2->offset)
  61. return 1;
  62. } else {
  63. if (ref1->parent < ref2->parent)
  64. return -1;
  65. if (ref1->parent > ref2->parent)
  66. return 1;
  67. }
  68. return 0;
  69. }
  70. static int comp_refs(struct btrfs_delayed_ref_node *ref1,
  71. struct btrfs_delayed_ref_node *ref2,
  72. bool check_seq)
  73. {
  74. int ret = 0;
  75. if (ref1->type < ref2->type)
  76. return -1;
  77. if (ref1->type > ref2->type)
  78. return 1;
  79. if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
  80. ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
  81. ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
  82. btrfs_delayed_node_to_tree_ref(ref2));
  83. else
  84. ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
  85. btrfs_delayed_node_to_data_ref(ref2));
  86. if (ret)
  87. return ret;
  88. if (check_seq) {
  89. if (ref1->seq < ref2->seq)
  90. return -1;
  91. if (ref1->seq > ref2->seq)
  92. return 1;
  93. }
  94. return 0;
  95. }
  96. /* insert a new ref to head ref rbtree */
  97. static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root,
  98. struct rb_node *node)
  99. {
  100. struct rb_node **p = &root->rb_root.rb_node;
  101. struct rb_node *parent_node = NULL;
  102. struct btrfs_delayed_ref_head *entry;
  103. struct btrfs_delayed_ref_head *ins;
  104. u64 bytenr;
  105. bool leftmost = true;
  106. ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
  107. bytenr = ins->bytenr;
  108. while (*p) {
  109. parent_node = *p;
  110. entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
  111. href_node);
  112. if (bytenr < entry->bytenr) {
  113. p = &(*p)->rb_left;
  114. } else if (bytenr > entry->bytenr) {
  115. p = &(*p)->rb_right;
  116. leftmost = false;
  117. } else {
  118. return entry;
  119. }
  120. }
  121. rb_link_node(node, parent_node, p);
  122. rb_insert_color_cached(node, root, leftmost);
  123. return NULL;
  124. }
  125. static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
  126. struct btrfs_delayed_ref_node *ins)
  127. {
  128. struct rb_node **p = &root->rb_root.rb_node;
  129. struct rb_node *node = &ins->ref_node;
  130. struct rb_node *parent_node = NULL;
  131. struct btrfs_delayed_ref_node *entry;
  132. bool leftmost = true;
  133. while (*p) {
  134. int comp;
  135. parent_node = *p;
  136. entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
  137. ref_node);
  138. comp = comp_refs(ins, entry, true);
  139. if (comp < 0) {
  140. p = &(*p)->rb_left;
  141. } else if (comp > 0) {
  142. p = &(*p)->rb_right;
  143. leftmost = false;
  144. } else {
  145. return entry;
  146. }
  147. }
  148. rb_link_node(node, parent_node, p);
  149. rb_insert_color_cached(node, root, leftmost);
  150. return NULL;
  151. }
  152. static struct btrfs_delayed_ref_head *find_first_ref_head(
  153. struct btrfs_delayed_ref_root *dr)
  154. {
  155. struct rb_node *n;
  156. struct btrfs_delayed_ref_head *entry;
  157. n = rb_first_cached(&dr->href_root);
  158. if (!n)
  159. return NULL;
  160. entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
  161. return entry;
  162. }
  163. /*
  164. * Find a head entry based on bytenr. This returns the delayed ref head if it
  165. * was able to find one, or NULL if nothing was in that spot. If return_bigger
  166. * is given, the next bigger entry is returned if no exact match is found.
  167. */
  168. static struct btrfs_delayed_ref_head *find_ref_head(
  169. struct btrfs_delayed_ref_root *dr, u64 bytenr,
  170. bool return_bigger)
  171. {
  172. struct rb_root *root = &dr->href_root.rb_root;
  173. struct rb_node *n;
  174. struct btrfs_delayed_ref_head *entry;
  175. n = root->rb_node;
  176. entry = NULL;
  177. while (n) {
  178. entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
  179. if (bytenr < entry->bytenr)
  180. n = n->rb_left;
  181. else if (bytenr > entry->bytenr)
  182. n = n->rb_right;
  183. else
  184. return entry;
  185. }
  186. if (entry && return_bigger) {
  187. if (bytenr > entry->bytenr) {
  188. n = rb_next(&entry->href_node);
  189. if (!n)
  190. return NULL;
  191. entry = rb_entry(n, struct btrfs_delayed_ref_head,
  192. href_node);
  193. }
  194. return entry;
  195. }
  196. return NULL;
  197. }
  198. int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
  199. struct btrfs_delayed_ref_head *head)
  200. {
  201. lockdep_assert_held(&delayed_refs->lock);
  202. if (mutex_trylock(&head->mutex))
  203. return 0;
  204. refcount_inc(&head->refs);
  205. spin_unlock(&delayed_refs->lock);
  206. mutex_lock(&head->mutex);
  207. spin_lock(&delayed_refs->lock);
  208. if (RB_EMPTY_NODE(&head->href_node)) {
  209. mutex_unlock(&head->mutex);
  210. btrfs_put_delayed_ref_head(head);
  211. return -EAGAIN;
  212. }
  213. btrfs_put_delayed_ref_head(head);
  214. return 0;
  215. }
  216. static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
  217. struct btrfs_delayed_ref_root *delayed_refs,
  218. struct btrfs_delayed_ref_head *head,
  219. struct btrfs_delayed_ref_node *ref)
  220. {
  221. lockdep_assert_held(&head->lock);
  222. rb_erase_cached(&ref->ref_node, &head->ref_tree);
  223. RB_CLEAR_NODE(&ref->ref_node);
  224. if (!list_empty(&ref->add_list))
  225. list_del(&ref->add_list);
  226. ref->in_tree = 0;
  227. btrfs_put_delayed_ref(ref);
  228. atomic_dec(&delayed_refs->num_entries);
  229. if (trans->delayed_ref_updates)
  230. trans->delayed_ref_updates--;
  231. }
  232. static bool merge_ref(struct btrfs_trans_handle *trans,
  233. struct btrfs_delayed_ref_root *delayed_refs,
  234. struct btrfs_delayed_ref_head *head,
  235. struct btrfs_delayed_ref_node *ref,
  236. u64 seq)
  237. {
  238. struct btrfs_delayed_ref_node *next;
  239. struct rb_node *node = rb_next(&ref->ref_node);
  240. bool done = false;
  241. while (!done && node) {
  242. int mod;
  243. next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
  244. node = rb_next(node);
  245. if (seq && next->seq >= seq)
  246. break;
  247. if (comp_refs(ref, next, false))
  248. break;
  249. if (ref->action == next->action) {
  250. mod = next->ref_mod;
  251. } else {
  252. if (ref->ref_mod < next->ref_mod) {
  253. swap(ref, next);
  254. done = true;
  255. }
  256. mod = -next->ref_mod;
  257. }
  258. drop_delayed_ref(trans, delayed_refs, head, next);
  259. ref->ref_mod += mod;
  260. if (ref->ref_mod == 0) {
  261. drop_delayed_ref(trans, delayed_refs, head, ref);
  262. done = true;
  263. } else {
  264. /*
  265. * Can't have multiples of the same ref on a tree block.
  266. */
  267. WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
  268. ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
  269. }
  270. }
  271. return done;
  272. }
  273. void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
  274. struct btrfs_delayed_ref_root *delayed_refs,
  275. struct btrfs_delayed_ref_head *head)
  276. {
  277. struct btrfs_fs_info *fs_info = trans->fs_info;
  278. struct btrfs_delayed_ref_node *ref;
  279. struct rb_node *node;
  280. u64 seq = 0;
  281. lockdep_assert_held(&head->lock);
  282. if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
  283. return;
  284. /* We don't have too many refs to merge for data. */
  285. if (head->is_data)
  286. return;
  287. spin_lock(&fs_info->tree_mod_seq_lock);
  288. if (!list_empty(&fs_info->tree_mod_seq_list)) {
  289. struct seq_list *elem;
  290. elem = list_first_entry(&fs_info->tree_mod_seq_list,
  291. struct seq_list, list);
  292. seq = elem->seq;
  293. }
  294. spin_unlock(&fs_info->tree_mod_seq_lock);
  295. again:
  296. for (node = rb_first_cached(&head->ref_tree); node;
  297. node = rb_next(node)) {
  298. ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
  299. if (seq && ref->seq >= seq)
  300. continue;
  301. if (merge_ref(trans, delayed_refs, head, ref, seq))
  302. goto again;
  303. }
  304. }
  305. int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
  306. {
  307. struct seq_list *elem;
  308. int ret = 0;
  309. spin_lock(&fs_info->tree_mod_seq_lock);
  310. if (!list_empty(&fs_info->tree_mod_seq_list)) {
  311. elem = list_first_entry(&fs_info->tree_mod_seq_list,
  312. struct seq_list, list);
  313. if (seq >= elem->seq) {
  314. btrfs_debug(fs_info,
  315. "holding back delayed_ref %#x.%x, lowest is %#x.%x",
  316. (u32)(seq >> 32), (u32)seq,
  317. (u32)(elem->seq >> 32), (u32)elem->seq);
  318. ret = 1;
  319. }
  320. }
  321. spin_unlock(&fs_info->tree_mod_seq_lock);
  322. return ret;
  323. }
  324. struct btrfs_delayed_ref_head *btrfs_select_ref_head(
  325. struct btrfs_delayed_ref_root *delayed_refs)
  326. {
  327. struct btrfs_delayed_ref_head *head;
  328. again:
  329. head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start,
  330. true);
  331. if (!head && delayed_refs->run_delayed_start != 0) {
  332. delayed_refs->run_delayed_start = 0;
  333. head = find_first_ref_head(delayed_refs);
  334. }
  335. if (!head)
  336. return NULL;
  337. while (head->processing) {
  338. struct rb_node *node;
  339. node = rb_next(&head->href_node);
  340. if (!node) {
  341. if (delayed_refs->run_delayed_start == 0)
  342. return NULL;
  343. delayed_refs->run_delayed_start = 0;
  344. goto again;
  345. }
  346. head = rb_entry(node, struct btrfs_delayed_ref_head,
  347. href_node);
  348. }
  349. head->processing = 1;
  350. WARN_ON(delayed_refs->num_heads_ready == 0);
  351. delayed_refs->num_heads_ready--;
  352. delayed_refs->run_delayed_start = head->bytenr +
  353. head->num_bytes;
  354. return head;
  355. }
  356. /*
  357. * Helper to insert the ref_node to the tail or merge with tail.
  358. *
  359. * Return 0 for insert.
  360. * Return >0 for merge.
  361. */
  362. static int insert_delayed_ref(struct btrfs_trans_handle *trans,
  363. struct btrfs_delayed_ref_root *root,
  364. struct btrfs_delayed_ref_head *href,
  365. struct btrfs_delayed_ref_node *ref)
  366. {
  367. struct btrfs_delayed_ref_node *exist;
  368. int mod;
  369. int ret = 0;
  370. spin_lock(&href->lock);
  371. exist = tree_insert(&href->ref_tree, ref);
  372. if (!exist)
  373. goto inserted;
  374. /* Now we are sure we can merge */
  375. ret = 1;
  376. if (exist->action == ref->action) {
  377. mod = ref->ref_mod;
  378. } else {
  379. /* Need to change action */
  380. if (exist->ref_mod < ref->ref_mod) {
  381. exist->action = ref->action;
  382. mod = -exist->ref_mod;
  383. exist->ref_mod = ref->ref_mod;
  384. if (ref->action == BTRFS_ADD_DELAYED_REF)
  385. list_add_tail(&exist->add_list,
  386. &href->ref_add_list);
  387. else if (ref->action == BTRFS_DROP_DELAYED_REF) {
  388. ASSERT(!list_empty(&exist->add_list));
  389. list_del(&exist->add_list);
  390. } else {
  391. ASSERT(0);
  392. }
  393. } else
  394. mod = -ref->ref_mod;
  395. }
  396. exist->ref_mod += mod;
  397. /* remove existing tail if its ref_mod is zero */
  398. if (exist->ref_mod == 0)
  399. drop_delayed_ref(trans, root, href, exist);
  400. spin_unlock(&href->lock);
  401. return ret;
  402. inserted:
  403. if (ref->action == BTRFS_ADD_DELAYED_REF)
  404. list_add_tail(&ref->add_list, &href->ref_add_list);
  405. atomic_inc(&root->num_entries);
  406. trans->delayed_ref_updates++;
  407. spin_unlock(&href->lock);
  408. return ret;
  409. }
  410. /*
  411. * helper function to update the accounting in the head ref
  412. * existing and update must have the same bytenr
  413. */
  414. static noinline void
  415. update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
  416. struct btrfs_delayed_ref_head *existing,
  417. struct btrfs_delayed_ref_head *update,
  418. int *old_ref_mod_ret)
  419. {
  420. int old_ref_mod;
  421. BUG_ON(existing->is_data != update->is_data);
  422. spin_lock(&existing->lock);
  423. if (update->must_insert_reserved) {
  424. /* if the extent was freed and then
  425. * reallocated before the delayed ref
  426. * entries were processed, we can end up
  427. * with an existing head ref without
  428. * the must_insert_reserved flag set.
  429. * Set it again here
  430. */
  431. existing->must_insert_reserved = update->must_insert_reserved;
  432. /*
  433. * update the num_bytes so we make sure the accounting
  434. * is done correctly
  435. */
  436. existing->num_bytes = update->num_bytes;
  437. }
  438. if (update->extent_op) {
  439. if (!existing->extent_op) {
  440. existing->extent_op = update->extent_op;
  441. } else {
  442. if (update->extent_op->update_key) {
  443. memcpy(&existing->extent_op->key,
  444. &update->extent_op->key,
  445. sizeof(update->extent_op->key));
  446. existing->extent_op->update_key = true;
  447. }
  448. if (update->extent_op->update_flags) {
  449. existing->extent_op->flags_to_set |=
  450. update->extent_op->flags_to_set;
  451. existing->extent_op->update_flags = true;
  452. }
  453. btrfs_free_delayed_extent_op(update->extent_op);
  454. }
  455. }
  456. /*
  457. * update the reference mod on the head to reflect this new operation,
  458. * only need the lock for this case cause we could be processing it
  459. * currently, for refs we just added we know we're a-ok.
  460. */
  461. old_ref_mod = existing->total_ref_mod;
  462. if (old_ref_mod_ret)
  463. *old_ref_mod_ret = old_ref_mod;
  464. existing->ref_mod += update->ref_mod;
  465. existing->total_ref_mod += update->ref_mod;
  466. /*
  467. * If we are going to from a positive ref mod to a negative or vice
  468. * versa we need to make sure to adjust pending_csums accordingly.
  469. */
  470. if (existing->is_data) {
  471. if (existing->total_ref_mod >= 0 && old_ref_mod < 0)
  472. delayed_refs->pending_csums -= existing->num_bytes;
  473. if (existing->total_ref_mod < 0 && old_ref_mod >= 0)
  474. delayed_refs->pending_csums += existing->num_bytes;
  475. }
  476. spin_unlock(&existing->lock);
  477. }
  478. static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
  479. struct btrfs_qgroup_extent_record *qrecord,
  480. u64 bytenr, u64 num_bytes, u64 ref_root,
  481. u64 reserved, int action, bool is_data,
  482. bool is_system)
  483. {
  484. int count_mod = 1;
  485. int must_insert_reserved = 0;
  486. /* If reserved is provided, it must be a data extent. */
  487. BUG_ON(!is_data && reserved);
  488. /*
  489. * The head node stores the sum of all the mods, so dropping a ref
  490. * should drop the sum in the head node by one.
  491. */
  492. if (action == BTRFS_UPDATE_DELAYED_HEAD)
  493. count_mod = 0;
  494. else if (action == BTRFS_DROP_DELAYED_REF)
  495. count_mod = -1;
  496. /*
  497. * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
  498. * accounting when the extent is finally added, or if a later
  499. * modification deletes the delayed ref without ever inserting the
  500. * extent into the extent allocation tree. ref->must_insert_reserved
  501. * is the flag used to record that accounting mods are required.
  502. *
  503. * Once we record must_insert_reserved, switch the action to
  504. * BTRFS_ADD_DELAYED_REF because other special casing is not required.
  505. */
  506. if (action == BTRFS_ADD_DELAYED_EXTENT)
  507. must_insert_reserved = 1;
  508. else
  509. must_insert_reserved = 0;
  510. refcount_set(&head_ref->refs, 1);
  511. head_ref->bytenr = bytenr;
  512. head_ref->num_bytes = num_bytes;
  513. head_ref->ref_mod = count_mod;
  514. head_ref->must_insert_reserved = must_insert_reserved;
  515. head_ref->is_data = is_data;
  516. head_ref->is_system = is_system;
  517. head_ref->ref_tree = RB_ROOT_CACHED;
  518. INIT_LIST_HEAD(&head_ref->ref_add_list);
  519. RB_CLEAR_NODE(&head_ref->href_node);
  520. head_ref->processing = 0;
  521. head_ref->total_ref_mod = count_mod;
  522. head_ref->qgroup_reserved = 0;
  523. head_ref->qgroup_ref_root = 0;
  524. spin_lock_init(&head_ref->lock);
  525. mutex_init(&head_ref->mutex);
  526. if (qrecord) {
  527. if (ref_root && reserved) {
  528. head_ref->qgroup_ref_root = ref_root;
  529. head_ref->qgroup_reserved = reserved;
  530. }
  531. qrecord->bytenr = bytenr;
  532. qrecord->num_bytes = num_bytes;
  533. qrecord->old_roots = NULL;
  534. }
  535. }
  536. /*
  537. * helper function to actually insert a head node into the rbtree.
  538. * this does all the dirty work in terms of maintaining the correct
  539. * overall modification count.
  540. */
  541. static noinline struct btrfs_delayed_ref_head *
  542. add_delayed_ref_head(struct btrfs_trans_handle *trans,
  543. struct btrfs_delayed_ref_head *head_ref,
  544. struct btrfs_qgroup_extent_record *qrecord,
  545. int action, int *qrecord_inserted_ret,
  546. int *old_ref_mod, int *new_ref_mod)
  547. {
  548. struct btrfs_delayed_ref_head *existing;
  549. struct btrfs_delayed_ref_root *delayed_refs;
  550. int qrecord_inserted = 0;
  551. delayed_refs = &trans->transaction->delayed_refs;
  552. /* Record qgroup extent info if provided */
  553. if (qrecord) {
  554. if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
  555. delayed_refs, qrecord))
  556. kfree(qrecord);
  557. else
  558. qrecord_inserted = 1;
  559. }
  560. trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
  561. existing = htree_insert(&delayed_refs->href_root,
  562. &head_ref->href_node);
  563. if (existing) {
  564. WARN_ON(qrecord && head_ref->qgroup_ref_root
  565. && head_ref->qgroup_reserved
  566. && existing->qgroup_ref_root
  567. && existing->qgroup_reserved);
  568. update_existing_head_ref(delayed_refs, existing, head_ref,
  569. old_ref_mod);
  570. /*
  571. * we've updated the existing ref, free the newly
  572. * allocated ref
  573. */
  574. kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
  575. head_ref = existing;
  576. } else {
  577. if (old_ref_mod)
  578. *old_ref_mod = 0;
  579. if (head_ref->is_data && head_ref->ref_mod < 0)
  580. delayed_refs->pending_csums += head_ref->num_bytes;
  581. delayed_refs->num_heads++;
  582. delayed_refs->num_heads_ready++;
  583. atomic_inc(&delayed_refs->num_entries);
  584. trans->delayed_ref_updates++;
  585. }
  586. if (qrecord_inserted_ret)
  587. *qrecord_inserted_ret = qrecord_inserted;
  588. if (new_ref_mod)
  589. *new_ref_mod = head_ref->total_ref_mod;
  590. return head_ref;
  591. }
  592. /*
  593. * init_delayed_ref_common - Initialize the structure which represents a
  594. * modification to a an extent.
  595. *
  596. * @fs_info: Internal to the mounted filesystem mount structure.
  597. *
  598. * @ref: The structure which is going to be initialized.
  599. *
  600. * @bytenr: The logical address of the extent for which a modification is
  601. * going to be recorded.
  602. *
  603. * @num_bytes: Size of the extent whose modification is being recorded.
  604. *
  605. * @ref_root: The id of the root where this modification has originated, this
  606. * can be either one of the well-known metadata trees or the
  607. * subvolume id which references this extent.
  608. *
  609. * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
  610. * BTRFS_ADD_DELAYED_EXTENT
  611. *
  612. * @ref_type: Holds the type of the extent which is being recorded, can be
  613. * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
  614. * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
  615. * BTRFS_EXTENT_DATA_REF_KEY when recording data extent
  616. */
  617. static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
  618. struct btrfs_delayed_ref_node *ref,
  619. u64 bytenr, u64 num_bytes, u64 ref_root,
  620. int action, u8 ref_type)
  621. {
  622. u64 seq = 0;
  623. if (action == BTRFS_ADD_DELAYED_EXTENT)
  624. action = BTRFS_ADD_DELAYED_REF;
  625. if (is_fstree(ref_root))
  626. seq = atomic64_read(&fs_info->tree_mod_seq);
  627. refcount_set(&ref->refs, 1);
  628. ref->bytenr = bytenr;
  629. ref->num_bytes = num_bytes;
  630. ref->ref_mod = 1;
  631. ref->action = action;
  632. ref->is_head = 0;
  633. ref->in_tree = 1;
  634. ref->seq = seq;
  635. ref->type = ref_type;
  636. RB_CLEAR_NODE(&ref->ref_node);
  637. INIT_LIST_HEAD(&ref->add_list);
  638. }
  639. /*
  640. * add a delayed tree ref. This does all of the accounting required
  641. * to make sure the delayed ref is eventually processed before this
  642. * transaction commits.
  643. */
  644. int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
  645. u64 bytenr, u64 num_bytes, u64 parent,
  646. u64 ref_root, int level, int action,
  647. struct btrfs_delayed_extent_op *extent_op,
  648. int *old_ref_mod, int *new_ref_mod)
  649. {
  650. struct btrfs_fs_info *fs_info = trans->fs_info;
  651. struct btrfs_delayed_tree_ref *ref;
  652. struct btrfs_delayed_ref_head *head_ref;
  653. struct btrfs_delayed_ref_root *delayed_refs;
  654. struct btrfs_qgroup_extent_record *record = NULL;
  655. int qrecord_inserted;
  656. bool is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
  657. int ret;
  658. u8 ref_type;
  659. BUG_ON(extent_op && extent_op->is_data);
  660. ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
  661. if (!ref)
  662. return -ENOMEM;
  663. head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
  664. if (!head_ref) {
  665. kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
  666. return -ENOMEM;
  667. }
  668. if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
  669. is_fstree(ref_root)) {
  670. record = kmalloc(sizeof(*record), GFP_NOFS);
  671. if (!record) {
  672. kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
  673. kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
  674. return -ENOMEM;
  675. }
  676. }
  677. if (parent)
  678. ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
  679. else
  680. ref_type = BTRFS_TREE_BLOCK_REF_KEY;
  681. init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
  682. ref_root, action, ref_type);
  683. ref->root = ref_root;
  684. ref->parent = parent;
  685. ref->level = level;
  686. init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
  687. ref_root, 0, action, false, is_system);
  688. head_ref->extent_op = extent_op;
  689. delayed_refs = &trans->transaction->delayed_refs;
  690. spin_lock(&delayed_refs->lock);
  691. /*
  692. * insert both the head node and the new ref without dropping
  693. * the spin lock
  694. */
  695. head_ref = add_delayed_ref_head(trans, head_ref, record,
  696. action, &qrecord_inserted,
  697. old_ref_mod, new_ref_mod);
  698. ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
  699. spin_unlock(&delayed_refs->lock);
  700. trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
  701. action == BTRFS_ADD_DELAYED_EXTENT ?
  702. BTRFS_ADD_DELAYED_REF : action);
  703. if (ret > 0)
  704. kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
  705. if (qrecord_inserted)
  706. btrfs_qgroup_trace_extent_post(fs_info, record);
  707. return 0;
  708. }
  709. /*
  710. * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
  711. */
  712. int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
  713. u64 bytenr, u64 num_bytes,
  714. u64 parent, u64 ref_root,
  715. u64 owner, u64 offset, u64 reserved, int action,
  716. int *old_ref_mod, int *new_ref_mod)
  717. {
  718. struct btrfs_fs_info *fs_info = trans->fs_info;
  719. struct btrfs_delayed_data_ref *ref;
  720. struct btrfs_delayed_ref_head *head_ref;
  721. struct btrfs_delayed_ref_root *delayed_refs;
  722. struct btrfs_qgroup_extent_record *record = NULL;
  723. int qrecord_inserted;
  724. int ret;
  725. u8 ref_type;
  726. ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
  727. if (!ref)
  728. return -ENOMEM;
  729. if (parent)
  730. ref_type = BTRFS_SHARED_DATA_REF_KEY;
  731. else
  732. ref_type = BTRFS_EXTENT_DATA_REF_KEY;
  733. init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
  734. ref_root, action, ref_type);
  735. ref->root = ref_root;
  736. ref->parent = parent;
  737. ref->objectid = owner;
  738. ref->offset = offset;
  739. head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
  740. if (!head_ref) {
  741. kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
  742. return -ENOMEM;
  743. }
  744. if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
  745. is_fstree(ref_root)) {
  746. record = kmalloc(sizeof(*record), GFP_NOFS);
  747. if (!record) {
  748. kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
  749. kmem_cache_free(btrfs_delayed_ref_head_cachep,
  750. head_ref);
  751. return -ENOMEM;
  752. }
  753. }
  754. init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root,
  755. reserved, action, true, false);
  756. head_ref->extent_op = NULL;
  757. delayed_refs = &trans->transaction->delayed_refs;
  758. spin_lock(&delayed_refs->lock);
  759. /*
  760. * insert both the head node and the new ref without dropping
  761. * the spin lock
  762. */
  763. head_ref = add_delayed_ref_head(trans, head_ref, record,
  764. action, &qrecord_inserted,
  765. old_ref_mod, new_ref_mod);
  766. ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
  767. spin_unlock(&delayed_refs->lock);
  768. trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
  769. action == BTRFS_ADD_DELAYED_EXTENT ?
  770. BTRFS_ADD_DELAYED_REF : action);
  771. if (ret > 0)
  772. kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
  773. if (qrecord_inserted)
  774. return btrfs_qgroup_trace_extent_post(fs_info, record);
  775. return 0;
  776. }
  777. int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
  778. struct btrfs_trans_handle *trans,
  779. u64 bytenr, u64 num_bytes,
  780. struct btrfs_delayed_extent_op *extent_op)
  781. {
  782. struct btrfs_delayed_ref_head *head_ref;
  783. struct btrfs_delayed_ref_root *delayed_refs;
  784. head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
  785. if (!head_ref)
  786. return -ENOMEM;
  787. init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0,
  788. BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data,
  789. false);
  790. head_ref->extent_op = extent_op;
  791. delayed_refs = &trans->transaction->delayed_refs;
  792. spin_lock(&delayed_refs->lock);
  793. add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD,
  794. NULL, NULL, NULL);
  795. spin_unlock(&delayed_refs->lock);
  796. return 0;
  797. }
  798. /*
  799. * this does a simple search for the head node for a given extent.
  800. * It must be called with the delayed ref spinlock held, and it returns
  801. * the head node if any where found, or NULL if not.
  802. */
  803. struct btrfs_delayed_ref_head *
  804. btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
  805. {
  806. return find_ref_head(delayed_refs, bytenr, false);
  807. }
  808. void __cold btrfs_delayed_ref_exit(void)
  809. {
  810. kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
  811. kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
  812. kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
  813. kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
  814. }
  815. int __init btrfs_delayed_ref_init(void)
  816. {
  817. btrfs_delayed_ref_head_cachep = kmem_cache_create(
  818. "btrfs_delayed_ref_head",
  819. sizeof(struct btrfs_delayed_ref_head), 0,
  820. SLAB_MEM_SPREAD, NULL);
  821. if (!btrfs_delayed_ref_head_cachep)
  822. goto fail;
  823. btrfs_delayed_tree_ref_cachep = kmem_cache_create(
  824. "btrfs_delayed_tree_ref",
  825. sizeof(struct btrfs_delayed_tree_ref), 0,
  826. SLAB_MEM_SPREAD, NULL);
  827. if (!btrfs_delayed_tree_ref_cachep)
  828. goto fail;
  829. btrfs_delayed_data_ref_cachep = kmem_cache_create(
  830. "btrfs_delayed_data_ref",
  831. sizeof(struct btrfs_delayed_data_ref), 0,
  832. SLAB_MEM_SPREAD, NULL);
  833. if (!btrfs_delayed_data_ref_cachep)
  834. goto fail;
  835. btrfs_delayed_extent_op_cachep = kmem_cache_create(
  836. "btrfs_delayed_extent_op",
  837. sizeof(struct btrfs_delayed_extent_op), 0,
  838. SLAB_MEM_SPREAD, NULL);
  839. if (!btrfs_delayed_extent_op_cachep)
  840. goto fail;
  841. return 0;
  842. fail:
  843. btrfs_delayed_ref_exit();
  844. return -ENOMEM;
  845. }