ref-verify.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031
  1. /*
  2. * Copyright (C) 2014 Facebook. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/sched.h>
  19. #include <linux/stacktrace.h>
  20. #include "ctree.h"
  21. #include "disk-io.h"
  22. #include "locking.h"
  23. #include "delayed-ref.h"
  24. #include "ref-verify.h"
  25. /*
  26. * Used to keep track the roots and number of refs each root has for a given
  27. * bytenr. This just tracks the number of direct references, no shared
  28. * references.
  29. */
  30. struct root_entry {
  31. u64 root_objectid;
  32. u64 num_refs;
  33. struct rb_node node;
  34. };
  35. /*
  36. * These are meant to represent what should exist in the extent tree, these can
  37. * be used to verify the extent tree is consistent as these should all match
  38. * what the extent tree says.
  39. */
  40. struct ref_entry {
  41. u64 root_objectid;
  42. u64 parent;
  43. u64 owner;
  44. u64 offset;
  45. u64 num_refs;
  46. struct rb_node node;
  47. };
  48. #define MAX_TRACE 16
  49. /*
  50. * Whenever we add/remove a reference we record the action. The action maps
  51. * back to the delayed ref action. We hold the ref we are changing in the
  52. * action so we can account for the history properly, and we record the root we
  53. * were called with since it could be different from ref_root. We also store
  54. * stack traces because thats how I roll.
  55. */
  56. struct ref_action {
  57. int action;
  58. u64 root;
  59. struct ref_entry ref;
  60. struct list_head list;
  61. unsigned long trace[MAX_TRACE];
  62. unsigned int trace_len;
  63. };
  64. /*
  65. * One of these for every block we reference, it holds the roots and references
  66. * to it as well as all of the ref actions that have occured to it. We never
  67. * free it until we unmount the file system in order to make sure re-allocations
  68. * are happening properly.
  69. */
  70. struct block_entry {
  71. u64 bytenr;
  72. u64 len;
  73. u64 num_refs;
  74. int metadata;
  75. int from_disk;
  76. struct rb_root roots;
  77. struct rb_root refs;
  78. struct rb_node node;
  79. struct list_head actions;
  80. };
  81. static struct block_entry *insert_block_entry(struct rb_root *root,
  82. struct block_entry *be)
  83. {
  84. struct rb_node **p = &root->rb_node;
  85. struct rb_node *parent_node = NULL;
  86. struct block_entry *entry;
  87. while (*p) {
  88. parent_node = *p;
  89. entry = rb_entry(parent_node, struct block_entry, node);
  90. if (entry->bytenr > be->bytenr)
  91. p = &(*p)->rb_left;
  92. else if (entry->bytenr < be->bytenr)
  93. p = &(*p)->rb_right;
  94. else
  95. return entry;
  96. }
  97. rb_link_node(&be->node, parent_node, p);
  98. rb_insert_color(&be->node, root);
  99. return NULL;
  100. }
  101. static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr)
  102. {
  103. struct rb_node *n;
  104. struct block_entry *entry = NULL;
  105. n = root->rb_node;
  106. while (n) {
  107. entry = rb_entry(n, struct block_entry, node);
  108. if (entry->bytenr < bytenr)
  109. n = n->rb_right;
  110. else if (entry->bytenr > bytenr)
  111. n = n->rb_left;
  112. else
  113. return entry;
  114. }
  115. return NULL;
  116. }
  117. static struct root_entry *insert_root_entry(struct rb_root *root,
  118. struct root_entry *re)
  119. {
  120. struct rb_node **p = &root->rb_node;
  121. struct rb_node *parent_node = NULL;
  122. struct root_entry *entry;
  123. while (*p) {
  124. parent_node = *p;
  125. entry = rb_entry(parent_node, struct root_entry, node);
  126. if (entry->root_objectid > re->root_objectid)
  127. p = &(*p)->rb_left;
  128. else if (entry->root_objectid < re->root_objectid)
  129. p = &(*p)->rb_right;
  130. else
  131. return entry;
  132. }
  133. rb_link_node(&re->node, parent_node, p);
  134. rb_insert_color(&re->node, root);
  135. return NULL;
  136. }
  137. static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2)
  138. {
  139. if (ref1->root_objectid < ref2->root_objectid)
  140. return -1;
  141. if (ref1->root_objectid > ref2->root_objectid)
  142. return 1;
  143. if (ref1->parent < ref2->parent)
  144. return -1;
  145. if (ref1->parent > ref2->parent)
  146. return 1;
  147. if (ref1->owner < ref2->owner)
  148. return -1;
  149. if (ref1->owner > ref2->owner)
  150. return 1;
  151. if (ref1->offset < ref2->offset)
  152. return -1;
  153. if (ref1->offset > ref2->offset)
  154. return 1;
  155. return 0;
  156. }
  157. static struct ref_entry *insert_ref_entry(struct rb_root *root,
  158. struct ref_entry *ref)
  159. {
  160. struct rb_node **p = &root->rb_node;
  161. struct rb_node *parent_node = NULL;
  162. struct ref_entry *entry;
  163. int cmp;
  164. while (*p) {
  165. parent_node = *p;
  166. entry = rb_entry(parent_node, struct ref_entry, node);
  167. cmp = comp_refs(entry, ref);
  168. if (cmp > 0)
  169. p = &(*p)->rb_left;
  170. else if (cmp < 0)
  171. p = &(*p)->rb_right;
  172. else
  173. return entry;
  174. }
  175. rb_link_node(&ref->node, parent_node, p);
  176. rb_insert_color(&ref->node, root);
  177. return NULL;
  178. }
  179. static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid)
  180. {
  181. struct rb_node *n;
  182. struct root_entry *entry = NULL;
  183. n = root->rb_node;
  184. while (n) {
  185. entry = rb_entry(n, struct root_entry, node);
  186. if (entry->root_objectid < objectid)
  187. n = n->rb_right;
  188. else if (entry->root_objectid > objectid)
  189. n = n->rb_left;
  190. else
  191. return entry;
  192. }
  193. return NULL;
  194. }
  195. #ifdef CONFIG_STACKTRACE
  196. static void __save_stack_trace(struct ref_action *ra)
  197. {
  198. struct stack_trace stack_trace;
  199. stack_trace.max_entries = MAX_TRACE;
  200. stack_trace.nr_entries = 0;
  201. stack_trace.entries = ra->trace;
  202. stack_trace.skip = 2;
  203. save_stack_trace(&stack_trace);
  204. ra->trace_len = stack_trace.nr_entries;
  205. }
  206. static void __print_stack_trace(struct btrfs_fs_info *fs_info,
  207. struct ref_action *ra)
  208. {
  209. struct stack_trace trace;
  210. if (ra->trace_len == 0) {
  211. btrfs_err(fs_info, " ref-verify: no stacktrace");
  212. return;
  213. }
  214. trace.nr_entries = ra->trace_len;
  215. trace.entries = ra->trace;
  216. print_stack_trace(&trace, 2);
  217. }
  218. #else
  219. static void inline __save_stack_trace(struct ref_action *ra)
  220. {
  221. }
  222. static void inline __print_stack_trace(struct btrfs_fs_info *fs_info,
  223. struct ref_action *ra)
  224. {
  225. btrfs_err(fs_info, " ref-verify: no stacktrace support");
  226. }
  227. #endif
  228. static void free_block_entry(struct block_entry *be)
  229. {
  230. struct root_entry *re;
  231. struct ref_entry *ref;
  232. struct ref_action *ra;
  233. struct rb_node *n;
  234. while ((n = rb_first(&be->roots))) {
  235. re = rb_entry(n, struct root_entry, node);
  236. rb_erase(&re->node, &be->roots);
  237. kfree(re);
  238. }
  239. while((n = rb_first(&be->refs))) {
  240. ref = rb_entry(n, struct ref_entry, node);
  241. rb_erase(&ref->node, &be->refs);
  242. kfree(ref);
  243. }
  244. while (!list_empty(&be->actions)) {
  245. ra = list_first_entry(&be->actions, struct ref_action,
  246. list);
  247. list_del(&ra->list);
  248. kfree(ra);
  249. }
  250. kfree(be);
  251. }
  252. static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
  253. u64 bytenr, u64 len,
  254. u64 root_objectid)
  255. {
  256. struct block_entry *be = NULL, *exist;
  257. struct root_entry *re = NULL;
  258. re = kzalloc(sizeof(struct root_entry), GFP_KERNEL);
  259. be = kzalloc(sizeof(struct block_entry), GFP_KERNEL);
  260. if (!be || !re) {
  261. kfree(re);
  262. kfree(be);
  263. return ERR_PTR(-ENOMEM);
  264. }
  265. be->bytenr = bytenr;
  266. be->len = len;
  267. re->root_objectid = root_objectid;
  268. re->num_refs = 0;
  269. spin_lock(&fs_info->ref_verify_lock);
  270. exist = insert_block_entry(&fs_info->block_tree, be);
  271. if (exist) {
  272. if (root_objectid) {
  273. struct root_entry *exist_re;
  274. exist_re = insert_root_entry(&exist->roots, re);
  275. if (exist_re)
  276. kfree(re);
  277. }
  278. kfree(be);
  279. return exist;
  280. }
  281. be->num_refs = 0;
  282. be->metadata = 0;
  283. be->from_disk = 0;
  284. be->roots = RB_ROOT;
  285. be->refs = RB_ROOT;
  286. INIT_LIST_HEAD(&be->actions);
  287. if (root_objectid)
  288. insert_root_entry(&be->roots, re);
  289. else
  290. kfree(re);
  291. return be;
  292. }
  293. static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root,
  294. u64 parent, u64 bytenr, int level)
  295. {
  296. struct block_entry *be;
  297. struct root_entry *re;
  298. struct ref_entry *ref = NULL, *exist;
  299. ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL);
  300. if (!ref)
  301. return -ENOMEM;
  302. if (parent)
  303. ref->root_objectid = 0;
  304. else
  305. ref->root_objectid = ref_root;
  306. ref->parent = parent;
  307. ref->owner = level;
  308. ref->offset = 0;
  309. ref->num_refs = 1;
  310. be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root);
  311. if (IS_ERR(be)) {
  312. kfree(ref);
  313. return PTR_ERR(be);
  314. }
  315. be->num_refs++;
  316. be->from_disk = 1;
  317. be->metadata = 1;
  318. if (!parent) {
  319. ASSERT(ref_root);
  320. re = lookup_root_entry(&be->roots, ref_root);
  321. ASSERT(re);
  322. re->num_refs++;
  323. }
  324. exist = insert_ref_entry(&be->refs, ref);
  325. if (exist) {
  326. exist->num_refs++;
  327. kfree(ref);
  328. }
  329. spin_unlock(&fs_info->ref_verify_lock);
  330. return 0;
  331. }
  332. static int add_shared_data_ref(struct btrfs_fs_info *fs_info,
  333. u64 parent, u32 num_refs, u64 bytenr,
  334. u64 num_bytes)
  335. {
  336. struct block_entry *be;
  337. struct ref_entry *ref;
  338. ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
  339. if (!ref)
  340. return -ENOMEM;
  341. be = add_block_entry(fs_info, bytenr, num_bytes, 0);
  342. if (IS_ERR(be)) {
  343. kfree(ref);
  344. return PTR_ERR(be);
  345. }
  346. be->num_refs += num_refs;
  347. ref->parent = parent;
  348. ref->num_refs = num_refs;
  349. if (insert_ref_entry(&be->refs, ref)) {
  350. spin_unlock(&fs_info->ref_verify_lock);
  351. btrfs_err(fs_info, "existing shared ref when reading from disk?");
  352. kfree(ref);
  353. return -EINVAL;
  354. }
  355. spin_unlock(&fs_info->ref_verify_lock);
  356. return 0;
  357. }
  358. static int add_extent_data_ref(struct btrfs_fs_info *fs_info,
  359. struct extent_buffer *leaf,
  360. struct btrfs_extent_data_ref *dref,
  361. u64 bytenr, u64 num_bytes)
  362. {
  363. struct block_entry *be;
  364. struct ref_entry *ref;
  365. struct root_entry *re;
  366. u64 ref_root = btrfs_extent_data_ref_root(leaf, dref);
  367. u64 owner = btrfs_extent_data_ref_objectid(leaf, dref);
  368. u64 offset = btrfs_extent_data_ref_offset(leaf, dref);
  369. u32 num_refs = btrfs_extent_data_ref_count(leaf, dref);
  370. ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
  371. if (!ref)
  372. return -ENOMEM;
  373. be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
  374. if (IS_ERR(be)) {
  375. kfree(ref);
  376. return PTR_ERR(be);
  377. }
  378. be->num_refs += num_refs;
  379. ref->parent = 0;
  380. ref->owner = owner;
  381. ref->root_objectid = ref_root;
  382. ref->offset = offset;
  383. ref->num_refs = num_refs;
  384. if (insert_ref_entry(&be->refs, ref)) {
  385. spin_unlock(&fs_info->ref_verify_lock);
  386. btrfs_err(fs_info, "existing ref when reading from disk?");
  387. kfree(ref);
  388. return -EINVAL;
  389. }
  390. re = lookup_root_entry(&be->roots, ref_root);
  391. if (!re) {
  392. spin_unlock(&fs_info->ref_verify_lock);
  393. btrfs_err(fs_info, "missing root in new block entry?");
  394. return -EINVAL;
  395. }
  396. re->num_refs += num_refs;
  397. spin_unlock(&fs_info->ref_verify_lock);
  398. return 0;
  399. }
  400. static int process_extent_item(struct btrfs_fs_info *fs_info,
  401. struct btrfs_path *path, struct btrfs_key *key,
  402. int slot, int *tree_block_level)
  403. {
  404. struct btrfs_extent_item *ei;
  405. struct btrfs_extent_inline_ref *iref;
  406. struct btrfs_extent_data_ref *dref;
  407. struct btrfs_shared_data_ref *sref;
  408. struct extent_buffer *leaf = path->nodes[0];
  409. u32 item_size = btrfs_item_size_nr(leaf, slot);
  410. unsigned long end, ptr;
  411. u64 offset, flags, count;
  412. int type, ret;
  413. ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
  414. flags = btrfs_extent_flags(leaf, ei);
  415. if ((key->type == BTRFS_EXTENT_ITEM_KEY) &&
  416. flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
  417. struct btrfs_tree_block_info *info;
  418. info = (struct btrfs_tree_block_info *)(ei + 1);
  419. *tree_block_level = btrfs_tree_block_level(leaf, info);
  420. iref = (struct btrfs_extent_inline_ref *)(info + 1);
  421. } else {
  422. if (key->type == BTRFS_METADATA_ITEM_KEY)
  423. *tree_block_level = key->offset;
  424. iref = (struct btrfs_extent_inline_ref *)(ei + 1);
  425. }
  426. ptr = (unsigned long)iref;
  427. end = (unsigned long)ei + item_size;
  428. while (ptr < end) {
  429. iref = (struct btrfs_extent_inline_ref *)ptr;
  430. type = btrfs_extent_inline_ref_type(leaf, iref);
  431. offset = btrfs_extent_inline_ref_offset(leaf, iref);
  432. switch (type) {
  433. case BTRFS_TREE_BLOCK_REF_KEY:
  434. ret = add_tree_block(fs_info, offset, 0, key->objectid,
  435. *tree_block_level);
  436. break;
  437. case BTRFS_SHARED_BLOCK_REF_KEY:
  438. ret = add_tree_block(fs_info, 0, offset, key->objectid,
  439. *tree_block_level);
  440. break;
  441. case BTRFS_EXTENT_DATA_REF_KEY:
  442. dref = (struct btrfs_extent_data_ref *)(&iref->offset);
  443. ret = add_extent_data_ref(fs_info, leaf, dref,
  444. key->objectid, key->offset);
  445. break;
  446. case BTRFS_SHARED_DATA_REF_KEY:
  447. sref = (struct btrfs_shared_data_ref *)(iref + 1);
  448. count = btrfs_shared_data_ref_count(leaf, sref);
  449. ret = add_shared_data_ref(fs_info, offset, count,
  450. key->objectid, key->offset);
  451. break;
  452. default:
  453. btrfs_err(fs_info, "invalid key type in iref");
  454. ret = -EINVAL;
  455. break;
  456. }
  457. if (ret)
  458. break;
  459. ptr += btrfs_extent_inline_ref_size(type);
  460. }
  461. return ret;
  462. }
  463. static int process_leaf(struct btrfs_root *root,
  464. struct btrfs_path *path, u64 *bytenr, u64 *num_bytes)
  465. {
  466. struct btrfs_fs_info *fs_info = root->fs_info;
  467. struct extent_buffer *leaf = path->nodes[0];
  468. struct btrfs_extent_data_ref *dref;
  469. struct btrfs_shared_data_ref *sref;
  470. u32 count;
  471. int i = 0, tree_block_level = 0, ret;
  472. struct btrfs_key key;
  473. int nritems = btrfs_header_nritems(leaf);
  474. for (i = 0; i < nritems; i++) {
  475. btrfs_item_key_to_cpu(leaf, &key, i);
  476. switch (key.type) {
  477. case BTRFS_EXTENT_ITEM_KEY:
  478. *num_bytes = key.offset;
  479. case BTRFS_METADATA_ITEM_KEY:
  480. *bytenr = key.objectid;
  481. ret = process_extent_item(fs_info, path, &key, i,
  482. &tree_block_level);
  483. break;
  484. case BTRFS_TREE_BLOCK_REF_KEY:
  485. ret = add_tree_block(fs_info, key.offset, 0,
  486. key.objectid, tree_block_level);
  487. break;
  488. case BTRFS_SHARED_BLOCK_REF_KEY:
  489. ret = add_tree_block(fs_info, 0, key.offset,
  490. key.objectid, tree_block_level);
  491. break;
  492. case BTRFS_EXTENT_DATA_REF_KEY:
  493. dref = btrfs_item_ptr(leaf, i,
  494. struct btrfs_extent_data_ref);
  495. ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr,
  496. *num_bytes);
  497. break;
  498. case BTRFS_SHARED_DATA_REF_KEY:
  499. sref = btrfs_item_ptr(leaf, i,
  500. struct btrfs_shared_data_ref);
  501. count = btrfs_shared_data_ref_count(leaf, sref);
  502. ret = add_shared_data_ref(fs_info, key.offset, count,
  503. *bytenr, *num_bytes);
  504. break;
  505. default:
  506. break;
  507. }
  508. if (ret)
  509. break;
  510. }
  511. return ret;
  512. }
  513. /* Walk down to the leaf from the given level */
  514. static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
  515. int level, u64 *bytenr, u64 *num_bytes)
  516. {
  517. struct btrfs_fs_info *fs_info = root->fs_info;
  518. struct extent_buffer *eb;
  519. u64 block_bytenr, gen;
  520. int ret = 0;
  521. while (level >= 0) {
  522. if (level) {
  523. block_bytenr = btrfs_node_blockptr(path->nodes[level],
  524. path->slots[level]);
  525. gen = btrfs_node_ptr_generation(path->nodes[level],
  526. path->slots[level]);
  527. eb = read_tree_block(fs_info, block_bytenr, gen);
  528. if (IS_ERR(eb))
  529. return PTR_ERR(eb);
  530. if (!extent_buffer_uptodate(eb)) {
  531. free_extent_buffer(eb);
  532. return -EIO;
  533. }
  534. btrfs_tree_read_lock(eb);
  535. btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
  536. path->nodes[level-1] = eb;
  537. path->slots[level-1] = 0;
  538. path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING;
  539. } else {
  540. ret = process_leaf(root, path, bytenr, num_bytes);
  541. if (ret)
  542. break;
  543. }
  544. level--;
  545. }
  546. return ret;
  547. }
  548. /* Walk up to the next node that needs to be processed */
  549. static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
  550. int *level)
  551. {
  552. int l;
  553. for (l = 0; l < BTRFS_MAX_LEVEL; l++) {
  554. if (!path->nodes[l])
  555. continue;
  556. if (l) {
  557. path->slots[l]++;
  558. if (path->slots[l] <
  559. btrfs_header_nritems(path->nodes[l])) {
  560. *level = l;
  561. return 0;
  562. }
  563. }
  564. btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]);
  565. free_extent_buffer(path->nodes[l]);
  566. path->nodes[l] = NULL;
  567. path->slots[l] = 0;
  568. path->locks[l] = 0;
  569. }
  570. return 1;
  571. }
  572. static void dump_ref_action(struct btrfs_fs_info *fs_info,
  573. struct ref_action *ra)
  574. {
  575. btrfs_err(fs_info,
  576. " Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
  577. ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent,
  578. ra->ref.owner, ra->ref.offset, ra->ref.num_refs);
  579. __print_stack_trace(fs_info, ra);
  580. }
  581. /*
  582. * Dumps all the information from the block entry to printk, it's going to be
  583. * awesome.
  584. */
  585. static void dump_block_entry(struct btrfs_fs_info *fs_info,
  586. struct block_entry *be)
  587. {
  588. struct ref_entry *ref;
  589. struct root_entry *re;
  590. struct ref_action *ra;
  591. struct rb_node *n;
  592. btrfs_err(fs_info,
  593. "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d",
  594. be->bytenr, be->len, be->num_refs, be->metadata,
  595. be->from_disk);
  596. for (n = rb_first(&be->refs); n; n = rb_next(n)) {
  597. ref = rb_entry(n, struct ref_entry, node);
  598. btrfs_err(fs_info,
  599. " ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
  600. ref->root_objectid, ref->parent, ref->owner,
  601. ref->offset, ref->num_refs);
  602. }
  603. for (n = rb_first(&be->roots); n; n = rb_next(n)) {
  604. re = rb_entry(n, struct root_entry, node);
  605. btrfs_err(fs_info, " root entry %llu, num_refs %llu",
  606. re->root_objectid, re->num_refs);
  607. }
  608. list_for_each_entry(ra, &be->actions, list)
  609. dump_ref_action(fs_info, ra);
  610. }
  611. /*
  612. * btrfs_ref_tree_mod: called when we modify a ref for a bytenr
  613. * @root: the root we are making this modification from.
  614. * @bytenr: the bytenr we are modifying.
  615. * @num_bytes: number of bytes.
  616. * @parent: the parent bytenr.
  617. * @ref_root: the original root owner of the bytenr.
  618. * @owner: level in the case of metadata, inode in the case of data.
  619. * @offset: 0 for metadata, file offset for data.
  620. * @action: the action that we are doing, this is the same as the delayed ref
  621. * action.
  622. *
  623. * This will add an action item to the given bytenr and do sanity checks to make
  624. * sure we haven't messed something up. If we are making a new allocation and
  625. * this block entry has history we will delete all previous actions as long as
  626. * our sanity checks pass as they are no longer needed.
  627. */
  628. int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
  629. u64 parent, u64 ref_root, u64 owner, u64 offset,
  630. int action)
  631. {
  632. struct btrfs_fs_info *fs_info = root->fs_info;
  633. struct ref_entry *ref = NULL, *exist;
  634. struct ref_action *ra = NULL;
  635. struct block_entry *be = NULL;
  636. struct root_entry *re = NULL;
  637. int ret = 0;
  638. bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
  639. if (!btrfs_test_opt(root->fs_info, REF_VERIFY))
  640. return 0;
  641. ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
  642. ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
  643. if (!ra || !ref) {
  644. kfree(ref);
  645. kfree(ra);
  646. ret = -ENOMEM;
  647. goto out;
  648. }
  649. if (parent) {
  650. ref->parent = parent;
  651. } else {
  652. ref->root_objectid = ref_root;
  653. ref->owner = owner;
  654. ref->offset = offset;
  655. }
  656. ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1;
  657. memcpy(&ra->ref, ref, sizeof(struct ref_entry));
  658. /*
  659. * Save the extra info from the delayed ref in the ref action to make it
  660. * easier to figure out what is happening. The real ref's we add to the
  661. * ref tree need to reflect what we save on disk so it matches any
  662. * on-disk refs we pre-loaded.
  663. */
  664. ra->ref.owner = owner;
  665. ra->ref.offset = offset;
  666. ra->ref.root_objectid = ref_root;
  667. __save_stack_trace(ra);
  668. INIT_LIST_HEAD(&ra->list);
  669. ra->action = action;
  670. ra->root = root->objectid;
  671. /*
  672. * This is an allocation, preallocate the block_entry in case we haven't
  673. * used it before.
  674. */
  675. ret = -EINVAL;
  676. if (action == BTRFS_ADD_DELAYED_EXTENT) {
  677. /*
  678. * For subvol_create we'll just pass in whatever the parent root
  679. * is and the new root objectid, so let's not treat the passed
  680. * in root as if it really has a ref for this bytenr.
  681. */
  682. be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root);
  683. if (IS_ERR(be)) {
  684. kfree(ra);
  685. ret = PTR_ERR(be);
  686. goto out;
  687. }
  688. be->num_refs++;
  689. if (metadata)
  690. be->metadata = 1;
  691. if (be->num_refs != 1) {
  692. btrfs_err(fs_info,
  693. "re-allocated a block that still has references to it!");
  694. dump_block_entry(fs_info, be);
  695. dump_ref_action(fs_info, ra);
  696. goto out_unlock;
  697. }
  698. while (!list_empty(&be->actions)) {
  699. struct ref_action *tmp;
  700. tmp = list_first_entry(&be->actions, struct ref_action,
  701. list);
  702. list_del(&tmp->list);
  703. kfree(tmp);
  704. }
  705. } else {
  706. struct root_entry *tmp;
  707. if (!parent) {
  708. re = kmalloc(sizeof(struct root_entry), GFP_NOFS);
  709. if (!re) {
  710. kfree(ref);
  711. kfree(ra);
  712. ret = -ENOMEM;
  713. goto out;
  714. }
  715. /*
  716. * This is the root that is modifying us, so it's the
  717. * one we want to lookup below when we modify the
  718. * re->num_refs.
  719. */
  720. ref_root = root->objectid;
  721. re->root_objectid = root->objectid;
  722. re->num_refs = 0;
  723. }
  724. spin_lock(&root->fs_info->ref_verify_lock);
  725. be = lookup_block_entry(&root->fs_info->block_tree, bytenr);
  726. if (!be) {
  727. btrfs_err(fs_info,
  728. "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
  729. action, (unsigned long long)bytenr,
  730. (unsigned long long)num_bytes);
  731. dump_ref_action(fs_info, ra);
  732. kfree(ref);
  733. kfree(ra);
  734. goto out_unlock;
  735. }
  736. if (!parent) {
  737. tmp = insert_root_entry(&be->roots, re);
  738. if (tmp) {
  739. kfree(re);
  740. re = tmp;
  741. }
  742. }
  743. }
  744. exist = insert_ref_entry(&be->refs, ref);
  745. if (exist) {
  746. if (action == BTRFS_DROP_DELAYED_REF) {
  747. if (exist->num_refs == 0) {
  748. btrfs_err(fs_info,
  749. "dropping a ref for a existing root that doesn't have a ref on the block");
  750. dump_block_entry(fs_info, be);
  751. dump_ref_action(fs_info, ra);
  752. kfree(ra);
  753. goto out_unlock;
  754. }
  755. exist->num_refs--;
  756. if (exist->num_refs == 0) {
  757. rb_erase(&exist->node, &be->refs);
  758. kfree(exist);
  759. }
  760. } else if (!be->metadata) {
  761. exist->num_refs++;
  762. } else {
  763. btrfs_err(fs_info,
  764. "attempting to add another ref for an existing ref on a tree block");
  765. dump_block_entry(fs_info, be);
  766. dump_ref_action(fs_info, ra);
  767. kfree(ra);
  768. goto out_unlock;
  769. }
  770. kfree(ref);
  771. } else {
  772. if (action == BTRFS_DROP_DELAYED_REF) {
  773. btrfs_err(fs_info,
  774. "dropping a ref for a root that doesn't have a ref on the block");
  775. dump_block_entry(fs_info, be);
  776. dump_ref_action(fs_info, ra);
  777. kfree(ra);
  778. goto out_unlock;
  779. }
  780. }
  781. if (!parent && !re) {
  782. re = lookup_root_entry(&be->roots, ref_root);
  783. if (!re) {
  784. /*
  785. * This shouldn't happen because we will add our re
  786. * above when we lookup the be with !parent, but just in
  787. * case catch this case so we don't panic because I
  788. * didn't thik of some other corner case.
  789. */
  790. btrfs_err(fs_info, "failed to find root %llu for %llu",
  791. root->objectid, be->bytenr);
  792. dump_block_entry(fs_info, be);
  793. dump_ref_action(fs_info, ra);
  794. kfree(ra);
  795. goto out_unlock;
  796. }
  797. }
  798. if (action == BTRFS_DROP_DELAYED_REF) {
  799. if (re)
  800. re->num_refs--;
  801. be->num_refs--;
  802. } else if (action == BTRFS_ADD_DELAYED_REF) {
  803. be->num_refs++;
  804. if (re)
  805. re->num_refs++;
  806. }
  807. list_add_tail(&ra->list, &be->actions);
  808. ret = 0;
  809. out_unlock:
  810. spin_unlock(&root->fs_info->ref_verify_lock);
  811. out:
  812. if (ret)
  813. btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
  814. return ret;
  815. }
  816. /* Free up the ref cache */
  817. void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
  818. {
  819. struct block_entry *be;
  820. struct rb_node *n;
  821. if (!btrfs_test_opt(fs_info, REF_VERIFY))
  822. return;
  823. spin_lock(&fs_info->ref_verify_lock);
  824. while ((n = rb_first(&fs_info->block_tree))) {
  825. be = rb_entry(n, struct block_entry, node);
  826. rb_erase(&be->node, &fs_info->block_tree);
  827. free_block_entry(be);
  828. cond_resched_lock(&fs_info->ref_verify_lock);
  829. }
  830. spin_unlock(&fs_info->ref_verify_lock);
  831. }
  832. void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
  833. u64 len)
  834. {
  835. struct block_entry *be = NULL, *entry;
  836. struct rb_node *n;
  837. if (!btrfs_test_opt(fs_info, REF_VERIFY))
  838. return;
  839. spin_lock(&fs_info->ref_verify_lock);
  840. n = fs_info->block_tree.rb_node;
  841. while (n) {
  842. entry = rb_entry(n, struct block_entry, node);
  843. if (entry->bytenr < start) {
  844. n = n->rb_right;
  845. } else if (entry->bytenr > start) {
  846. n = n->rb_left;
  847. } else {
  848. be = entry;
  849. break;
  850. }
  851. /* We want to get as close to start as possible */
  852. if (be == NULL ||
  853. (entry->bytenr < start && be->bytenr > start) ||
  854. (entry->bytenr < start && entry->bytenr > be->bytenr))
  855. be = entry;
  856. }
  857. /*
  858. * Could have an empty block group, maybe have something to check for
  859. * this case to verify we were actually empty?
  860. */
  861. if (!be) {
  862. spin_unlock(&fs_info->ref_verify_lock);
  863. return;
  864. }
  865. n = &be->node;
  866. while (n) {
  867. be = rb_entry(n, struct block_entry, node);
  868. n = rb_next(n);
  869. if (be->bytenr < start && be->bytenr + be->len > start) {
  870. btrfs_err(fs_info,
  871. "block entry overlaps a block group [%llu,%llu]!",
  872. start, len);
  873. dump_block_entry(fs_info, be);
  874. continue;
  875. }
  876. if (be->bytenr < start)
  877. continue;
  878. if (be->bytenr >= start + len)
  879. break;
  880. if (be->bytenr + be->len > start + len) {
  881. btrfs_err(fs_info,
  882. "block entry overlaps a block group [%llu,%llu]!",
  883. start, len);
  884. dump_block_entry(fs_info, be);
  885. }
  886. rb_erase(&be->node, &fs_info->block_tree);
  887. free_block_entry(be);
  888. }
  889. spin_unlock(&fs_info->ref_verify_lock);
  890. }
  891. /* Walk down all roots and build the ref tree, meant to be called at mount */
  892. int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
  893. {
  894. struct btrfs_path *path;
  895. struct btrfs_root *root;
  896. struct extent_buffer *eb;
  897. u64 bytenr = 0, num_bytes = 0;
  898. int ret, level;
  899. if (!btrfs_test_opt(fs_info, REF_VERIFY))
  900. return 0;
  901. path = btrfs_alloc_path();
  902. if (!path)
  903. return -ENOMEM;
  904. eb = btrfs_read_lock_root_node(fs_info->extent_root);
  905. btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
  906. level = btrfs_header_level(eb);
  907. path->nodes[level] = eb;
  908. path->slots[level] = 0;
  909. path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
  910. while (1) {
  911. /*
  912. * We have to keep track of the bytenr/num_bytes we last hit
  913. * because we could have run out of space for an inline ref, and
  914. * would have had to added a ref key item which may appear on a
  915. * different leaf from the original extent item.
  916. */
  917. ret = walk_down_tree(fs_info->extent_root, path, level,
  918. &bytenr, &num_bytes);
  919. if (ret)
  920. break;
  921. ret = walk_up_tree(root, path, &level);
  922. if (ret < 0)
  923. break;
  924. if (ret > 0) {
  925. ret = 0;
  926. break;
  927. }
  928. }
  929. if (ret) {
  930. btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
  931. btrfs_free_ref_cache(fs_info);
  932. }
  933. btrfs_free_path(path);
  934. return ret;
  935. }