balloc.c 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558
  1. /*
  2. * linux/fs/ext2/balloc.c
  3. *
  4. * Copyright (C) 1992, 1993, 1994, 1995
  5. * Remy Card (card@masi.ibp.fr)
  6. * Laboratoire MASI - Institut Blaise Pascal
  7. * Universite Pierre et Marie Curie (Paris VI)
  8. *
  9. * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
  10. * Big-endian to little-endian byte-swapping/bitmaps by
  11. * David S. Miller (davem@caip.rutgers.edu), 1995
  12. */
  13. #include "ext2.h"
  14. #include <linux/quotaops.h>
  15. #include <linux/slab.h>
  16. #include <linux/sched.h>
  17. #include <linux/cred.h>
  18. #include <linux/buffer_head.h>
  19. #include <linux/capability.h>
  20. /*
  21. * balloc.c contains the blocks allocation and deallocation routines
  22. */
  23. /*
  24. * The free blocks are managed by bitmaps. A file system contains several
  25. * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
  26. * block for inodes, N blocks for the inode table and data blocks.
  27. *
  28. * The file system contains group descriptors which are located after the
  29. * super block. Each descriptor contains the number of the bitmap block and
  30. * the free blocks count in the block. The descriptors are loaded in memory
  31. * when a file system is mounted (see ext2_fill_super).
  32. */
  33. #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
  34. struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
  35. unsigned int block_group,
  36. struct buffer_head ** bh)
  37. {
  38. unsigned long group_desc;
  39. unsigned long offset;
  40. struct ext2_group_desc * desc;
  41. struct ext2_sb_info *sbi = EXT2_SB(sb);
  42. if (block_group >= sbi->s_groups_count) {
  43. ext2_error (sb, "ext2_get_group_desc",
  44. "block_group >= groups_count - "
  45. "block_group = %d, groups_count = %lu",
  46. block_group, sbi->s_groups_count);
  47. return NULL;
  48. }
  49. group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
  50. offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
  51. if (!sbi->s_group_desc[group_desc]) {
  52. ext2_error (sb, "ext2_get_group_desc",
  53. "Group descriptor not loaded - "
  54. "block_group = %d, group_desc = %lu, desc = %lu",
  55. block_group, group_desc, offset);
  56. return NULL;
  57. }
  58. desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
  59. if (bh)
  60. *bh = sbi->s_group_desc[group_desc];
  61. return desc + offset;
  62. }
  63. static int ext2_valid_block_bitmap(struct super_block *sb,
  64. struct ext2_group_desc *desc,
  65. unsigned int block_group,
  66. struct buffer_head *bh)
  67. {
  68. ext2_grpblk_t offset;
  69. ext2_grpblk_t next_zero_bit;
  70. ext2_fsblk_t bitmap_blk;
  71. ext2_fsblk_t group_first_block;
  72. group_first_block = ext2_group_first_block_no(sb, block_group);
  73. /* check whether block bitmap block number is set */
  74. bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
  75. offset = bitmap_blk - group_first_block;
  76. if (!ext2_test_bit(offset, bh->b_data))
  77. /* bad block bitmap */
  78. goto err_out;
  79. /* check whether the inode bitmap block number is set */
  80. bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap);
  81. offset = bitmap_blk - group_first_block;
  82. if (!ext2_test_bit(offset, bh->b_data))
  83. /* bad block bitmap */
  84. goto err_out;
  85. /* check whether the inode table block number is set */
  86. bitmap_blk = le32_to_cpu(desc->bg_inode_table);
  87. offset = bitmap_blk - group_first_block;
  88. next_zero_bit = ext2_find_next_zero_bit(bh->b_data,
  89. offset + EXT2_SB(sb)->s_itb_per_group,
  90. offset);
  91. if (next_zero_bit >= offset + EXT2_SB(sb)->s_itb_per_group)
  92. /* good bitmap for inode tables */
  93. return 1;
  94. err_out:
  95. ext2_error(sb, __func__,
  96. "Invalid block bitmap - "
  97. "block_group = %d, block = %lu",
  98. block_group, bitmap_blk);
  99. return 0;
  100. }
  101. /*
  102. * Read the bitmap for a given block_group,and validate the
  103. * bits for block/inode/inode tables are set in the bitmaps
  104. *
  105. * Return buffer_head on success or NULL in case of failure.
  106. */
  107. static struct buffer_head *
  108. read_block_bitmap(struct super_block *sb, unsigned int block_group)
  109. {
  110. struct ext2_group_desc * desc;
  111. struct buffer_head * bh = NULL;
  112. ext2_fsblk_t bitmap_blk;
  113. desc = ext2_get_group_desc(sb, block_group, NULL);
  114. if (!desc)
  115. return NULL;
  116. bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
  117. bh = sb_getblk(sb, bitmap_blk);
  118. if (unlikely(!bh)) {
  119. ext2_error(sb, __func__,
  120. "Cannot read block bitmap - "
  121. "block_group = %d, block_bitmap = %u",
  122. block_group, le32_to_cpu(desc->bg_block_bitmap));
  123. return NULL;
  124. }
  125. if (likely(bh_uptodate_or_lock(bh)))
  126. return bh;
  127. if (bh_submit_read(bh) < 0) {
  128. brelse(bh);
  129. ext2_error(sb, __func__,
  130. "Cannot read block bitmap - "
  131. "block_group = %d, block_bitmap = %u",
  132. block_group, le32_to_cpu(desc->bg_block_bitmap));
  133. return NULL;
  134. }
  135. ext2_valid_block_bitmap(sb, desc, block_group, bh);
  136. /*
  137. * file system mounted not to panic on error, continue with corrupt
  138. * bitmap
  139. */
  140. return bh;
  141. }
  142. static void group_adjust_blocks(struct super_block *sb, int group_no,
  143. struct ext2_group_desc *desc, struct buffer_head *bh, int count)
  144. {
  145. if (count) {
  146. struct ext2_sb_info *sbi = EXT2_SB(sb);
  147. unsigned free_blocks;
  148. spin_lock(sb_bgl_lock(sbi, group_no));
  149. free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
  150. desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
  151. spin_unlock(sb_bgl_lock(sbi, group_no));
  152. mark_buffer_dirty(bh);
  153. }
  154. }
  155. /*
  156. * The reservation window structure operations
  157. * --------------------------------------------
  158. * Operations include:
  159. * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
  160. *
  161. * We use a red-black tree to represent per-filesystem reservation
  162. * windows.
  163. *
  164. */
  165. /**
  166. * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
  167. * @rb_root: root of per-filesystem reservation rb tree
  168. * @verbose: verbose mode
  169. * @fn: function which wishes to dump the reservation map
  170. *
  171. * If verbose is turned on, it will print the whole block reservation
  172. * windows(start, end). Otherwise, it will only print out the "bad" windows,
  173. * those windows that overlap with their immediate neighbors.
  174. */
  175. #if 1
  176. static void __rsv_window_dump(struct rb_root *root, int verbose,
  177. const char *fn)
  178. {
  179. struct rb_node *n;
  180. struct ext2_reserve_window_node *rsv, *prev;
  181. int bad;
  182. restart:
  183. n = rb_first(root);
  184. bad = 0;
  185. prev = NULL;
  186. printk("Block Allocation Reservation Windows Map (%s):\n", fn);
  187. while (n) {
  188. rsv = rb_entry(n, struct ext2_reserve_window_node, rsv_node);
  189. if (verbose)
  190. printk("reservation window 0x%p "
  191. "start: %lu, end: %lu\n",
  192. rsv, rsv->rsv_start, rsv->rsv_end);
  193. if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
  194. printk("Bad reservation %p (start >= end)\n",
  195. rsv);
  196. bad = 1;
  197. }
  198. if (prev && prev->rsv_end >= rsv->rsv_start) {
  199. printk("Bad reservation %p (prev->end >= start)\n",
  200. rsv);
  201. bad = 1;
  202. }
  203. if (bad) {
  204. if (!verbose) {
  205. printk("Restarting reservation walk in verbose mode\n");
  206. verbose = 1;
  207. goto restart;
  208. }
  209. }
  210. n = rb_next(n);
  211. prev = rsv;
  212. }
  213. printk("Window map complete.\n");
  214. BUG_ON(bad);
  215. }
  216. #define rsv_window_dump(root, verbose) \
  217. __rsv_window_dump((root), (verbose), __func__)
  218. #else
  219. #define rsv_window_dump(root, verbose) do {} while (0)
  220. #endif
  221. /**
  222. * goal_in_my_reservation()
  223. * @rsv: inode's reservation window
  224. * @grp_goal: given goal block relative to the allocation block group
  225. * @group: the current allocation block group
  226. * @sb: filesystem super block
  227. *
  228. * Test if the given goal block (group relative) is within the file's
  229. * own block reservation window range.
  230. *
  231. * If the reservation window is outside the goal allocation group, return 0;
  232. * grp_goal (given goal block) could be -1, which means no specific
  233. * goal block. In this case, always return 1.
  234. * If the goal block is within the reservation window, return 1;
  235. * otherwise, return 0;
  236. */
  237. static int
  238. goal_in_my_reservation(struct ext2_reserve_window *rsv, ext2_grpblk_t grp_goal,
  239. unsigned int group, struct super_block * sb)
  240. {
  241. ext2_fsblk_t group_first_block, group_last_block;
  242. group_first_block = ext2_group_first_block_no(sb, group);
  243. group_last_block = group_first_block + EXT2_BLOCKS_PER_GROUP(sb) - 1;
  244. if ((rsv->_rsv_start > group_last_block) ||
  245. (rsv->_rsv_end < group_first_block))
  246. return 0;
  247. if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
  248. || (grp_goal + group_first_block > rsv->_rsv_end)))
  249. return 0;
  250. return 1;
  251. }
  252. /**
  253. * search_reserve_window()
  254. * @rb_root: root of reservation tree
  255. * @goal: target allocation block
  256. *
  257. * Find the reserved window which includes the goal, or the previous one
  258. * if the goal is not in any window.
  259. * Returns NULL if there are no windows or if all windows start after the goal.
  260. */
  261. static struct ext2_reserve_window_node *
  262. search_reserve_window(struct rb_root *root, ext2_fsblk_t goal)
  263. {
  264. struct rb_node *n = root->rb_node;
  265. struct ext2_reserve_window_node *rsv;
  266. if (!n)
  267. return NULL;
  268. do {
  269. rsv = rb_entry(n, struct ext2_reserve_window_node, rsv_node);
  270. if (goal < rsv->rsv_start)
  271. n = n->rb_left;
  272. else if (goal > rsv->rsv_end)
  273. n = n->rb_right;
  274. else
  275. return rsv;
  276. } while (n);
  277. /*
  278. * We've fallen off the end of the tree: the goal wasn't inside
  279. * any particular node. OK, the previous node must be to one
  280. * side of the interval containing the goal. If it's the RHS,
  281. * we need to back up one.
  282. */
  283. if (rsv->rsv_start > goal) {
  284. n = rb_prev(&rsv->rsv_node);
  285. rsv = rb_entry(n, struct ext2_reserve_window_node, rsv_node);
  286. }
  287. return rsv;
  288. }
  289. /*
  290. * ext2_rsv_window_add() -- Insert a window to the block reservation rb tree.
  291. * @sb: super block
  292. * @rsv: reservation window to add
  293. *
  294. * Must be called with rsv_lock held.
  295. */
  296. void ext2_rsv_window_add(struct super_block *sb,
  297. struct ext2_reserve_window_node *rsv)
  298. {
  299. struct rb_root *root = &EXT2_SB(sb)->s_rsv_window_root;
  300. struct rb_node *node = &rsv->rsv_node;
  301. ext2_fsblk_t start = rsv->rsv_start;
  302. struct rb_node ** p = &root->rb_node;
  303. struct rb_node * parent = NULL;
  304. struct ext2_reserve_window_node *this;
  305. while (*p)
  306. {
  307. parent = *p;
  308. this = rb_entry(parent, struct ext2_reserve_window_node, rsv_node);
  309. if (start < this->rsv_start)
  310. p = &(*p)->rb_left;
  311. else if (start > this->rsv_end)
  312. p = &(*p)->rb_right;
  313. else {
  314. rsv_window_dump(root, 1);
  315. BUG();
  316. }
  317. }
  318. rb_link_node(node, parent, p);
  319. rb_insert_color(node, root);
  320. }
  321. /**
  322. * rsv_window_remove() -- unlink a window from the reservation rb tree
  323. * @sb: super block
  324. * @rsv: reservation window to remove
  325. *
  326. * Mark the block reservation window as not allocated, and unlink it
  327. * from the filesystem reservation window rb tree. Must be called with
  328. * rsv_lock held.
  329. */
  330. static void rsv_window_remove(struct super_block *sb,
  331. struct ext2_reserve_window_node *rsv)
  332. {
  333. rsv->rsv_start = EXT2_RESERVE_WINDOW_NOT_ALLOCATED;
  334. rsv->rsv_end = EXT2_RESERVE_WINDOW_NOT_ALLOCATED;
  335. rsv->rsv_alloc_hit = 0;
  336. rb_erase(&rsv->rsv_node, &EXT2_SB(sb)->s_rsv_window_root);
  337. }
  338. /*
  339. * rsv_is_empty() -- Check if the reservation window is allocated.
  340. * @rsv: given reservation window to check
  341. *
  342. * returns 1 if the end block is EXT2_RESERVE_WINDOW_NOT_ALLOCATED.
  343. */
  344. static inline int rsv_is_empty(struct ext2_reserve_window *rsv)
  345. {
  346. /* a valid reservation end block could not be 0 */
  347. return (rsv->_rsv_end == EXT2_RESERVE_WINDOW_NOT_ALLOCATED);
  348. }
  349. /**
  350. * ext2_init_block_alloc_info()
  351. * @inode: file inode structure
  352. *
  353. * Allocate and initialize the reservation window structure, and
  354. * link the window to the ext2 inode structure at last
  355. *
  356. * The reservation window structure is only dynamically allocated
  357. * and linked to ext2 inode the first time the open file
  358. * needs a new block. So, before every ext2_new_block(s) call, for
  359. * regular files, we should check whether the reservation window
  360. * structure exists or not. In the latter case, this function is called.
  361. * Fail to do so will result in block reservation being turned off for that
  362. * open file.
  363. *
  364. * This function is called from ext2_get_blocks_handle(), also called
  365. * when setting the reservation window size through ioctl before the file
  366. * is open for write (needs block allocation).
  367. *
  368. * Needs truncate_mutex protection prior to calling this function.
  369. */
  370. void ext2_init_block_alloc_info(struct inode *inode)
  371. {
  372. struct ext2_inode_info *ei = EXT2_I(inode);
  373. struct ext2_block_alloc_info *block_i;
  374. struct super_block *sb = inode->i_sb;
  375. block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
  376. if (block_i) {
  377. struct ext2_reserve_window_node *rsv = &block_i->rsv_window_node;
  378. rsv->rsv_start = EXT2_RESERVE_WINDOW_NOT_ALLOCATED;
  379. rsv->rsv_end = EXT2_RESERVE_WINDOW_NOT_ALLOCATED;
  380. /*
  381. * if filesystem is mounted with NORESERVATION, the goal
  382. * reservation window size is set to zero to indicate
  383. * block reservation is off
  384. */
  385. if (!test_opt(sb, RESERVATION))
  386. rsv->rsv_goal_size = 0;
  387. else
  388. rsv->rsv_goal_size = EXT2_DEFAULT_RESERVE_BLOCKS;
  389. rsv->rsv_alloc_hit = 0;
  390. block_i->last_alloc_logical_block = 0;
  391. block_i->last_alloc_physical_block = 0;
  392. }
  393. ei->i_block_alloc_info = block_i;
  394. }
  395. /**
  396. * ext2_discard_reservation()
  397. * @inode: inode
  398. *
  399. * Discard(free) block reservation window on last file close, or truncate
  400. * or at last iput().
  401. *
  402. * It is being called in three cases:
  403. * ext2_release_file(): last writer closes the file
  404. * ext2_clear_inode(): last iput(), when nobody links to this file.
  405. * ext2_truncate(): when the block indirect map is about to change.
  406. */
  407. void ext2_discard_reservation(struct inode *inode)
  408. {
  409. struct ext2_inode_info *ei = EXT2_I(inode);
  410. struct ext2_block_alloc_info *block_i = ei->i_block_alloc_info;
  411. struct ext2_reserve_window_node *rsv;
  412. spinlock_t *rsv_lock = &EXT2_SB(inode->i_sb)->s_rsv_window_lock;
  413. if (!block_i)
  414. return;
  415. rsv = &block_i->rsv_window_node;
  416. if (!rsv_is_empty(&rsv->rsv_window)) {
  417. spin_lock(rsv_lock);
  418. if (!rsv_is_empty(&rsv->rsv_window))
  419. rsv_window_remove(inode->i_sb, rsv);
  420. spin_unlock(rsv_lock);
  421. }
  422. }
  423. /**
  424. * ext2_free_blocks() -- Free given blocks and update quota and i_blocks
  425. * @inode: inode
  426. * @block: start physical block to free
  427. * @count: number of blocks to free
  428. */
  429. void ext2_free_blocks (struct inode * inode, unsigned long block,
  430. unsigned long count)
  431. {
  432. struct buffer_head *bitmap_bh = NULL;
  433. struct buffer_head * bh2;
  434. unsigned long block_group;
  435. unsigned long bit;
  436. unsigned long i;
  437. unsigned long overflow;
  438. struct super_block * sb = inode->i_sb;
  439. struct ext2_sb_info * sbi = EXT2_SB(sb);
  440. struct ext2_group_desc * desc;
  441. struct ext2_super_block * es = sbi->s_es;
  442. unsigned freed = 0, group_freed;
  443. if (block < le32_to_cpu(es->s_first_data_block) ||
  444. block + count < block ||
  445. block + count > le32_to_cpu(es->s_blocks_count)) {
  446. ext2_error (sb, "ext2_free_blocks",
  447. "Freeing blocks not in datazone - "
  448. "block = %lu, count = %lu", block, count);
  449. goto error_return;
  450. }
  451. ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
  452. do_more:
  453. overflow = 0;
  454. block_group = (block - le32_to_cpu(es->s_first_data_block)) /
  455. EXT2_BLOCKS_PER_GROUP(sb);
  456. bit = (block - le32_to_cpu(es->s_first_data_block)) %
  457. EXT2_BLOCKS_PER_GROUP(sb);
  458. /*
  459. * Check to see if we are freeing blocks across a group
  460. * boundary.
  461. */
  462. if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) {
  463. overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
  464. count -= overflow;
  465. }
  466. brelse(bitmap_bh);
  467. bitmap_bh = read_block_bitmap(sb, block_group);
  468. if (!bitmap_bh)
  469. goto error_return;
  470. desc = ext2_get_group_desc (sb, block_group, &bh2);
  471. if (!desc)
  472. goto error_return;
  473. if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
  474. in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
  475. in_range (block, le32_to_cpu(desc->bg_inode_table),
  476. sbi->s_itb_per_group) ||
  477. in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
  478. sbi->s_itb_per_group)) {
  479. ext2_error (sb, "ext2_free_blocks",
  480. "Freeing blocks in system zones - "
  481. "Block = %lu, count = %lu",
  482. block, count);
  483. goto error_return;
  484. }
  485. for (i = 0, group_freed = 0; i < count; i++) {
  486. if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
  487. bit + i, bitmap_bh->b_data)) {
  488. ext2_error(sb, __func__,
  489. "bit already cleared for block %lu", block + i);
  490. } else {
  491. group_freed++;
  492. }
  493. }
  494. mark_buffer_dirty(bitmap_bh);
  495. if (sb->s_flags & MS_SYNCHRONOUS)
  496. sync_dirty_buffer(bitmap_bh);
  497. group_adjust_blocks(sb, block_group, desc, bh2, group_freed);
  498. freed += group_freed;
  499. if (overflow) {
  500. block += count;
  501. count = overflow;
  502. goto do_more;
  503. }
  504. error_return:
  505. brelse(bitmap_bh);
  506. if (freed) {
  507. percpu_counter_add(&sbi->s_freeblocks_counter, freed);
  508. dquot_free_block_nodirty(inode, freed);
  509. mark_inode_dirty(inode);
  510. }
  511. }
  512. /**
  513. * bitmap_search_next_usable_block()
  514. * @start: the starting block (group relative) of the search
  515. * @bh: bufferhead contains the block group bitmap
  516. * @maxblocks: the ending block (group relative) of the reservation
  517. *
  518. * The bitmap search --- search forward through the actual bitmap on disk until
  519. * we find a bit free.
  520. */
  521. static ext2_grpblk_t
  522. bitmap_search_next_usable_block(ext2_grpblk_t start, struct buffer_head *bh,
  523. ext2_grpblk_t maxblocks)
  524. {
  525. ext2_grpblk_t next;
  526. next = ext2_find_next_zero_bit(bh->b_data, maxblocks, start);
  527. if (next >= maxblocks)
  528. return -1;
  529. return next;
  530. }
  531. /**
  532. * find_next_usable_block()
  533. * @start: the starting block (group relative) to find next
  534. * allocatable block in bitmap.
  535. * @bh: bufferhead contains the block group bitmap
  536. * @maxblocks: the ending block (group relative) for the search
  537. *
  538. * Find an allocatable block in a bitmap. We perform the "most
  539. * appropriate allocation" algorithm of looking for a free block near
  540. * the initial goal; then for a free byte somewhere in the bitmap;
  541. * then for any free bit in the bitmap.
  542. */
  543. static ext2_grpblk_t
  544. find_next_usable_block(int start, struct buffer_head *bh, int maxblocks)
  545. {
  546. ext2_grpblk_t here, next;
  547. char *p, *r;
  548. if (start > 0) {
  549. /*
  550. * The goal was occupied; search forward for a free
  551. * block within the next XX blocks.
  552. *
  553. * end_goal is more or less random, but it has to be
  554. * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
  555. * next 64-bit boundary is simple..
  556. */
  557. ext2_grpblk_t end_goal = (start + 63) & ~63;
  558. if (end_goal > maxblocks)
  559. end_goal = maxblocks;
  560. here = ext2_find_next_zero_bit(bh->b_data, end_goal, start);
  561. if (here < end_goal)
  562. return here;
  563. ext2_debug("Bit not found near goal\n");
  564. }
  565. here = start;
  566. if (here < 0)
  567. here = 0;
  568. p = ((char *)bh->b_data) + (here >> 3);
  569. r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
  570. next = (r - ((char *)bh->b_data)) << 3;
  571. if (next < maxblocks && next >= here)
  572. return next;
  573. here = bitmap_search_next_usable_block(here, bh, maxblocks);
  574. return here;
  575. }
  576. /**
  577. * ext2_try_to_allocate()
  578. * @sb: superblock
  579. * @group: given allocation block group
  580. * @bitmap_bh: bufferhead holds the block bitmap
  581. * @grp_goal: given target block within the group
  582. * @count: target number of blocks to allocate
  583. * @my_rsv: reservation window
  584. *
  585. * Attempt to allocate blocks within a give range. Set the range of allocation
  586. * first, then find the first free bit(s) from the bitmap (within the range),
  587. * and at last, allocate the blocks by claiming the found free bit as allocated.
  588. *
  589. * To set the range of this allocation:
  590. * if there is a reservation window, only try to allocate block(s)
  591. * from the file's own reservation window;
  592. * Otherwise, the allocation range starts from the give goal block,
  593. * ends at the block group's last block.
  594. *
  595. * If we failed to allocate the desired block then we may end up crossing to a
  596. * new bitmap.
  597. */
  598. static int
  599. ext2_try_to_allocate(struct super_block *sb, int group,
  600. struct buffer_head *bitmap_bh, ext2_grpblk_t grp_goal,
  601. unsigned long *count,
  602. struct ext2_reserve_window *my_rsv)
  603. {
  604. ext2_fsblk_t group_first_block;
  605. ext2_grpblk_t start, end;
  606. unsigned long num = 0;
  607. /* we do allocation within the reservation window if we have a window */
  608. if (my_rsv) {
  609. group_first_block = ext2_group_first_block_no(sb, group);
  610. if (my_rsv->_rsv_start >= group_first_block)
  611. start = my_rsv->_rsv_start - group_first_block;
  612. else
  613. /* reservation window cross group boundary */
  614. start = 0;
  615. end = my_rsv->_rsv_end - group_first_block + 1;
  616. if (end > EXT2_BLOCKS_PER_GROUP(sb))
  617. /* reservation window crosses group boundary */
  618. end = EXT2_BLOCKS_PER_GROUP(sb);
  619. if ((start <= grp_goal) && (grp_goal < end))
  620. start = grp_goal;
  621. else
  622. grp_goal = -1;
  623. } else {
  624. if (grp_goal > 0)
  625. start = grp_goal;
  626. else
  627. start = 0;
  628. end = EXT2_BLOCKS_PER_GROUP(sb);
  629. }
  630. BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb));
  631. repeat:
  632. if (grp_goal < 0) {
  633. grp_goal = find_next_usable_block(start, bitmap_bh, end);
  634. if (grp_goal < 0)
  635. goto fail_access;
  636. if (!my_rsv) {
  637. int i;
  638. for (i = 0; i < 7 && grp_goal > start &&
  639. !ext2_test_bit(grp_goal - 1,
  640. bitmap_bh->b_data);
  641. i++, grp_goal--)
  642. ;
  643. }
  644. }
  645. start = grp_goal;
  646. if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), grp_goal,
  647. bitmap_bh->b_data)) {
  648. /*
  649. * The block was allocated by another thread, or it was
  650. * allocated and then freed by another thread
  651. */
  652. start++;
  653. grp_goal++;
  654. if (start >= end)
  655. goto fail_access;
  656. goto repeat;
  657. }
  658. num++;
  659. grp_goal++;
  660. while (num < *count && grp_goal < end
  661. && !ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group),
  662. grp_goal, bitmap_bh->b_data)) {
  663. num++;
  664. grp_goal++;
  665. }
  666. *count = num;
  667. return grp_goal - num;
  668. fail_access:
  669. *count = num;
  670. return -1;
  671. }
  672. /**
  673. * find_next_reservable_window():
  674. * find a reservable space within the given range.
  675. * It does not allocate the reservation window for now:
  676. * alloc_new_reservation() will do the work later.
  677. *
  678. * @search_head: the head of the searching list;
  679. * This is not necessarily the list head of the whole filesystem
  680. *
  681. * We have both head and start_block to assist the search
  682. * for the reservable space. The list starts from head,
  683. * but we will shift to the place where start_block is,
  684. * then start from there, when looking for a reservable space.
  685. *
  686. * @size: the target new reservation window size
  687. *
  688. * @group_first_block: the first block we consider to start
  689. * the real search from
  690. *
  691. * @last_block:
  692. * the maximum block number that our goal reservable space
  693. * could start from. This is normally the last block in this
  694. * group. The search will end when we found the start of next
  695. * possible reservable space is out of this boundary.
  696. * This could handle the cross boundary reservation window
  697. * request.
  698. *
  699. * basically we search from the given range, rather than the whole
  700. * reservation double linked list, (start_block, last_block)
  701. * to find a free region that is of my size and has not
  702. * been reserved.
  703. *
  704. */
  705. static int find_next_reservable_window(
  706. struct ext2_reserve_window_node *search_head,
  707. struct ext2_reserve_window_node *my_rsv,
  708. struct super_block * sb,
  709. ext2_fsblk_t start_block,
  710. ext2_fsblk_t last_block)
  711. {
  712. struct rb_node *next;
  713. struct ext2_reserve_window_node *rsv, *prev;
  714. ext2_fsblk_t cur;
  715. int size = my_rsv->rsv_goal_size;
  716. /* TODO: make the start of the reservation window byte-aligned */
  717. /* cur = *start_block & ~7;*/
  718. cur = start_block;
  719. rsv = search_head;
  720. if (!rsv)
  721. return -1;
  722. while (1) {
  723. if (cur <= rsv->rsv_end)
  724. cur = rsv->rsv_end + 1;
  725. /* TODO?
  726. * in the case we could not find a reservable space
  727. * that is what is expected, during the re-search, we could
  728. * remember what's the largest reservable space we could have
  729. * and return that one.
  730. *
  731. * For now it will fail if we could not find the reservable
  732. * space with expected-size (or more)...
  733. */
  734. if (cur > last_block)
  735. return -1; /* fail */
  736. prev = rsv;
  737. next = rb_next(&rsv->rsv_node);
  738. rsv = rb_entry(next,struct ext2_reserve_window_node,rsv_node);
  739. /*
  740. * Reached the last reservation, we can just append to the
  741. * previous one.
  742. */
  743. if (!next)
  744. break;
  745. if (cur + size <= rsv->rsv_start) {
  746. /*
  747. * Found a reserveable space big enough. We could
  748. * have a reservation across the group boundary here
  749. */
  750. break;
  751. }
  752. }
  753. /*
  754. * we come here either :
  755. * when we reach the end of the whole list,
  756. * and there is empty reservable space after last entry in the list.
  757. * append it to the end of the list.
  758. *
  759. * or we found one reservable space in the middle of the list,
  760. * return the reservation window that we could append to.
  761. * succeed.
  762. */
  763. if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
  764. rsv_window_remove(sb, my_rsv);
  765. /*
  766. * Let's book the whole available window for now. We will check the
  767. * disk bitmap later and then, if there are free blocks then we adjust
  768. * the window size if it's larger than requested.
  769. * Otherwise, we will remove this node from the tree next time
  770. * call find_next_reservable_window.
  771. */
  772. my_rsv->rsv_start = cur;
  773. my_rsv->rsv_end = cur + size - 1;
  774. my_rsv->rsv_alloc_hit = 0;
  775. if (prev != my_rsv)
  776. ext2_rsv_window_add(sb, my_rsv);
  777. return 0;
  778. }
  779. /**
  780. * alloc_new_reservation()--allocate a new reservation window
  781. *
  782. * To make a new reservation, we search part of the filesystem
  783. * reservation list (the list that inside the group). We try to
  784. * allocate a new reservation window near the allocation goal,
  785. * or the beginning of the group, if there is no goal.
  786. *
  787. * We first find a reservable space after the goal, then from
  788. * there, we check the bitmap for the first free block after
  789. * it. If there is no free block until the end of group, then the
  790. * whole group is full, we failed. Otherwise, check if the free
  791. * block is inside the expected reservable space, if so, we
  792. * succeed.
  793. * If the first free block is outside the reservable space, then
  794. * start from the first free block, we search for next available
  795. * space, and go on.
  796. *
  797. * on succeed, a new reservation will be found and inserted into the list
  798. * It contains at least one free block, and it does not overlap with other
  799. * reservation windows.
  800. *
  801. * failed: we failed to find a reservation window in this group
  802. *
  803. * @rsv: the reservation
  804. *
  805. * @grp_goal: The goal (group-relative). It is where the search for a
  806. * free reservable space should start from.
  807. * if we have a goal(goal >0 ), then start from there,
  808. * no goal(goal = -1), we start from the first block
  809. * of the group.
  810. *
  811. * @sb: the super block
  812. * @group: the group we are trying to allocate in
  813. * @bitmap_bh: the block group block bitmap
  814. *
  815. */
  816. static int alloc_new_reservation(struct ext2_reserve_window_node *my_rsv,
  817. ext2_grpblk_t grp_goal, struct super_block *sb,
  818. unsigned int group, struct buffer_head *bitmap_bh)
  819. {
  820. struct ext2_reserve_window_node *search_head;
  821. ext2_fsblk_t group_first_block, group_end_block, start_block;
  822. ext2_grpblk_t first_free_block;
  823. struct rb_root *fs_rsv_root = &EXT2_SB(sb)->s_rsv_window_root;
  824. unsigned long size;
  825. int ret;
  826. spinlock_t *rsv_lock = &EXT2_SB(sb)->s_rsv_window_lock;
  827. group_first_block = ext2_group_first_block_no(sb, group);
  828. group_end_block = group_first_block + (EXT2_BLOCKS_PER_GROUP(sb) - 1);
  829. if (grp_goal < 0)
  830. start_block = group_first_block;
  831. else
  832. start_block = grp_goal + group_first_block;
  833. size = my_rsv->rsv_goal_size;
  834. if (!rsv_is_empty(&my_rsv->rsv_window)) {
  835. /*
  836. * if the old reservation is cross group boundary
  837. * and if the goal is inside the old reservation window,
  838. * we will come here when we just failed to allocate from
  839. * the first part of the window. We still have another part
  840. * that belongs to the next group. In this case, there is no
  841. * point to discard our window and try to allocate a new one
  842. * in this group(which will fail). we should
  843. * keep the reservation window, just simply move on.
  844. *
  845. * Maybe we could shift the start block of the reservation
  846. * window to the first block of next group.
  847. */
  848. if ((my_rsv->rsv_start <= group_end_block) &&
  849. (my_rsv->rsv_end > group_end_block) &&
  850. (start_block >= my_rsv->rsv_start))
  851. return -1;
  852. if ((my_rsv->rsv_alloc_hit >
  853. (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
  854. /*
  855. * if the previously allocation hit ratio is
  856. * greater than 1/2, then we double the size of
  857. * the reservation window the next time,
  858. * otherwise we keep the same size window
  859. */
  860. size = size * 2;
  861. if (size > EXT2_MAX_RESERVE_BLOCKS)
  862. size = EXT2_MAX_RESERVE_BLOCKS;
  863. my_rsv->rsv_goal_size= size;
  864. }
  865. }
  866. spin_lock(rsv_lock);
  867. /*
  868. * shift the search start to the window near the goal block
  869. */
  870. search_head = search_reserve_window(fs_rsv_root, start_block);
  871. /*
  872. * find_next_reservable_window() simply finds a reservable window
  873. * inside the given range(start_block, group_end_block).
  874. *
  875. * To make sure the reservation window has a free bit inside it, we
  876. * need to check the bitmap after we found a reservable window.
  877. */
  878. retry:
  879. ret = find_next_reservable_window(search_head, my_rsv, sb,
  880. start_block, group_end_block);
  881. if (ret == -1) {
  882. if (!rsv_is_empty(&my_rsv->rsv_window))
  883. rsv_window_remove(sb, my_rsv);
  884. spin_unlock(rsv_lock);
  885. return -1;
  886. }
  887. /*
  888. * On success, find_next_reservable_window() returns the
  889. * reservation window where there is a reservable space after it.
  890. * Before we reserve this reservable space, we need
  891. * to make sure there is at least a free block inside this region.
  892. *
  893. * Search the first free bit on the block bitmap. Search starts from
  894. * the start block of the reservable space we just found.
  895. */
  896. spin_unlock(rsv_lock);
  897. first_free_block = bitmap_search_next_usable_block(
  898. my_rsv->rsv_start - group_first_block,
  899. bitmap_bh, group_end_block - group_first_block + 1);
  900. if (first_free_block < 0) {
  901. /*
  902. * no free block left on the bitmap, no point
  903. * to reserve the space. return failed.
  904. */
  905. spin_lock(rsv_lock);
  906. if (!rsv_is_empty(&my_rsv->rsv_window))
  907. rsv_window_remove(sb, my_rsv);
  908. spin_unlock(rsv_lock);
  909. return -1; /* failed */
  910. }
  911. start_block = first_free_block + group_first_block;
  912. /*
  913. * check if the first free block is within the
  914. * free space we just reserved
  915. */
  916. if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
  917. return 0; /* success */
  918. /*
  919. * if the first free bit we found is out of the reservable space
  920. * continue search for next reservable space,
  921. * start from where the free block is,
  922. * we also shift the list head to where we stopped last time
  923. */
  924. search_head = my_rsv;
  925. spin_lock(rsv_lock);
  926. goto retry;
  927. }
  928. /**
  929. * try_to_extend_reservation()
  930. * @my_rsv: given reservation window
  931. * @sb: super block
  932. * @size: the delta to extend
  933. *
  934. * Attempt to expand the reservation window large enough to have
  935. * required number of free blocks
  936. *
  937. * Since ext2_try_to_allocate() will always allocate blocks within
  938. * the reservation window range, if the window size is too small,
  939. * multiple blocks allocation has to stop at the end of the reservation
  940. * window. To make this more efficient, given the total number of
  941. * blocks needed and the current size of the window, we try to
  942. * expand the reservation window size if necessary on a best-effort
  943. * basis before ext2_new_blocks() tries to allocate blocks.
  944. */
  945. static void try_to_extend_reservation(struct ext2_reserve_window_node *my_rsv,
  946. struct super_block *sb, int size)
  947. {
  948. struct ext2_reserve_window_node *next_rsv;
  949. struct rb_node *next;
  950. spinlock_t *rsv_lock = &EXT2_SB(sb)->s_rsv_window_lock;
  951. if (!spin_trylock(rsv_lock))
  952. return;
  953. next = rb_next(&my_rsv->rsv_node);
  954. if (!next)
  955. my_rsv->rsv_end += size;
  956. else {
  957. next_rsv = rb_entry(next, struct ext2_reserve_window_node, rsv_node);
  958. if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
  959. my_rsv->rsv_end += size;
  960. else
  961. my_rsv->rsv_end = next_rsv->rsv_start - 1;
  962. }
  963. spin_unlock(rsv_lock);
  964. }
  965. /**
  966. * ext2_try_to_allocate_with_rsv()
  967. * @sb: superblock
  968. * @group: given allocation block group
  969. * @bitmap_bh: bufferhead holds the block bitmap
  970. * @grp_goal: given target block within the group
  971. * @count: target number of blocks to allocate
  972. * @my_rsv: reservation window
  973. *
  974. * This is the main function used to allocate a new block and its reservation
  975. * window.
  976. *
  977. * Each time when a new block allocation is need, first try to allocate from
  978. * its own reservation. If it does not have a reservation window, instead of
  979. * looking for a free bit on bitmap first, then look up the reservation list to
  980. * see if it is inside somebody else's reservation window, we try to allocate a
  981. * reservation window for it starting from the goal first. Then do the block
  982. * allocation within the reservation window.
  983. *
  984. * This will avoid keeping on searching the reservation list again and
  985. * again when somebody is looking for a free block (without
  986. * reservation), and there are lots of free blocks, but they are all
  987. * being reserved.
  988. *
  989. * We use a red-black tree for the per-filesystem reservation list.
  990. */
  991. static ext2_grpblk_t
  992. ext2_try_to_allocate_with_rsv(struct super_block *sb, unsigned int group,
  993. struct buffer_head *bitmap_bh, ext2_grpblk_t grp_goal,
  994. struct ext2_reserve_window_node * my_rsv,
  995. unsigned long *count)
  996. {
  997. ext2_fsblk_t group_first_block, group_last_block;
  998. ext2_grpblk_t ret = 0;
  999. unsigned long num = *count;
  1000. /*
  1001. * we don't deal with reservation when
  1002. * filesystem is mounted without reservation
  1003. * or the file is not a regular file
  1004. * or last attempt to allocate a block with reservation turned on failed
  1005. */
  1006. if (my_rsv == NULL) {
  1007. return ext2_try_to_allocate(sb, group, bitmap_bh,
  1008. grp_goal, count, NULL);
  1009. }
  1010. /*
  1011. * grp_goal is a group relative block number (if there is a goal)
  1012. * 0 <= grp_goal < EXT2_BLOCKS_PER_GROUP(sb)
  1013. * first block is a filesystem wide block number
  1014. * first block is the block number of the first block in this group
  1015. */
  1016. group_first_block = ext2_group_first_block_no(sb, group);
  1017. group_last_block = group_first_block + (EXT2_BLOCKS_PER_GROUP(sb) - 1);
  1018. /*
  1019. * Basically we will allocate a new block from inode's reservation
  1020. * window.
  1021. *
  1022. * We need to allocate a new reservation window, if:
  1023. * a) inode does not have a reservation window; or
  1024. * b) last attempt to allocate a block from existing reservation
  1025. * failed; or
  1026. * c) we come here with a goal and with a reservation window
  1027. *
  1028. * We do not need to allocate a new reservation window if we come here
  1029. * at the beginning with a goal and the goal is inside the window, or
  1030. * we don't have a goal but already have a reservation window.
  1031. * then we could go to allocate from the reservation window directly.
  1032. */
  1033. while (1) {
  1034. if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
  1035. !goal_in_my_reservation(&my_rsv->rsv_window,
  1036. grp_goal, group, sb)) {
  1037. if (my_rsv->rsv_goal_size < *count)
  1038. my_rsv->rsv_goal_size = *count;
  1039. ret = alloc_new_reservation(my_rsv, grp_goal, sb,
  1040. group, bitmap_bh);
  1041. if (ret < 0)
  1042. break; /* failed */
  1043. if (!goal_in_my_reservation(&my_rsv->rsv_window,
  1044. grp_goal, group, sb))
  1045. grp_goal = -1;
  1046. } else if (grp_goal >= 0) {
  1047. int curr = my_rsv->rsv_end -
  1048. (grp_goal + group_first_block) + 1;
  1049. if (curr < *count)
  1050. try_to_extend_reservation(my_rsv, sb,
  1051. *count - curr);
  1052. }
  1053. if ((my_rsv->rsv_start > group_last_block) ||
  1054. (my_rsv->rsv_end < group_first_block)) {
  1055. rsv_window_dump(&EXT2_SB(sb)->s_rsv_window_root, 1);
  1056. BUG();
  1057. }
  1058. ret = ext2_try_to_allocate(sb, group, bitmap_bh, grp_goal,
  1059. &num, &my_rsv->rsv_window);
  1060. if (ret >= 0) {
  1061. my_rsv->rsv_alloc_hit += num;
  1062. *count = num;
  1063. break; /* succeed */
  1064. }
  1065. num = *count;
  1066. }
  1067. return ret;
  1068. }
  1069. /**
  1070. * ext2_has_free_blocks()
  1071. * @sbi: in-core super block structure.
  1072. *
  1073. * Check if filesystem has at least 1 free block available for allocation.
  1074. */
  1075. static int ext2_has_free_blocks(struct ext2_sb_info *sbi)
  1076. {
  1077. ext2_fsblk_t free_blocks, root_blocks;
  1078. free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
  1079. root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
  1080. if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
  1081. !uid_eq(sbi->s_resuid, current_fsuid()) &&
  1082. (gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) ||
  1083. !in_group_p (sbi->s_resgid))) {
  1084. return 0;
  1085. }
  1086. return 1;
  1087. }
  1088. /*
  1089. * Returns 1 if the passed-in block region is valid; 0 if some part overlaps
  1090. * with filesystem metadata blocksi.
  1091. */
  1092. int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk,
  1093. unsigned int count)
  1094. {
  1095. if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
  1096. (start_blk + count < start_blk) ||
  1097. (start_blk > le32_to_cpu(sbi->s_es->s_blocks_count)))
  1098. return 0;
  1099. /* Ensure we do not step over superblock */
  1100. if ((start_blk <= sbi->s_sb_block) &&
  1101. (start_blk + count >= sbi->s_sb_block))
  1102. return 0;
  1103. return 1;
  1104. }
  1105. /*
  1106. * ext2_new_blocks() -- core block(s) allocation function
  1107. * @inode: file inode
  1108. * @goal: given target block(filesystem wide)
  1109. * @count: target number of blocks to allocate
  1110. * @errp: error code
  1111. *
  1112. * ext2_new_blocks uses a goal block to assist allocation. If the goal is
  1113. * free, or there is a free block within 32 blocks of the goal, that block
  1114. * is allocated. Otherwise a forward search is made for a free block; within
  1115. * each block group the search first looks for an entire free byte in the block
  1116. * bitmap, and then for any free bit if that fails.
  1117. * This function also updates quota and i_blocks field.
  1118. */
  1119. ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal,
  1120. unsigned long *count, int *errp)
  1121. {
  1122. struct buffer_head *bitmap_bh = NULL;
  1123. struct buffer_head *gdp_bh;
  1124. int group_no;
  1125. int goal_group;
  1126. ext2_grpblk_t grp_target_blk; /* blockgroup relative goal block */
  1127. ext2_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/
  1128. ext2_fsblk_t ret_block; /* filesyetem-wide allocated block */
  1129. int bgi; /* blockgroup iteration index */
  1130. int performed_allocation = 0;
  1131. ext2_grpblk_t free_blocks; /* number of free blocks in a group */
  1132. struct super_block *sb;
  1133. struct ext2_group_desc *gdp;
  1134. struct ext2_super_block *es;
  1135. struct ext2_sb_info *sbi;
  1136. struct ext2_reserve_window_node *my_rsv = NULL;
  1137. struct ext2_block_alloc_info *block_i;
  1138. unsigned short windowsz = 0;
  1139. unsigned long ngroups;
  1140. unsigned long num = *count;
  1141. int ret;
  1142. *errp = -ENOSPC;
  1143. sb = inode->i_sb;
  1144. /*
  1145. * Check quota for allocation of this block.
  1146. */
  1147. ret = dquot_alloc_block(inode, num);
  1148. if (ret) {
  1149. *errp = ret;
  1150. return 0;
  1151. }
  1152. sbi = EXT2_SB(sb);
  1153. es = EXT2_SB(sb)->s_es;
  1154. ext2_debug("goal=%lu.\n", goal);
  1155. /*
  1156. * Allocate a block from reservation only when
  1157. * filesystem is mounted with reservation(default,-o reservation), and
  1158. * it's a regular file, and
  1159. * the desired window size is greater than 0 (One could use ioctl
  1160. * command EXT2_IOC_SETRSVSZ to set the window size to 0 to turn off
  1161. * reservation on that particular file)
  1162. */
  1163. block_i = EXT2_I(inode)->i_block_alloc_info;
  1164. if (block_i) {
  1165. windowsz = block_i->rsv_window_node.rsv_goal_size;
  1166. if (windowsz > 0)
  1167. my_rsv = &block_i->rsv_window_node;
  1168. }
  1169. if (!ext2_has_free_blocks(sbi)) {
  1170. *errp = -ENOSPC;
  1171. goto out;
  1172. }
  1173. /*
  1174. * First, test whether the goal block is free.
  1175. */
  1176. if (goal < le32_to_cpu(es->s_first_data_block) ||
  1177. goal >= le32_to_cpu(es->s_blocks_count))
  1178. goal = le32_to_cpu(es->s_first_data_block);
  1179. group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
  1180. EXT2_BLOCKS_PER_GROUP(sb);
  1181. goal_group = group_no;
  1182. retry_alloc:
  1183. gdp = ext2_get_group_desc(sb, group_no, &gdp_bh);
  1184. if (!gdp)
  1185. goto io_error;
  1186. free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
  1187. /*
  1188. * if there is not enough free blocks to make a new resevation
  1189. * turn off reservation for this allocation
  1190. */
  1191. if (my_rsv && (free_blocks < windowsz)
  1192. && (free_blocks > 0)
  1193. && (rsv_is_empty(&my_rsv->rsv_window)))
  1194. my_rsv = NULL;
  1195. if (free_blocks > 0) {
  1196. grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
  1197. EXT2_BLOCKS_PER_GROUP(sb));
  1198. bitmap_bh = read_block_bitmap(sb, group_no);
  1199. if (!bitmap_bh)
  1200. goto io_error;
  1201. grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no,
  1202. bitmap_bh, grp_target_blk,
  1203. my_rsv, &num);
  1204. if (grp_alloc_blk >= 0)
  1205. goto allocated;
  1206. }
  1207. ngroups = EXT2_SB(sb)->s_groups_count;
  1208. smp_rmb();
  1209. /*
  1210. * Now search the rest of the groups. We assume that
  1211. * group_no and gdp correctly point to the last group visited.
  1212. */
  1213. for (bgi = 0; bgi < ngroups; bgi++) {
  1214. group_no++;
  1215. if (group_no >= ngroups)
  1216. group_no = 0;
  1217. gdp = ext2_get_group_desc(sb, group_no, &gdp_bh);
  1218. if (!gdp)
  1219. goto io_error;
  1220. free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
  1221. /*
  1222. * skip this group (and avoid loading bitmap) if there
  1223. * are no free blocks
  1224. */
  1225. if (!free_blocks)
  1226. continue;
  1227. /*
  1228. * skip this group if the number of
  1229. * free blocks is less than half of the reservation
  1230. * window size.
  1231. */
  1232. if (my_rsv && (free_blocks <= (windowsz/2)))
  1233. continue;
  1234. brelse(bitmap_bh);
  1235. bitmap_bh = read_block_bitmap(sb, group_no);
  1236. if (!bitmap_bh)
  1237. goto io_error;
  1238. /*
  1239. * try to allocate block(s) from this group, without a goal(-1).
  1240. */
  1241. grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no,
  1242. bitmap_bh, -1, my_rsv, &num);
  1243. if (grp_alloc_blk >= 0)
  1244. goto allocated;
  1245. }
  1246. /*
  1247. * We may end up a bogus earlier ENOSPC error due to
  1248. * filesystem is "full" of reservations, but
  1249. * there maybe indeed free blocks available on disk
  1250. * In this case, we just forget about the reservations
  1251. * just do block allocation as without reservations.
  1252. */
  1253. if (my_rsv) {
  1254. my_rsv = NULL;
  1255. windowsz = 0;
  1256. group_no = goal_group;
  1257. goto retry_alloc;
  1258. }
  1259. /* No space left on the device */
  1260. *errp = -ENOSPC;
  1261. goto out;
  1262. allocated:
  1263. ext2_debug("using block group %d(%d)\n",
  1264. group_no, gdp->bg_free_blocks_count);
  1265. ret_block = grp_alloc_blk + ext2_group_first_block_no(sb, group_no);
  1266. if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||
  1267. in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||
  1268. in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),
  1269. EXT2_SB(sb)->s_itb_per_group) ||
  1270. in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
  1271. EXT2_SB(sb)->s_itb_per_group)) {
  1272. ext2_error(sb, "ext2_new_blocks",
  1273. "Allocating block in system zone - "
  1274. "blocks from "E2FSBLK", length %lu",
  1275. ret_block, num);
  1276. /*
  1277. * ext2_try_to_allocate marked the blocks we allocated as in
  1278. * use. So we may want to selectively mark some of the blocks
  1279. * as free
  1280. */
  1281. goto retry_alloc;
  1282. }
  1283. performed_allocation = 1;
  1284. if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
  1285. ext2_error(sb, "ext2_new_blocks",
  1286. "block("E2FSBLK") >= blocks count(%d) - "
  1287. "block_group = %d, es == %p ", ret_block,
  1288. le32_to_cpu(es->s_blocks_count), group_no, es);
  1289. goto out;
  1290. }
  1291. group_adjust_blocks(sb, group_no, gdp, gdp_bh, -num);
  1292. percpu_counter_sub(&sbi->s_freeblocks_counter, num);
  1293. mark_buffer_dirty(bitmap_bh);
  1294. if (sb->s_flags & MS_SYNCHRONOUS)
  1295. sync_dirty_buffer(bitmap_bh);
  1296. *errp = 0;
  1297. brelse(bitmap_bh);
  1298. if (num < *count) {
  1299. dquot_free_block_nodirty(inode, *count-num);
  1300. mark_inode_dirty(inode);
  1301. *count = num;
  1302. }
  1303. return ret_block;
  1304. io_error:
  1305. *errp = -EIO;
  1306. out:
  1307. /*
  1308. * Undo the block allocation
  1309. */
  1310. if (!performed_allocation) {
  1311. dquot_free_block_nodirty(inode, *count);
  1312. mark_inode_dirty(inode);
  1313. }
  1314. brelse(bitmap_bh);
  1315. return 0;
  1316. }
  1317. ext2_fsblk_t ext2_new_block(struct inode *inode, unsigned long goal, int *errp)
  1318. {
  1319. unsigned long count = 1;
  1320. return ext2_new_blocks(inode, goal, &count, errp);
  1321. }
  1322. #ifdef EXT2FS_DEBUG
  1323. unsigned long ext2_count_free(struct buffer_head *map, unsigned int numchars)
  1324. {
  1325. return numchars * BITS_PER_BYTE - memweight(map->b_data, numchars);
  1326. }
  1327. #endif /* EXT2FS_DEBUG */
  1328. unsigned long ext2_count_free_blocks (struct super_block * sb)
  1329. {
  1330. struct ext2_group_desc * desc;
  1331. unsigned long desc_count = 0;
  1332. int i;
  1333. #ifdef EXT2FS_DEBUG
  1334. unsigned long bitmap_count, x;
  1335. struct ext2_super_block *es;
  1336. es = EXT2_SB(sb)->s_es;
  1337. desc_count = 0;
  1338. bitmap_count = 0;
  1339. desc = NULL;
  1340. for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
  1341. struct buffer_head *bitmap_bh;
  1342. desc = ext2_get_group_desc (sb, i, NULL);
  1343. if (!desc)
  1344. continue;
  1345. desc_count += le16_to_cpu(desc->bg_free_blocks_count);
  1346. bitmap_bh = read_block_bitmap(sb, i);
  1347. if (!bitmap_bh)
  1348. continue;
  1349. x = ext2_count_free(bitmap_bh, sb->s_blocksize);
  1350. printk ("group %d: stored = %d, counted = %lu\n",
  1351. i, le16_to_cpu(desc->bg_free_blocks_count), x);
  1352. bitmap_count += x;
  1353. brelse(bitmap_bh);
  1354. }
  1355. printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
  1356. (long)le32_to_cpu(es->s_free_blocks_count),
  1357. desc_count, bitmap_count);
  1358. return bitmap_count;
  1359. #else
  1360. for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
  1361. desc = ext2_get_group_desc (sb, i, NULL);
  1362. if (!desc)
  1363. continue;
  1364. desc_count += le16_to_cpu(desc->bg_free_blocks_count);
  1365. }
  1366. return desc_count;
  1367. #endif
  1368. }
  1369. static inline int test_root(int a, int b)
  1370. {
  1371. int num = b;
  1372. while (a > num)
  1373. num *= b;
  1374. return num == a;
  1375. }
  1376. static int ext2_group_sparse(int group)
  1377. {
  1378. if (group <= 1)
  1379. return 1;
  1380. return (test_root(group, 3) || test_root(group, 5) ||
  1381. test_root(group, 7));
  1382. }
  1383. /**
  1384. * ext2_bg_has_super - number of blocks used by the superblock in group
  1385. * @sb: superblock for filesystem
  1386. * @group: group number to check
  1387. *
  1388. * Return the number of blocks used by the superblock (primary or backup)
  1389. * in this group. Currently this will be only 0 or 1.
  1390. */
  1391. int ext2_bg_has_super(struct super_block *sb, int group)
  1392. {
  1393. if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
  1394. !ext2_group_sparse(group))
  1395. return 0;
  1396. return 1;
  1397. }
  1398. /**
  1399. * ext2_bg_num_gdb - number of blocks used by the group table in group
  1400. * @sb: superblock for filesystem
  1401. * @group: group number to check
  1402. *
  1403. * Return the number of blocks used by the group descriptor table
  1404. * (primary or backup) in this group. In the future there may be a
  1405. * different number of descriptor blocks in each group.
  1406. */
  1407. unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
  1408. {
  1409. return ext2_bg_has_super(sb, group) ? EXT2_SB(sb)->s_gdb_count : 0;
  1410. }