пре 17 година · 58617d5e59
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -2,19 +2,24 @@
 
															 Ext4 Filesystem
														
 
															 ===============
														
 
															-This is a development version of the ext4 filesystem, an advanced level
														
 
															-of the ext3 filesystem which incorporates scalability and reliability
														
 
															-enhancements for supporting large filesystems (64 bit) in keeping with
														
 
															-increasing disk capacities and state-of-the-art feature requirements.
														
 
															+Ext4 is an an advanced level of the ext3 filesystem which incorporates
														
 
															+scalability and reliability enhancements for supporting large filesystems
														
 
															+(64 bit) in keeping with increasing disk capacities and state-of-the-art
														
 
															+feature requirements.
														
 
															-Mailing list: linux-ext4@vger.kernel.org
														
 
															+Mailing list:	linux-ext4@vger.kernel.org
														
 
															+Web site:	http://ext4.wiki.kernel.org
														
 
															 1. Quick usage instructions:
														
 
															 ===========================
														
 
															+Note: More extensive information for getting started with ext4 can be
														
 
															+      found at the ext4 wiki site at the URL:
														
 
															+      http://ext4.wiki.kernel.org/index.php/Ext4_Howto
														
 
															+
														
 
															   - Compile and install the latest version of e2fsprogs (as of this
														
 
															-    writing version 1.41) from:
														
 
															+    writing version 1.41.3) from:
														
 
															     http://sourceforge.net/project/showfiles.php?group_id=2406
														
@@ -36,11 +41,9 @@ Mailing list: linux-ext4@vger.kernel.org
 
															     	# mke2fs -t ext4 /dev/hda1
														
 
															-    Or configure an existing ext3 filesystem to support extents and set
														
 
															-    the test_fs flag to indicate that it's ok for an in-development
														
 
															-    filesystem to touch this filesystem:
														
 
															+    Or to configure an existing ext3 filesystem to support extents: 
														
 
															-	# tune2fs -O extents -E test_fs /dev/hda1
														
 
															+	# tune2fs -O extents /dev/hda1
														
 
															     If the filesystem was created with 128 byte inodes, it can be
														
 
															     converted to use 256 byte for greater efficiency via:
														
@@ -104,8 +107,8 @@ exist yet so I'm not sure they're in the near-term roadmap.
 
															 The big performance win will come with mballoc, delalloc and flex_bg
														
 
															 grouping of bitmaps and inode tables.  Some test results available here:
														
 
															- - http://www.bullopensource.org/ext4/20080530/ffsb-write-2.6.26-rc2.html
														
 
															- - http://www.bullopensource.org/ext4/20080530/ffsb-readwrite-2.6.26-rc2.html
														
 
															+ - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-write-2.6.27-rc1.html
														
 
															+ - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-readwrite-2.6.27-rc1.html
														
 
															 3. Options
														
 
															 ==========
														
@@ -214,9 +217,6 @@ noreservation
 
															 bsddf		(*)	Make 'df' act like BSD.
														
 
															 minixdf			Make 'df' act like Minix.
														
 
															-check=none		Don't do extra checking of bitmaps on mount.
														
 
															-nocheck
														
 
															-
														
 
															 debug			Extra debugging information is sent to syslog.
														
 
															 errors=remount-ro(*)	Remount the filesystem read-only on an error.
														
@@ -253,8 +253,6 @@ nobh			(a) cache disk block mapping information
 
															 			"nobh" option tries to avoid associating buffer
														
 
															 			heads (supported only for "writeback" mode).
														
 
															-mballoc		(*)	Use the multiple block allocator for block allocation
														
 
															-nomballoc		disabled multiple block allocator for block allocation.
														
 
															 stripe=n		Number of filesystem blocks that mballoc will try
														
 
															 			to use for allocation size and alignment. For RAID5/6
														
 
															 			systems this should be the number of data
														
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -160,7 +160,7 @@ config EXT4_FS
 
															 	  filesystem initially.
														
 
															 	  To compile this file system support as a module, choose M here. The
														
 
															-	  module will be called ext4dev.
														
 
															+	  module will be called ext4.
														
 
															 	  If unsure, say N.
														
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -71,7 +71,7 @@ obj-$(CONFIG_DLM)		+= dlm/
 
															 # Do not add any filesystems before this line
														
 
															 obj-$(CONFIG_REISERFS_FS)	+= reiserfs/
														
 
															 obj-$(CONFIG_EXT3_FS)		+= ext3/ # Before ext2 so root fs can be ext3
														
 
															-obj-$(CONFIG_EXT4_FS)		+= ext4/ # Before ext2 so root fs can be ext4dev
														
 
															+obj-$(CONFIG_EXT4_FS)		+= ext4/ # Before ext2 so root fs can be ext4
														
 
															 obj-$(CONFIG_JBD)		+= jbd/
														
 
															 obj-$(CONFIG_JBD2)		+= jbd2/
														
 
															 obj-$(CONFIG_EXT2_FS)		+= ext2/
														
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -568,8 +568,16 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 
															 	/* this isn't the right place to decide whether block is metadata
														
 
															 	 * inode.c/extents.c knows better, but for safety ... */
														
 
															-	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
														
 
															-			ext4_should_journal_data(inode))
														
 
															+	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
														
 
															+		metadata = 1;
														
 
															+
														
 
															+	/* We need to make sure we don't reuse
														
 
															+	 * block released untill the transaction commit.
														
 
															+	 * writeback mode have weak data consistency so
														
 
															+	 * don't force data as metadata when freeing block
														
 
															+	 * for writeback mode.
														
 
															+	 */
														
 
															+	if (metadata == 0 && !ext4_should_writeback_data(inode))
														
 
															 		metadata = 1;
														
 
															 	sb = inode->i_sb;
														
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -511,7 +511,6 @@ do {									       \
 
															 /*
														
 
															  * Mount flags
														
 
															  */
														
 
															-#define EXT4_MOUNT_CHECK		0x00001	/* Do mount-time checks */
														
 
															 #define EXT4_MOUNT_OLDALLOC		0x00002  /* Don't use the new Orlov allocator */
														
 
															 #define EXT4_MOUNT_GRPID		0x00004	/* Create files with directory's group */
														
 
															 #define EXT4_MOUNT_DEBUG		0x00008	/* Some debugging messages */
														
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -99,9 +99,6 @@ struct ext4_sb_info {
 
															 	struct inode *s_buddy_cache;
														
 
															 	long s_blocks_reserved;
														
 
															 	spinlock_t s_reserve_lock;
														
 
															-	struct list_head s_active_transaction;
														
 
															-	struct list_head s_closed_transaction;
														
 
															-	struct list_head s_committed_transaction;
														
 
															 	spinlock_t s_md_lock;
														
 
															 	tid_t s_last_transaction;
														
 
															 	unsigned short *s_mb_offsets, *s_mb_maxs;
														
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
 
															 	int ret = 0, err, nr_pages, i;
														
 
															 	unsigned long index, end;
														
 
															 	struct pagevec pvec;
														
 
															+	long pages_skipped;
														
 
															 	BUG_ON(mpd->next_page <= mpd->first_page);
														
 
															 	pagevec_init(&pvec, 0);
														
@@ -1655,20 +1656,30 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
 
															 	end = mpd->next_page - 1;
														
 
															 	while (index <= end) {
														
 
															-		/* XXX: optimize tail */
														
 
															-		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
														
 
															+		/*
														
 
															+		 * We can use PAGECACHE_TAG_DIRTY lookup here because
														
 
															+		 * even though we have cleared the dirty flag on the page
														
 
															+		 * We still keep the page in the radix tree with tag
														
 
															+		 * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
														
 
															+		 * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
														
 
															+		 * which is called via the below writepage callback.
														
 
															+		 */
														
 
															+		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
														
 
															+					PAGECACHE_TAG_DIRTY,
														
 
															+					min(end - index,
														
 
															+					(pgoff_t)PAGEVEC_SIZE-1) + 1);
														
 
															 		if (nr_pages == 0)
														
 
															 			break;
														
 
															 		for (i = 0; i < nr_pages; i++) {
														
 
															 			struct page *page = pvec.pages[i];
														
 
															-			index = page->index;
														
 
															-			if (index > end)
														
 
															-				break;
														
 
															-			index++;
														
 
															-
														
 
															+			pages_skipped = mpd->wbc->pages_skipped;
														
 
															 			err = mapping->a_ops->writepage(page, mpd->wbc);
														
 
															-			if (!err)
														
 
															+			if (!err && (pages_skipped == mpd->wbc->pages_skipped))
														
 
															+				/*
														
 
															+				 * have successfully written the page
														
 
															+				 * without skipping the same
														
 
															+				 */
														
 
															 				mpd->pages_written++;
														
 
															 			/*
														
 
															 			 * In error case, we have to continue because
														
@@ -2104,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
 
															 			       struct writeback_control *wbc,
														
 
															 			       struct mpage_da_data *mpd)
														
 
															 {
														
 
															-	long to_write;
														
 
															 	int ret;
														
 
															 	if (!mpd->get_block)
														
@@ -2119,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping,
 
															 	mpd->pages_written = 0;
														
 
															 	mpd->retval = 0;
														
 
															-	to_write = wbc->nr_to_write;
														
 
															-
														
 
															 	ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
														
 
															-
														
 
															 	/*
														
 
															 	 * Handle last extent of pages
														
 
															 	 */
														
 
															 	if (!mpd->io_done && mpd->next_page != mpd->first_page) {
														
 
															 		if (mpage_da_map_blocks(mpd) == 0)
														
 
															 			mpage_da_submit_io(mpd);
														
 
															-	}
														
 
															-	wbc->nr_to_write = to_write - mpd->pages_written;
														
 
															+		mpd->io_done = 1;
														
 
															+		ret = MPAGE_DA_EXTENT_TAIL;
														
 
															+	}
														
 
															+	wbc->nr_to_write -= mpd->pages_written;
														
 
															 	return ret;
														
 
															 }
														
@@ -2360,12 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
 
															 static int ext4_da_writepages(struct address_space *mapping,
														
 
															 			      struct writeback_control *wbc)
														
 
															 {
														
 
															+	pgoff_t	index;
														
 
															+	int range_whole = 0;
														
 
															 	handle_t *handle = NULL;
														
 
															-	loff_t range_start = 0;
														
 
															 	struct mpage_da_data mpd;
														
 
															 	struct inode *inode = mapping->host;
														
 
															+	int no_nrwrite_index_update;
														
 
															+	long pages_written = 0, pages_skipped;
														
 
															 	int needed_blocks, ret = 0, nr_to_writebump = 0;
														
 
															-	long to_write, pages_skipped = 0;
														
 
															 	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
														
 
															 	/*
														
@@ -2385,23 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
 
															 		nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
														
 
															 		wbc->nr_to_write = sbi->s_mb_stream_request;
														
 
															 	}
														
 
															+	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
														
 
															+		range_whole = 1;
														
 
															-	if (!wbc->range_cyclic)
														
 
															-		/*
														
 
															-		 * If range_cyclic is not set force range_cont
														
 
															-		 * and save the old writeback_index
														
 
															-		 */
														
 
															-		wbc->range_cont = 1;
														
 
															-
														
 
															-	range_start =  wbc->range_start;
														
 
															-	pages_skipped = wbc->pages_skipped;
														
 
															+	if (wbc->range_cyclic)
														
 
															+		index = mapping->writeback_index;
														
 
															+	else
														
 
															+		index = wbc->range_start >> PAGE_CACHE_SHIFT;
														
 
															 	mpd.wbc = wbc;
														
 
															 	mpd.inode = mapping->host;
														
 
															-restart_loop:
														
 
															-	to_write = wbc->nr_to_write;
														
 
															-	while (!ret && to_write > 0) {
														
 
															+	/*
														
 
															+	 * we don't want write_cache_pages to update
														
 
															+	 * nr_to_write and writeback_index
														
 
															+	 */
														
 
															+	no_nrwrite_index_update = wbc->no_nrwrite_index_update;
														
 
															+	wbc->no_nrwrite_index_update = 1;
														
 
															+	pages_skipped = wbc->pages_skipped;
														
 
															+
														
 
															+	while (!ret && wbc->nr_to_write > 0) {
														
 
															 		/*
														
 
															 		 * we  insert one extent at a time. So we need
														
@@ -2422,48 +2436,53 @@ static int ext4_da_writepages(struct address_space *mapping,
 
															 			dump_stack();
														
 
															 			goto out_writepages;
														
 
															 		}
														
 
															-		to_write -= wbc->nr_to_write;
														
 
															-
														
 
															 		mpd.get_block = ext4_da_get_block_write;
														
 
															 		ret = mpage_da_writepages(mapping, wbc, &mpd);
														
 
															 		ext4_journal_stop(handle);
														
 
															-		if (mpd.retval == -ENOSPC)
														
 
															+		if (mpd.retval == -ENOSPC) {
														
 
															+			/* commit the transaction which would
														
 
															+			 * free blocks released in the transaction
														
 
															+			 * and try again
														
 
															+			 */
														
 
															 			jbd2_journal_force_commit_nested(sbi->s_journal);
														
 
															-
														
 
															-		/* reset the retry count */
														
 
															-		if (ret == MPAGE_DA_EXTENT_TAIL) {
														
 
															+			wbc->pages_skipped = pages_skipped;
														
 
															+			ret = 0;
														
 
															+		} else if (ret == MPAGE_DA_EXTENT_TAIL) {
														
 
															 			/*
														
 
															 			 * got one extent now try with
														
 
															 			 * rest of the pages
														
 
															 			 */
														
 
															-			to_write += wbc->nr_to_write;
														
 
															+			pages_written += mpd.pages_written;
														
 
															+			wbc->pages_skipped = pages_skipped;
														
 
															 			ret = 0;
														
 
															-		} else if (wbc->nr_to_write) {
														
 
															+		} else if (wbc->nr_to_write)
														
 
															 			/*
														
 
															 			 * There is no more writeout needed
														
 
															 			 * or we requested for a noblocking writeout
														
 
															 			 * and we found the device congested
														
 
															 			 */
														
 
															-			to_write += wbc->nr_to_write;
														
 
															 			break;
														
 
															-		}
														
 
															-		wbc->nr_to_write = to_write;
														
 
															-	}
														
 
															-
														
 
															-	if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
														
 
															-		/* We skipped pages in this loop */
														
 
															-		wbc->range_start = range_start;
														
 
															-		wbc->nr_to_write = to_write +
														
 
															-				wbc->pages_skipped - pages_skipped;
														
 
															-		wbc->pages_skipped = pages_skipped;
														
 
															-		goto restart_loop;
														
 
															 	}
														
 
															+	if (pages_skipped != wbc->pages_skipped)
														
 
															+		printk(KERN_EMERG "This should not happen leaving %s "
														
 
															+				"with nr_to_write = %ld ret = %d\n",
														
 
															+				__func__, wbc->nr_to_write, ret);
														
 
															+
														
 
															+	/* Update index */
														
 
															+	index += pages_written;
														
 
															+	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
														
 
															+		/*
														
 
															+		 * set the writeback_index so that range_cyclic
														
 
															+		 * mode will write it back later
														
 
															+		 */
														
 
															+		mapping->writeback_index = index;
														
 
															 out_writepages:
														
 
															-	wbc->nr_to_write = to_write - nr_to_writebump;
														
 
															-	wbc->range_start = range_start;
														
 
															+	if (!no_nrwrite_index_update)
														
 
															+		wbc->no_nrwrite_index_update = 0;
														
 
															+	wbc->nr_to_write -= nr_to_writebump;
														
 
															 	return ret;
														
 
															 }
														
@@ -4175,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle,
 
															 	struct inode *inode = &(ei->vfs_inode);
														
 
															 	u64 i_blocks = inode->i_blocks;
														
 
															 	struct super_block *sb = inode->i_sb;
														
 
															-	int err = 0;
														
 
															 	if (i_blocks <= ~0U) {
														
 
															 		/*
														
@@ -4185,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle,
 
															 		raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
														
 
															 		raw_inode->i_blocks_high = 0;
														
 
															 		ei->i_flags &= ~EXT4_HUGE_FILE_FL;
														
 
															-	} else if (i_blocks <= 0xffffffffffffULL) {
														
 
															+		return 0;
														
 
															+	}
														
 
															+	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
														
 
															+		return -EFBIG;
														
 
															+
														
 
															+	if (i_blocks <= 0xffffffffffffULL) {
														
 
															 		/*
														
 
															 		 * i_blocks can be represented in a 48 bit variable
														
 
															 		 * as multiple of 512 bytes
														
 
															 		 */
														
 
															-		err = ext4_update_rocompat_feature(handle, sb,
														
 
															-					    EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
														
 
															-		if (err)
														
 
															-			goto  err_out;
														
 
															-		/* i_block is stored in the split  48 bit fields */
														
 
															 		raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
														
 
															 		raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
														
 
															 		ei->i_flags &= ~EXT4_HUGE_FILE_FL;
														
 
															 	} else {
														
 
															-		/*
														
 
															-		 * i_blocks should be represented in a 48 bit variable
														
 
															-		 * as multiple of  file system block size
														
 
															-		 */
														
 
															-		err = ext4_update_rocompat_feature(handle, sb,
														
 
															-					    EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
														
 
															-		if (err)
														
 
															-			goto  err_out;
														
 
															 		ei->i_flags |= EXT4_HUGE_FILE_FL;
														
 
															 		/* i_block is stored in file system block size */
														
 
															 		i_blocks = i_blocks >> (inode->i_blkbits - 9);
														
 
															 		raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
														
 
															 		raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
														
 
															 	}
														
 
															-err_out:
														
 
															-	return err;
														
 
															+	return 0;
														
 
															 }
														
 
															 /*
														
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 
															 	}
														
 
															 	INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
														
 
															+	meta_group_info[i]->bb_free_root.rb_node = NULL;;
														
 
															 #ifdef DOUBLE_CHECK
														
 
															 	{
														
@@ -2522,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 
															 	}
														
 
															 	spin_lock_init(&sbi->s_md_lock);
														
 
															-	INIT_LIST_HEAD(&sbi->s_active_transaction);
														
 
															-	INIT_LIST_HEAD(&sbi->s_closed_transaction);
														
 
															-	INIT_LIST_HEAD(&sbi->s_committed_transaction);
														
 
															 	spin_lock_init(&sbi->s_bal_lock);
														
 
															 	sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
														
@@ -2553,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 
															 	ext4_mb_init_per_dev_proc(sb);
														
 
															 	ext4_mb_history_init(sb);
														
 
															+	sbi->s_journal->j_commit_callback = release_blocks_on_commit;
														
 
															+
														
 
															 	printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
														
 
															 	return 0;
														
 
															 }
														
@@ -2568,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
 
															 		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
														
 
															 		list_del(&pa->pa_group_list);
														
 
															 		count++;
														
 
															-		kfree(pa);
														
 
															+		kmem_cache_free(ext4_pspace_cachep, pa);
														
 
															 	}
														
 
															 	if (count)
														
 
															 		mb_debug("mballoc: %u PAs left\n", count);
														
@@ -2582,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
 
															 	struct ext4_group_info *grinfo;
														
 
															 	struct ext4_sb_info *sbi = EXT4_SB(sb);
														
 
															-	/* release freed, non-committed blocks */
														
 
															-	spin_lock(&sbi->s_md_lock);
														
 
															-	list_splice_init(&sbi->s_closed_transaction,
														
 
															-			&sbi->s_committed_transaction);
														
 
															-	list_splice_init(&sbi->s_active_transaction,
														
 
															-			&sbi->s_committed_transaction);
														
 
															-	spin_unlock(&sbi->s_md_lock);
														
 
															-	ext4_mb_free_committed_blocks(sb);
														
 
															-
														
 
															 	if (sbi->s_group_info) {
														
 
															 		for (i = 0; i < sbi->s_groups_count; i++) {
														
 
															 			grinfo = ext4_get_group_info(sb, i);
														
@@ -2644,61 +2635,57 @@ int ext4_mb_release(struct super_block *sb)
 
															 	return 0;
														
 
															 }
														
 
															-static noinline_for_stack void
														
 
															-ext4_mb_free_committed_blocks(struct super_block *sb)
														
 
															+/*
														
 
															+ * This function is called by the jbd2 layer once the commit has finished,
														
 
															+ * so we know we can free the blocks that were released with that commit.
														
 
															+ */
														
 
															+static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
														
 
															 {
														
 
															-	struct ext4_sb_info *sbi = EXT4_SB(sb);
														
 
															-	int err;
														
 
															-	int i;
														
 
															-	int count = 0;
														
 
															-	int count2 = 0;
														
 
															-	struct ext4_free_metadata *md;
														
 
															+	struct super_block *sb = journal->j_private;
														
 
															 	struct ext4_buddy e4b;
														
 
															+	struct ext4_group_info *db;
														
 
															+	int err, count = 0, count2 = 0;
														
 
															+	struct ext4_free_data *entry;
														
 
															+	ext4_fsblk_t discard_block;
														
 
															+	struct list_head *l, *ltmp;
														
 
															-	if (list_empty(&sbi->s_committed_transaction))
														
 
															-		return;
														
 
															-
														
 
															-	/* there is committed blocks to be freed yet */
														
 
															-	do {
														
 
															-		/* get next array of blocks */
														
 
															-		md = NULL;
														
 
															-		spin_lock(&sbi->s_md_lock);
														
 
															-		if (!list_empty(&sbi->s_committed_transaction)) {
														
 
															-			md = list_entry(sbi->s_committed_transaction.next,
														
 
															-					struct ext4_free_metadata, list);
														
 
															-			list_del(&md->list);
														
 
															-		}
														
 
															-		spin_unlock(&sbi->s_md_lock);
														
 
															-
														
 
															-		if (md == NULL)
														
 
															-			break;
														
 
															+	list_for_each_safe(l, ltmp, &txn->t_private_list) {
														
 
															+		entry = list_entry(l, struct ext4_free_data, list);
														
 
															 		mb_debug("gonna free %u blocks in group %lu (0x%p):",
														
 
															-				md->num, md->group, md);
														
 
															+			 entry->count, entry->group, entry);
														
 
															-		err = ext4_mb_load_buddy(sb, md->group, &e4b);
														
 
															+		err = ext4_mb_load_buddy(sb, entry->group, &e4b);
														
 
															 		/* we expect to find existing buddy because it's pinned */
														
 
															 		BUG_ON(err != 0);
														
 
															+		db = e4b.bd_info;
														
 
															 		/* there are blocks to put in buddy to make them really free */
														
 
															-		count += md->num;
														
 
															+		count += entry->count;
														
 
															 		count2++;
														
 
															-		ext4_lock_group(sb, md->group);
														
 
															-		for (i = 0; i < md->num; i++) {
														
 
															-			mb_debug(" %u", md->blocks[i]);
														
 
															-			mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
														
 
															+		ext4_lock_group(sb, entry->group);
														
 
															+		/* Take it out of per group rb tree */
														
 
															+		rb_erase(&entry->node, &(db->bb_free_root));
														
 
															+		mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
														
 
															+
														
 
															+		if (!db->bb_free_root.rb_node) {
														
 
															+			/* No more items in the per group rb tree
														
 
															+			 * balance refcounts from ext4_mb_free_metadata()
														
 
															+			 */
														
 
															+			page_cache_release(e4b.bd_buddy_page);
														
 
															+			page_cache_release(e4b.bd_bitmap_page);
														
 
															 		}
														
 
															-		mb_debug("\n");
														
 
															-		ext4_unlock_group(sb, md->group);
														
 
															-
														
 
															-		/* balance refcounts from ext4_mb_free_metadata() */
														
 
															-		page_cache_release(e4b.bd_buddy_page);
														
 
															-		page_cache_release(e4b.bd_bitmap_page);
														
 
															-
														
 
															-		kfree(md);
														
 
															+		ext4_unlock_group(sb, entry->group);
														
 
															+		discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
														
 
															+			+ entry->start_blk
														
 
															+			+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
														
 
															+		trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id,
														
 
															+			   (unsigned long long) discard_block, entry->count);
														
 
															+		sb_issue_discard(sb, discard_block, entry->count);
														
 
															+
														
 
															+		kmem_cache_free(ext4_free_ext_cachep, entry);
														
 
															 		ext4_mb_release_desc(&e4b);
														
 
															-
														
 
															-	} while (md);
														
 
															+	}
														
 
															 	mb_debug("freed %u blocks in %u structures\n", count, count2);
														
 
															 }
														
@@ -2712,6 +2699,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
 
															 static int ext4_mb_init_per_dev_proc(struct super_block *sb)
														
 
															 {
														
 
															+#ifdef CONFIG_PROC_FS
														
 
															 	mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
														
 
															 	struct ext4_sb_info *sbi = EXT4_SB(sb);
														
 
															 	struct proc_dir_entry *proc;
														
@@ -2735,10 +2723,14 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
 
															 	remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
														
 
															 	remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
														
 
															 	return -ENOMEM;
														
 
															+#else
														
 
															+	return 0;
														
 
															+#endif
														
 
															 }
														
 
															 static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
														
 
															 {
														
 
															+#ifdef CONFIG_PROC_FS
														
 
															 	struct ext4_sb_info *sbi = EXT4_SB(sb);
														
 
															 	if (sbi->s_proc == NULL)
														
@@ -2750,7 +2742,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
 
															 	remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
														
 
															 	remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
														
 
															 	remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
														
 
															-
														
 
															+#endif
														
 
															 	return 0;
														
 
															 }
														
@@ -2771,6 +2763,16 @@ int __init init_ext4_mballoc(void)
 
															 		kmem_cache_destroy(ext4_pspace_cachep);
														
 
															 		return -ENOMEM;
														
 
															 	}
														
 
															+
														
 
															+	ext4_free_ext_cachep =
														
 
															+		kmem_cache_create("ext4_free_block_extents",
														
 
															+				     sizeof(struct ext4_free_data),
														
 
															+				     0, SLAB_RECLAIM_ACCOUNT, NULL);
														
 
															+	if (ext4_free_ext_cachep == NULL) {
														
 
															+		kmem_cache_destroy(ext4_pspace_cachep);
														
 
															+		kmem_cache_destroy(ext4_ac_cachep);
														
 
															+		return -ENOMEM;
														
 
															+	}
														
 
															 	return 0;
														
 
															 }
														
@@ -2779,6 +2781,7 @@ void exit_ext4_mballoc(void)
 
															 	/* XXX: synchronize_rcu(); */
														
 
															 	kmem_cache_destroy(ext4_pspace_cachep);
														
 
															 	kmem_cache_destroy(ext4_ac_cachep);
														
 
															+	kmem_cache_destroy(ext4_free_ext_cachep);
														
 
															 }
														
@@ -4324,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 
															 		goto out1;
														
 
															 	}
														
 
															-	ext4_mb_poll_new_transaction(sb, handle);
														
 
															-
														
 
															 	*errp = ext4_mb_initialize_context(ac, ar);
														
 
															 	if (*errp) {
														
 
															 		ar->len = 0;
														
@@ -4384,35 +4385,20 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 
															 	return block;
														
 
															 }
														
 
															-static void ext4_mb_poll_new_transaction(struct super_block *sb,
														
 
															-						handle_t *handle)
														
 
															-{
														
 
															-	struct ext4_sb_info *sbi = EXT4_SB(sb);
														
 
															-
														
 
															-	if (sbi->s_last_transaction == handle->h_transaction->t_tid)
														
 
															-		return;
														
 
															-
														
 
															-	/* new transaction! time to close last one and free blocks for
														
 
															-	 * committed transaction. we know that only transaction can be
														
 
															-	 * active, so previos transaction can be being logged and we
														
 
															-	 * know that transaction before previous is known to be already
														
 
															-	 * logged. this means that now we may free blocks freed in all
														
 
															-	 * transactions before previous one. hope I'm clear enough ... */
														
 
															-	spin_lock(&sbi->s_md_lock);
														
 
															-	if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
														
 
															-		mb_debug("new transaction %lu, old %lu\n",
														
 
															-				(unsigned long) handle->h_transaction->t_tid,
														
 
															-				(unsigned long) sbi->s_last_transaction);
														
 
															-		list_splice_init(&sbi->s_closed_transaction,
														
 
															-				&sbi->s_committed_transaction);
														
 
															-		list_splice_init(&sbi->s_active_transaction,
														
 
															-				&sbi->s_closed_transaction);
														
 
															-		sbi->s_last_transaction = handle->h_transaction->t_tid;
														
 
															-	}
														
 
															-	spin_unlock(&sbi->s_md_lock);
														
 
															-
														
 
															-	ext4_mb_free_committed_blocks(sb);
														
 
															+/*
														
 
															+ * We can merge two free data extents only if the physical blocks
														
 
															+ * are contiguous, AND the extents were freed by the same transaction,
														
 
															+ * AND the blocks are associated with the same group.
														
 
															+ */
														
 
															+static int can_merge(struct ext4_free_data *entry1,
														
 
															+			struct ext4_free_data *entry2)
														
 
															+{
														
 
															+	if ((entry1->t_tid == entry2->t_tid) &&
														
 
															+	    (entry1->group == entry2->group) &&
														
 
															+	    ((entry1->start_blk + entry1->count) == entry2->start_blk))
														
 
															+		return 1;
														
 
															+	return 0;
														
 
															 }
														
 
															 static noinline_for_stack int
														
@@ -4422,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 
															 	struct ext4_group_info *db = e4b->bd_info;
														
 
															 	struct super_block *sb = e4b->bd_sb;
														
 
															 	struct ext4_sb_info *sbi = EXT4_SB(sb);
														
 
															-	struct ext4_free_metadata *md;
														
 
															-	int i;
														
 
															+	struct ext4_free_data *entry, *new_entry;
														
 
															+	struct rb_node **n = &db->bb_free_root.rb_node, *node;
														
 
															+	struct rb_node *parent = NULL, *new_node;
														
 
															+
														
 
															 	BUG_ON(e4b->bd_bitmap_page == NULL);
														
 
															 	BUG_ON(e4b->bd_buddy_page == NULL);
														
 
															+	new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
														
 
															+	new_entry->start_blk = block;
														
 
															+	new_entry->group  = group;
														
 
															+	new_entry->count = count;
														
 
															+	new_entry->t_tid = handle->h_transaction->t_tid;
														
 
															+	new_node = &new_entry->node;
														
 
															+
														
 
															 	ext4_lock_group(sb, group);
														
 
															-	for (i = 0; i < count; i++) {
														
 
															-		md = db->bb_md_cur;
														
 
															-		if (md && db->bb_tid != handle->h_transaction->t_tid) {
														
 
															-			db->bb_md_cur = NULL;
														
 
															-			md = NULL;
														
 
															+	if (!*n) {
														
 
															+		/* first free block exent. We need to
														
 
															+		   protect buddy cache from being freed,
														
 
															+		 * otherwise we'll refresh it from
														
 
															+		 * on-disk bitmap and lose not-yet-available
														
 
															+		 * blocks */
														
 
															+		page_cache_get(e4b->bd_buddy_page);
														
 
															+		page_cache_get(e4b->bd_bitmap_page);
														
 
															+	}
														
 
															+	while (*n) {
														
 
															+		parent = *n;
														
 
															+		entry = rb_entry(parent, struct ext4_free_data, node);
														
 
															+		if (block < entry->start_blk)
														
 
															+			n = &(*n)->rb_left;
														
 
															+		else if (block >= (entry->start_blk + entry->count))
														
 
															+			n = &(*n)->rb_right;
														
 
															+		else {
														
 
															+			ext4_error(sb, __func__,
														
 
															+			    "Double free of blocks %d (%d %d)\n",
														
 
															+			    block, entry->start_blk, entry->count);
														
 
															+			return 0;
														
 
															 		}
														
 
															+	}
														
 
															-		if (md == NULL) {
														
 
															-			ext4_unlock_group(sb, group);
														
 
															-			md = kmalloc(sizeof(*md), GFP_NOFS);
														
 
															-			if (md == NULL)
														
 
															-				return -ENOMEM;
														
 
															-			md->num = 0;
														
 
															-			md->group = group;
														
 
															-
														
 
															-			ext4_lock_group(sb, group);
														
 
															-			if (db->bb_md_cur == NULL) {
														
 
															-				spin_lock(&sbi->s_md_lock);
														
 
															-				list_add(&md->list, &sbi->s_active_transaction);
														
 
															-				spin_unlock(&sbi->s_md_lock);
														
 
															-				/* protect buddy cache from being freed,
														
 
															-				 * otherwise we'll refresh it from
														
 
															-				 * on-disk bitmap and lose not-yet-available
														
 
															-				 * blocks */
														
 
															-				page_cache_get(e4b->bd_buddy_page);
														
 
															-				page_cache_get(e4b->bd_bitmap_page);
														
 
															-				db->bb_md_cur = md;
														
 
															-				db->bb_tid = handle->h_transaction->t_tid;
														
 
															-				mb_debug("new md 0x%p for group %lu\n",
														
 
															-						md, md->group);
														
 
															-			} else {
														
 
															-				kfree(md);
														
 
															-				md = db->bb_md_cur;
														
 
															-			}
														
 
															+	rb_link_node(new_node, parent, n);
														
 
															+	rb_insert_color(new_node, &db->bb_free_root);
														
 
															+
														
 
															+	/* Now try to see the extent can be merged to left and right */
														
 
															+	node = rb_prev(new_node);
														
 
															+	if (node) {
														
 
															+		entry = rb_entry(node, struct ext4_free_data, node);
														
 
															+		if (can_merge(entry, new_entry)) {
														
 
															+			new_entry->start_blk = entry->start_blk;
														
 
															+			new_entry->count += entry->count;
														
 
															+			rb_erase(node, &(db->bb_free_root));
														
 
															+			spin_lock(&sbi->s_md_lock);
														
 
															+			list_del(&entry->list);
														
 
															+			spin_unlock(&sbi->s_md_lock);
														
 
															+			kmem_cache_free(ext4_free_ext_cachep, entry);
														
 
															 		}
														
 
															+	}
														
 
															-		BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS);
														
 
															-		md->blocks[md->num] = block + i;
														
 
															-		md->num++;
														
 
															-		if (md->num == EXT4_BB_MAX_BLOCKS) {
														
 
															-			/* no more space, put full container on a sb's list */
														
 
															-			db->bb_md_cur = NULL;
														
 
															+	node = rb_next(new_node);
														
 
															+	if (node) {
														
 
															+		entry = rb_entry(node, struct ext4_free_data, node);
														
 
															+		if (can_merge(new_entry, entry)) {
														
 
															+			new_entry->count += entry->count;
														
 
															+			rb_erase(node, &(db->bb_free_root));
														
 
															+			spin_lock(&sbi->s_md_lock);
														
 
															+			list_del(&entry->list);
														
 
															+			spin_unlock(&sbi->s_md_lock);
														
 
															+			kmem_cache_free(ext4_free_ext_cachep, entry);
														
 
															 		}
														
 
															 	}
														
 
															+	/* Add the extent to transaction's private list */
														
 
															+	spin_lock(&sbi->s_md_lock);
														
 
															+	list_add(&new_entry->list, &handle->h_transaction->t_private_list);
														
 
															+	spin_unlock(&sbi->s_md_lock);
														
 
															 	ext4_unlock_group(sb, group);
														
 
															 	return 0;
														
 
															 }
														
@@ -4500,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
 
															 	*freed = 0;
														
 
															-	ext4_mb_poll_new_transaction(sb, handle);
														
 
															-
														
 
															 	sbi = EXT4_SB(sb);
														
 
															 	es = EXT4_SB(sb)->s_es;
														
 
															 	if (block < le32_to_cpu(es->s_first_data_block) ||
														
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -18,6 +18,8 @@
 
															 #include <linux/pagemap.h>
														
 
															 #include <linux/seq_file.h>
														
 
															 #include <linux/version.h>
														
 
															+#include <linux/blkdev.h>
														
 
															+#include <linux/marker.h>
														
 
															 #include "ext4_jbd2.h"
														
 
															 #include "ext4.h"
														
 
															 #include "group.h"
														
@@ -98,23 +100,29 @@
 
															 static struct kmem_cache *ext4_pspace_cachep;
														
 
															 static struct kmem_cache *ext4_ac_cachep;
														
 
															+static struct kmem_cache *ext4_free_ext_cachep;
														
 
															-#ifdef EXT4_BB_MAX_BLOCKS
														
 
															-#undef EXT4_BB_MAX_BLOCKS
														
 
															-#endif
														
 
															-#define EXT4_BB_MAX_BLOCKS	30
														
 
															+struct ext4_free_data {
														
 
															+	/* this links the free block information from group_info */
														
 
															+	struct rb_node node;
														
 
															-struct ext4_free_metadata {
														
 
															-	ext4_group_t group;
														
 
															-	unsigned short num;
														
 
															-	ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
														
 
															+	/* this links the free block information from ext4_sb_info */
														
 
															 	struct list_head list;
														
 
															+
														
 
															+	/* group which free block extent belongs */
														
 
															+	ext4_group_t group;
														
 
															+
														
 
															+	/* free block extent */
														
 
															+	ext4_grpblk_t start_blk;
														
 
															+	ext4_grpblk_t count;
														
 
															+
														
 
															+	/* transaction which freed this extent */
														
 
															+	tid_t	t_tid;
														
 
															 };
														
 
															 struct ext4_group_info {
														
 
															 	unsigned long	bb_state;
														
 
															-	unsigned long	bb_tid;
														
 
															-	struct ext4_free_metadata *bb_md_cur;
														
 
															+	struct rb_root  bb_free_root;
														
 
															 	unsigned short	bb_first_free;
														
 
															 	unsigned short	bb_free;
														
 
															 	unsigned short	bb_fragments;
														
@@ -261,8 +269,6 @@ struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
 
															 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
														
 
															 					ext4_group_t group);
														
 
															-static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
														
 
															-static void ext4_mb_free_committed_blocks(struct super_block *);
														
 
															 static void ext4_mb_return_to_preallocation(struct inode *inode,
														
 
															 					struct ext4_buddy *e4b, sector_t block,
														
 
															 					int count);
														
@@ -270,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *,
 
															 			struct super_block *, struct ext4_prealloc_space *pa);
														
 
															 static int ext4_mb_init_per_dev_proc(struct super_block *sb);
														
 
															 static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
														
 
															+static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
														
 
															 static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
														
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -374,66 +374,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
 
															 	 */
														
 
															 }
														
 
															-int ext4_update_compat_feature(handle_t *handle,
														
 
															-					struct super_block *sb, __u32 compat)
														
 
															-{
														
 
															-	int err = 0;
														
 
															-	if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) {
														
 
															-		err = ext4_journal_get_write_access(handle,
														
 
															-				EXT4_SB(sb)->s_sbh);
														
 
															-		if (err)
														
 
															-			return err;
														
 
															-		EXT4_SET_COMPAT_FEATURE(sb, compat);
														
 
															-		sb->s_dirt = 1;
														
 
															-		handle->h_sync = 1;
														
 
															-		BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
														
 
															-					"call ext4_journal_dirty_met adata");
														
 
															-		err = ext4_journal_dirty_metadata(handle,
														
 
															-				EXT4_SB(sb)->s_sbh);
														
 
															-	}
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-int ext4_update_rocompat_feature(handle_t *handle,
														
 
															-					struct super_block *sb, __u32 rocompat)
														
 
															-{
														
 
															-	int err = 0;
														
 
															-	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) {
														
 
															-		err = ext4_journal_get_write_access(handle,
														
 
															-				EXT4_SB(sb)->s_sbh);
														
 
															-		if (err)
														
 
															-			return err;
														
 
															-		EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat);
														
 
															-		sb->s_dirt = 1;
														
 
															-		handle->h_sync = 1;
														
 
															-		BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
														
 
															-					"call ext4_journal_dirty_met adata");
														
 
															-		err = ext4_journal_dirty_metadata(handle,
														
 
															-				EXT4_SB(sb)->s_sbh);
														
 
															-	}
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-int ext4_update_incompat_feature(handle_t *handle,
														
 
															-					struct super_block *sb, __u32 incompat)
														
 
															-{
														
 
															-	int err = 0;
														
 
															-	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) {
														
 
															-		err = ext4_journal_get_write_access(handle,
														
 
															-				EXT4_SB(sb)->s_sbh);
														
 
															-		if (err)
														
 
															-			return err;
														
 
															-		EXT4_SET_INCOMPAT_FEATURE(sb, incompat);
														
 
															-		sb->s_dirt = 1;
														
 
															-		handle->h_sync = 1;
														
 
															-		BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
														
 
															-					"call ext4_journal_dirty_met adata");
														
 
															-		err = ext4_journal_dirty_metadata(handle,
														
 
															-				EXT4_SB(sb)->s_sbh);
														
 
															-	}
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															 /*
														
 
															  * Open the external journal device
														
 
															  */
														
@@ -904,7 +844,7 @@ static const struct export_operations ext4_export_ops = {
 
															 enum {
														
 
															 	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
														
 
															 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
														
 
															-	Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
														
 
															+	Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
														
 
															 	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
														
 
															 	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
														
 
															 	Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
														
@@ -915,7 +855,7 @@ enum {
 
															 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
														
 
															 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
														
 
															 	Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
														
 
															-	Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
														
 
															+	Opt_stripe, Opt_delalloc, Opt_nodelalloc,
														
 
															 	Opt_inode_readahead_blks
														
 
															 };
														
@@ -933,8 +873,6 @@ static const match_table_t tokens = {
 
															 	{Opt_err_panic, "errors=panic"},
														
 
															 	{Opt_err_ro, "errors=remount-ro"},
														
 
															 	{Opt_nouid32, "nouid32"},
														
 
															-	{Opt_nocheck, "nocheck"},
														
 
															-	{Opt_nocheck, "check=none"},
														
 
															 	{Opt_debug, "debug"},
														
 
															 	{Opt_oldalloc, "oldalloc"},
														
 
															 	{Opt_orlov, "orlov"},
														
@@ -973,8 +911,6 @@ static const match_table_t tokens = {
 
															 	{Opt_extents, "extents"},
														
 
															 	{Opt_noextents, "noextents"},
														
 
															 	{Opt_i_version, "i_version"},
														
 
															-	{Opt_mballoc, "mballoc"},
														
 
															-	{Opt_nomballoc, "nomballoc"},
														
 
															 	{Opt_stripe, "stripe=%u"},
														
 
															 	{Opt_resize, "resize"},
														
 
															 	{Opt_delalloc, "delalloc"},
														
@@ -1073,9 +1009,6 @@ static int parse_options(char *options, struct super_block *sb,
 
															 		case Opt_nouid32:
														
 
															 			set_opt(sbi->s_mount_opt, NO_UID32);
														
 
															 			break;
														
 
															-		case Opt_nocheck:
														
 
															-			clear_opt(sbi->s_mount_opt, CHECK);
														
 
															-			break;
														
 
															 		case Opt_debug:
														
 
															 			set_opt(sbi->s_mount_opt, DEBUG);
														
 
															 			break;
														
@@ -1618,14 +1551,14 @@ static int ext4_check_descriptors(struct super_block *sb)
 
															 		if (block_bitmap < first_block || block_bitmap > last_block) {
														
 
															 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
														
 
															 			       "Block bitmap for group %lu not in group "
														
 
															-			       "(block %llu)!", i, block_bitmap);
														
 
															+			       "(block %llu)!\n", i, block_bitmap);
														
 
															 			return 0;
														
 
															 		}
														
 
															 		inode_bitmap = ext4_inode_bitmap(sb, gdp);
														
 
															 		if (inode_bitmap < first_block || inode_bitmap > last_block) {
														
 
															 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
														
 
															 			       "Inode bitmap for group %lu not in group "
														
 
															-			       "(block %llu)!", i, inode_bitmap);
														
 
															+			       "(block %llu)!\n", i, inode_bitmap);
														
 
															 			return 0;
														
 
															 		}
														
 
															 		inode_table = ext4_inode_table(sb, gdp);
														
@@ -1633,7 +1566,7 @@ static int ext4_check_descriptors(struct super_block *sb)
 
															 		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
														
 
															 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
														
 
															 			       "Inode table for group %lu not in group "
														
 
															-			       "(block %llu)!", i, inode_table);
														
 
															+			       "(block %llu)!\n", i, inode_table);
														
 
															 			return 0;
														
 
															 		}
														
 
															 		spin_lock(sb_bgl_lock(sbi, i));
														
@@ -1778,13 +1711,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 
															  *
														
 
															  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
														
 
															  */
														
 
															-static loff_t ext4_max_size(int blkbits)
														
 
															+static loff_t ext4_max_size(int blkbits, int has_huge_files)
														
 
															 {
														
 
															 	loff_t res;
														
 
															 	loff_t upper_limit = MAX_LFS_FILESIZE;
														
 
															 	/* small i_blocks in vfs inode? */
														
 
															-	if (sizeof(blkcnt_t) < sizeof(u64)) {
														
 
															+	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
														
 
															 		/*
														
 
															 		 * CONFIG_LSF is not enabled implies the inode
														
 
															 		 * i_block represent total blocks in 512 bytes
														
@@ -1814,7 +1747,7 @@ static loff_t ext4_max_size(int blkbits)
 
															  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
														
 
															  * We need to be 1 filesystem block less than the 2^48 sector limit.
														
 
															  */
														
 
															-static loff_t ext4_max_bitmap_size(int bits)
														
 
															+static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
														
 
															 {
														
 
															 	loff_t res = EXT4_NDIR_BLOCKS;
														
 
															 	int meta_blocks;
														
@@ -1827,11 +1760,11 @@ static loff_t ext4_max_bitmap_size(int bits)
 
															 	 * total number of  512 bytes blocks of the file
														
 
															 	 */
														
 
															-	if (sizeof(blkcnt_t) < sizeof(u64)) {
														
 
															+	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
														
 
															 		/*
														
 
															-		 * CONFIG_LSF is not enabled implies the inode
														
 
															-		 * i_block represent total blocks in 512 bytes
														
 
															-		 * 32 == size of vfs inode i_blocks * 8
														
 
															+		 * !has_huge_files or CONFIG_LSF is not enabled
														
 
															+		 * implies the inode i_block represent total blocks in
														
 
															+		 * 512 bytes 32 == size of vfs inode i_blocks * 8
														
 
															 		 */
														
 
															 		upper_limit = (1LL << 32) - 1;
														
@@ -1940,7 +1873,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
															 	int blocksize;
														
 
															 	int db_count;
														
 
															 	int i;
														
 
															-	int needs_recovery;
														
 
															+	int needs_recovery, has_huge_files;
														
 
															 	__le32 features;
														
 
															 	__u64 blocks_count;
														
 
															 	int err;
														
@@ -2081,7 +2014,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
															 		       sb->s_id, le32_to_cpu(features));
														
 
															 		goto failed_mount;
														
 
															 	}
														
 
															-	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
														
 
															+	has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
														
 
															+				    EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
														
 
															+	if (has_huge_files) {
														
 
															 		/*
														
 
															 		 * Large file size enabled file system can only be
														
 
															 		 * mount if kernel is build with CONFIG_LSF
														
@@ -2131,8 +2066,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
															 		}
														
 
															 	}
														
 
															-	sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits);
														
 
															-	sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits);
														
 
															+	sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
														
 
															+						      has_huge_files);
														
 
															+	sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
														
 
															 	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
														
 
															 		sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
														
@@ -2456,6 +2392,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
															 			"available.\n");
														
 
															 	}
														
 
															+	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
														
 
															+		printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
														
 
															+				"requested data journaling mode\n");
														
 
															+		clear_opt(sbi->s_mount_opt, DELALLOC);
														
 
															+	} else if (test_opt(sb, DELALLOC))
														
 
															+		printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
														
 
															+
														
 
															+	ext4_ext_init(sb);
														
 
															+	err = ext4_mb_init(sb, needs_recovery);
														
 
															+	if (err) {
														
 
															+		printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
														
 
															+		       err);
														
 
															+		goto failed_mount4;
														
 
															+	}
														
 
															+
														
 
															 	/*
														
 
															 	 * akpm: core read_super() calls in here with the superblock locked.
														
 
															 	 * That deadlocks, because orphan cleanup needs to lock the superblock
														
@@ -2475,21 +2426,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
															 	       test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
														
 
															 	       "writeback");
														
 
															-	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
														
 
															-		printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
														
 
															-				"requested data journaling mode\n");
														
 
															-		clear_opt(sbi->s_mount_opt, DELALLOC);
														
 
															-	} else if (test_opt(sb, DELALLOC))
														
 
															-		printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
														
 
															-
														
 
															-	ext4_ext_init(sb);
														
 
															-	err = ext4_mb_init(sb, needs_recovery);
														
 
															-	if (err) {
														
 
															-		printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
														
 
															-		       err);
														
 
															-		goto failed_mount4;
														
 
															-	}
														
 
															-
														
 
															 	lock_kernel();
														
 
															 	return 0;
														
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -995,6 +995,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
															 	}
														
 
															 	spin_unlock(&journal->j_list_lock);
														
 
															+	if (journal->j_commit_callback)
														
 
															+		journal->j_commit_callback(journal, commit_transaction);
														
 
															+
														
 
															 	trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
														
 
															 		   journal->j_devname, commit_transaction->t_tid,
														
 
															 		   journal->j_tail_sequence);
														
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
 
															 	transaction->t_expires = jiffies + journal->j_commit_interval;
														
 
															 	spin_lock_init(&transaction->t_handle_lock);
														
 
															 	INIT_LIST_HEAD(&transaction->t_inode_list);
														
 
															+	INIT_LIST_HEAD(&transaction->t_private_list);
														
 
															 	/* Set up the commit timer for the new transaction. */
														
 
															 	journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
														
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -641,6 +641,11 @@ struct transaction_s
 
															 	 */
														
 
															 	int t_handle_count;
														
 
															+	/*
														
 
															+	 * For use by the filesystem to store fs-specific data
														
 
															+	 * structures associated with the transaction
														
 
															+	 */
														
 
															+	struct list_head	t_private_list;
														
 
															 };
														
 
															 struct transaction_run_stats_s {
														
@@ -935,6 +940,10 @@ struct journal_s
 
															 	pid_t			j_last_sync_writer;
														
 
															+	/* This function is called when a transaction is closed */
														
 
															+	void			(*j_commit_callback)(journal_t *,
														
 
															+						     transaction_t *);
														
 
															+
														
 
															 	/*
														
 
															 	 * Journal statistics
														
 
															 	 */
														
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -63,7 +63,15 @@ struct writeback_control {
 
															 	unsigned for_writepages:1;	/* This is a writepages() call */
														
 
															 	unsigned range_cyclic:1;	/* range_start is cyclic */
														
 
															 	unsigned more_io:1;		/* more io to be dispatched */
														
 
															-	unsigned range_cont:1;
														
 
															+	/*
														
 
															+	 * write_cache_pages() won't update wbc->nr_to_write and
														
 
															+	 * mapping->writeback_index if no_nrwrite_index_update
														
 
															+	 * is set.  write_cache_pages() may write more than we
														
 
															+	 * requested and we want to make sure nr_to_write and
														
 
															+	 * writeback_index are updated in a consistent manner
														
 
															+	 * so we use a single control to update them
														
 
															+	 */
														
 
															+	unsigned no_nrwrite_index_update:1;
														
 
															 };
														
 
															 /*
														
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -876,6 +876,7 @@ int write_cache_pages(struct address_space *mapping,
 
															 	pgoff_t end;		/* Inclusive */
														
 
															 	int scanned = 0;
														
 
															 	int range_whole = 0;
														
 
															+	long nr_to_write = wbc->nr_to_write;
														
 
															 	if (wbc->nonblocking && bdi_write_congested(bdi)) {
														
 
															 		wbc->encountered_congestion = 1;
														
@@ -939,7 +940,7 @@ int write_cache_pages(struct address_space *mapping,
 
															 				unlock_page(page);
														
 
															 				ret = 0;
														
 
															 			}
														
 
															-			if (ret || (--(wbc->nr_to_write) <= 0))
														
 
															+			if (ret || (--nr_to_write <= 0))
														
 
															 				done = 1;
														
 
															 			if (wbc->nonblocking && bdi_write_congested(bdi)) {
														
 
															 				wbc->encountered_congestion = 1;
														
@@ -958,11 +959,12 @@ int write_cache_pages(struct address_space *mapping,
 
															 		index = 0;
														
 
															 		goto retry;
														
 
															 	}
														
 
															-	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
														
 
															-		mapping->writeback_index = index;
														
 
															+	if (!wbc->no_nrwrite_index_update) {
														
 
															+		if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
														
 
															+			mapping->writeback_index = index;
														
 
															+		wbc->nr_to_write = nr_to_write;
														
 
															+	}
														
 
															-	if (wbc->range_cont)
														
 
															-		wbc->range_start = index << PAGE_CACHE_SHIFT;
														
 
															 	return ret;
														
 
															 }
														
 
															 EXPORT_SYMBOL(write_cache_pages);