пре 12 година · 4e7ea81db5
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -176,21 +176,6 @@ struct ext4_map_blocks {
 
				 	unsigned int m_flags;
			
 
				 };
			
 
				 
			
 
				-/*
			
 
				- * For delayed allocation tracking
			
 
				- */
			
 
				-struct mpage_da_data {
			
 
				-	struct inode *inode;
			
 
				-	sector_t b_blocknr;		/* start block number of extent */
			
 
				-	size_t b_size;			/* size of extent */
			
 
				-	unsigned long b_state;		/* state of the extent */
			
 
				-	unsigned long first_page, next_page;	/* extent of pages */
			
 
				-	struct writeback_control *wbc;
			
 
				-	int io_done;
			
 
				-	int pages_written;
			
 
				-	int retval;
			
 
				-};
			
 
				-
			
 
				 /*
			
 
				  * Flags for ext4_io_end->flags
			
 
				  */
			
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1407,149 +1407,42 @@ static void ext4_da_page_release_reservation(struct page *page,
 
				  * Delayed allocation stuff
			
 
				  */
			
 
				 
			
 
				-static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd);
			
 
				-
			
 
				-/*
			
 
				- * mpage_da_submit_io - walks through extent of pages and try to write
			
 
				- * them with writepage() call back
			
 
				- *
			
 
				- * @mpd->inode: inode
			
 
				- * @mpd->first_page: first page of the extent
			
 
				- * @mpd->next_page: page after the last page of the extent
			
 
				- *
			
 
				- * By the time mpage_da_submit_io() is called we expect all blocks
			
 
				- * to be allocated. this may be wrong if allocation failed.
			
 
				- *
			
 
				- * As pages are already locked by write_cache_pages(), we can't use it
			
 
				- */
			
 
				-static int mpage_da_submit_io(struct mpage_da_data *mpd,
			
 
				-			      struct ext4_map_blocks *map)
			
 
				-{
			
 
				-	struct pagevec pvec;
			
 
				-	unsigned long index, end;
			
 
				-	int ret = 0, err, nr_pages, i;
			
 
				-	struct inode *inode = mpd->inode;
			
 
				-	struct address_space *mapping = inode->i_mapping;
			
 
				-	loff_t size = i_size_read(inode);
			
 
				-	unsigned int len, block_start;
			
 
				-	struct buffer_head *bh, *page_bufs = NULL;
			
 
				-	sector_t pblock = 0, cur_logical = 0;
			
 
				-	struct ext4_io_submit io_submit;
			
 
				-
			
 
				-	BUG_ON(mpd->next_page <= mpd->first_page);
			
 
				-	ext4_io_submit_init(&io_submit, mpd->wbc);
			
 
				-	io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
			
 
				-	if (!io_submit.io_end) {
			
 
				-		ext4_da_block_invalidatepages(mpd);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				+struct mpage_da_data {
			
 
				+	struct inode *inode;
			
 
				+	struct writeback_control *wbc;
			
 
				+	pgoff_t first_page;	/* The first page to write */
			
 
				+	pgoff_t next_page;	/* Current page to examine */
			
 
				+	pgoff_t last_page;	/* Last page to examine */
			
 
				 	/*
			
 
				-	 * We need to start from the first_page to the next_page - 1
			
 
				-	 * to make sure we also write the mapped dirty buffer_heads.
			
 
				-	 * If we look at mpd->b_blocknr we would only be looking
			
 
				-	 * at the currently mapped buffer_heads.
			
 
				+	 * Extent to map - this can be after first_page because that can be
			
 
				+	 * fully mapped. We somewhat abuse m_flags to store whether the extent
			
 
				+	 * is delalloc or unwritten.
			
 
				 	 */
			
 
				-	index = mpd->first_page;
			
 
				-	end = mpd->next_page - 1;
			
 
				-
			
 
				-	pagevec_init(&pvec, 0);
			
 
				-	while (index <= end) {
			
 
				-		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
			
 
				-		if (nr_pages == 0)
			
 
				-			break;
			
 
				-		for (i = 0; i < nr_pages; i++) {
			
 
				-			int skip_page = 0;
			
 
				-			struct page *page = pvec.pages[i];
			
 
				-
			
 
				-			index = page->index;
			
 
				-			if (index > end)
			
 
				-				break;
			
 
				-
			
 
				-			if (index == size >> PAGE_CACHE_SHIFT)
			
 
				-				len = size & ~PAGE_CACHE_MASK;
			
 
				-			else
			
 
				-				len = PAGE_CACHE_SIZE;
			
 
				-			if (map) {
			
 
				-				cur_logical = index << (PAGE_CACHE_SHIFT -
			
 
				-							inode->i_blkbits);
			
 
				-				pblock = map->m_pblk + (cur_logical -
			
 
				-							map->m_lblk);
			
 
				-			}
			
 
				-			index++;
			
 
				-
			
 
				-			BUG_ON(!PageLocked(page));
			
 
				-			BUG_ON(PageWriteback(page));
			
 
				-
			
 
				-			bh = page_bufs = page_buffers(page);
			
 
				-			block_start = 0;
			
 
				-			do {
			
 
				-				if (map && (cur_logical >= map->m_lblk) &&
			
 
				-				    (cur_logical <= (map->m_lblk +
			
 
				-						     (map->m_len - 1)))) {
			
 
				-					if (buffer_delay(bh)) {
			
 
				-						clear_buffer_delay(bh);
			
 
				-						bh->b_blocknr = pblock;
			
 
				-					}
			
 
				-					if (buffer_unwritten(bh) ||
			
 
				-					    buffer_mapped(bh))
			
 
				-						BUG_ON(bh->b_blocknr != pblock);
			
 
				-					if (map->m_flags & EXT4_MAP_UNINIT)
			
 
				-						set_buffer_uninit(bh);
			
 
				-					clear_buffer_unwritten(bh);
			
 
				-				}
			
 
				-
			
 
				-				/*
			
 
				-				 * skip page if block allocation undone and
			
 
				-				 * block is dirty
			
 
				-				 */
			
 
				-				if (ext4_bh_delay_or_unwritten(NULL, bh))
			
 
				-					skip_page = 1;
			
 
				-				bh = bh->b_this_page;
			
 
				-				block_start += bh->b_size;
			
 
				-				cur_logical++;
			
 
				-				pblock++;
			
 
				-			} while (bh != page_bufs);
			
 
				-
			
 
				-			if (skip_page) {
			
 
				-				unlock_page(page);
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			clear_page_dirty_for_io(page);
			
 
				-			err = ext4_bio_write_page(&io_submit, page, len,
			
 
				-						  mpd->wbc);
			
 
				-			if (!err)
			
 
				-				mpd->pages_written++;
			
 
				-			/*
			
 
				-			 * In error case, we have to continue because
			
 
				-			 * remaining pages are still locked
			
 
				-			 */
			
 
				-			if (ret == 0)
			
 
				-				ret = err;
			
 
				-		}
			
 
				-		pagevec_release(&pvec);
			
 
				-	}
			
 
				-	ext4_io_submit(&io_submit);
			
 
				-	/* Drop io_end reference we got from init */
			
 
				-	ext4_put_io_end_defer(io_submit.io_end);
			
 
				-	return ret;
			
 
				-}
			
 
				+	struct ext4_map_blocks map;
			
 
				+	struct ext4_io_submit io_submit;	/* IO submission data */
			
 
				+};
			
 
				 
			
 
				-static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
			
 
				+static void mpage_release_unused_pages(struct mpage_da_data *mpd,
			
 
				+				       bool invalidate)
			
 
				 {
			
 
				 	int nr_pages, i;
			
 
				 	pgoff_t index, end;
			
 
				 	struct pagevec pvec;
			
 
				 	struct inode *inode = mpd->inode;
			
 
				 	struct address_space *mapping = inode->i_mapping;
			
 
				-	ext4_lblk_t start, last;
			
 
				+
			
 
				+	/* This is necessary when next_page == 0. */
			
 
				+	if (mpd->first_page >= mpd->next_page)
			
 
				+		return;
			
 
				 
			
 
				 	index = mpd->first_page;
			
 
				 	end   = mpd->next_page - 1;
			
 
				-
			
 
				-	start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
			
 
				-	last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits);
			
 
				-	ext4_es_remove_extent(inode, start, last - start + 1);
			
 
				+	if (invalidate) {
			
 
				+		ext4_lblk_t start, last;
			
 
				+		start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
			
 
				+		last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits);
			
 
				+		ext4_es_remove_extent(inode, start, last - start + 1);
			
 
				+	}
			
 
				 
			
 
				 	pagevec_init(&pvec, 0);
			
 
				 	while (index <= end) {
			
@@ -1562,14 +1455,15 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
 
				 				break;
			
 
				 			BUG_ON(!PageLocked(page));
			
 
				 			BUG_ON(PageWriteback(page));
			
 
				-			block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
			
 
				-			ClearPageUptodate(page);
			
 
				+			if (invalidate) {
			
 
				+				block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
			
 
				+				ClearPageUptodate(page);
			
 
				+			}
			
 
				 			unlock_page(page);
			
 
				 		}
			
 
				 		index = pvec.pages[nr_pages - 1]->index + 1;
			
 
				 		pagevec_release(&pvec);
			
 
				 	}
			
 
				-	return;
			
 
				 }
			
 
				 
			
 
				 static void ext4_print_free_blocks(struct inode *inode)
			
@@ -1598,215 +1492,6 @@ static void ext4_print_free_blocks(struct inode *inode)
 
				 	return;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * mpage_da_map_and_submit - go through given space, map them
			
 
				- *       if necessary, and then submit them for I/O
			
 
				- *
			
 
				- * @mpd - bh describing space
			
 
				- *
			
 
				- * The function skips space we know is already mapped to disk blocks.
			
 
				- *
			
 
				- */
			
 
				-static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
			
 
				-{
			
 
				-	int err, blks, get_blocks_flags;
			
 
				-	struct ext4_map_blocks map, *mapp = NULL;
			
 
				-	sector_t next = mpd->b_blocknr;
			
 
				-	unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
			
 
				-	loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
			
 
				-	handle_t *handle = NULL;
			
 
				-
			
 
				-	/*
			
 
				-	 * If the blocks are mapped already, or we couldn't accumulate
			
 
				-	 * any blocks, then proceed immediately to the submission stage.
			
 
				-	 */
			
 
				-	if ((mpd->b_size == 0) ||
			
 
				-	    ((mpd->b_state  & (1 << BH_Mapped)) &&
			
 
				-	     !(mpd->b_state & (1 << BH_Delay)) &&
			
 
				-	     !(mpd->b_state & (1 << BH_Unwritten))))
			
 
				-		goto submit_io;
			
 
				-
			
 
				-	handle = ext4_journal_current_handle();
			
 
				-	BUG_ON(!handle);
			
 
				-
			
 
				-	/*
			
 
				-	 * Call ext4_map_blocks() to allocate any delayed allocation
			
 
				-	 * blocks, or to convert an uninitialized extent to be
			
 
				-	 * initialized (in the case where we have written into
			
 
				-	 * one or more preallocated blocks).
			
 
				-	 *
			
 
				-	 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to
			
 
				-	 * indicate that we are on the delayed allocation path.  This
			
 
				-	 * affects functions in many different parts of the allocation
			
 
				-	 * call path.  This flag exists primarily because we don't
			
 
				-	 * want to change *many* call functions, so ext4_map_blocks()
			
 
				-	 * will set the EXT4_STATE_DELALLOC_RESERVED flag once the
			
 
				-	 * inode's allocation semaphore is taken.
			
 
				-	 *
			
 
				-	 * If the blocks in questions were delalloc blocks, set
			
 
				-	 * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
			
 
				-	 * variables are updated after the blocks have been allocated.
			
 
				-	 */
			
 
				-	map.m_lblk = next;
			
 
				-	map.m_len = max_blocks;
			
 
				-	/*
			
 
				-	 * We're in delalloc path and it is possible that we're going to
			
 
				-	 * need more metadata blocks than previously reserved. However
			
 
				-	 * we must not fail because we're in writeback and there is
			
 
				-	 * nothing we can do about it so it might result in data loss.
			
 
				-	 * So use reserved blocks to allocate metadata if possible.
			
 
				-	 */
			
 
				-	get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
			
 
				-			   EXT4_GET_BLOCKS_METADATA_NOFAIL;
			
 
				-	if (ext4_should_dioread_nolock(mpd->inode))
			
 
				-		get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
			
 
				-	if (mpd->b_state & (1 << BH_Delay))
			
 
				-		get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
			
 
				-
			
 
				-
			
 
				-	blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
			
 
				-	if (blks < 0) {
			
 
				-		struct super_block *sb = mpd->inode->i_sb;
			
 
				-
			
 
				-		err = blks;
			
 
				-		/*
			
 
				-		 * If get block returns EAGAIN or ENOSPC and there
			
 
				-		 * appears to be free blocks we will just let
			
 
				-		 * mpage_da_submit_io() unlock all of the pages.
			
 
				-		 */
			
 
				-		if (err == -EAGAIN)
			
 
				-			goto submit_io;
			
 
				-
			
 
				-		if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
			
 
				-			mpd->retval = err;
			
 
				-			goto submit_io;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * get block failure will cause us to loop in
			
 
				-		 * writepages, because a_ops->writepage won't be able
			
 
				-		 * to make progress. The page will be redirtied by
			
 
				-		 * writepage and writepages will again try to write
			
 
				-		 * the same.
			
 
				-		 */
			
 
				-		if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
			
 
				-			ext4_msg(sb, KERN_CRIT,
			
 
				-				 "delayed block allocation failed for inode %lu "
			
 
				-				 "at logical offset %llu with max blocks %zd "
			
 
				-				 "with error %d", mpd->inode->i_ino,
			
 
				-				 (unsigned long long) next,
			
 
				-				 mpd->b_size >> mpd->inode->i_blkbits, err);
			
 
				-			ext4_msg(sb, KERN_CRIT,
			
 
				-				"This should not happen!! Data will be lost");
			
 
				-			if (err == -ENOSPC)
			
 
				-				ext4_print_free_blocks(mpd->inode);
			
 
				-		}
			
 
				-		/* invalidate all the pages */
			
 
				-		ext4_da_block_invalidatepages(mpd);
			
 
				-
			
 
				-		/* Mark this page range as having been completed */
			
 
				-		mpd->io_done = 1;
			
 
				-		return;
			
 
				-	}
			
 
				-	BUG_ON(blks == 0);
			
 
				-
			
 
				-	mapp = &map;
			
 
				-	if (map.m_flags & EXT4_MAP_NEW) {
			
 
				-		struct block_device *bdev = mpd->inode->i_sb->s_bdev;
			
 
				-		int i;
			
 
				-
			
 
				-		for (i = 0; i < map.m_len; i++)
			
 
				-			unmap_underlying_metadata(bdev, map.m_pblk + i);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Update on-disk size along with block allocation.
			
 
				-	 */
			
 
				-	disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
			
 
				-	if (disksize > i_size_read(mpd->inode))
			
 
				-		disksize = i_size_read(mpd->inode);
			
 
				-	if (disksize > EXT4_I(mpd->inode)->i_disksize) {
			
 
				-		ext4_update_i_disksize(mpd->inode, disksize);
			
 
				-		err = ext4_mark_inode_dirty(handle, mpd->inode);
			
 
				-		if (err)
			
 
				-			ext4_error(mpd->inode->i_sb,
			
 
				-				   "Failed to mark inode %lu dirty",
			
 
				-				   mpd->inode->i_ino);
			
 
				-	}
			
 
				-
			
 
				-submit_io:
			
 
				-	mpage_da_submit_io(mpd, mapp);
			
 
				-	mpd->io_done = 1;
			
 
				-}
			
 
				-
			
 
				-#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
			
 
				-		(1 << BH_Delay) | (1 << BH_Unwritten))
			
 
				-
			
 
				-/*
			
 
				- * mpage_add_bh_to_extent - try to add one more block to extent of blocks
			
 
				- *
			
 
				- * @mpd->lbh - extent of blocks
			
 
				- * @logical - logical number of the block in the file
			
 
				- * @b_state - b_state of the buffer head added
			
 
				- *
			
 
				- * the function is used to collect contig. blocks in same state
			
 
				- */
			
 
				-static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical,
			
 
				-				   unsigned long b_state)
			
 
				-{
			
 
				-	sector_t next;
			
 
				-	int blkbits = mpd->inode->i_blkbits;
			
 
				-	int nrblocks = mpd->b_size >> blkbits;
			
 
				-
			
 
				-	/*
			
 
				-	 * XXX Don't go larger than mballoc is willing to allocate
			
 
				-	 * This is a stopgap solution.  We eventually need to fold
			
 
				-	 * mpage_da_submit_io() into this function and then call
			
 
				-	 * ext4_map_blocks() multiple times in a loop
			
 
				-	 */
			
 
				-	if (nrblocks >= (8*1024*1024 >> blkbits))
			
 
				-		goto flush_it;
			
 
				-
			
 
				-	/* check if the reserved journal credits might overflow */
			
 
				-	if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) {
			
 
				-		if (nrblocks >= EXT4_MAX_TRANS_DATA) {
			
 
				-			/*
			
 
				-			 * With non-extent format we are limited by the journal
			
 
				-			 * credit available.  Total credit needed to insert
			
 
				-			 * nrblocks contiguous blocks is dependent on the
			
 
				-			 * nrblocks.  So limit nrblocks.
			
 
				-			 */
			
 
				-			goto flush_it;
			
 
				-		}
			
 
				-	}
			
 
				-	/*
			
 
				-	 * First block in the extent
			
 
				-	 */
			
 
				-	if (mpd->b_size == 0) {
			
 
				-		mpd->b_blocknr = logical;
			
 
				-		mpd->b_size = 1 << blkbits;
			
 
				-		mpd->b_state = b_state & BH_FLAGS;
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	next = mpd->b_blocknr + nrblocks;
			
 
				-	/*
			
 
				-	 * Can we merge the block to our big extent?
			
 
				-	 */
			
 
				-	if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
			
 
				-		mpd->b_size += 1 << blkbits;
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-flush_it:
			
 
				-	/*
			
 
				-	 * We couldn't merge the block to our extent, so we
			
 
				-	 * need to flush current  extent and start new one
			
 
				-	 */
			
 
				-	mpage_da_map_and_submit(mpd);
			
 
				-	return;
			
 
				-}
			
 
				-
			
 
				 static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
			
 
				 {
			
 
				 	return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
			
@@ -2204,6 +1889,8 @@ static int ext4_writepage(struct page *page,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+#define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
			
 
				+
			
 
				 /*
			
 
				  * mballoc gives us at most this number of blocks...
			
 
				  * XXX: That seems to be only a limitation of ext4_mb_normalize_request().
			
@@ -2211,6 +1898,315 @@ static int ext4_writepage(struct page *page,
 
				  */
			
 
				 #define MAX_WRITEPAGES_EXTENT_LEN 2048
			
 
				 
			
 
				+/*
			
 
				+ * mpage_add_bh_to_extent - try to add bh to extent of blocks to map
			
 
				+ *
			
 
				+ * @mpd - extent of blocks
			
 
				+ * @lblk - logical number of the block in the file
			
 
				+ * @b_state - b_state of the buffer head added
			
 
				+ *
			
 
				+ * the function is used to collect contig. blocks in same state
			
 
				+ */
			
 
				+static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
			
 
				+				  unsigned long b_state)
			
 
				+{
			
 
				+	struct ext4_map_blocks *map = &mpd->map;
			
 
				+
			
 
				+	/* Don't go larger than mballoc is willing to allocate */
			
 
				+	if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* First block in the extent? */
			
 
				+	if (map->m_len == 0) {
			
 
				+		map->m_lblk = lblk;
			
 
				+		map->m_len = 1;
			
 
				+		map->m_flags = b_state & BH_FLAGS;
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	/* Can we merge the block to our big extent? */
			
 
				+	if (lblk == map->m_lblk + map->m_len &&
			
 
				+	    (b_state & BH_FLAGS) == map->m_flags) {
			
 
				+		map->m_len++;
			
 
				+		return 1;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
			
 
				+				    struct buffer_head *head,
			
 
				+				    struct buffer_head *bh,
			
 
				+				    ext4_lblk_t lblk)
			
 
				+{
			
 
				+	struct inode *inode = mpd->inode;
			
 
				+	ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
			
 
				+							>> inode->i_blkbits;
			
 
				+
			
 
				+	do {
			
 
				+		BUG_ON(buffer_locked(bh));
			
 
				+
			
 
				+		if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
			
 
				+		    (!buffer_delay(bh) && !buffer_unwritten(bh)) ||
			
 
				+		    lblk >= blocks) {
			
 
				+			/* Found extent to map? */
			
 
				+			if (mpd->map.m_len)
			
 
				+				return false;
			
 
				+			if (lblk >= blocks)
			
 
				+				return true;
			
 
				+			continue;
			
 
				+		}
			
 
				+		if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state))
			
 
				+			return false;
			
 
				+	} while (lblk++, (bh = bh->b_this_page) != head);
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
			
 
				+{
			
 
				+	int len;
			
 
				+	loff_t size = i_size_read(mpd->inode);
			
 
				+	int err;
			
 
				+
			
 
				+	BUG_ON(page->index != mpd->first_page);
			
 
				+	if (page->index == size >> PAGE_CACHE_SHIFT)
			
 
				+		len = size & ~PAGE_CACHE_MASK;
			
 
				+	else
			
 
				+		len = PAGE_CACHE_SIZE;
			
 
				+	clear_page_dirty_for_io(page);
			
 
				+	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
			
 
				+	if (!err)
			
 
				+		mpd->wbc->nr_to_write--;
			
 
				+	mpd->first_page++;
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * mpage_map_buffers - update buffers corresponding to changed extent and
			
 
				+ *		       submit fully mapped pages for IO
			
 
				+ *
			
 
				+ * @mpd - description of extent to map, on return next extent to map
			
 
				+ *
			
 
				+ * Scan buffers corresponding to changed extent (we expect corresponding pages
			
 
				+ * to be already locked) and update buffer state according to new extent state.
			
 
				+ * We map delalloc buffers to their physical location, clear unwritten bits,
			
 
				+ * and mark buffers as uninit when we perform writes to uninitialized extents
			
 
				+ * and do extent conversion after IO is finished. If the last page is not fully
			
 
				+ * mapped, we update @map to the next extent in the last page that needs
			
 
				+ * mapping. Otherwise we submit the page for IO.
			
 
				+ */
			
 
				+static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
			
 
				+{
			
 
				+	struct pagevec pvec;
			
 
				+	int nr_pages, i;
			
 
				+	struct inode *inode = mpd->inode;
			
 
				+	struct buffer_head *head, *bh;
			
 
				+	int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
			
 
				+	ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
			
 
				+							>> inode->i_blkbits;
			
 
				+	pgoff_t start, end;
			
 
				+	ext4_lblk_t lblk;
			
 
				+	sector_t pblock;
			
 
				+	int err;
			
 
				+
			
 
				+	start = mpd->map.m_lblk >> bpp_bits;
			
 
				+	end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits;
			
 
				+	lblk = start << bpp_bits;
			
 
				+	pblock = mpd->map.m_pblk;
			
 
				+
			
 
				+	pagevec_init(&pvec, 0);
			
 
				+	while (start <= end) {
			
 
				+		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start,
			
 
				+					  PAGEVEC_SIZE);
			
 
				+		if (nr_pages == 0)
			
 
				+			break;
			
 
				+		for (i = 0; i < nr_pages; i++) {
			
 
				+			struct page *page = pvec.pages[i];
			
 
				+
			
 
				+			if (page->index > end)
			
 
				+				break;
			
 
				+			/* Upto 'end' pages must be contiguous */
			
 
				+			BUG_ON(page->index != start);
			
 
				+			bh = head = page_buffers(page);
			
 
				+			do {
			
 
				+				if (lblk < mpd->map.m_lblk)
			
 
				+					continue;
			
 
				+				if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
			
 
				+					/*
			
 
				+					 * Buffer after end of mapped extent.
			
 
				+					 * Find next buffer in the page to map.
			
 
				+					 */
			
 
				+					mpd->map.m_len = 0;
			
 
				+					mpd->map.m_flags = 0;
			
 
				+					add_page_bufs_to_extent(mpd, head, bh,
			
 
				+								lblk);
			
 
				+					pagevec_release(&pvec);
			
 
				+					return 0;
			
 
				+				}
			
 
				+				if (buffer_delay(bh)) {
			
 
				+					clear_buffer_delay(bh);
			
 
				+					bh->b_blocknr = pblock++;
			
 
				+				}
			
 
				+				if (mpd->map.m_flags & EXT4_MAP_UNINIT)
			
 
				+					set_buffer_uninit(bh);
			
 
				+				clear_buffer_unwritten(bh);
			
 
				+			} while (++lblk < blocks &&
			
 
				+				 (bh = bh->b_this_page) != head);
			
 
				+
			
 
				+			/*
			
 
				+			 * FIXME: This is going to break if dioread_nolock
			
 
				+			 * supports blocksize < pagesize as we will try to
			
 
				+			 * convert potentially unmapped parts of inode.
			
 
				+			 */
			
 
				+			mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
			
 
				+			/* Page fully mapped - let IO run! */
			
 
				+			err = mpage_submit_page(mpd, page);
			
 
				+			if (err < 0) {
			
 
				+				pagevec_release(&pvec);
			
 
				+				return err;
			
 
				+			}
			
 
				+			start++;
			
 
				+		}
			
 
				+		pagevec_release(&pvec);
			
 
				+	}
			
 
				+	/* Extent fully mapped and matches with page boundary. We are done. */
			
 
				+	mpd->map.m_len = 0;
			
 
				+	mpd->map.m_flags = 0;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
			
 
				+{
			
 
				+	struct inode *inode = mpd->inode;
			
 
				+	struct ext4_map_blocks *map = &mpd->map;
			
 
				+	int get_blocks_flags;
			
 
				+	int err;
			
 
				+
			
 
				+	trace_ext4_da_write_pages_extent(inode, map);
			
 
				+	/*
			
 
				+	 * Call ext4_map_blocks() to allocate any delayed allocation blocks, or
			
 
				+	 * to convert an uninitialized extent to be initialized (in the case
			
 
				+	 * where we have written into one or more preallocated blocks).  It is
			
 
				+	 * possible that we're going to need more metadata blocks than
			
 
				+	 * previously reserved. However we must not fail because we're in
			
 
				+	 * writeback and there is nothing we can do about it so it might result
			
 
				+	 * in data loss.  So use reserved blocks to allocate metadata if
			
 
				+	 * possible.
			
 
				+	 *
			
 
				+	 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks
			
 
				+	 * in question are delalloc blocks.  This affects functions in many
			
 
				+	 * different parts of the allocation call path.  This flag exists
			
 
				+	 * primarily because we don't want to change *many* call functions, so
			
 
				+	 * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag
			
 
				+	 * once the inode's allocation semaphore is taken.
			
 
				+	 */
			
 
				+	get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
			
 
				+			   EXT4_GET_BLOCKS_METADATA_NOFAIL;
			
 
				+	if (ext4_should_dioread_nolock(inode))
			
 
				+		get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
			
 
				+	if (map->m_flags & (1 << BH_Delay))
			
 
				+		get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
			
 
				+
			
 
				+	err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
			
 
				+	if (err < 0)
			
 
				+		return err;
			
 
				+
			
 
				+	BUG_ON(map->m_len == 0);
			
 
				+	if (map->m_flags & EXT4_MAP_NEW) {
			
 
				+		struct block_device *bdev = inode->i_sb->s_bdev;
			
 
				+		int i;
			
 
				+
			
 
				+		for (i = 0; i < map->m_len; i++)
			
 
				+			unmap_underlying_metadata(bdev, map->m_pblk + i);
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length
			
 
				+ *				 mpd->len and submit pages underlying it for IO
			
 
				+ *
			
 
				+ * @handle - handle for journal operations
			
 
				+ * @mpd - extent to map
			
 
				+ *
			
 
				+ * The function maps extent starting at mpd->lblk of length mpd->len. If it is
			
 
				+ * delayed, blocks are allocated, if it is unwritten, we may need to convert
			
 
				+ * them to initialized or split the described range from larger unwritten
			
 
				+ * extent. Note that we need not map all the described range since allocation
			
 
				+ * can return less blocks or the range is covered by more unwritten extents. We
			
 
				+ * cannot map more because we are limited by reserved transaction credits. On
			
 
				+ * the other hand we always make sure that the last touched page is fully
			
 
				+ * mapped so that it can be written out (and thus forward progress is
			
 
				+ * guaranteed). After mapping we submit all mapped pages for IO.
			
 
				+ */
			
 
				+static int mpage_map_and_submit_extent(handle_t *handle,
			
 
				+				       struct mpage_da_data *mpd)
			
 
				+{
			
 
				+	struct inode *inode = mpd->inode;
			
 
				+	struct ext4_map_blocks *map = &mpd->map;
			
 
				+	int err;
			
 
				+	loff_t disksize;
			
 
				+
			
 
				+	mpd->io_submit.io_end->offset =
			
 
				+				((loff_t)map->m_lblk) << inode->i_blkbits;
			
 
				+	while (map->m_len) {
			
 
				+		err = mpage_map_one_extent(handle, mpd);
			
 
				+		if (err < 0) {
			
 
				+			struct super_block *sb = inode->i_sb;
			
 
				+
			
 
				+			/*
			
 
				+			 * Need to commit transaction to free blocks. Let upper
			
 
				+			 * layers sort it out.
			
 
				+			 */
			
 
				+			if (err == -ENOSPC && ext4_count_free_clusters(sb))
			
 
				+				return -ENOSPC;
			
 
				+
			
 
				+			if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
			
 
				+				ext4_msg(sb, KERN_CRIT,
			
 
				+					 "Delayed block allocation failed for "
			
 
				+					 "inode %lu at logical offset %llu with"
			
 
				+					 " max blocks %u with error %d",
			
 
				+					 inode->i_ino,
			
 
				+					 (unsigned long long)map->m_lblk,
			
 
				+					 (unsigned)map->m_len, err);
			
 
				+				ext4_msg(sb, KERN_CRIT,
			
 
				+					 "This should not happen!! Data will "
			
 
				+					 "be lost\n");
			
 
				+				if (err == -ENOSPC)
			
 
				+					ext4_print_free_blocks(inode);
			
 
				+			}
			
 
				+			/* invalidate all the pages */
			
 
				+			mpage_release_unused_pages(mpd, true);
			
 
				+			return err;
			
 
				+		}
			
 
				+		/*
			
 
				+		 * Update buffer state, submit mapped pages, and get us new
			
 
				+		 * extent to map
			
 
				+		 */
			
 
				+		err = mpage_map_and_submit_buffers(mpd);
			
 
				+		if (err < 0)
			
 
				+			return err;
			
 
				+	}
			
 
				+
			
 
				+	/* Update on-disk size after IO is submitted */
			
 
				+	disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
			
 
				+	if (disksize > i_size_read(inode))
			
 
				+		disksize = i_size_read(inode);
			
 
				+	if (disksize > EXT4_I(inode)->i_disksize) {
			
 
				+		int err2;
			
 
				+
			
 
				+		ext4_update_i_disksize(inode, disksize);
			
 
				+		err2 = ext4_mark_inode_dirty(handle, inode);
			
 
				+		if (err2)
			
 
				+			ext4_error(inode->i_sb,
			
 
				+				   "Failed to mark inode %lu dirty",
			
 
				+				   inode->i_ino);
			
 
				+		if (!err)
			
 
				+			err = err2;
			
 
				+	}
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Calculate the total number of credits to reserve for one writepages
			
 
				  * iteration. This is called from ext4_da_writepages(). We map an extent of
			
@@ -2227,44 +2223,49 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * write_cache_pages_da - walk the list of dirty pages of the given
			
 
				- * address space and accumulate pages that need writing, and call
			
 
				- * mpage_da_map_and_submit to map a single contiguous memory region
			
 
				- * and then write them.
			
 
				+ * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages
			
 
				+ * 				 and underlying extent to map
			
 
				+ *
			
 
				+ * @mpd - where to look for pages
			
 
				+ *
			
 
				+ * Walk dirty pages in the mapping. If they are fully mapped, submit them for
			
 
				+ * IO immediately. When we find a page which isn't mapped we start accumulating
			
 
				+ * extent of buffers underlying these pages that needs mapping (formed by
			
 
				+ * either delayed or unwritten buffers). We also lock the pages containing
			
 
				+ * these buffers. The extent found is returned in @mpd structure (starting at
			
 
				+ * mpd->lblk with length mpd->len blocks).
			
 
				+ *
			
 
				+ * Note that this function can attach bios to one io_end structure which are
			
 
				+ * neither logically nor physically contiguous. Although it may seem as an
			
 
				+ * unnecessary complication, it is actually inevitable in blocksize < pagesize
			
 
				+ * case as we need to track IO to all buffers underlying a page in one io_end.
			
 
				  */
			
 
				-static int write_cache_pages_da(handle_t *handle,
			
 
				-				struct address_space *mapping,
			
 
				-				struct writeback_control *wbc,
			
 
				-				struct mpage_da_data *mpd,
			
 
				-				pgoff_t *done_index)
			
 
				+static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
			
 
				 {
			
 
				-	struct buffer_head	*bh, *head;
			
 
				-	struct inode		*inode = mapping->host;
			
 
				-	struct pagevec		pvec;
			
 
				-	unsigned int		nr_pages;
			
 
				-	sector_t		logical;
			
 
				-	pgoff_t			index, end;
			
 
				-	long			nr_to_write = wbc->nr_to_write;
			
 
				-	int			i, tag, ret = 0;
			
 
				-
			
 
				-	memset(mpd, 0, sizeof(struct mpage_da_data));
			
 
				-	mpd->wbc = wbc;
			
 
				-	mpd->inode = inode;
			
 
				-	pagevec_init(&pvec, 0);
			
 
				-	index = wbc->range_start >> PAGE_CACHE_SHIFT;
			
 
				-	end = wbc->range_end >> PAGE_CACHE_SHIFT;
			
 
				+	struct address_space *mapping = mpd->inode->i_mapping;
			
 
				+	struct pagevec pvec;
			
 
				+	unsigned int nr_pages;
			
 
				+	pgoff_t index = mpd->first_page;
			
 
				+	pgoff_t end = mpd->last_page;
			
 
				+	int tag;
			
 
				+	int i, err = 0;
			
 
				+	int blkbits = mpd->inode->i_blkbits;
			
 
				+	ext4_lblk_t lblk;
			
 
				+	struct buffer_head *head;
			
 
				 
			
 
				-	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
			
 
				+	if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages)
			
 
				 		tag = PAGECACHE_TAG_TOWRITE;
			
 
				 	else
			
 
				 		tag = PAGECACHE_TAG_DIRTY;
			
 
				 
			
 
				-	*done_index = index;
			
 
				+	pagevec_init(&pvec, 0);
			
 
				+	mpd->map.m_len = 0;
			
 
				+	mpd->next_page = index;
			
 
				 	while (index <= end) {
			
 
				 		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
			
 
				 			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
			
 
				 		if (nr_pages == 0)
			
 
				-			return 0;
			
 
				+			goto out;
			
 
				 
			
 
				 		for (i = 0; i < nr_pages; i++) {
			
 
				 			struct page *page = pvec.pages[i];
			
@@ -2279,31 +2280,21 @@ static int write_cache_pages_da(handle_t *handle,
 
				 			if (page->index > end)
			
 
				 				goto out;
			
 
				 
			
 
				-			*done_index = page->index + 1;
			
 
				-
			
 
				-			/*
			
 
				-			 * If we can't merge this page, and we have
			
 
				-			 * accumulated an contiguous region, write it
			
 
				-			 */
			
 
				-			if ((mpd->next_page != page->index) &&
			
 
				-			    (mpd->next_page != mpd->first_page)) {
			
 
				-				mpage_da_map_and_submit(mpd);
			
 
				-				goto ret_extent_tail;
			
 
				-			}
			
 
				+			/* If we can't merge this page, we are done. */
			
 
				+			if (mpd->map.m_len > 0 && mpd->next_page != page->index)
			
 
				+				goto out;
			
 
				 
			
 
				 			lock_page(page);
			
 
				-
			
 
				 			/*
			
 
				-			 * If the page is no longer dirty, or its
			
 
				-			 * mapping no longer corresponds to inode we
			
 
				-			 * are writing (which means it has been
			
 
				-			 * truncated or invalidated), or the page is
			
 
				-			 * already under writeback and we are not
			
 
				-			 * doing a data integrity writeback, skip the page
			
 
				+			 * If the page is no longer dirty, or its mapping no
			
 
				+			 * longer corresponds to inode we are writing (which
			
 
				+			 * means it has been truncated or invalidated), or the
			
 
				+			 * page is already under writeback and we are not doing
			
 
				+			 * a data integrity writeback, skip the page
			
 
				 			 */
			
 
				 			if (!PageDirty(page) ||
			
 
				 			    (PageWriteback(page) &&
			
 
				-			     (wbc->sync_mode == WB_SYNC_NONE)) ||
			
 
				+			     (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
			
 
				 			    unlikely(page->mapping != mapping)) {
			
 
				 				unlock_page(page);
			
 
				 				continue;
			
@@ -2312,101 +2303,57 @@ static int write_cache_pages_da(handle_t *handle,
 
				 			wait_on_page_writeback(page);
			
 
				 			BUG_ON(PageWriteback(page));
			
 
				 
			
 
				-			/*
			
 
				-			 * If we have inline data and arrive here, it means that
			
 
				-			 * we will soon create the block for the 1st page, so
			
 
				-			 * we'd better clear the inline data here.
			
 
				-			 */
			
 
				-			if (ext4_has_inline_data(inode)) {
			
 
				-				BUG_ON(ext4_test_inode_state(inode,
			
 
				-						EXT4_STATE_MAY_INLINE_DATA));
			
 
				-				ext4_destroy_inline_data(handle, inode);
			
 
				-			}
			
 
				-
			
 
				-			if (mpd->next_page != page->index)
			
 
				+			if (mpd->map.m_len == 0)
			
 
				 				mpd->first_page = page->index;
			
 
				 			mpd->next_page = page->index + 1;
			
 
				-			logical = (sector_t) page->index <<
			
 
				-				(PAGE_CACHE_SHIFT - inode->i_blkbits);
			
 
				-
			
 
				 			/* Add all dirty buffers to mpd */
			
 
				+			lblk = ((ext4_lblk_t)page->index) <<
			
 
				+				(PAGE_CACHE_SHIFT - blkbits);
			
 
				 			head = page_buffers(page);
			
 
				-			bh = head;
			
 
				-			do {
			
 
				-				BUG_ON(buffer_locked(bh));
			
 
				-				/*
			
 
				-				 * We need to try to allocate unmapped blocks
			
 
				-				 * in the same page.  Otherwise we won't make
			
 
				-				 * progress with the page in ext4_writepage
			
 
				-				 */
			
 
				-				if (ext4_bh_delay_or_unwritten(NULL, bh)) {
			
 
				-					mpage_add_bh_to_extent(mpd, logical,
			
 
				-							       bh->b_state);
			
 
				-					if (mpd->io_done)
			
 
				-						goto ret_extent_tail;
			
 
				-				} else if (buffer_dirty(bh) &&
			
 
				-					   buffer_mapped(bh)) {
			
 
				-					/*
			
 
				-					 * mapped dirty buffer. We need to
			
 
				-					 * update the b_state because we look
			
 
				-					 * at b_state in mpage_da_map_blocks.
			
 
				-					 * We don't update b_size because if we
			
 
				-					 * find an unmapped buffer_head later
			
 
				-					 * we need to use the b_state flag of
			
 
				-					 * that buffer_head.
			
 
				-					 */
			
 
				-					if (mpd->b_size == 0)
			
 
				-						mpd->b_state =
			
 
				-							bh->b_state & BH_FLAGS;
			
 
				-				}
			
 
				-				logical++;
			
 
				-			} while ((bh = bh->b_this_page) != head);
			
 
				-
			
 
				-			if (nr_to_write > 0) {
			
 
				-				nr_to_write--;
			
 
				-				if (nr_to_write == 0 &&
			
 
				-				    wbc->sync_mode == WB_SYNC_NONE)
			
 
				-					/*
			
 
				-					 * We stop writing back only if we are
			
 
				-					 * not doing integrity sync. In case of
			
 
				-					 * integrity sync we have to keep going
			
 
				-					 * because someone may be concurrently
			
 
				-					 * dirtying pages, and we might have
			
 
				-					 * synced a lot of newly appeared dirty
			
 
				-					 * pages, but have not synced all of the
			
 
				-					 * old dirty pages.
			
 
				-					 */
			
 
				+			if (!add_page_bufs_to_extent(mpd, head, head, lblk))
			
 
				+				goto out;
			
 
				+			/* So far everything mapped? Submit the page for IO. */
			
 
				+			if (mpd->map.m_len == 0) {
			
 
				+				err = mpage_submit_page(mpd, page);
			
 
				+				if (err < 0)
			
 
				 					goto out;
			
 
				 			}
			
 
				+
			
 
				+			/*
			
 
				+			 * Accumulated enough dirty pages? This doesn't apply
			
 
				+			 * to WB_SYNC_ALL mode. For integrity sync we have to
			
 
				+			 * keep going because someone may be concurrently
			
 
				+			 * dirtying pages, and we might have synced a lot of
			
 
				+			 * newly appeared dirty pages, but have not synced all
			
 
				+			 * of the old dirty pages.
			
 
				+			 */
			
 
				+			if (mpd->wbc->sync_mode == WB_SYNC_NONE &&
			
 
				+			    mpd->next_page - mpd->first_page >=
			
 
				+							mpd->wbc->nr_to_write)
			
 
				+				goto out;
			
 
				 		}
			
 
				 		pagevec_release(&pvec);
			
 
				 		cond_resched();
			
 
				 	}
			
 
				 	return 0;
			
 
				-ret_extent_tail:
			
 
				-	ret = MPAGE_DA_EXTENT_TAIL;
			
 
				 out:
			
 
				 	pagevec_release(&pvec);
			
 
				-	cond_resched();
			
 
				-	return ret;
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 static int ext4_da_writepages(struct address_space *mapping,
			
 
				 			      struct writeback_control *wbc)
			
 
				 {
			
 
				-	pgoff_t	index;
			
 
				+	pgoff_t	writeback_index = 0;
			
 
				+	long nr_to_write = wbc->nr_to_write;
			
 
				 	int range_whole = 0;
			
 
				+	int cycled = 1;
			
 
				 	handle_t *handle = NULL;
			
 
				 	struct mpage_da_data mpd;
			
 
				 	struct inode *inode = mapping->host;
			
 
				-	int pages_written = 0;
			
 
				-	int range_cyclic, cycled = 1, io_done = 0;
			
 
				 	int needed_blocks, ret = 0;
			
 
				-	loff_t range_start = wbc->range_start;
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
			
 
				-	pgoff_t done_index = 0;
			
 
				-	pgoff_t end;
			
 
				+	bool done;
			
 
				 	struct blk_plug plug;
			
 
				 
			
 
				 	trace_ext4_da_writepages(inode, wbc);
			
@@ -2432,40 +2379,65 @@ static int ext4_da_writepages(struct address_space *mapping,
 
				 	if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
			
 
				 		return -EROFS;
			
 
				 
			
 
				+	/*
			
 
				+	 * If we have inline data and arrive here, it means that
			
 
				+	 * we will soon create the block for the 1st page, so
			
 
				+	 * we'd better clear the inline data here.
			
 
				+	 */
			
 
				+	if (ext4_has_inline_data(inode)) {
			
 
				+		/* Just inode will be modified... */
			
 
				+		handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
			
 
				+		if (IS_ERR(handle)) {
			
 
				+			ret = PTR_ERR(handle);
			
 
				+			goto out_writepages;
			
 
				+		}
			
 
				+		BUG_ON(ext4_test_inode_state(inode,
			
 
				+				EXT4_STATE_MAY_INLINE_DATA));
			
 
				+		ext4_destroy_inline_data(handle, inode);
			
 
				+		ext4_journal_stop(handle);
			
 
				+	}
			
 
				+
			
 
				 	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			
 
				 		range_whole = 1;
			
 
				 
			
 
				-	range_cyclic = wbc->range_cyclic;
			
 
				 	if (wbc->range_cyclic) {
			
 
				-		index = mapping->writeback_index;
			
 
				-		if (index)
			
 
				+		writeback_index = mapping->writeback_index;
			
 
				+		if (writeback_index)
			
 
				 			cycled = 0;
			
 
				-		wbc->range_start = index << PAGE_CACHE_SHIFT;
			
 
				-		wbc->range_end  = LLONG_MAX;
			
 
				-		wbc->range_cyclic = 0;
			
 
				-		end = -1;
			
 
				+		mpd.first_page = writeback_index;
			
 
				+		mpd.last_page = -1;
			
 
				 	} else {
			
 
				-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
			
 
				-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
			
 
				+		mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT;
			
 
				+		mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT;
			
 
				 	}
			
 
				 
			
 
				+	mpd.inode = inode;
			
 
				+	mpd.wbc = wbc;
			
 
				+	ext4_io_submit_init(&mpd.io_submit, wbc);
			
 
				 retry:
			
 
				 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
			
 
				-		tag_pages_for_writeback(mapping, index, end);
			
 
				-
			
 
				+		tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
			
 
				+	done = false;
			
 
				 	blk_start_plug(&plug);
			
 
				-	while (!ret && wbc->nr_to_write > 0) {
			
 
				+	while (!done && mpd.first_page <= mpd.last_page) {
			
 
				+		/* For each extent of pages we use new io_end */
			
 
				+		mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
			
 
				+		if (!mpd.io_submit.io_end) {
			
 
				+			ret = -ENOMEM;
			
 
				+			break;
			
 
				+		}
			
 
				 
			
 
				 		/*
			
 
				-		 * we  insert one extent at a time. So we need
			
 
				-		 * credit needed for single extent allocation.
			
 
				-		 * journalled mode is currently not supported
			
 
				-		 * by delalloc
			
 
				+		 * We have two constraints: We find one extent to map and we
			
 
				+		 * must always write out whole page (makes a difference when
			
 
				+		 * blocksize < pagesize) so that we don't block on IO when we
			
 
				+		 * try to write out the rest of the page. Journalled mode is
			
 
				+		 * not supported by delalloc.
			
 
				 		 */
			
 
				 		BUG_ON(ext4_should_journal_data(inode));
			
 
				 		needed_blocks = ext4_da_writepages_trans_blocks(inode);
			
 
				 
			
 
				-		/* start a new transaction*/
			
 
				+		/* start a new transaction */
			
 
				 		handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
			
 
				 					    needed_blocks);
			
 
				 		if (IS_ERR(handle)) {
			
@@ -2473,76 +2445,67 @@ static int ext4_da_writepages(struct address_space *mapping,
 
				 			ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
			
 
				 			       "%ld pages, ino %lu; err %d", __func__,
			
 
				 				wbc->nr_to_write, inode->i_ino, ret);
			
 
				-			blk_finish_plug(&plug);
			
 
				-			goto out_writepages;
			
 
				+			/* Release allocated io_end */
			
 
				+			ext4_put_io_end(mpd.io_submit.io_end);
			
 
				+			break;
			
 
				 		}
			
 
				 
			
 
				-		/*
			
 
				-		 * Now call write_cache_pages_da() to find the next
			
 
				-		 * contiguous region of logical blocks that need
			
 
				-		 * blocks to be allocated by ext4 and submit them.
			
 
				-		 */
			
 
				-		ret = write_cache_pages_da(handle, mapping,
			
 
				-					   wbc, &mpd, &done_index);
			
 
				-		/*
			
 
				-		 * If we have a contiguous extent of pages and we
			
 
				-		 * haven't done the I/O yet, map the blocks and submit
			
 
				-		 * them for I/O.
			
 
				-		 */
			
 
				-		if (!mpd.io_done && mpd.next_page != mpd.first_page) {
			
 
				-			mpage_da_map_and_submit(&mpd);
			
 
				-			ret = MPAGE_DA_EXTENT_TAIL;
			
 
				+		trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc);
			
 
				+		ret = mpage_prepare_extent_to_map(&mpd);
			
 
				+		if (!ret) {
			
 
				+			if (mpd.map.m_len)
			
 
				+				ret = mpage_map_and_submit_extent(handle, &mpd);
			
 
				+			else {
			
 
				+				/*
			
 
				+				 * We scanned the whole range (or exhausted
			
 
				+				 * nr_to_write), submitted what was mapped and
			
 
				+				 * didn't find anything needing mapping. We are
			
 
				+				 * done.
			
 
				+				 */
			
 
				+				done = true;
			
 
				+			}
			
 
				 		}
			
 
				-		trace_ext4_da_write_pages(inode, &mpd);
			
 
				-		wbc->nr_to_write -= mpd.pages_written;
			
 
				-
			
 
				 		ext4_journal_stop(handle);
			
 
				-
			
 
				-		if ((mpd.retval == -ENOSPC) && sbi->s_journal) {
			
 
				-			/* commit the transaction which would
			
 
				+		/* Submit prepared bio */
			
 
				+		ext4_io_submit(&mpd.io_submit);
			
 
				+		/* Unlock pages we didn't use */
			
 
				+		mpage_release_unused_pages(&mpd, false);
			
 
				+		/* Drop our io_end reference we got from init */
			
 
				+		ext4_put_io_end(mpd.io_submit.io_end);
			
 
				+
			
 
				+		if (ret == -ENOSPC && sbi->s_journal) {
			
 
				+			/*
			
 
				+			 * Commit the transaction which would
			
 
				 			 * free blocks released in the transaction
			
 
				 			 * and try again
			
 
				 			 */
			
 
				 			jbd2_journal_force_commit_nested(sbi->s_journal);
			
 
				 			ret = 0;
			
 
				-		} else if (ret == MPAGE_DA_EXTENT_TAIL) {
			
 
				-			/*
			
 
				-			 * Got one extent now try with rest of the pages.
			
 
				-			 * If mpd.retval is set -EIO, journal is aborted.
			
 
				-			 * So we don't need to write any more.
			
 
				-			 */
			
 
				-			pages_written += mpd.pages_written;
			
 
				-			ret = mpd.retval;
			
 
				-			io_done = 1;
			
 
				-		} else if (wbc->nr_to_write)
			
 
				-			/*
			
 
				-			 * There is no more writeout needed
			
 
				-			 * or we requested for a noblocking writeout
			
 
				-			 * and we found the device congested
			
 
				-			 */
			
 
				+			continue;
			
 
				+		}
			
 
				+		/* Fatal error - ENOMEM, EIO... */
			
 
				+		if (ret)
			
 
				 			break;
			
 
				 	}
			
 
				 	blk_finish_plug(&plug);
			
 
				-	if (!io_done && !cycled) {
			
 
				+	if (!ret && !cycled) {
			
 
				 		cycled = 1;
			
 
				-		index = 0;
			
 
				-		wbc->range_start = index << PAGE_CACHE_SHIFT;
			
 
				-		wbc->range_end  = mapping->writeback_index - 1;
			
 
				+		mpd.last_page = writeback_index - 1;
			
 
				+		mpd.first_page = 0;
			
 
				 		goto retry;
			
 
				 	}
			
 
				 
			
 
				 	/* Update index */
			
 
				-	wbc->range_cyclic = range_cyclic;
			
 
				 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
			
 
				 		/*
			
 
				-		 * set the writeback_index so that range_cyclic
			
 
				+		 * Set the writeback_index so that range_cyclic
			
 
				 		 * mode will write it back later
			
 
				 		 */
			
 
				-		mapping->writeback_index = done_index;
			
 
				+		mapping->writeback_index = mpd.first_page;
			
 
				 
			
 
				 out_writepages:
			
 
				-	wbc->range_start = range_start;
			
 
				-	trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
			
 
				+	trace_ext4_da_writepages_result(inode, wbc, ret,
			
 
				+					nr_to_write - wbc->nr_to_write);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -360,9 +360,6 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
 
				 	bio->bi_bdev = bh->b_bdev;
			
 
				 	bio->bi_end_io = ext4_end_bio;
			
 
				 	bio->bi_private = ext4_get_io_end(io->io_end);
			
 
				-	if (!io->io_end->size)
			
 
				-		io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
			
 
				-				     + bh_offset(bh);
			
 
				 	io->io_bio = bio;
			
 
				 	io->io_next_block = bh->b_blocknr;
			
 
				 	return 0;
			
@@ -390,7 +387,6 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
 
				 	io_end = io->io_end;
			
 
				 	if (test_clear_buffer_uninit(bh))
			
 
				 		ext4_set_io_unwritten_flag(inode, io_end);
			
 
				-	io_end->size += bh->b_size;
			
 
				 	io->io_next_block++;
			
 
				 	return 0;
			
 
				 }
			
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -324,43 +324,59 @@ TRACE_EVENT(ext4_da_writepages,
 
				 );
			
 
				 
			
 
				 TRACE_EVENT(ext4_da_write_pages,
			
 
				-	TP_PROTO(struct inode *inode, struct mpage_da_data *mpd),
			
 
				+	TP_PROTO(struct inode *inode, pgoff_t first_page,
			
 
				+		 struct writeback_control *wbc),
			
 
				 
			
 
				-	TP_ARGS(inode, mpd),
			
 
				+	TP_ARGS(inode, first_page, wbc),
			
 
				 
			
 
				 	TP_STRUCT__entry(
			
 
				 		__field(	dev_t,	dev			)
			
 
				 		__field(	ino_t,	ino			)
			
 
				-		__field(	__u64,	b_blocknr		)
			
 
				-		__field(	__u32,	b_size			)
			
 
				-		__field(	__u32,	b_state			)
			
 
				-		__field(	unsigned long,	first_page	)
			
 
				-		__field(	int,	io_done			)
			
 
				-		__field(	int,	pages_written		)
			
 
				-		__field(	int,	sync_mode		)
			
 
				+		__field(      pgoff_t,	first_page		)
			
 
				+		__field(	 long,	nr_to_write		)
			
 
				+		__field(	  int,	sync_mode		)
			
 
				 	),
			
 
				 
			
 
				 	TP_fast_assign(
			
 
				 		__entry->dev		= inode->i_sb->s_dev;
			
 
				 		__entry->ino		= inode->i_ino;
			
 
				-		__entry->b_blocknr	= mpd->b_blocknr;
			
 
				-		__entry->b_size		= mpd->b_size;
			
 
				-		__entry->b_state	= mpd->b_state;
			
 
				-		__entry->first_page	= mpd->first_page;
			
 
				-		__entry->io_done	= mpd->io_done;
			
 
				-		__entry->pages_written	= mpd->pages_written;
			
 
				-		__entry->sync_mode	= mpd->wbc->sync_mode;
			
 
				+		__entry->first_page	= first_page;
			
 
				+		__entry->nr_to_write	= wbc->nr_to_write;
			
 
				+		__entry->sync_mode	= wbc->sync_mode;
			
 
				 	),
			
 
				 
			
 
				-	TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x "
			
 
				-		  "first_page %lu io_done %d pages_written %d sync_mode %d",
			
 
				+	TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld "
			
 
				+		  "sync_mode %d",
			
 
				 		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				-		  (unsigned long) __entry->ino,
			
 
				-		  __entry->b_blocknr, __entry->b_size,
			
 
				-		  __entry->b_state, __entry->first_page,
			
 
				-		  __entry->io_done, __entry->pages_written,
			
 
				-		  __entry->sync_mode
			
 
				-                  )
			
 
				+		  (unsigned long) __entry->ino, __entry->first_page,
			
 
				+		  __entry->nr_to_write, __entry->sync_mode)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(ext4_da_write_pages_extent,
			
 
				+	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map),
			
 
				+
			
 
				+	TP_ARGS(inode, map),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	dev_t,	dev			)
			
 
				+		__field(	ino_t,	ino			)
			
 
				+		__field(	__u64,	lblk			)
			
 
				+		__field(	__u32,	len			)
			
 
				+		__field(	__u32,	flags			)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->dev		= inode->i_sb->s_dev;
			
 
				+		__entry->ino		= inode->i_ino;
			
 
				+		__entry->lblk		= map->m_lblk;
			
 
				+		__entry->len		= map->m_len;
			
 
				+		__entry->flags		= map->m_flags;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("dev %d,%d ino %lu lblk %llu len %u flags 0x%04x",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  (unsigned long) __entry->ino, __entry->lblk, __entry->len,
			
 
				+		  __entry->flags)
			
 
				 );
			
 
				 
			
 
				 TRACE_EVENT(ext4_da_writepages_result,