9 ani în urmă · 3d93ec0364
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -40,6 +40,17 @@
 
				 #define XFS_DIO_FLAG_UNWRITTEN	(1 << 0)
			
 
				 #define XFS_DIO_FLAG_APPEND	(1 << 1)
			
 
				 
			
 
				+/*
			
 
				+ * structure owned by writepages passed to individual writepage calls
			
 
				+ */
			
 
				+struct xfs_writepage_ctx {
			
 
				+	struct xfs_bmbt_irec    imap;
			
 
				+	bool			imap_valid;
			
 
				+	unsigned int		io_type;
			
 
				+	struct xfs_ioend	*ioend;
			
 
				+	sector_t		last_block;
			
 
				+};
			
 
				+
			
 
				 void
			
 
				 xfs_count_page_state(
			
 
				 	struct page		*page,
			
@@ -271,7 +282,7 @@ xfs_alloc_ioend(
 
				 	 */
			
 
				 	atomic_set(&ioend->io_remaining, 1);
			
 
				 	ioend->io_error = 0;
			
 
				-	ioend->io_list = NULL;
			
 
				+	INIT_LIST_HEAD(&ioend->io_list);
			
 
				 	ioend->io_type = type;
			
 
				 	ioend->io_inode = inode;
			
 
				 	ioend->io_buffer_head = NULL;
			
@@ -289,8 +300,7 @@ xfs_map_blocks(
 
				 	struct inode		*inode,
			
 
				 	loff_t			offset,
			
 
				 	struct xfs_bmbt_irec	*imap,
			
 
				-	int			type,
			
 
				-	int			nonblocking)
			
 
				+	int			type)
			
 
				 {
			
 
				 	struct xfs_inode	*ip = XFS_I(inode);
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
@@ -306,12 +316,7 @@ xfs_map_blocks(
 
				 	if (type == XFS_IO_UNWRITTEN)
			
 
				 		bmapi_flags |= XFS_BMAPI_IGSTATE;
			
 
				 
			
 
				-	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
			
 
				-		if (nonblocking)
			
 
				-			return -EAGAIN;
			
 
				-		xfs_ilock(ip, XFS_ILOCK_SHARED);
			
 
				-	}
			
 
				-
			
 
				+	xfs_ilock(ip, XFS_ILOCK_SHARED);
			
 
				 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
			
 
				 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
			
 
				 	ASSERT(offset <= mp->m_super->s_maxbytes);
			
@@ -347,7 +352,7 @@ xfs_map_blocks(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				+STATIC bool
			
 
				 xfs_imap_valid(
			
 
				 	struct inode		*inode,
			
 
				 	struct xfs_bmbt_irec	*imap,
			
@@ -420,8 +425,7 @@ xfs_start_buffer_writeback(
 
				 STATIC void
			
 
				 xfs_start_page_writeback(
			
 
				 	struct page		*page,
			
 
				-	int			clear_dirty,
			
 
				-	int			buffers)
			
 
				+	int			clear_dirty)
			
 
				 {
			
 
				 	ASSERT(PageLocked(page));
			
 
				 	ASSERT(!PageWriteback(page));
			
@@ -440,10 +444,6 @@ xfs_start_page_writeback(
 
				 		set_page_writeback_keepwrite(page);
			
 
				 
			
 
				 	unlock_page(page);
			
 
				-
			
 
				-	/* If no buffers on the page are to be written, finish it here */
			
 
				-	if (!buffers)
			
 
				-		end_page_writeback(page);
			
 
				 }
			
 
				 
			
 
				 static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
			
@@ -452,153 +452,101 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Submit all of the bios for all of the ioends we have saved up, covering the
			
 
				- * initial writepage page and also any probed pages.
			
 
				- *
			
 
				- * Because we may have multiple ioends spanning a page, we need to start
			
 
				- * writeback on all the buffers before we submit them for I/O. If we mark the
			
 
				- * buffers as we got, then we can end up with a page that only has buffers
			
 
				- * marked async write and I/O complete on can occur before we mark the other
			
 
				- * buffers async write.
			
 
				- *
			
 
				- * The end result of this is that we trip a bug in end_page_writeback() because
			
 
				- * we call it twice for the one page as the code in end_buffer_async_write()
			
 
				- * assumes that all buffers on the page are started at the same time.
			
 
				- *
			
 
				- * The fix is two passes across the ioend list - one to start writeback on the
			
 
				- * buffer_heads, and then submit them for I/O on the second pass.
			
 
				+ * Submit all of the bios for an ioend. We are only passed a single ioend at a
			
 
				+ * time; the caller is responsible for chaining prior to submission.
			
 
				  *
			
 
				  * If @fail is non-zero, it means that we have a situation where some part of
			
 
				  * the submission process has failed after we have marked paged for writeback
			
 
				  * and unlocked them. In this situation, we need to fail the ioend chain rather
			
 
				  * than submit it to IO. This typically only happens on a filesystem shutdown.
			
 
				  */
			
 
				-STATIC void
			
 
				+STATIC int
			
 
				 xfs_submit_ioend(
			
 
				 	struct writeback_control *wbc,
			
 
				 	xfs_ioend_t		*ioend,
			
 
				-	int			fail)
			
 
				+	int			status)
			
 
				 {
			
 
				-	xfs_ioend_t		*head = ioend;
			
 
				-	xfs_ioend_t		*next;
			
 
				 	struct buffer_head	*bh;
			
 
				 	struct bio		*bio;
			
 
				 	sector_t		lastblock = 0;
			
 
				 
			
 
				-	/* Pass 1 - start writeback */
			
 
				-	do {
			
 
				-		next = ioend->io_list;
			
 
				-		for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
			
 
				-			xfs_start_buffer_writeback(bh);
			
 
				-	} while ((ioend = next) != NULL);
			
 
				+	/* Reserve log space if we might write beyond the on-disk inode size. */
			
 
				+	if (!status &&
			
 
				+	     ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
			
 
				+		status = xfs_setfilesize_trans_alloc(ioend);
			
 
				+	/*
			
 
				+	 * If we are failing the IO now, just mark the ioend with an
			
 
				+	 * error and finish it. This will run IO completion immediately
			
 
				+	 * as there is only one reference to the ioend at this point in
			
 
				+	 * time.
			
 
				+	 */
			
 
				+	if (status) {
			
 
				+		ioend->io_error = status;
			
 
				+		xfs_finish_ioend(ioend);
			
 
				+		return status;
			
 
				+	}
			
 
				 
			
 
				-	/* Pass 2 - submit I/O */
			
 
				-	ioend = head;
			
 
				-	do {
			
 
				-		next = ioend->io_list;
			
 
				-		bio = NULL;
			
 
				+	bio = NULL;
			
 
				+	for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
			
 
				 
			
 
				-		/*
			
 
				-		 * If we are failing the IO now, just mark the ioend with an
			
 
				-		 * error and finish it. This will run IO completion immediately
			
 
				-		 * as there is only one reference to the ioend at this point in
			
 
				-		 * time.
			
 
				-		 */
			
 
				-		if (fail) {
			
 
				-			ioend->io_error = fail;
			
 
				-			xfs_finish_ioend(ioend);
			
 
				-			continue;
			
 
				+		if (!bio) {
			
 
				+retry:
			
 
				+			bio = xfs_alloc_ioend_bio(bh);
			
 
				+		} else if (bh->b_blocknr != lastblock + 1) {
			
 
				+			xfs_submit_ioend_bio(wbc, ioend, bio);
			
 
				+			goto retry;
			
 
				 		}
			
 
				 
			
 
				-		for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
			
 
				-
			
 
				-			if (!bio) {
			
 
				- retry:
			
 
				-				bio = xfs_alloc_ioend_bio(bh);
			
 
				-			} else if (bh->b_blocknr != lastblock + 1) {
			
 
				-				xfs_submit_ioend_bio(wbc, ioend, bio);
			
 
				-				goto retry;
			
 
				-			}
			
 
				-
			
 
				-			if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
			
 
				-				xfs_submit_ioend_bio(wbc, ioend, bio);
			
 
				-				goto retry;
			
 
				-			}
			
 
				-
			
 
				-			lastblock = bh->b_blocknr;
			
 
				-		}
			
 
				-		if (bio)
			
 
				+		if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
			
 
				 			xfs_submit_ioend_bio(wbc, ioend, bio);
			
 
				-		xfs_finish_ioend(ioend);
			
 
				-	} while ((ioend = next) != NULL);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Cancel submission of all buffer_heads so far in this endio.
			
 
				- * Toss the endio too.  Only ever called for the initial page
			
 
				- * in a writepage request, so only ever one page.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_cancel_ioend(
			
 
				-	xfs_ioend_t		*ioend)
			
 
				-{
			
 
				-	xfs_ioend_t		*next;
			
 
				-	struct buffer_head	*bh, *next_bh;
			
 
				-
			
 
				-	do {
			
 
				-		next = ioend->io_list;
			
 
				-		bh = ioend->io_buffer_head;
			
 
				-		do {
			
 
				-			next_bh = bh->b_private;
			
 
				-			clear_buffer_async_write(bh);
			
 
				-			/*
			
 
				-			 * The unwritten flag is cleared when added to the
			
 
				-			 * ioend. We're not submitting for I/O so mark the
			
 
				-			 * buffer unwritten again for next time around.
			
 
				-			 */
			
 
				-			if (ioend->io_type == XFS_IO_UNWRITTEN)
			
 
				-				set_buffer_unwritten(bh);
			
 
				-			unlock_buffer(bh);
			
 
				-		} while ((bh = next_bh) != NULL);
			
 
				+			goto retry;
			
 
				+		}
			
 
				 
			
 
				-		mempool_free(ioend, xfs_ioend_pool);
			
 
				-	} while ((ioend = next) != NULL);
			
 
				+		lastblock = bh->b_blocknr;
			
 
				+	}
			
 
				+	if (bio)
			
 
				+		xfs_submit_ioend_bio(wbc, ioend, bio);
			
 
				+	xfs_finish_ioend(ioend);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * Test to see if we've been building up a completion structure for
			
 
				  * earlier buffers -- if so, we try to append to this ioend if we
			
 
				  * can, otherwise we finish off any current ioend and start another.
			
 
				- * Return true if we've finished the given ioend.
			
 
				+ * Return the ioend we finished off so that the caller can submit it
			
 
				+ * once it has finished processing the dirty page.
			
 
				  */
			
 
				 STATIC void
			
 
				 xfs_add_to_ioend(
			
 
				 	struct inode		*inode,
			
 
				 	struct buffer_head	*bh,
			
 
				 	xfs_off_t		offset,
			
 
				-	unsigned int		type,
			
 
				-	xfs_ioend_t		**result,
			
 
				-	int			need_ioend)
			
 
				+	struct xfs_writepage_ctx *wpc,
			
 
				+	struct list_head	*iolist)
			
 
				 {
			
 
				-	xfs_ioend_t		*ioend = *result;
			
 
				-
			
 
				-	if (!ioend || need_ioend || type != ioend->io_type) {
			
 
				-		xfs_ioend_t	*previous = *result;
			
 
				-
			
 
				-		ioend = xfs_alloc_ioend(inode, type);
			
 
				-		ioend->io_offset = offset;
			
 
				-		ioend->io_buffer_head = bh;
			
 
				-		ioend->io_buffer_tail = bh;
			
 
				-		if (previous)
			
 
				-			previous->io_list = ioend;
			
 
				-		*result = ioend;
			
 
				+	if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
			
 
				+	    bh->b_blocknr != wpc->last_block + 1 ||
			
 
				+	    offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
			
 
				+		struct xfs_ioend	*new;
			
 
				+
			
 
				+		if (wpc->ioend)
			
 
				+			list_add(&wpc->ioend->io_list, iolist);
			
 
				+
			
 
				+		new = xfs_alloc_ioend(inode, wpc->io_type);
			
 
				+		new->io_offset = offset;
			
 
				+		new->io_buffer_head = bh;
			
 
				+		new->io_buffer_tail = bh;
			
 
				+		wpc->ioend = new;
			
 
				 	} else {
			
 
				-		ioend->io_buffer_tail->b_private = bh;
			
 
				-		ioend->io_buffer_tail = bh;
			
 
				+		wpc->ioend->io_buffer_tail->b_private = bh;
			
 
				+		wpc->ioend->io_buffer_tail = bh;
			
 
				 	}
			
 
				 
			
 
				 	bh->b_private = NULL;
			
 
				-	ioend->io_size += bh->b_size;
			
 
				+	wpc->ioend->io_size += bh->b_size;
			
 
				+	wpc->last_block = bh->b_blocknr;
			
 
				+	xfs_start_buffer_writeback(bh);
			
 
				 }
			
 
				 
			
 
				 STATIC void
			
@@ -684,183 +632,6 @@ xfs_check_page_type(
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Allocate & map buffers for page given the extent map. Write it out.
			
 
				- * except for the original page of a writepage, this is called on
			
 
				- * delalloc/unwritten pages only, for the original page it is possible
			
 
				- * that the page has no mapping at all.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_convert_page(
			
 
				-	struct inode		*inode,
			
 
				-	struct page		*page,
			
 
				-	loff_t			tindex,
			
 
				-	struct xfs_bmbt_irec	*imap,
			
 
				-	xfs_ioend_t		**ioendp,
			
 
				-	struct writeback_control *wbc)
			
 
				-{
			
 
				-	struct buffer_head	*bh, *head;
			
 
				-	xfs_off_t		end_offset;
			
 
				-	unsigned long		p_offset;
			
 
				-	unsigned int		type;
			
 
				-	int			len, page_dirty;
			
 
				-	int			count = 0, done = 0, uptodate = 1;
			
 
				- 	xfs_off_t		offset = page_offset(page);
			
 
				-
			
 
				-	if (page->index != tindex)
			
 
				-		goto fail;
			
 
				-	if (!trylock_page(page))
			
 
				-		goto fail;
			
 
				-	if (PageWriteback(page))
			
 
				-		goto fail_unlock_page;
			
 
				-	if (page->mapping != inode->i_mapping)
			
 
				-		goto fail_unlock_page;
			
 
				-	if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
			
 
				-		goto fail_unlock_page;
			
 
				-
			
 
				-	/*
			
 
				-	 * page_dirty is initially a count of buffers on the page before
			
 
				-	 * EOF and is decremented as we move each into a cleanable state.
			
 
				-	 *
			
 
				-	 * Derivation:
			
 
				-	 *
			
 
				-	 * End offset is the highest offset that this page should represent.
			
 
				-	 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
			
 
				-	 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
			
 
				-	 * hence give us the correct page_dirty count. On any other page,
			
 
				-	 * it will be zero and in that case we need page_dirty to be the
			
 
				-	 * count of buffers on the page.
			
 
				-	 */
			
 
				-	end_offset = min_t(unsigned long long,
			
 
				-			(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
			
 
				-			i_size_read(inode));
			
 
				-
			
 
				-	/*
			
 
				-	 * If the current map does not span the entire page we are about to try
			
 
				-	 * to write, then give up. The only way we can write a page that spans
			
 
				-	 * multiple mappings in a single writeback iteration is via the
			
 
				-	 * xfs_vm_writepage() function. Data integrity writeback requires the
			
 
				-	 * entire page to be written in a single attempt, otherwise the part of
			
 
				-	 * the page we don't write here doesn't get written as part of the data
			
 
				-	 * integrity sync.
			
 
				-	 *
			
 
				-	 * For normal writeback, we also don't attempt to write partial pages
			
 
				-	 * here as it simply means that write_cache_pages() will see it under
			
 
				-	 * writeback and ignore the page until some point in the future, at
			
 
				-	 * which time this will be the only page in the file that needs
			
 
				-	 * writeback.  Hence for more optimal IO patterns, we should always
			
 
				-	 * avoid partial page writeback due to multiple mappings on a page here.
			
 
				-	 */
			
 
				-	if (!xfs_imap_valid(inode, imap, end_offset))
			
 
				-		goto fail_unlock_page;
			
 
				-
			
 
				-	len = 1 << inode->i_blkbits;
			
 
				-	p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
			
 
				-					PAGE_CACHE_SIZE);
			
 
				-	p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
			
 
				-	page_dirty = p_offset / len;
			
 
				-
			
 
				-	/*
			
 
				-	 * The moment we find a buffer that doesn't match our current type
			
 
				-	 * specification or can't be written, abort the loop and start
			
 
				-	 * writeback. As per the above xfs_imap_valid() check, only
			
 
				-	 * xfs_vm_writepage() can handle partial page writeback fully - we are
			
 
				-	 * limited here to the buffers that are contiguous with the current
			
 
				-	 * ioend, and hence a buffer we can't write breaks that contiguity and
			
 
				-	 * we have to defer the rest of the IO to xfs_vm_writepage().
			
 
				-	 */
			
 
				-	bh = head = page_buffers(page);
			
 
				-	do {
			
 
				-		if (offset >= end_offset)
			
 
				-			break;
			
 
				-		if (!buffer_uptodate(bh))
			
 
				-			uptodate = 0;
			
 
				-		if (!(PageUptodate(page) || buffer_uptodate(bh))) {
			
 
				-			done = 1;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (buffer_unwritten(bh) || buffer_delay(bh) ||
			
 
				-		    buffer_mapped(bh)) {
			
 
				-			if (buffer_unwritten(bh))
			
 
				-				type = XFS_IO_UNWRITTEN;
			
 
				-			else if (buffer_delay(bh))
			
 
				-				type = XFS_IO_DELALLOC;
			
 
				-			else
			
 
				-				type = XFS_IO_OVERWRITE;
			
 
				-
			
 
				-			/*
			
 
				-			 * imap should always be valid because of the above
			
 
				-			 * partial page end_offset check on the imap.
			
 
				-			 */
			
 
				-			ASSERT(xfs_imap_valid(inode, imap, offset));
			
 
				-
			
 
				-			lock_buffer(bh);
			
 
				-			if (type != XFS_IO_OVERWRITE)
			
 
				-				xfs_map_at_offset(inode, bh, imap, offset);
			
 
				-			xfs_add_to_ioend(inode, bh, offset, type,
			
 
				-					 ioendp, done);
			
 
				-
			
 
				-			page_dirty--;
			
 
				-			count++;
			
 
				-		} else {
			
 
				-			done = 1;
			
 
				-			break;
			
 
				-		}
			
 
				-	} while (offset += len, (bh = bh->b_this_page) != head);
			
 
				-
			
 
				-	if (uptodate && bh == head)
			
 
				-		SetPageUptodate(page);
			
 
				-
			
 
				-	if (count) {
			
 
				-		if (--wbc->nr_to_write <= 0 &&
			
 
				-		    wbc->sync_mode == WB_SYNC_NONE)
			
 
				-			done = 1;
			
 
				-	}
			
 
				-	xfs_start_page_writeback(page, !page_dirty, count);
			
 
				-
			
 
				-	return done;
			
 
				- fail_unlock_page:
			
 
				-	unlock_page(page);
			
 
				- fail:
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Convert & write out a cluster of pages in the same extent as defined
			
 
				- * by mp and following the start page.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_cluster_write(
			
 
				-	struct inode		*inode,
			
 
				-	pgoff_t			tindex,
			
 
				-	struct xfs_bmbt_irec	*imap,
			
 
				-	xfs_ioend_t		**ioendp,
			
 
				-	struct writeback_control *wbc,
			
 
				-	pgoff_t			tlast)
			
 
				-{
			
 
				-	struct pagevec		pvec;
			
 
				-	int			done = 0, i;
			
 
				-
			
 
				-	pagevec_init(&pvec, 0);
			
 
				-	while (!done && tindex <= tlast) {
			
 
				-		unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
			
 
				-
			
 
				-		if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
			
 
				-			break;
			
 
				-
			
 
				-		for (i = 0; i < pagevec_count(&pvec); i++) {
			
 
				-			done = xfs_convert_page(inode, pvec.pages[i], tindex++,
			
 
				-					imap, ioendp, wbc);
			
 
				-			if (done)
			
 
				-				break;
			
 
				-		}
			
 
				-
			
 
				-		pagevec_release(&pvec);
			
 
				-		cond_resched();
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 STATIC void
			
 
				 xfs_vm_invalidatepage(
			
 
				 	struct page		*page,
			
@@ -937,6 +708,164 @@ out_invalidate:
 
				 	return;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * We implement an immediate ioend submission policy here to avoid needing to
			
 
				+ * chain multiple ioends and hence nest mempool allocations which can violate
			
 
				+ * forward progress guarantees we need to provide. The current ioend we are
			
 
				+ * adding buffers to is cached on the writepage context, and if the new buffer
			
 
				+ * does not append to the cached ioend it will create a new ioend and cache that
			
 
				+ * instead.
			
 
				+ *
			
 
				+ * If a new ioend is created and cached, the old ioend is returned and queued
			
 
				+ * locally for submission once the entire page is processed or an error has been
			
 
				+ * detected.  While ioends are submitted immediately after they are completed,
			
 
				+ * batching optimisations are provided by higher level block plugging.
			
 
				+ *
			
 
				+ * At the end of a writeback pass, there will be a cached ioend remaining on the
			
 
				+ * writepage context that the caller will need to submit.
			
 
				+ */
			
 
				+static int
			
 
				+xfs_writepage_map(
			
 
				+	struct xfs_writepage_ctx *wpc,
			
 
				+	struct writeback_control *wbc,
			
 
				+	struct inode		*inode,
			
 
				+	struct page		*page,
			
 
				+	loff_t			offset,
			
 
				+	__uint64_t              end_offset)
			
 
				+{
			
 
				+	LIST_HEAD(submit_list);
			
 
				+	struct xfs_ioend	*ioend, *next;
			
 
				+	struct buffer_head	*bh, *head;
			
 
				+	ssize_t			len = 1 << inode->i_blkbits;
			
 
				+	int			error = 0;
			
 
				+	int			count = 0;
			
 
				+	int			uptodate = 1;
			
 
				+
			
 
				+	bh = head = page_buffers(page);
			
 
				+	offset = page_offset(page);
			
 
				+	do {
			
 
				+		if (offset >= end_offset)
			
 
				+			break;
			
 
				+		if (!buffer_uptodate(bh))
			
 
				+			uptodate = 0;
			
 
				+
			
 
				+		/*
			
 
				+		 * set_page_dirty dirties all buffers in a page, independent
			
 
				+		 * of their state.  The dirty state however is entirely
			
 
				+		 * meaningless for holes (!mapped && uptodate), so skip
			
 
				+		 * buffers covering holes here.
			
 
				+		 */
			
 
				+		if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
			
 
				+			wpc->imap_valid = false;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (buffer_unwritten(bh)) {
			
 
				+			if (wpc->io_type != XFS_IO_UNWRITTEN) {
			
 
				+				wpc->io_type = XFS_IO_UNWRITTEN;
			
 
				+				wpc->imap_valid = false;
			
 
				+			}
			
 
				+		} else if (buffer_delay(bh)) {
			
 
				+			if (wpc->io_type != XFS_IO_DELALLOC) {
			
 
				+				wpc->io_type = XFS_IO_DELALLOC;
			
 
				+				wpc->imap_valid = false;
			
 
				+			}
			
 
				+		} else if (buffer_uptodate(bh)) {
			
 
				+			if (wpc->io_type != XFS_IO_OVERWRITE) {
			
 
				+				wpc->io_type = XFS_IO_OVERWRITE;
			
 
				+				wpc->imap_valid = false;
			
 
				+			}
			
 
				+		} else {
			
 
				+			if (PageUptodate(page))
			
 
				+				ASSERT(buffer_mapped(bh));
			
 
				+			/*
			
 
				+			 * This buffer is not uptodate and will not be
			
 
				+			 * written to disk.  Ensure that we will put any
			
 
				+			 * subsequent writeable buffers into a new
			
 
				+			 * ioend.
			
 
				+			 */
			
 
				+			wpc->imap_valid = false;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (wpc->imap_valid)
			
 
				+			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
			
 
				+							 offset);
			
 
				+		if (!wpc->imap_valid) {
			
 
				+			error = xfs_map_blocks(inode, offset, &wpc->imap,
			
 
				+					     wpc->io_type);
			
 
				+			if (error)
			
 
				+				goto out;
			
 
				+			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
			
 
				+							 offset);
			
 
				+		}
			
 
				+		if (wpc->imap_valid) {
			
 
				+			lock_buffer(bh);
			
 
				+			if (wpc->io_type != XFS_IO_OVERWRITE)
			
 
				+				xfs_map_at_offset(inode, bh, &wpc->imap, offset);
			
 
				+			xfs_add_to_ioend(inode, bh, offset, wpc, &submit_list);
			
 
				+			count++;
			
 
				+		}
			
 
				+
			
 
				+	} while (offset += len, ((bh = bh->b_this_page) != head));
			
 
				+
			
 
				+	if (uptodate && bh == head)
			
 
				+		SetPageUptodate(page);
			
 
				+
			
 
				+	ASSERT(wpc->ioend || list_empty(&submit_list));
			
 
				+
			
 
				+out:
			
 
				+	/*
			
 
				+	 * On error, we have to fail the ioend here because we have locked
			
 
				+	 * buffers in the ioend. If we don't do this, we'll deadlock
			
 
				+	 * invalidating the page as that tries to lock the buffers on the page.
			
 
				+	 * Also, because we may have set pages under writeback, we have to make
			
 
				+	 * sure we run IO completion to mark the error state of the IO
			
 
				+	 * appropriately, so we can't cancel the ioend directly here. That means
			
 
				+	 * we have to mark this page as under writeback if we included any
			
 
				+	 * buffers from it in the ioend chain so that completion treats it
			
 
				+	 * correctly.
			
 
				+	 *
			
 
				+	 * If we didn't include the page in the ioend, the on error we can
			
 
				+	 * simply discard and unlock it as there are no other users of the page
			
 
				+	 * or it's buffers right now. The caller will still need to trigger
			
 
				+	 * submission of outstanding ioends on the writepage context so they are
			
 
				+	 * treated correctly on error.
			
 
				+	 */
			
 
				+	if (count) {
			
 
				+		xfs_start_page_writeback(page, !error);
			
 
				+
			
 
				+		/*
			
 
				+		 * Preserve the original error if there was one, otherwise catch
			
 
				+		 * submission errors here and propagate into subsequent ioend
			
 
				+		 * submissions.
			
 
				+		 */
			
 
				+		list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
			
 
				+			int error2;
			
 
				+
			
 
				+			list_del_init(&ioend->io_list);
			
 
				+			error2 = xfs_submit_ioend(wbc, ioend, error);
			
 
				+			if (error2 && !error)
			
 
				+				error = error2;
			
 
				+		}
			
 
				+	} else if (error) {
			
 
				+		xfs_aops_discard_page(page);
			
 
				+		ClearPageUptodate(page);
			
 
				+		unlock_page(page);
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * We can end up here with no error and nothing to write if we
			
 
				+		 * race with a partial page truncate on a sub-page block sized
			
 
				+		 * filesystem. In that case we need to mark the page clean.
			
 
				+		 */
			
 
				+		xfs_start_page_writeback(page, 1);
			
 
				+		end_page_writeback(page);
			
 
				+	}
			
 
				+
			
 
				+	mapping_set_error(page->mapping, error);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Write out a dirty page.
			
 
				  *
			
@@ -946,22 +875,16 @@ out_invalidate:
 
				  * For any other dirty buffer heads on the page we should flush them.
			
 
				  */
			
 
				 STATIC int
			
 
				-xfs_vm_writepage(
			
 
				+xfs_do_writepage(
			
 
				 	struct page		*page,
			
 
				-	struct writeback_control *wbc)
			
 
				+	struct writeback_control *wbc,
			
 
				+	void			*data)
			
 
				 {
			
 
				+	struct xfs_writepage_ctx *wpc = data;
			
 
				 	struct inode		*inode = page->mapping->host;
			
 
				-	struct buffer_head	*bh, *head;
			
 
				-	struct xfs_bmbt_irec	imap;
			
 
				-	xfs_ioend_t		*ioend = NULL, *iohead = NULL;
			
 
				 	loff_t			offset;
			
 
				-	unsigned int		type;
			
 
				 	__uint64_t              end_offset;
			
 
				-	pgoff_t                 end_index, last_index;
			
 
				-	ssize_t			len;
			
 
				-	int			err, imap_valid = 0, uptodate = 1;
			
 
				-	int			count = 0;
			
 
				-	int			nonblocking = 0;
			
 
				+	pgoff_t                 end_index;
			
 
				 
			
 
				 	trace_xfs_writepage(inode, page, 0, 0);
			
 
				 
			
@@ -988,12 +911,9 @@ xfs_vm_writepage(
 
				 	if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
			
 
				 		goto redirty;
			
 
				 
			
 
				-	/* Is this page beyond the end of the file? */
			
 
				-	offset = i_size_read(inode);
			
 
				-	end_index = offset >> PAGE_CACHE_SHIFT;
			
 
				-	last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
			
 
				-
			
 
				 	/*
			
 
				+	 * Is this page beyond the end of the file?
			
 
				+	 *
			
 
				 	 * The page index is less than the end_index, adjust the end_offset
			
 
				 	 * to the highest offset that this page should represent.
			
 
				 	 * -----------------------------------------------------
			
@@ -1004,6 +924,8 @@ xfs_vm_writepage(
 
				 	 * |     desired writeback range    |      see else    |
			
 
				 	 * ---------------------------------^------------------|
			
 
				 	 */
			
 
				+	offset = i_size_read(inode);
			
 
				+	end_index = offset >> PAGE_CACHE_SHIFT;
			
 
				 	if (page->index < end_index)
			
 
				 		end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
			
 
				 	else {
			
@@ -1055,152 +977,7 @@ xfs_vm_writepage(
 
				 		end_offset = offset;
			
 
				 	}
			
 
				 
			
 
				-	len = 1 << inode->i_blkbits;
			
 
				-
			
 
				-	bh = head = page_buffers(page);
			
 
				-	offset = page_offset(page);
			
 
				-	type = XFS_IO_OVERWRITE;
			
 
				-
			
 
				-	if (wbc->sync_mode == WB_SYNC_NONE)
			
 
				-		nonblocking = 1;
			
 
				-
			
 
				-	do {
			
 
				-		int new_ioend = 0;
			
 
				-
			
 
				-		if (offset >= end_offset)
			
 
				-			break;
			
 
				-		if (!buffer_uptodate(bh))
			
 
				-			uptodate = 0;
			
 
				-
			
 
				-		/*
			
 
				-		 * set_page_dirty dirties all buffers in a page, independent
			
 
				-		 * of their state.  The dirty state however is entirely
			
 
				-		 * meaningless for holes (!mapped && uptodate), so skip
			
 
				-		 * buffers covering holes here.
			
 
				-		 */
			
 
				-		if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
			
 
				-			imap_valid = 0;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (buffer_unwritten(bh)) {
			
 
				-			if (type != XFS_IO_UNWRITTEN) {
			
 
				-				type = XFS_IO_UNWRITTEN;
			
 
				-				imap_valid = 0;
			
 
				-			}
			
 
				-		} else if (buffer_delay(bh)) {
			
 
				-			if (type != XFS_IO_DELALLOC) {
			
 
				-				type = XFS_IO_DELALLOC;
			
 
				-				imap_valid = 0;
			
 
				-			}
			
 
				-		} else if (buffer_uptodate(bh)) {
			
 
				-			if (type != XFS_IO_OVERWRITE) {
			
 
				-				type = XFS_IO_OVERWRITE;
			
 
				-				imap_valid = 0;
			
 
				-			}
			
 
				-		} else {
			
 
				-			if (PageUptodate(page))
			
 
				-				ASSERT(buffer_mapped(bh));
			
 
				-			/*
			
 
				-			 * This buffer is not uptodate and will not be
			
 
				-			 * written to disk.  Ensure that we will put any
			
 
				-			 * subsequent writeable buffers into a new
			
 
				-			 * ioend.
			
 
				-			 */
			
 
				-			imap_valid = 0;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (imap_valid)
			
 
				-			imap_valid = xfs_imap_valid(inode, &imap, offset);
			
 
				-		if (!imap_valid) {
			
 
				-			/*
			
 
				-			 * If we didn't have a valid mapping then we need to
			
 
				-			 * put the new mapping into a separate ioend structure.
			
 
				-			 * This ensures non-contiguous extents always have
			
 
				-			 * separate ioends, which is particularly important
			
 
				-			 * for unwritten extent conversion at I/O completion
			
 
				-			 * time.
			
 
				-			 */
			
 
				-			new_ioend = 1;
			
 
				-			err = xfs_map_blocks(inode, offset, &imap, type,
			
 
				-					     nonblocking);
			
 
				-			if (err)
			
 
				-				goto error;
			
 
				-			imap_valid = xfs_imap_valid(inode, &imap, offset);
			
 
				-		}
			
 
				-		if (imap_valid) {
			
 
				-			lock_buffer(bh);
			
 
				-			if (type != XFS_IO_OVERWRITE)
			
 
				-				xfs_map_at_offset(inode, bh, &imap, offset);
			
 
				-			xfs_add_to_ioend(inode, bh, offset, type, &ioend,
			
 
				-					 new_ioend);
			
 
				-			count++;
			
 
				-		}
			
 
				-
			
 
				-		if (!iohead)
			
 
				-			iohead = ioend;
			
 
				-
			
 
				-	} while (offset += len, ((bh = bh->b_this_page) != head));
			
 
				-
			
 
				-	if (uptodate && bh == head)
			
 
				-		SetPageUptodate(page);
			
 
				-
			
 
				-	xfs_start_page_writeback(page, 1, count);
			
 
				-
			
 
				-	/* if there is no IO to be submitted for this page, we are done */
			
 
				-	if (!ioend)
			
 
				-		return 0;
			
 
				-
			
 
				-	ASSERT(iohead);
			
 
				-
			
 
				-	/*
			
 
				-	 * Any errors from this point onwards need tobe reported through the IO
			
 
				-	 * completion path as we have marked the initial page as under writeback
			
 
				-	 * and unlocked it.
			
 
				-	 */
			
 
				-	if (imap_valid) {
			
 
				-		xfs_off_t		end_index;
			
 
				-
			
 
				-		end_index = imap.br_startoff + imap.br_blockcount;
			
 
				-
			
 
				-		/* to bytes */
			
 
				-		end_index <<= inode->i_blkbits;
			
 
				-
			
 
				-		/* to pages */
			
 
				-		end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
			
 
				-
			
 
				-		/* check against file size */
			
 
				-		if (end_index > last_index)
			
 
				-			end_index = last_index;
			
 
				-
			
 
				-		xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
			
 
				-				  wbc, end_index);
			
 
				-	}
			
 
				-
			
 
				-
			
 
				-	/*
			
 
				-	 * Reserve log space if we might write beyond the on-disk inode size.
			
 
				-	 */
			
 
				-	err = 0;
			
 
				-	if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
			
 
				-		err = xfs_setfilesize_trans_alloc(ioend);
			
 
				-
			
 
				-	xfs_submit_ioend(wbc, iohead, err);
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-error:
			
 
				-	if (iohead)
			
 
				-		xfs_cancel_ioend(iohead);
			
 
				-
			
 
				-	if (err == -EAGAIN)
			
 
				-		goto redirty;
			
 
				-
			
 
				-	xfs_aops_discard_page(page);
			
 
				-	ClearPageUptodate(page);
			
 
				-	unlock_page(page);
			
 
				-	return err;
			
 
				+	return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
			
 
				 
			
 
				 redirty:
			
 
				 	redirty_page_for_writepage(wbc, page);
			
@@ -1208,13 +985,37 @@ redirty:
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+STATIC int
			
 
				+xfs_vm_writepage(
			
 
				+	struct page		*page,
			
 
				+	struct writeback_control *wbc)
			
 
				+{
			
 
				+	struct xfs_writepage_ctx wpc = {
			
 
				+		.io_type = XFS_IO_INVALID,
			
 
				+	};
			
 
				+	int			ret;
			
 
				+
			
 
				+	ret = xfs_do_writepage(page, wbc, &wpc);
			
 
				+	if (wpc.ioend)
			
 
				+		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 STATIC int
			
 
				 xfs_vm_writepages(
			
 
				 	struct address_space	*mapping,
			
 
				 	struct writeback_control *wbc)
			
 
				 {
			
 
				+	struct xfs_writepage_ctx wpc = {
			
 
				+		.io_type = XFS_IO_INVALID,
			
 
				+	};
			
 
				+	int			ret;
			
 
				+
			
 
				 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
			
 
				-	return generic_writepages(mapping, wbc);
			
 
				+	ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
			
 
				+	if (wpc.ioend)
			
 
				+		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -24,12 +24,14 @@ extern mempool_t *xfs_ioend_pool;
 
				  * Types of I/O for bmap clustering and I/O completion tracking.
			
 
				  */
			
 
				 enum {
			
 
				+	XFS_IO_INVALID,		/* initial state */
			
 
				 	XFS_IO_DELALLOC,	/* covers delalloc region */
			
 
				 	XFS_IO_UNWRITTEN,	/* covers allocated but uninitialized data */
			
 
				 	XFS_IO_OVERWRITE,	/* covers already allocated extent */
			
 
				 };
			
 
				 
			
 
				 #define XFS_IO_TYPES \
			
 
				+	{ XFS_IO_INVALID,		"invalid" }, \
			
 
				 	{ XFS_IO_DELALLOC,		"delalloc" }, \
			
 
				 	{ XFS_IO_UNWRITTEN,		"unwritten" }, \
			
 
				 	{ XFS_IO_OVERWRITE,		"overwrite" }
			
@@ -39,7 +41,7 @@ enum {
 
				  * It can manage several multi-page bio's at once.
			
 
				  */
			
 
				 typedef struct xfs_ioend {
			
 
				-	struct xfs_ioend	*io_list;	/* next ioend in chain */
			
 
				+	struct list_head	io_list;	/* next ioend in chain */
			
 
				 	unsigned int		io_type;	/* delalloc / unwritten */
			
 
				 	int			io_error;	/* I/O error code */
			
 
				 	atomic_t		io_remaining;	/* hold count */