11 лет назад · 75e58ce4c8
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1122,14 +1122,6 @@ xfs_zero_remaining_bytes(
 
				 	if (endoff > XFS_ISIZE(ip))
			
 
				 		endoff = XFS_ISIZE(ip);
			
 
				 
			
 
				-	bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
			
 
				-					mp->m_rtdev_targp : mp->m_ddev_targp,
			
 
				-				  BTOBB(mp->m_sb.sb_blocksize), 0);
			
 
				-	if (!bp)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	xfs_buf_unlock(bp);
			
 
				-
			
 
				 	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
			
 
				 		uint lock_mode;
			
 
				 
			
@@ -1152,42 +1144,24 @@ xfs_zero_remaining_bytes(
 
				 		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
			
 
				 		if (imap.br_state == XFS_EXT_UNWRITTEN)
			
 
				 			continue;
			
 
				-		XFS_BUF_UNDONE(bp);
			
 
				-		XFS_BUF_UNWRITE(bp);
			
 
				-		XFS_BUF_READ(bp);
			
 
				-		XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
			
 
				 
			
 
				-		if (XFS_FORCED_SHUTDOWN(mp)) {
			
 
				-			error = -EIO;
			
 
				-			break;
			
 
				-		}
			
 
				-		xfs_buf_iorequest(bp);
			
 
				-		error = xfs_buf_iowait(bp);
			
 
				-		if (error) {
			
 
				-			xfs_buf_ioerror_alert(bp,
			
 
				-					"xfs_zero_remaining_bytes(read)");
			
 
				-			break;
			
 
				-		}
			
 
				+		error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
			
 
				+				mp->m_rtdev_targp : mp->m_ddev_targp,
			
 
				+				xfs_fsb_to_db(ip, imap.br_startblock),
			
 
				+				BTOBB(mp->m_sb.sb_blocksize),
			
 
				+				0, &bp, NULL);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+
			
 
				 		memset(bp->b_addr +
			
 
				-			(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
			
 
				-		      0, lastoffset - offset + 1);
			
 
				-		XFS_BUF_UNDONE(bp);
			
 
				-		XFS_BUF_UNREAD(bp);
			
 
				-		XFS_BUF_WRITE(bp);
			
 
				-
			
 
				-		if (XFS_FORCED_SHUTDOWN(mp)) {
			
 
				-			error = -EIO;
			
 
				-			break;
			
 
				-		}
			
 
				-		xfs_buf_iorequest(bp);
			
 
				-		error = xfs_buf_iowait(bp);
			
 
				-		if (error) {
			
 
				-			xfs_buf_ioerror_alert(bp,
			
 
				-					"xfs_zero_remaining_bytes(write)");
			
 
				-			break;
			
 
				-		}
			
 
				+				(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
			
 
				+		       0, lastoffset - offset + 1);
			
 
				+
			
 
				+		error = xfs_bwrite(bp);
			
 
				+		xfs_buf_relse(bp);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				 	}
			
 
				-	xfs_buf_free(bp);
			
 
				 	return error;
			
 
				 }
			
 
				 
			
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -623,10 +623,11 @@ _xfs_buf_read(
 
				 	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
			
 
				 	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
			
 
				 
			
 
				-	xfs_buf_iorequest(bp);
			
 
				-	if (flags & XBF_ASYNC)
			
 
				+	if (flags & XBF_ASYNC) {
			
 
				+		xfs_buf_submit(bp);
			
 
				 		return 0;
			
 
				-	return xfs_buf_iowait(bp);
			
 
				+	}
			
 
				+	return xfs_buf_submit_wait(bp);
			
 
				 }
			
 
				 
			
 
				 xfs_buf_t *
			
@@ -687,34 +688,39 @@ xfs_buf_readahead_map(
 
				  * Read an uncached buffer from disk. Allocates and returns a locked
			
 
				  * buffer containing the disk contents or nothing.
			
 
				  */
			
 
				-struct xfs_buf *
			
 
				+int
			
 
				 xfs_buf_read_uncached(
			
 
				 	struct xfs_buftarg	*target,
			
 
				 	xfs_daddr_t		daddr,
			
 
				 	size_t			numblks,
			
 
				 	int			flags,
			
 
				+	struct xfs_buf		**bpp,
			
 
				 	const struct xfs_buf_ops *ops)
			
 
				 {
			
 
				 	struct xfs_buf		*bp;
			
 
				 
			
 
				+	*bpp = NULL;
			
 
				+
			
 
				 	bp = xfs_buf_get_uncached(target, numblks, flags);
			
 
				 	if (!bp)
			
 
				-		return NULL;
			
 
				+		return -ENOMEM;
			
 
				 
			
 
				 	/* set up the buffer for a read IO */
			
 
				 	ASSERT(bp->b_map_count == 1);
			
 
				-	bp->b_bn = daddr;
			
 
				+	bp->b_bn = XFS_BUF_DADDR_NULL;  /* always null for uncached buffers */
			
 
				 	bp->b_maps[0].bm_bn = daddr;
			
 
				 	bp->b_flags |= XBF_READ;
			
 
				 	bp->b_ops = ops;
			
 
				 
			
 
				-	if (XFS_FORCED_SHUTDOWN(target->bt_mount)) {
			
 
				+	xfs_buf_submit_wait(bp);
			
 
				+	if (bp->b_error) {
			
 
				+		int	error = bp->b_error;
			
 
				 		xfs_buf_relse(bp);
			
 
				-		return NULL;
			
 
				+		return error;
			
 
				 	}
			
 
				-	xfs_buf_iorequest(bp);
			
 
				-	xfs_buf_iowait(bp);
			
 
				-	return bp;
			
 
				+
			
 
				+	*bpp = bp;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -998,53 +1004,56 @@ xfs_buf_wait_unpin(
 
				  *	Buffer Utility Routines
			
 
				  */
			
 
				 
			
 
				-STATIC void
			
 
				-xfs_buf_iodone_work(
			
 
				-	struct work_struct	*work)
			
 
				+void
			
 
				+xfs_buf_ioend(
			
 
				+	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	struct xfs_buf		*bp =
			
 
				-		container_of(work, xfs_buf_t, b_iodone_work);
			
 
				-	bool			read = !!(bp->b_flags & XBF_READ);
			
 
				+	bool		read = bp->b_flags & XBF_READ;
			
 
				+
			
 
				+	trace_xfs_buf_iodone(bp, _RET_IP_);
			
 
				 
			
 
				 	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
			
 
				 
			
 
				-	/* only validate buffers that were read without errors */
			
 
				-	if (read && bp->b_ops && !bp->b_error && (bp->b_flags & XBF_DONE))
			
 
				+	/*
			
 
				+	 * Pull in IO completion errors now. We are guaranteed to be running
			
 
				+	 * single threaded, so we don't need the lock to read b_io_error.
			
 
				+	 */
			
 
				+	if (!bp->b_error && bp->b_io_error)
			
 
				+		xfs_buf_ioerror(bp, bp->b_io_error);
			
 
				+
			
 
				+	/* Only validate buffers that were read without errors */
			
 
				+	if (read && !bp->b_error && bp->b_ops) {
			
 
				+		ASSERT(!bp->b_iodone);
			
 
				 		bp->b_ops->verify_read(bp);
			
 
				+	}
			
 
				+
			
 
				+	if (!bp->b_error)
			
 
				+		bp->b_flags |= XBF_DONE;
			
 
				 
			
 
				 	if (bp->b_iodone)
			
 
				 		(*(bp->b_iodone))(bp);
			
 
				 	else if (bp->b_flags & XBF_ASYNC)
			
 
				 		xfs_buf_relse(bp);
			
 
				-	else {
			
 
				-		ASSERT(read && bp->b_ops);
			
 
				+	else
			
 
				 		complete(&bp->b_iowait);
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				-void
			
 
				-xfs_buf_ioend(
			
 
				-	struct xfs_buf	*bp,
			
 
				-	int		schedule)
			
 
				+static void
			
 
				+xfs_buf_ioend_work(
			
 
				+	struct work_struct	*work)
			
 
				 {
			
 
				-	bool		read = !!(bp->b_flags & XBF_READ);
			
 
				-
			
 
				-	trace_xfs_buf_iodone(bp, _RET_IP_);
			
 
				+	struct xfs_buf		*bp =
			
 
				+		container_of(work, xfs_buf_t, b_iodone_work);
			
 
				 
			
 
				-	if (bp->b_error == 0)
			
 
				-		bp->b_flags |= XBF_DONE;
			
 
				+	xfs_buf_ioend(bp);
			
 
				+}
			
 
				 
			
 
				-	if (bp->b_iodone || (read && bp->b_ops) || (bp->b_flags & XBF_ASYNC)) {
			
 
				-		if (schedule) {
			
 
				-			INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
			
 
				-			queue_work(xfslogd_workqueue, &bp->b_iodone_work);
			
 
				-		} else {
			
 
				-			xfs_buf_iodone_work(&bp->b_iodone_work);
			
 
				-		}
			
 
				-	} else {
			
 
				-		bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
			
 
				-		complete(&bp->b_iowait);
			
 
				-	}
			
 
				+void
			
 
				+xfs_buf_ioend_async(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work);
			
 
				+	queue_work(xfslogd_workqueue, &bp->b_iodone_work);
			
 
				 }
			
 
				 
			
 
				 void
			
@@ -1067,96 +1076,6 @@ xfs_buf_ioerror_alert(
 
				 		(__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Called when we want to stop a buffer from getting written or read.
			
 
				- * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
			
 
				- * so that the proper iodone callbacks get called.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_bioerror(
			
 
				-	xfs_buf_t *bp)
			
 
				-{
			
 
				-#ifdef XFSERRORDEBUG
			
 
				-	ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
			
 
				-#endif
			
 
				-
			
 
				-	/*
			
 
				-	 * No need to wait until the buffer is unpinned, we aren't flushing it.
			
 
				-	 */
			
 
				-	xfs_buf_ioerror(bp, -EIO);
			
 
				-
			
 
				-	/*
			
 
				-	 * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
			
 
				-	 */
			
 
				-	XFS_BUF_UNREAD(bp);
			
 
				-	XFS_BUF_UNDONE(bp);
			
 
				-	xfs_buf_stale(bp);
			
 
				-
			
 
				-	xfs_buf_ioend(bp, 0);
			
 
				-
			
 
				-	return -EIO;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Same as xfs_bioerror, except that we are releasing the buffer
			
 
				- * here ourselves, and avoiding the xfs_buf_ioend call.
			
 
				- * This is meant for userdata errors; metadata bufs come with
			
 
				- * iodone functions attached, so that we can track down errors.
			
 
				- */
			
 
				-int
			
 
				-xfs_bioerror_relse(
			
 
				-	struct xfs_buf	*bp)
			
 
				-{
			
 
				-	int64_t		fl = bp->b_flags;
			
 
				-	/*
			
 
				-	 * No need to wait until the buffer is unpinned.
			
 
				-	 * We aren't flushing it.
			
 
				-	 *
			
 
				-	 * chunkhold expects B_DONE to be set, whether
			
 
				-	 * we actually finish the I/O or not. We don't want to
			
 
				-	 * change that interface.
			
 
				-	 */
			
 
				-	XFS_BUF_UNREAD(bp);
			
 
				-	XFS_BUF_DONE(bp);
			
 
				-	xfs_buf_stale(bp);
			
 
				-	bp->b_iodone = NULL;
			
 
				-	if (!(fl & XBF_ASYNC)) {
			
 
				-		/*
			
 
				-		 * Mark b_error and B_ERROR _both_.
			
 
				-		 * Lot's of chunkcache code assumes that.
			
 
				-		 * There's no reason to mark error for
			
 
				-		 * ASYNC buffers.
			
 
				-		 */
			
 
				-		xfs_buf_ioerror(bp, -EIO);
			
 
				-		complete(&bp->b_iowait);
			
 
				-	} else {
			
 
				-		xfs_buf_relse(bp);
			
 
				-	}
			
 
				-
			
 
				-	return -EIO;
			
 
				-}
			
 
				-
			
 
				-STATIC int
			
 
				-xfs_bdstrat_cb(
			
 
				-	struct xfs_buf	*bp)
			
 
				-{
			
 
				-	if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
			
 
				-		trace_xfs_bdstrat_shut(bp, _RET_IP_);
			
 
				-		/*
			
 
				-		 * Metadata write that didn't get logged but
			
 
				-		 * written delayed anyway. These aren't associated
			
 
				-		 * with a transaction, and can be ignored.
			
 
				-		 */
			
 
				-		if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
			
 
				-			return xfs_bioerror_relse(bp);
			
 
				-		else
			
 
				-			return xfs_bioerror(bp);
			
 
				-	}
			
 
				-
			
 
				-	xfs_buf_iorequest(bp);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 int
			
 
				 xfs_bwrite(
			
 
				 	struct xfs_buf		*bp)
			
@@ -1166,11 +1085,10 @@ xfs_bwrite(
 
				 	ASSERT(xfs_buf_islocked(bp));
			
 
				 
			
 
				 	bp->b_flags |= XBF_WRITE;
			
 
				-	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL);
			
 
				+	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
			
 
				+			 XBF_WRITE_FAIL | XBF_DONE);
			
 
				 
			
 
				-	xfs_bdstrat_cb(bp);
			
 
				-
			
 
				-	error = xfs_buf_iowait(bp);
			
 
				+	error = xfs_buf_submit_wait(bp);
			
 
				 	if (error) {
			
 
				 		xfs_force_shutdown(bp->b_target->bt_mount,
			
 
				 				   SHUTDOWN_META_IO_ERROR);
			
@@ -1178,15 +1096,6 @@ xfs_bwrite(
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-STATIC void
			
 
				-_xfs_buf_ioend(
			
 
				-	xfs_buf_t		*bp,
			
 
				-	int			schedule)
			
 
				-{
			
 
				-	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
			
 
				-		xfs_buf_ioend(bp, schedule);
			
 
				-}
			
 
				-
			
 
				 STATIC void
			
 
				 xfs_buf_bio_end_io(
			
 
				 	struct bio		*bio,
			
@@ -1198,13 +1107,18 @@ xfs_buf_bio_end_io(
 
				 	 * don't overwrite existing errors - otherwise we can lose errors on
			
 
				 	 * buffers that require multiple bios to complete.
			
 
				 	 */
			
 
				-	if (!bp->b_error)
			
 
				-		xfs_buf_ioerror(bp, error);
			
 
				+	if (error) {
			
 
				+		spin_lock(&bp->b_lock);
			
 
				+		if (!bp->b_io_error)
			
 
				+			bp->b_io_error = error;
			
 
				+		spin_unlock(&bp->b_lock);
			
 
				+	}
			
 
				 
			
 
				 	if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
			
 
				 		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
			
 
				 
			
 
				-	_xfs_buf_ioend(bp, 1);
			
 
				+	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
			
 
				+		xfs_buf_ioend_async(bp);
			
 
				 	bio_put(bio);
			
 
				 }
			
 
				 
			
@@ -1283,7 +1197,7 @@ xfs_buf_ioapply_map(
 
				 	} else {
			
 
				 		/*
			
 
				 		 * This is guaranteed not to be the last io reference count
			
 
				-		 * because the caller (xfs_buf_iorequest) holds a count itself.
			
 
				+		 * because the caller (xfs_buf_submit) holds a count itself.
			
 
				 		 */
			
 
				 		atomic_dec(&bp->b_io_remaining);
			
 
				 		xfs_buf_ioerror(bp, -EIO);
			
@@ -1373,53 +1287,131 @@ _xfs_buf_ioapply(
 
				 	blk_finish_plug(&plug);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Asynchronous IO submission path. This transfers the buffer lock ownership and
			
 
				+ * the current reference to the IO. It is not safe to reference the buffer after
			
 
				+ * a call to this function unless the caller holds an additional reference
			
 
				+ * itself.
			
 
				+ */
			
 
				 void
			
 
				-xfs_buf_iorequest(
			
 
				-	xfs_buf_t		*bp)
			
 
				+xfs_buf_submit(
			
 
				+	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	trace_xfs_buf_iorequest(bp, _RET_IP_);
			
 
				+	trace_xfs_buf_submit(bp, _RET_IP_);
			
 
				 
			
 
				 	ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
			
 
				+	ASSERT(bp->b_flags & XBF_ASYNC);
			
 
				+
			
 
				+	/* on shutdown we stale and complete the buffer immediately */
			
 
				+	if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
			
 
				+		xfs_buf_ioerror(bp, -EIO);
			
 
				+		bp->b_flags &= ~XBF_DONE;
			
 
				+		xfs_buf_stale(bp);
			
 
				+		xfs_buf_ioend(bp);
			
 
				+		return;
			
 
				+	}
			
 
				 
			
 
				 	if (bp->b_flags & XBF_WRITE)
			
 
				 		xfs_buf_wait_unpin(bp);
			
 
				+
			
 
				+	/* clear the internal error state to avoid spurious errors */
			
 
				+	bp->b_io_error = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * The caller's reference is released during I/O completion.
			
 
				+	 * This occurs some time after the last b_io_remaining reference is
			
 
				+	 * released, so after we drop our Io reference we have to have some
			
 
				+	 * other reference to ensure the buffer doesn't go away from underneath
			
 
				+	 * us. Take a direct reference to ensure we have safe access to the
			
 
				+	 * buffer until we are finished with it.
			
 
				+	 */
			
 
				 	xfs_buf_hold(bp);
			
 
				 
			
 
				 	/*
			
 
				-	 * Set the count to 1 initially, this will stop an I/O
			
 
				-	 * completion callout which happens before we have started
			
 
				-	 * all the I/O from calling xfs_buf_ioend too early.
			
 
				+	 * Set the count to 1 initially, this will stop an I/O completion
			
 
				+	 * callout which happens before we have started all the I/O from calling
			
 
				+	 * xfs_buf_ioend too early.
			
 
				 	 */
			
 
				 	atomic_set(&bp->b_io_remaining, 1);
			
 
				 	_xfs_buf_ioapply(bp);
			
 
				+
			
 
				 	/*
			
 
				-	 * If _xfs_buf_ioapply failed, we'll get back here with
			
 
				-	 * only the reference we took above.  _xfs_buf_ioend will
			
 
				-	 * drop it to zero, so we'd better not queue it for later,
			
 
				-	 * or we'll free it before it's done.
			
 
				+	 * If _xfs_buf_ioapply failed, we can get back here with only the IO
			
 
				+	 * reference we took above. If we drop it to zero, run completion so
			
 
				+	 * that we don't return to the caller with completion still pending.
			
 
				 	 */
			
 
				-	_xfs_buf_ioend(bp, bp->b_error ? 0 : 1);
			
 
				+	if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
			
 
				+		if (bp->b_error)
			
 
				+			xfs_buf_ioend(bp);
			
 
				+		else
			
 
				+			xfs_buf_ioend_async(bp);
			
 
				+	}
			
 
				 
			
 
				 	xfs_buf_rele(bp);
			
 
				+	/* Note: it is not safe to reference bp now we've dropped our ref */
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Waits for I/O to complete on the buffer supplied.  It returns immediately if
			
 
				- * no I/O is pending or there is already a pending error on the buffer, in which
			
 
				- * case nothing will ever complete.  It returns the I/O error code, if any, or
			
 
				- * 0 if there was no error.
			
 
				+ * Synchronous buffer IO submission path, read or write.
			
 
				  */
			
 
				 int
			
 
				-xfs_buf_iowait(
			
 
				-	xfs_buf_t		*bp)
			
 
				+xfs_buf_submit_wait(
			
 
				+	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	trace_xfs_buf_iowait(bp, _RET_IP_);
			
 
				+	int		error;
			
 
				 
			
 
				-	if (!bp->b_error)
			
 
				-		wait_for_completion(&bp->b_iowait);
			
 
				+	trace_xfs_buf_submit_wait(bp, _RET_IP_);
			
 
				+
			
 
				+	ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC)));
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
			
 
				+		xfs_buf_ioerror(bp, -EIO);
			
 
				+		xfs_buf_stale(bp);
			
 
				+		bp->b_flags &= ~XBF_DONE;
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	if (bp->b_flags & XBF_WRITE)
			
 
				+		xfs_buf_wait_unpin(bp);
			
 
				+
			
 
				+	/* clear the internal error state to avoid spurious errors */
			
 
				+	bp->b_io_error = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * For synchronous IO, the IO does not inherit the submitters reference
			
 
				+	 * count, nor the buffer lock. Hence we cannot release the reference we
			
 
				+	 * are about to take until we've waited for all IO completion to occur,
			
 
				+	 * including any xfs_buf_ioend_async() work that may be pending.
			
 
				+	 */
			
 
				+	xfs_buf_hold(bp);
			
 
				+
			
 
				+	/*
			
 
				+	 * Set the count to 1 initially, this will stop an I/O completion
			
 
				+	 * callout which happens before we have started all the I/O from calling
			
 
				+	 * xfs_buf_ioend too early.
			
 
				+	 */
			
 
				+	atomic_set(&bp->b_io_remaining, 1);
			
 
				+	_xfs_buf_ioapply(bp);
			
 
				+
			
 
				+	/*
			
 
				+	 * make sure we run completion synchronously if it raced with us and is
			
 
				+	 * already complete.
			
 
				+	 */
			
 
				+	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
			
 
				+		xfs_buf_ioend(bp);
			
 
				 
			
 
				+	/* wait for completion before gathering the error from the buffer */
			
 
				+	trace_xfs_buf_iowait(bp, _RET_IP_);
			
 
				+	wait_for_completion(&bp->b_iowait);
			
 
				 	trace_xfs_buf_iowait_done(bp, _RET_IP_);
			
 
				-	return bp->b_error;
			
 
				+	error = bp->b_error;
			
 
				+
			
 
				+	/*
			
 
				+	 * all done now, we can release the hold that keeps the buffer
			
 
				+	 * referenced for the entire IO.
			
 
				+	 */
			
 
				+	xfs_buf_rele(bp);
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 xfs_caddr_t
			
@@ -1813,13 +1805,19 @@ __xfs_buf_delwri_submit(
 
				 	blk_start_plug(&plug);
			
 
				 	list_for_each_entry_safe(bp, n, io_list, b_list) {
			
 
				 		bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
			
 
				-		bp->b_flags |= XBF_WRITE;
			
 
				+		bp->b_flags |= XBF_WRITE | XBF_ASYNC;
			
 
				 
			
 
				-		if (!wait) {
			
 
				-			bp->b_flags |= XBF_ASYNC;
			
 
				+		/*
			
 
				+		 * we do all Io submission async. This means if we need to wait
			
 
				+		 * for IO completion we need to take an extra reference so the
			
 
				+		 * buffer is still valid on the other side.
			
 
				+		 */
			
 
				+		if (wait)
			
 
				+			xfs_buf_hold(bp);
			
 
				+		else
			
 
				 			list_del_init(&bp->b_list);
			
 
				-		}
			
 
				-		xfs_bdstrat_cb(bp);
			
 
				+
			
 
				+		xfs_buf_submit(bp);
			
 
				 	}
			
 
				 	blk_finish_plug(&plug);
			
 
				 
			
@@ -1866,7 +1864,10 @@ xfs_buf_delwri_submit(
 
				 		bp = list_first_entry(&io_list, struct xfs_buf, b_list);
			
 
				 
			
 
				 		list_del_init(&bp->b_list);
			
 
				-		error2 = xfs_buf_iowait(bp);
			
 
				+
			
 
				+		/* locking the buffer will wait for async IO completion. */
			
 
				+		xfs_buf_lock(bp);
			
 
				+		error2 = bp->b_error;
			
 
				 		xfs_buf_relse(bp);
			
 
				 		if (!error)
			
 
				 			error = error2;
			
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -158,6 +158,7 @@ typedef struct xfs_buf {
 
				 	struct list_head	b_lru;		/* lru list */
			
 
				 	spinlock_t		b_lock;		/* internal state lock */
			
 
				 	unsigned int		b_state;	/* internal state flags */
			
 
				+	int			b_io_error;	/* internal IO error state */
			
 
				 	wait_queue_head_t	b_waiters;	/* unpin waiters */
			
 
				 	struct list_head	b_list;
			
 
				 	struct xfs_perag	*b_pag;		/* contains rbtree root */
			
@@ -268,9 +269,9 @@ int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
 
				 
			
 
				 struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
			
 
				 				int flags);
			
 
				-struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target,
			
 
				-				xfs_daddr_t daddr, size_t numblks, int flags,
			
 
				-				const struct xfs_buf_ops *ops);
			
 
				+int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
			
 
				+			  size_t numblks, int flags, struct xfs_buf **bpp,
			
 
				+			  const struct xfs_buf_ops *ops);
			
 
				 void xfs_buf_hold(struct xfs_buf *bp);
			
 
				 
			
 
				 /* Releasing Buffers */
			
@@ -286,18 +287,16 @@ extern void xfs_buf_unlock(xfs_buf_t *);
 
				 
			
 
				 /* Buffer Read and Write Routines */
			
 
				 extern int xfs_bwrite(struct xfs_buf *bp);
			
 
				-extern void xfs_buf_ioend(xfs_buf_t *,	int);
			
 
				+extern void xfs_buf_ioend(struct xfs_buf *bp);
			
 
				 extern void xfs_buf_ioerror(xfs_buf_t *, int);
			
 
				 extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
			
 
				-extern void xfs_buf_iorequest(xfs_buf_t *);
			
 
				-extern int xfs_buf_iowait(xfs_buf_t *);
			
 
				+extern void xfs_buf_submit(struct xfs_buf *bp);
			
 
				+extern int xfs_buf_submit_wait(struct xfs_buf *bp);
			
 
				 extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
			
 
				 				xfs_buf_rw_t);
			
 
				 #define xfs_buf_zero(bp, off, len) \
			
 
				 	    xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
			
 
				 
			
 
				-extern int xfs_bioerror_relse(struct xfs_buf *);
			
 
				-
			
 
				 /* Buffer Utility Routines */
			
 
				 extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
			
 
				 
			
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -491,7 +491,7 @@ xfs_buf_item_unpin(
 
				 		xfs_buf_ioerror(bp, -EIO);
			
 
				 		XFS_BUF_UNDONE(bp);
			
 
				 		xfs_buf_stale(bp);
			
 
				-		xfs_buf_ioend(bp, 0);
			
 
				+		xfs_buf_ioend(bp);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1081,7 +1081,7 @@ xfs_buf_iodone_callbacks(
 
				 	 * a way to shut the filesystem down if the writes keep failing.
			
 
				 	 *
			
 
				 	 * In practice we'll shut the filesystem down soon as non-transient
			
 
				-	 * erorrs tend to affect the whole device and a failing log write
			
 
				+	 * errors tend to affect the whole device and a failing log write
			
 
				 	 * will make us give up.  But we really ought to do better here.
			
 
				 	 */
			
 
				 	if (XFS_BUF_ISASYNC(bp)) {
			
@@ -1094,7 +1094,7 @@ xfs_buf_iodone_callbacks(
 
				 		if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
			
 
				 			bp->b_flags |= XBF_WRITE | XBF_ASYNC |
			
 
				 				       XBF_DONE | XBF_WRITE_FAIL;
			
 
				-			xfs_buf_iorequest(bp);
			
 
				+			xfs_buf_submit(bp);
			
 
				 		} else {
			
 
				 			xfs_buf_relse(bp);
			
 
				 		}
			
@@ -1115,7 +1115,7 @@ xfs_buf_iodone_callbacks(
 
				 	xfs_buf_do_callbacks(bp);
			
 
				 	bp->b_fspriv = NULL;
			
 
				 	bp->b_iodone = NULL;
			
 
				-	xfs_buf_ioend(bp, 0);
			
 
				+	xfs_buf_ioend(bp);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -172,16 +172,11 @@ xfs_growfs_data_private(
 
				 	if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
			
 
				 		return error;
			
 
				 	dpct = pct - mp->m_sb.sb_imax_pct;
			
 
				-	bp = xfs_buf_read_uncached(mp->m_ddev_targp,
			
 
				+	error = xfs_buf_read_uncached(mp->m_ddev_targp,
			
 
				 				XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
			
 
				-				XFS_FSS_TO_BB(mp, 1), 0, NULL);
			
 
				-	if (!bp)
			
 
				-		return -EIO;
			
 
				-	if (bp->b_error) {
			
 
				-		error = bp->b_error;
			
 
				-		xfs_buf_relse(bp);
			
 
				+				XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
			
 
				+	if (error)
			
 
				 		return error;
			
 
				-	}
			
 
				 	xfs_buf_relse(bp);
			
 
				 
			
 
				 	new = nb;	/* use new as a temporary here */
			
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3062,7 +3062,7 @@ xfs_iflush_cluster(
 
				 			XFS_BUF_UNDONE(bp);
			
 
				 			xfs_buf_stale(bp);
			
 
				 			xfs_buf_ioerror(bp, -EIO);
			
 
				-			xfs_buf_ioend(bp, 0);
			
 
				+			xfs_buf_ioend(bp);
			
 
				 		} else {
			
 
				 			xfs_buf_stale(bp);
			
 
				 			xfs_buf_relse(bp);
			
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1678,7 +1678,7 @@ xlog_bdstrat(
 
				 	if (iclog->ic_state & XLOG_STATE_IOERROR) {
			
 
				 		xfs_buf_ioerror(bp, -EIO);
			
 
				 		xfs_buf_stale(bp);
			
 
				-		xfs_buf_ioend(bp, 0);
			
 
				+		xfs_buf_ioend(bp);
			
 
				 		/*
			
 
				 		 * It would seem logical to return EIO here, but we rely on
			
 
				 		 * the log state machine to propagate I/O errors instead of
			
@@ -1688,7 +1688,7 @@ xlog_bdstrat(
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				-	xfs_buf_iorequest(bp);
			
 
				+	xfs_buf_submit(bp);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -3867,18 +3867,17 @@ xlog_state_ioerror(
 
				  * This is called from xfs_force_shutdown, when we're forcibly
			
 
				  * shutting down the filesystem, typically because of an IO error.
			
 
				  * Our main objectives here are to make sure that:
			
 
				- *	a. the filesystem gets marked 'SHUTDOWN' for all interested
			
 
				+ *	a. if !logerror, flush the logs to disk. Anything modified
			
 
				+ *	   after this is ignored.
			
 
				+ *	b. the filesystem gets marked 'SHUTDOWN' for all interested
			
 
				  *	   parties to find out, 'atomically'.
			
 
				- *	b. those who're sleeping on log reservations, pinned objects and
			
 
				+ *	c. those who're sleeping on log reservations, pinned objects and
			
 
				  *	    other resources get woken up, and be told the bad news.
			
 
				- *	c. nothing new gets queued up after (a) and (b) are done.
			
 
				- *	d. if !logerror, flush the iclogs to disk, then seal them off
			
 
				- *	   for business.
			
 
				+ *	d. nothing new gets queued up after (b) and (c) are done.
			
 
				  *
			
 
				- * Note: for delayed logging the !logerror case needs to flush the regions
			
 
				- * held in memory out to the iclogs before flushing them to disk. This needs
			
 
				- * to be done before the log is marked as shutdown, otherwise the flush to the
			
 
				- * iclogs will fail.
			
 
				+ * Note: for the !logerror case we need to flush the regions held in memory out
			
 
				+ * to disk first. This needs to be done before the log is marked as shutdown,
			
 
				+ * otherwise the iclog writes will fail.
			
 
				  */
			
 
				 int
			
 
				 xfs_log_force_umount(
			
@@ -3910,16 +3909,16 @@ xfs_log_force_umount(
 
				 		ASSERT(XLOG_FORCED_SHUTDOWN(log));
			
 
				 		return 1;
			
 
				 	}
			
 
				-	retval = 0;
			
 
				 
			
 
				 	/*
			
 
				-	 * Flush the in memory commit item list before marking the log as
			
 
				-	 * being shut down. We need to do it in this order to ensure all the
			
 
				-	 * completed transactions are flushed to disk with the xfs_log_force()
			
 
				-	 * call below.
			
 
				+	 * Flush all the completed transactions to disk before marking the log
			
 
				+	 * being shut down. We need to do it in this order to ensure that
			
 
				+	 * completed operations are safely on disk before we shut down, and that
			
 
				+	 * we don't have to issue any buffer IO after the shutdown flags are set
			
 
				+	 * to guarantee this.
			
 
				 	 */
			
 
				 	if (!logerror)
			
 
				-		xlog_cil_force(log);
			
 
				+		_xfs_log_force(mp, XFS_LOG_SYNC, NULL);
			
 
				 
			
 
				 	/*
			
 
				 	 * mark the filesystem and the as in a shutdown state and wake
			
@@ -3931,18 +3930,11 @@ xfs_log_force_umount(
 
				 		XFS_BUF_DONE(mp->m_sb_bp);
			
 
				 
			
 
				 	/*
			
 
				-	 * This flag is sort of redundant because of the mount flag, but
			
 
				-	 * it's good to maintain the separation between the log and the rest
			
 
				-	 * of XFS.
			
 
				+	 * Mark the log and the iclogs with IO error flags to prevent any
			
 
				+	 * further log IO from being issued or completed.
			
 
				 	 */
			
 
				 	log->l_flags |= XLOG_IO_ERROR;
			
 
				-
			
 
				-	/*
			
 
				-	 * If we hit a log error, we want to mark all the iclogs IOERROR
			
 
				-	 * while we're still holding the loglock.
			
 
				-	 */
			
 
				-	if (logerror)
			
 
				-		retval = xlog_state_ioerror(log);
			
 
				+	retval = xlog_state_ioerror(log);
			
 
				 	spin_unlock(&log->l_icloglock);
			
 
				 
			
 
				 	/*
			
@@ -3955,19 +3947,6 @@ xfs_log_force_umount(
 
				 	xlog_grant_head_wake_all(&log->l_reserve_head);
			
 
				 	xlog_grant_head_wake_all(&log->l_write_head);
			
 
				 
			
 
				-	if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
			
 
				-		ASSERT(!logerror);
			
 
				-		/*
			
 
				-		 * Force the incore logs to disk before shutting the
			
 
				-		 * log down completely.
			
 
				-		 */
			
 
				-		_xfs_log_force(mp, XFS_LOG_SYNC, NULL);
			
 
				-
			
 
				-		spin_lock(&log->l_icloglock);
			
 
				-		retval = xlog_state_ioerror(log);
			
 
				-		spin_unlock(&log->l_icloglock);
			
 
				-	}
			
 
				-
			
 
				 	/*
			
 
				 	 * Wake up everybody waiting on xfs_log_force. Wake the CIL push first
			
 
				 	 * as if the log writes were completed. The abort handling in the log
			
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -193,12 +193,8 @@ xlog_bread_noalign(
 
				 	bp->b_io_length = nbblks;
			
 
				 	bp->b_error = 0;
			
 
				 
			
 
				-	if (XFS_FORCED_SHUTDOWN(log->l_mp))
			
 
				-		return -EIO;
			
 
				-
			
 
				-	xfs_buf_iorequest(bp);
			
 
				-	error = xfs_buf_iowait(bp);
			
 
				-	if (error)
			
 
				+	error = xfs_buf_submit_wait(bp);
			
 
				+	if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
			
 
				 		xfs_buf_ioerror_alert(bp, __func__);
			
 
				 	return error;
			
 
				 }
			
@@ -378,12 +374,14 @@ xlog_recover_iodone(
 
				 		 * We're not going to bother about retrying
			
 
				 		 * this during recovery. One strike!
			
 
				 		 */
			
 
				-		xfs_buf_ioerror_alert(bp, __func__);
			
 
				-		xfs_force_shutdown(bp->b_target->bt_mount,
			
 
				-					SHUTDOWN_META_IO_ERROR);
			
 
				+		if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
			
 
				+			xfs_buf_ioerror_alert(bp, __func__);
			
 
				+			xfs_force_shutdown(bp->b_target->bt_mount,
			
 
				+						SHUTDOWN_META_IO_ERROR);
			
 
				+		}
			
 
				 	}
			
 
				 	bp->b_iodone = NULL;
			
 
				-	xfs_buf_ioend(bp, 0);
			
 
				+	xfs_buf_ioend(bp);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -4452,16 +4450,12 @@ xlog_do_recover(
 
				 	XFS_BUF_UNASYNC(bp);
			
 
				 	bp->b_ops = &xfs_sb_buf_ops;
			
 
				 
			
 
				-	if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
			
 
				-		xfs_buf_relse(bp);
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-
			
 
				-	xfs_buf_iorequest(bp);
			
 
				-	error = xfs_buf_iowait(bp);
			
 
				+	error = xfs_buf_submit_wait(bp);
			
 
				 	if (error) {
			
 
				-		xfs_buf_ioerror_alert(bp, __func__);
			
 
				-		ASSERT(0);
			
 
				+		if (!XFS_FORCED_SHUTDOWN(log->l_mp)) {
			
 
				+			xfs_buf_ioerror_alert(bp, __func__);
			
 
				+			ASSERT(0);
			
 
				+		}
			
 
				 		xfs_buf_relse(bp);
			
 
				 		return error;
			
 
				 	}
			
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -300,21 +300,15 @@ xfs_readsb(
 
				 	 * access to the superblock.
			
 
				 	 */
			
 
				 reread:
			
 
				-	bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
			
 
				-				   BTOBB(sector_size), 0, buf_ops);
			
 
				-	if (!bp) {
			
 
				-		if (loud)
			
 
				-			xfs_warn(mp, "SB buffer read failed");
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-	if (bp->b_error) {
			
 
				-		error = bp->b_error;
			
 
				+	error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
			
 
				+				   BTOBB(sector_size), 0, &bp, buf_ops);
			
 
				+	if (error) {
			
 
				 		if (loud)
			
 
				 			xfs_warn(mp, "SB validate failed with error %d.", error);
			
 
				 		/* bad CRC means corrupted metadata */
			
 
				 		if (error == -EFSBADCRC)
			
 
				 			error = -EFSCORRUPTED;
			
 
				-		goto release_buf;
			
 
				+		return error;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -544,40 +538,43 @@ xfs_set_inoalignment(xfs_mount_t *mp)
 
				  * Check that the data (and log if separate) is an ok size.
			
 
				  */
			
 
				 STATIC int
			
 
				-xfs_check_sizes(xfs_mount_t *mp)
			
 
				+xfs_check_sizes(
			
 
				+	struct xfs_mount *mp)
			
 
				 {
			
 
				-	xfs_buf_t	*bp;
			
 
				+	struct xfs_buf	*bp;
			
 
				 	xfs_daddr_t	d;
			
 
				+	int		error;
			
 
				 
			
 
				 	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
			
 
				 	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
			
 
				 		xfs_warn(mp, "filesystem size mismatch detected");
			
 
				 		return -EFBIG;
			
 
				 	}
			
 
				-	bp = xfs_buf_read_uncached(mp->m_ddev_targp,
			
 
				+	error = xfs_buf_read_uncached(mp->m_ddev_targp,
			
 
				 					d - XFS_FSS_TO_BB(mp, 1),
			
 
				-					XFS_FSS_TO_BB(mp, 1), 0, NULL);
			
 
				-	if (!bp) {
			
 
				+					XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
			
 
				+	if (error) {
			
 
				 		xfs_warn(mp, "last sector read failed");
			
 
				-		return -EIO;
			
 
				+		return error;
			
 
				 	}
			
 
				 	xfs_buf_relse(bp);
			
 
				 
			
 
				-	if (mp->m_logdev_targp != mp->m_ddev_targp) {
			
 
				-		d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
			
 
				-		if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
			
 
				-			xfs_warn(mp, "log size mismatch detected");
			
 
				-			return -EFBIG;
			
 
				-		}
			
 
				-		bp = xfs_buf_read_uncached(mp->m_logdev_targp,
			
 
				+	if (mp->m_logdev_targp == mp->m_ddev_targp)
			
 
				+		return 0;
			
 
				+
			
 
				+	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
			
 
				+	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
			
 
				+		xfs_warn(mp, "log size mismatch detected");
			
 
				+		return -EFBIG;
			
 
				+	}
			
 
				+	error = xfs_buf_read_uncached(mp->m_logdev_targp,
			
 
				 					d - XFS_FSB_TO_BB(mp, 1),
			
 
				-					XFS_FSB_TO_BB(mp, 1), 0, NULL);
			
 
				-		if (!bp) {
			
 
				-			xfs_warn(mp, "log device read failed");
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-		xfs_buf_relse(bp);
			
 
				+					XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
			
 
				+	if (error) {
			
 
				+		xfs_warn(mp, "log device read failed");
			
 
				+		return error;
			
 
				 	}
			
 
				+	xfs_buf_relse(bp);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -921,16 +921,11 @@ xfs_growfs_rt(
 
				 	/*
			
 
				 	 * Read in the last block of the device, make sure it exists.
			
 
				 	 */
			
 
				-	bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
			
 
				+	error = xfs_buf_read_uncached(mp->m_rtdev_targp,
			
 
				 				XFS_FSB_TO_BB(mp, nrblocks - 1),
			
 
				-				XFS_FSB_TO_BB(mp, 1), 0, NULL);
			
 
				-	if (!bp)
			
 
				-		return -EIO;
			
 
				-	if (bp->b_error) {
			
 
				-		error = bp->b_error;
			
 
				-		xfs_buf_relse(bp);
			
 
				+				XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
			
 
				+	if (error)
			
 
				 		return error;
			
 
				-	}
			
 
				 	xfs_buf_relse(bp);
			
 
				 
			
 
				 	/*
			
@@ -1184,11 +1179,12 @@ xfs_rtallocate_extent(
 
				  */
			
 
				 int				/* error */
			
 
				 xfs_rtmount_init(
			
 
				-	xfs_mount_t	*mp)	/* file system mount structure */
			
 
				+	struct xfs_mount	*mp)	/* file system mount structure */
			
 
				 {
			
 
				-	xfs_buf_t	*bp;	/* buffer for last block of subvolume */
			
 
				-	xfs_daddr_t	d;	/* address of last block of subvolume */
			
 
				-	xfs_sb_t	*sbp;	/* filesystem superblock copy in mount */
			
 
				+	struct xfs_buf		*bp;	/* buffer for last block of subvolume */
			
 
				+	struct xfs_sb		*sbp;	/* filesystem superblock copy in mount */
			
 
				+	xfs_daddr_t		d;	/* address of last block of subvolume */
			
 
				+	int			error;
			
 
				 
			
 
				 	sbp = &mp->m_sb;
			
 
				 	if (sbp->sb_rblocks == 0)
			
@@ -1214,14 +1210,12 @@ xfs_rtmount_init(
 
				 			(unsigned long long) mp->m_sb.sb_rblocks);
			
 
				 		return -EFBIG;
			
 
				 	}
			
 
				-	bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
			
 
				+	error = xfs_buf_read_uncached(mp->m_rtdev_targp,
			
 
				 					d - XFS_FSB_TO_BB(mp, 1),
			
 
				-					XFS_FSB_TO_BB(mp, 1), 0, NULL);
			
 
				-	if (!bp || bp->b_error) {
			
 
				+					XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
			
 
				+	if (error) {
			
 
				 		xfs_warn(mp, "realtime device size check failed");
			
 
				-		if (bp)
			
 
				-			xfs_buf_relse(bp);
			
 
				-		return -EIO;
			
 
				+		return error;
			
 
				 	}
			
 
				 	xfs_buf_relse(bp);
			
 
				 	return 0;
			
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -349,7 +349,8 @@ DEFINE_BUF_EVENT(xfs_buf_free);
 
				 DEFINE_BUF_EVENT(xfs_buf_hold);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_rele);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_iodone);
			
 
				-DEFINE_BUF_EVENT(xfs_buf_iorequest);
			
 
				+DEFINE_BUF_EVENT(xfs_buf_submit);
			
 
				+DEFINE_BUF_EVENT(xfs_buf_submit_wait);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_bawrite);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_lock);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_lock_done);
			
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -318,20 +318,10 @@ xfs_trans_read_buf_map(
 
				 			XFS_BUF_READ(bp);
			
 
				 			bp->b_ops = ops;
			
 
				 
			
 
				-			/*
			
 
				-			 * XXX(hch): clean up the error handling here to be less
			
 
				-			 * of a mess..
			
 
				-			 */
			
 
				-			if (XFS_FORCED_SHUTDOWN(mp)) {
			
 
				-				trace_xfs_bdstrat_shut(bp, _RET_IP_);
			
 
				-				xfs_bioerror_relse(bp);
			
 
				-			} else {
			
 
				-				xfs_buf_iorequest(bp);
			
 
				-			}
			
 
				-
			
 
				-			error = xfs_buf_iowait(bp);
			
 
				+			error = xfs_buf_submit_wait(bp);
			
 
				 			if (error) {
			
 
				-				xfs_buf_ioerror_alert(bp, __func__);
			
 
				+				if (!XFS_FORCED_SHUTDOWN(mp))
			
 
				+					xfs_buf_ioerror_alert(bp, __func__);
			
 
				 				xfs_buf_relse(bp);
			
 
				 				/*
			
 
				 				 * We can gracefully recover from most read