před 7 roky · 02a2b05395
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1009,6 +1009,13 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 
				 	WARN_ON_ONCE(ret);
			
 
				 	ret = 0;
			
 
				 
			
 
				+	if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
			
 
				+	    !inode->i_sb->s_dio_done_wq) {
			
 
				+		ret = sb_init_dio_done_wq(inode->i_sb);
			
 
				+		if (ret < 0)
			
 
				+			goto out_free_dio;
			
 
				+	}
			
 
				+
			
 
				 	inode_dio_begin(inode);
			
 
				 
			
 
				 	blk_start_plug(&plug);
			
@@ -1031,13 +1038,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 
				 	if (ret < 0)
			
 
				 		iomap_dio_set_error(dio, ret);
			
 
				 
			
 
				-	if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
			
 
				-			!inode->i_sb->s_dio_done_wq) {
			
 
				-		ret = sb_init_dio_done_wq(inode->i_sb);
			
 
				-		if (ret < 0)
			
 
				-			iomap_dio_set_error(dio, ret);
			
 
				-	}
			
 
				-
			
 
				 	if (!atomic_dec_and_test(&dio->ref)) {
			
 
				 		if (!is_sync_kiocb(iocb))
			
 
				 			return -EIOCBQUEUED;
			
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -156,7 +156,8 @@ __xfs_ag_resv_free(
 
				 	trace_xfs_ag_resv_free(pag, type, 0);
			
 
				 
			
 
				 	resv = xfs_perag_resv(pag, type);
			
 
				-	pag->pag_mount->m_ag_max_usable += resv->ar_asked;
			
 
				+	if (pag->pag_agno == 0)
			
 
				+		pag->pag_mount->m_ag_max_usable += resv->ar_asked;
			
 
				 	/*
			
 
				 	 * AGFL blocks are always considered "free", so whatever
			
 
				 	 * was reserved at mount time must be given back at umount.
			
@@ -216,7 +217,14 @@ __xfs_ag_resv_init(
 
				 		return error;
			
 
				 	}
			
 
				 
			
 
				-	mp->m_ag_max_usable -= ask;
			
 
				+	/*
			
 
				+	 * Reduce the maximum per-AG allocation length by however much we're
			
 
				+	 * trying to reserve for an AG.  Since this is a filesystem-wide
			
 
				+	 * counter, we only make the adjustment for AG 0.  This assumes that
			
 
				+	 * there aren't any AGs hungrier for per-AG reservation than AG 0.
			
 
				+	 */
			
 
				+	if (pag->pag_agno == 0)
			
 
				+		mp->m_ag_max_usable -= ask;
			
 
				 
			
 
				 	resv = xfs_perag_resv(pag, type);
			
 
				 	resv->ar_asked = ask;
			
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -49,7 +49,6 @@
 
				 #include "xfs_rmap.h"
			
 
				 #include "xfs_ag_resv.h"
			
 
				 #include "xfs_refcount.h"
			
 
				-#include "xfs_rmap_btree.h"
			
 
				 #include "xfs_icache.h"
			
 
				 
			
 
				 
			
@@ -192,12 +191,8 @@ xfs_bmap_worst_indlen(
 
				 	int		maxrecs;	/* maximum record count at this level */
			
 
				 	xfs_mount_t	*mp;		/* mount structure */
			
 
				 	xfs_filblks_t	rval;		/* return value */
			
 
				-	xfs_filblks_t   orig_len;
			
 
				 
			
 
				 	mp = ip->i_mount;
			
 
				-
			
 
				-	/* Calculate the worst-case size of the bmbt. */
			
 
				-	orig_len = len;
			
 
				 	maxrecs = mp->m_bmap_dmxr[0];
			
 
				 	for (level = 0, rval = 0;
			
 
				 	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
			
@@ -205,20 +200,12 @@ xfs_bmap_worst_indlen(
 
				 		len += maxrecs - 1;
			
 
				 		do_div(len, maxrecs);
			
 
				 		rval += len;
			
 
				-		if (len == 1) {
			
 
				-			rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
			
 
				+		if (len == 1)
			
 
				+			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
			
 
				 				level - 1;
			
 
				-			break;
			
 
				-		}
			
 
				 		if (level == 0)
			
 
				 			maxrecs = mp->m_bmap_dmxr[1];
			
 
				 	}
			
 
				-
			
 
				-	/* Calculate the worst-case size of the rmapbt. */
			
 
				-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
			
 
				-		rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
			
 
				-				mp->m_rmap_maxlevels;
			
 
				-
			
 
				 	return rval;
			
 
				 }
			
 
				 
			
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -343,7 +343,8 @@ xfs_end_io(
 
				 		error = xfs_reflink_end_cow(ip, offset, size);
			
 
				 		break;
			
 
				 	case XFS_IO_UNWRITTEN:
			
 
				-		error = xfs_iomap_write_unwritten(ip, offset, size);
			
 
				+		/* writeback should never update isize */
			
 
				+		error = xfs_iomap_write_unwritten(ip, offset, size, false);
			
 
				 		break;
			
 
				 	default:
			
 
				 		ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
			
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1459,7 +1459,19 @@ xfs_shift_file_space(
 
				 		return error;
			
 
				 
			
 
				 	/*
			
 
				-	 * The extent shiting code works on extent granularity. So, if
			
 
				+	 * Clean out anything hanging around in the cow fork now that
			
 
				+	 * we've flushed all the dirty data out to disk to avoid having
			
 
				+	 * CoW extents at the wrong offsets.
			
 
				+	 */
			
 
				+	if (xfs_is_reflink_inode(ip)) {
			
 
				+		error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
			
 
				+				true);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * The extent shifting code works on extent granularity. So, if
			
 
				 	 * stop_fsb is not the starting block of extent, we need to split
			
 
				 	 * the extent at stop_fsb.
			
 
				 	 */
			
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1258,8 +1258,6 @@ xfs_buf_ioapply_map(
 
				 	int		size;
			
 
				 	int		offset;
			
 
				 
			
 
				-	total_nr_pages = bp->b_page_count;
			
 
				-
			
 
				 	/* skip the pages in the buffer before the start offset */
			
 
				 	page_index = 0;
			
 
				 	offset = *buf_offset;
			
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -347,7 +347,7 @@ xfs_verifier_error(
 
				 {
			
 
				 	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				 
			
 
				-	xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
			
 
				+	xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
			
 
				 		  bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
			
 
				 		  __return_address, bp->b_ops->name, bp->b_bn);
			
 
				 
			
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -58,7 +58,7 @@ xfs_zero_range(
 
				 	xfs_off_t		count,
			
 
				 	bool			*did_zero)
			
 
				 {
			
 
				-	return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
			
 
				+	return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
			
 
				 }
			
 
				 
			
 
				 int
			
@@ -377,8 +377,6 @@ restart:
 
				 	 */
			
 
				 	spin_lock(&ip->i_flags_lock);
			
 
				 	if (iocb->ki_pos > i_size_read(inode)) {
			
 
				-		bool	zero = false;
			
 
				-
			
 
				 		spin_unlock(&ip->i_flags_lock);
			
 
				 		if (!drained_dio) {
			
 
				 			if (*iolock == XFS_IOLOCK_SHARED) {
			
@@ -399,7 +397,7 @@ restart:
 
				 			drained_dio = true;
			
 
				 			goto restart;
			
 
				 		}
			
 
				-		error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
			
 
				+		error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 	} else
			
@@ -436,7 +434,6 @@ xfs_dio_write_end_io(
 
				 	struct inode		*inode = file_inode(iocb->ki_filp);
			
 
				 	struct xfs_inode	*ip = XFS_I(inode);
			
 
				 	loff_t			offset = iocb->ki_pos;
			
 
				-	bool			update_size = false;
			
 
				 	int			error = 0;
			
 
				 
			
 
				 	trace_xfs_end_io_direct_write(ip, offset, size);
			
@@ -447,6 +444,21 @@ xfs_dio_write_end_io(
 
				 	if (size <= 0)
			
 
				 		return size;
			
 
				 
			
 
				+	if (flags & IOMAP_DIO_COW) {
			
 
				+		error = xfs_reflink_end_cow(ip, offset, size);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Unwritten conversion updates the in-core isize after extent
			
 
				+	 * conversion but before updating the on-disk size. Updating isize any
			
 
				+	 * earlier allows a racing dio read to find unwritten extents before
			
 
				+	 * they are converted.
			
 
				+	 */
			
 
				+	if (flags & IOMAP_DIO_UNWRITTEN)
			
 
				+		return xfs_iomap_write_unwritten(ip, offset, size, true);
			
 
				+
			
 
				 	/*
			
 
				 	 * We need to update the in-core inode size here so that we don't end up
			
 
				 	 * with the on-disk inode size being outside the in-core inode size. We
			
@@ -461,20 +473,11 @@ xfs_dio_write_end_io(
 
				 	spin_lock(&ip->i_flags_lock);
			
 
				 	if (offset + size > i_size_read(inode)) {
			
 
				 		i_size_write(inode, offset + size);
			
 
				-		update_size = true;
			
 
				-	}
			
 
				-	spin_unlock(&ip->i_flags_lock);
			
 
				-
			
 
				-	if (flags & IOMAP_DIO_COW) {
			
 
				-		error = xfs_reflink_end_cow(ip, offset, size);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				-	}
			
 
				-
			
 
				-	if (flags & IOMAP_DIO_UNWRITTEN)
			
 
				-		error = xfs_iomap_write_unwritten(ip, offset, size);
			
 
				-	else if (update_size)
			
 
				+		spin_unlock(&ip->i_flags_lock);
			
 
				 		error = xfs_setfilesize(ip, offset, size);
			
 
				+	} else {
			
 
				+		spin_unlock(&ip->i_flags_lock);
			
 
				+	}
			
 
				 
			
 
				 	return error;
			
 
				 }
			
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1624,10 +1624,12 @@ xfs_itruncate_extents(
 
				 		goto out;
			
 
				 
			
 
				 	/*
			
 
				-	 * Clear the reflink flag if we truncated everything.
			
 
				+	 * Clear the reflink flag if there are no data fork blocks and
			
 
				+	 * there are no extents staged in the cow fork.
			
 
				 	 */
			
 
				-	if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) {
			
 
				-		ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
			
 
				+	if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
			
 
				+		if (ip->i_d.di_nblocks == 0)
			
 
				+			ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
			
 
				 		xfs_inode_clear_cowblocks_tag(ip);
			
 
				 	}
			
 
				 
			
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -745,7 +745,7 @@ xfs_iflush_done(
 
				 		 */
			
 
				 		iip = INODE_ITEM(blip);
			
 
				 		if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
			
 
				-		    lip->li_flags & XFS_LI_FAILED)
			
 
				+		    (blip->li_flags & XFS_LI_FAILED))
			
 
				 			need_ail++;
			
 
				 
			
 
				 		blip = next;
			
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1088,6 +1088,7 @@ xfs_ioctl_setattr_dax_invalidate(
 
				 	int			*join_flags)
			
 
				 {
			
 
				 	struct inode		*inode = VFS_I(ip);
			
 
				+	struct super_block	*sb = inode->i_sb;
			
 
				 	int			error;
			
 
				 
			
 
				 	*join_flags = 0;
			
@@ -1100,7 +1101,7 @@ xfs_ioctl_setattr_dax_invalidate(
 
				 	if (fa->fsx_xflags & FS_XFLAG_DAX) {
			
 
				 		if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
			
 
				 			return -EINVAL;
			
 
				-		if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE)
			
 
				+		if (bdev_dax_supported(sb, sb->s_blocksize) < 0)
			
 
				 			return -EINVAL;
			
 
				 	}
			
 
				 
			
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -829,7 +829,8 @@ int
 
				 xfs_iomap_write_unwritten(
			
 
				 	xfs_inode_t	*ip,
			
 
				 	xfs_off_t	offset,
			
 
				-	xfs_off_t	count)
			
 
				+	xfs_off_t	count,
			
 
				+	bool		update_isize)
			
 
				 {
			
 
				 	xfs_mount_t	*mp = ip->i_mount;
			
 
				 	xfs_fileoff_t	offset_fsb;
			
@@ -840,6 +841,7 @@ xfs_iomap_write_unwritten(
 
				 	xfs_trans_t	*tp;
			
 
				 	xfs_bmbt_irec_t imap;
			
 
				 	struct xfs_defer_ops dfops;
			
 
				+	struct inode	*inode = VFS_I(ip);
			
 
				 	xfs_fsize_t	i_size;
			
 
				 	uint		resblks;
			
 
				 	int		error;
			
@@ -899,7 +901,8 @@ xfs_iomap_write_unwritten(
 
				 		i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
			
 
				 		if (i_size > offset + count)
			
 
				 			i_size = offset + count;
			
 
				-
			
 
				+		if (update_isize && i_size > i_size_read(inode))
			
 
				+			i_size_write(inode, i_size);
			
 
				 		i_size = xfs_new_eof(ip, i_size);
			
 
				 		if (i_size) {
			
 
				 			ip->i_d.di_size = i_size;
			
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 
				 			struct xfs_bmbt_irec *, int);
			
 
				 int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
			
 
				 			struct xfs_bmbt_irec *);
			
 
				-int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
			
 
				+int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
			
 
				 
			
 
				 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
			
 
				 		struct xfs_bmbt_irec *);
			
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -274,7 +274,7 @@ xfs_fs_commit_blocks(
 
				 					(end - 1) >> PAGE_SHIFT);
			
 
				 		WARN_ON_ONCE(error);
			
 
				 
			
 
				-		error = xfs_iomap_write_unwritten(ip, start, length);
			
 
				+		error = xfs_iomap_write_unwritten(ip, start, length, false);
			
 
				 		if (error)
			
 
				 			goto out_drop_iolock;
			
 
				 	}
			
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1654,6 +1654,16 @@ xfs_fs_fill_super(
 
				 		"DAX and reflink have not been tested together!");
			
 
				 	}
			
 
				 
			
 
				+	if (mp->m_flags & XFS_MOUNT_DISCARD) {
			
 
				+		struct request_queue *q = bdev_get_queue(sb->s_bdev);
			
 
				+
			
 
				+		if (!blk_queue_discard(q)) {
			
 
				+			xfs_warn(mp, "mounting with \"discard\" option, but "
			
 
				+					"the device does not support discard");
			
 
				+			mp->m_flags &= ~XFS_MOUNT_DISCARD;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
			
 
				 		if (mp->m_sb.sb_rblocks) {
			
 
				 			xfs_alert(mp,