Ver Fonte

pnfs/blocklayout: update last_write_offset atomically with extents

Block/SCSI layout write completion may add committable extents to the
extent tree before updating the layout's last-written byte under the inode
lock.  If a sync happens before this value is updated, then
prepare_layoutcommit may find and encode these extents which would produce
a LAYOUTCOMMIT request whose encoded extents are larger than the request's
loca_length.

Fix this by using a last-written byte value that is updated atomically with
the extent tree so that commitable extents always match.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Benjamin Coddington há 9 anos atrás
pai
commit
41963c10c4

+ 1 - 1
fs/nfs/blocklayout/blocklayout.c

@@ -346,7 +346,7 @@ static void bl_write_cleanup(struct work_struct *work)
 			PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
 			PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
 
 
 		ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
 		ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
-					(end - start) >> SECTOR_SHIFT);
+					(end - start) >> SECTOR_SHIFT, end);
 	}
 	}
 
 
 	pnfs_ld_write_done(hdr);
 	pnfs_ld_write_done(hdr);

+ 2 - 1
fs/nfs/blocklayout/blocklayout.h

@@ -141,6 +141,7 @@ struct pnfs_block_layout {
 	struct rb_root		bl_ext_ro;
 	struct rb_root		bl_ext_ro;
 	spinlock_t		bl_ext_lock;   /* Protects list manipulation */
 	spinlock_t		bl_ext_lock;   /* Protects list manipulation */
 	bool			bl_scsi_layout;
 	bool			bl_scsi_layout;
+	u64			bl_lwb;
 };
 };
 
 
 static inline struct pnfs_block_layout *
 static inline struct pnfs_block_layout *
@@ -182,7 +183,7 @@ int ext_tree_insert(struct pnfs_block_layout *bl,
 int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start,
 int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start,
 		sector_t end);
 		sector_t end);
 int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
 int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
-		sector_t len);
+		sector_t len, u64 lwb);
 bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect,
 bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect,
 		struct pnfs_block_extent *ret, bool rw);
 		struct pnfs_block_extent *ret, bool rw);
 int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg);
 int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg);

+ 7 - 3
fs/nfs/blocklayout/extent_tree.c

@@ -402,7 +402,7 @@ ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be,
 
 
 int
 int
 ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
 ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
-		sector_t len)
+		sector_t len, u64 lwb)
 {
 {
 	struct rb_root *root = &bl->bl_ext_rw;
 	struct rb_root *root = &bl->bl_ext_rw;
 	sector_t end = start + len;
 	sector_t end = start + len;
@@ -471,6 +471,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
 		}
 		}
 	}
 	}
 out:
 out:
+	if (bl->bl_lwb < lwb)
+		bl->bl_lwb = lwb;
 	spin_unlock(&bl->bl_ext_lock);
 	spin_unlock(&bl->bl_ext_lock);
 
 
 	__ext_put_deviceids(&tmp);
 	__ext_put_deviceids(&tmp);
@@ -518,7 +520,7 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
 }
 }
 
 
 static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
 static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
-		size_t buffer_size, size_t *count)
+		size_t buffer_size, size_t *count, __u64 *lastbyte)
 {
 {
 	struct pnfs_block_extent *be;
 	struct pnfs_block_extent *be;
 	int ret = 0;
 	int ret = 0;
@@ -542,6 +544,8 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
 			p = encode_block_extent(be, p);
 			p = encode_block_extent(be, p);
 		be->be_tag = EXTENT_COMMITTING;
 		be->be_tag = EXTENT_COMMITTING;
 	}
 	}
+	*lastbyte = bl->bl_lwb - 1;
+	bl->bl_lwb = 0;
 	spin_unlock(&bl->bl_ext_lock);
 	spin_unlock(&bl->bl_ext_lock);
 
 
 	return ret;
 	return ret;
@@ -564,7 +568,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
 	arg->layoutupdate_pages = &arg->layoutupdate_page;
 	arg->layoutupdate_pages = &arg->layoutupdate_page;
 
 
 retry:
 retry:
-	ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count);
+	ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
 	if (unlikely(ret)) {
 	if (unlikely(ret)) {
 		ext_tree_free_commitdata(arg, buffer_size);
 		ext_tree_free_commitdata(arg, buffer_size);