|
@@ -980,6 +980,37 @@ void inode_io_list_del(struct inode *inode)
|
|
|
spin_unlock(&wb->list_lock);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * mark an inode as under writeback on the sb
|
|
|
+ */
|
|
|
+void sb_mark_inode_writeback(struct inode *inode)
|
|
|
+{
|
|
|
+ struct super_block *sb = inode->i_sb;
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ if (list_empty(&inode->i_wb_list)) {
|
|
|
+ spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
|
|
|
+ if (list_empty(&inode->i_wb_list))
|
|
|
+ list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb);
|
|
|
+ spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * clear an inode as under writeback on the sb
|
|
|
+ */
|
|
|
+void sb_clear_inode_writeback(struct inode *inode)
|
|
|
+{
|
|
|
+ struct super_block *sb = inode->i_sb;
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ if (!list_empty(&inode->i_wb_list)) {
|
|
|
+ spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
|
|
|
+ list_del_init(&inode->i_wb_list);
|
|
|
+ spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Redirty an inode: set its when-it-was dirtied timestamp and move it to the
|
|
|
* furthest end of its superblock's dirty-inode list.
|
|
@@ -2154,7 +2185,7 @@ EXPORT_SYMBOL(__mark_inode_dirty);
|
|
|
*/
|
|
|
static void wait_sb_inodes(struct super_block *sb)
|
|
|
{
|
|
|
- struct inode *inode, *old_inode = NULL;
|
|
|
+ LIST_HEAD(sync_list);
|
|
|
|
|
|
/*
|
|
|
* We need to be protected against the filesystem going from
|
|
@@ -2163,38 +2194,60 @@ static void wait_sb_inodes(struct super_block *sb)
|
|
|
WARN_ON(!rwsem_is_locked(&sb->s_umount));
|
|
|
|
|
|
mutex_lock(&sb->s_sync_lock);
|
|
|
- spin_lock(&sb->s_inode_list_lock);
|
|
|
|
|
|
/*
|
|
|
- * Data integrity sync. Must wait for all pages under writeback,
|
|
|
- * because there may have been pages dirtied before our sync
|
|
|
- * call, but which had writeout started before we write it out.
|
|
|
- * In which case, the inode may not be on the dirty list, but
|
|
|
- * we still have to wait for that writeout.
|
|
|
+ * Splice the writeback list onto a temporary list to avoid waiting on
|
|
|
+ * inodes that have started writeback after this point.
|
|
|
+ *
|
|
|
+ * Use rcu_read_lock() to keep the inodes around until we have a
|
|
|
+ * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as
|
|
|
+ * the local list because inodes can be dropped from either by writeback
|
|
|
+ * completion.
|
|
|
+ */
|
|
|
+ rcu_read_lock();
|
|
|
+ spin_lock_irq(&sb->s_inode_wblist_lock);
|
|
|
+ list_splice_init(&sb->s_inodes_wb, &sync_list);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Data integrity sync. Must wait for all pages under writeback, because
|
|
|
+ * there may have been pages dirtied before our sync call, but which had
|
|
|
+ * writeout started before we write it out. In which case, the inode
|
|
|
+ * may not be on the dirty list, but we still have to wait for that
|
|
|
+ * writeout.
|
|
|
*/
|
|
|
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
|
|
|
+ while (!list_empty(&sync_list)) {
|
|
|
+ struct inode *inode = list_first_entry(&sync_list, struct inode,
|
|
|
+ i_wb_list);
|
|
|
struct address_space *mapping = inode->i_mapping;
|
|
|
|
|
|
+ /*
|
|
|
+ * Move each inode back to the wb list before we drop the lock
|
|
|
+ * to preserve consistency between i_wb_list and the mapping
|
|
|
+ * writeback tag. Writeback completion is responsible to remove
|
|
|
+ * the inode from either list once the writeback tag is cleared.
|
|
|
+ */
|
|
|
+ list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The mapping can appear untagged while still on-list since we
|
|
|
+ * do not have the mapping lock. Skip it here, wb completion
|
|
|
+ * will remove it.
|
|
|
+ */
|
|
|
+ if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ spin_unlock_irq(&sb->s_inode_wblist_lock);
|
|
|
+
|
|
|
spin_lock(&inode->i_lock);
|
|
|
- if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
|
|
|
- (mapping->nrpages == 0)) {
|
|
|
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
+
|
|
|
+ spin_lock_irq(&sb->s_inode_wblist_lock);
|
|
|
continue;
|
|
|
}
|
|
|
__iget(inode);
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
- spin_unlock(&sb->s_inode_list_lock);
|
|
|
-
|
|
|
- /*
|
|
|
- * We hold a reference to 'inode' so it couldn't have been
|
|
|
- * removed from s_inodes list while we dropped the
|
|
|
- * s_inode_list_lock. We cannot iput the inode now as we can
|
|
|
- * be holding the last reference and we cannot iput it under
|
|
|
- * s_inode_list_lock. So we keep the reference and iput it
|
|
|
- * later.
|
|
|
- */
|
|
|
- iput(old_inode);
|
|
|
- old_inode = inode;
|
|
|
+ rcu_read_unlock();
|
|
|
|
|
|
/*
|
|
|
* We keep the error status of individual mapping so that
|
|
@@ -2205,10 +2258,13 @@ static void wait_sb_inodes(struct super_block *sb)
|
|
|
|
|
|
cond_resched();
|
|
|
|
|
|
- spin_lock(&sb->s_inode_list_lock);
|
|
|
+ iput(inode);
|
|
|
+
|
|
|
+ rcu_read_lock();
|
|
|
+ spin_lock_irq(&sb->s_inode_wblist_lock);
|
|
|
}
|
|
|
- spin_unlock(&sb->s_inode_list_lock);
|
|
|
- iput(old_inode);
|
|
|
+ spin_unlock_irq(&sb->s_inode_wblist_lock);
|
|
|
+ rcu_read_unlock();
|
|
|
mutex_unlock(&sb->s_sync_lock);
|
|
|
}
|
|
|
|