|
@@ -1559,56 +1559,17 @@ static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Note that we always start a transaction even if we're not journalling
|
|
|
|
- * data. This is to preserve ordering: any hole instantiation within
|
|
|
|
- * __block_write_full_page -> ext3_get_block() should be journalled
|
|
|
|
- * along with the data so we don't crash and then get metadata which
|
|
|
|
|
|
+ * Note that whenever we need to map blocks we start a transaction even if
|
|
|
|
+ * we're not journalling data. This is to preserve ordering: any hole
|
|
|
|
+ * instantiation within __block_write_full_page -> ext3_get_block() should be
|
|
|
|
+ * journalled along with the data so we don't crash and then get metadata which
|
|
* refers to old data.
|
|
* refers to old data.
|
|
*
|
|
*
|
|
* In all journalling modes block_write_full_page() will start the I/O.
|
|
* In all journalling modes block_write_full_page() will start the I/O.
|
|
*
|
|
*
|
|
- * Problem:
|
|
|
|
- *
|
|
|
|
- * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
|
|
|
|
- * ext3_writepage()
|
|
|
|
- *
|
|
|
|
- * Similar for:
|
|
|
|
- *
|
|
|
|
- * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
|
|
|
|
- *
|
|
|
|
- * Same applies to ext3_get_block(). We will deadlock on various things like
|
|
|
|
- * lock_journal and i_truncate_mutex.
|
|
|
|
- *
|
|
|
|
- * Setting PF_MEMALLOC here doesn't work - too many internal memory
|
|
|
|
- * allocations fail.
|
|
|
|
- *
|
|
|
|
- * 16May01: If we're reentered then journal_current_handle() will be
|
|
|
|
- * non-zero. We simply *return*.
|
|
|
|
- *
|
|
|
|
- * 1 July 2001: @@@ FIXME:
|
|
|
|
- * In journalled data mode, a data buffer may be metadata against the
|
|
|
|
- * current transaction. But the same file is part of a shared mapping
|
|
|
|
- * and someone does a writepage() on it.
|
|
|
|
- *
|
|
|
|
- * We will move the buffer onto the async_data list, but *after* it has
|
|
|
|
- * been dirtied. So there's a small window where we have dirty data on
|
|
|
|
- * BJ_Metadata.
|
|
|
|
- *
|
|
|
|
- * Note that this only applies to the last partial page in the file. The
|
|
|
|
- * bit which block_write_full_page() uses prepare/commit for. (That's
|
|
|
|
- * broken code anyway: it's wrong for msync()).
|
|
|
|
- *
|
|
|
|
- * It's a rare case: affects the final partial page, for journalled data
|
|
|
|
- * where the file is subject to bith write() and writepage() in the same
|
|
|
|
- * transction. To fix it we'll need a custom block_write_full_page().
|
|
|
|
- * We'll probably need that anyway for journalling writepage() output.
|
|
|
|
- *
|
|
|
|
* We don't honour synchronous mounts for writepage(). That would be
|
|
* We don't honour synchronous mounts for writepage(). That would be
|
|
* disastrous. Any write() or metadata operation will sync the fs for
|
|
* disastrous. Any write() or metadata operation will sync the fs for
|
|
* us.
|
|
* us.
|
|
- *
|
|
|
|
- * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
|
|
|
|
- * we don't need to open a transaction here.
|
|
|
|
*/
|
|
*/
|
|
static int ext3_ordered_writepage(struct page *page,
|
|
static int ext3_ordered_writepage(struct page *page,
|
|
struct writeback_control *wbc)
|
|
struct writeback_control *wbc)
|
|
@@ -1673,12 +1634,9 @@ static int ext3_ordered_writepage(struct page *page,
|
|
* block_write_full_page() succeeded. Otherwise they are unmapped,
|
|
* block_write_full_page() succeeded. Otherwise they are unmapped,
|
|
* and generally junk.
|
|
* and generally junk.
|
|
*/
|
|
*/
|
|
- if (ret == 0) {
|
|
|
|
- err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
|
|
|
|
|
|
+ if (ret == 0)
|
|
|
|
+ ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
|
|
NULL, journal_dirty_data_fn);
|
|
NULL, journal_dirty_data_fn);
|
|
- if (!ret)
|
|
|
|
- ret = err;
|
|
|
|
- }
|
|
|
|
walk_page_buffers(handle, page_bufs, 0,
|
|
walk_page_buffers(handle, page_bufs, 0,
|
|
PAGE_CACHE_SIZE, NULL, bput_one);
|
|
PAGE_CACHE_SIZE, NULL, bput_one);
|
|
err = ext3_journal_stop(handle);
|
|
err = ext3_journal_stop(handle);
|
|
@@ -1925,6 +1883,8 @@ retry:
|
|
* and pretend the write failed... */
|
|
* and pretend the write failed... */
|
|
ext3_truncate_failed_direct_write(inode);
|
|
ext3_truncate_failed_direct_write(inode);
|
|
ret = PTR_ERR(handle);
|
|
ret = PTR_ERR(handle);
|
|
|
|
+ if (inode->i_nlink)
|
|
|
|
+ ext3_orphan_del(NULL, inode);
|
|
goto out;
|
|
goto out;
|
|
}
|
|
}
|
|
if (inode->i_nlink)
|
|
if (inode->i_nlink)
|
|
@@ -3212,21 +3172,20 @@ out_brelse:
|
|
*
|
|
*
|
|
* We are called from a few places:
|
|
* We are called from a few places:
|
|
*
|
|
*
|
|
- * - Within generic_file_write() for O_SYNC files.
|
|
|
|
|
|
+ * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
|
|
* Here, there will be no transaction running. We wait for any running
|
|
* Here, there will be no transaction running. We wait for any running
|
|
* transaction to commit.
|
|
* transaction to commit.
|
|
*
|
|
*
|
|
- * - Within sys_sync(), kupdate and such.
|
|
|
|
- * We wait on commit, if tol to.
|
|
|
|
|
|
+ * - Within flush work (for sys_sync(), kupdate and such).
|
|
|
|
+ * We wait on commit, if told to.
|
|
*
|
|
*
|
|
- * - Within prune_icache() (PF_MEMALLOC == true)
|
|
|
|
- * Here we simply return. We can't afford to block kswapd on the
|
|
|
|
- * journal commit.
|
|
|
|
|
|
+ * - Within iput_final() -> write_inode_now()
|
|
|
|
+ * We wait on commit, if told to.
|
|
*
|
|
*
|
|
* In all cases it is actually safe for us to return without doing anything,
|
|
* In all cases it is actually safe for us to return without doing anything,
|
|
* because the inode has been copied into a raw inode buffer in
|
|
* because the inode has been copied into a raw inode buffer in
|
|
- * ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for
|
|
|
|
- * knfsd.
|
|
|
|
|
|
+ * ext3_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL
|
|
|
|
+ * writeback.
|
|
*
|
|
*
|
|
* Note that we are absolutely dependent upon all inode dirtiers doing the
|
|
* Note that we are absolutely dependent upon all inode dirtiers doing the
|
|
* right thing: they *must* call mark_inode_dirty() after dirtying info in
|
|
* right thing: they *must* call mark_inode_dirty() after dirtying info in
|
|
@@ -3238,13 +3197,13 @@ out_brelse:
|
|
* stuff();
|
|
* stuff();
|
|
* inode->i_size = expr;
|
|
* inode->i_size = expr;
|
|
*
|
|
*
|
|
- * is in error because a kswapd-driven write_inode() could occur while
|
|
|
|
- * `stuff()' is running, and the new i_size will be lost. Plus the inode
|
|
|
|
- * will no longer be on the superblock's dirty inode list.
|
|
|
|
|
|
+ * is in error because write_inode() could occur while `stuff()' is running,
|
|
|
|
+ * and the new i_size will be lost. Plus the inode will no longer be on the
|
|
|
|
+ * superblock's dirty inode list.
|
|
*/
|
|
*/
|
|
int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
|
|
int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
|
|
{
|
|
{
|
|
- if (current->flags & PF_MEMALLOC)
|
|
|
|
|
|
+ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
if (ext3_journal_current_handle()) {
|
|
if (ext3_journal_current_handle()) {
|
|
@@ -3253,7 +3212,12 @@ int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
|
|
return -EIO;
|
|
return -EIO;
|
|
}
|
|
}
|
|
|
|
|
|
- if (wbc->sync_mode != WB_SYNC_ALL)
|
|
|
|
|
|
+ /*
|
|
|
|
+ * No need to force transaction in WB_SYNC_NONE mode. Also
|
|
|
|
+ * ext3_sync_fs() will force the commit after everything is
|
|
|
|
+ * written.
|
|
|
|
+ */
|
|
|
|
+ if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
return ext3_force_commit(inode->i_sb);
|
|
return ext3_force_commit(inode->i_sb);
|