|
@@ -1407,149 +1407,42 @@ static void ext4_da_page_release_reservation(struct page *page,
|
|
|
* Delayed allocation stuff
|
|
|
*/
|
|
|
|
|
|
-static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd);
|
|
|
-
|
|
|
-/*
|
|
|
- * mpage_da_submit_io - walks through extent of pages and try to write
|
|
|
- * them with writepage() call back
|
|
|
- *
|
|
|
- * @mpd->inode: inode
|
|
|
- * @mpd->first_page: first page of the extent
|
|
|
- * @mpd->next_page: page after the last page of the extent
|
|
|
- *
|
|
|
- * By the time mpage_da_submit_io() is called we expect all blocks
|
|
|
- * to be allocated. this may be wrong if allocation failed.
|
|
|
- *
|
|
|
- * As pages are already locked by write_cache_pages(), we can't use it
|
|
|
- */
|
|
|
-static int mpage_da_submit_io(struct mpage_da_data *mpd,
|
|
|
- struct ext4_map_blocks *map)
|
|
|
-{
|
|
|
- struct pagevec pvec;
|
|
|
- unsigned long index, end;
|
|
|
- int ret = 0, err, nr_pages, i;
|
|
|
- struct inode *inode = mpd->inode;
|
|
|
- struct address_space *mapping = inode->i_mapping;
|
|
|
- loff_t size = i_size_read(inode);
|
|
|
- unsigned int len, block_start;
|
|
|
- struct buffer_head *bh, *page_bufs = NULL;
|
|
|
- sector_t pblock = 0, cur_logical = 0;
|
|
|
- struct ext4_io_submit io_submit;
|
|
|
-
|
|
|
- BUG_ON(mpd->next_page <= mpd->first_page);
|
|
|
- ext4_io_submit_init(&io_submit, mpd->wbc);
|
|
|
- io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
|
|
|
- if (!io_submit.io_end) {
|
|
|
- ext4_da_block_invalidatepages(mpd);
|
|
|
- return -ENOMEM;
|
|
|
- }
|
|
|
+struct mpage_da_data {
|
|
|
+ struct inode *inode;
|
|
|
+ struct writeback_control *wbc;
|
|
|
+ pgoff_t first_page; /* The first page to write */
|
|
|
+ pgoff_t next_page; /* Current page to examine */
|
|
|
+ pgoff_t last_page; /* Last page to examine */
|
|
|
/*
|
|
|
- * We need to start from the first_page to the next_page - 1
|
|
|
- * to make sure we also write the mapped dirty buffer_heads.
|
|
|
- * If we look at mpd->b_blocknr we would only be looking
|
|
|
- * at the currently mapped buffer_heads.
|
|
|
+ * Extent to map - this can be after first_page because that can be
|
|
|
+ * fully mapped. We somewhat abuse m_flags to store whether the extent
|
|
|
+ * is delalloc or unwritten.
|
|
|
*/
|
|
|
- index = mpd->first_page;
|
|
|
- end = mpd->next_page - 1;
|
|
|
-
|
|
|
- pagevec_init(&pvec, 0);
|
|
|
- while (index <= end) {
|
|
|
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
|
|
|
- if (nr_pages == 0)
|
|
|
- break;
|
|
|
- for (i = 0; i < nr_pages; i++) {
|
|
|
- int skip_page = 0;
|
|
|
- struct page *page = pvec.pages[i];
|
|
|
-
|
|
|
- index = page->index;
|
|
|
- if (index > end)
|
|
|
- break;
|
|
|
-
|
|
|
- if (index == size >> PAGE_CACHE_SHIFT)
|
|
|
- len = size & ~PAGE_CACHE_MASK;
|
|
|
- else
|
|
|
- len = PAGE_CACHE_SIZE;
|
|
|
- if (map) {
|
|
|
- cur_logical = index << (PAGE_CACHE_SHIFT -
|
|
|
- inode->i_blkbits);
|
|
|
- pblock = map->m_pblk + (cur_logical -
|
|
|
- map->m_lblk);
|
|
|
- }
|
|
|
- index++;
|
|
|
-
|
|
|
- BUG_ON(!PageLocked(page));
|
|
|
- BUG_ON(PageWriteback(page));
|
|
|
-
|
|
|
- bh = page_bufs = page_buffers(page);
|
|
|
- block_start = 0;
|
|
|
- do {
|
|
|
- if (map && (cur_logical >= map->m_lblk) &&
|
|
|
- (cur_logical <= (map->m_lblk +
|
|
|
- (map->m_len - 1)))) {
|
|
|
- if (buffer_delay(bh)) {
|
|
|
- clear_buffer_delay(bh);
|
|
|
- bh->b_blocknr = pblock;
|
|
|
- }
|
|
|
- if (buffer_unwritten(bh) ||
|
|
|
- buffer_mapped(bh))
|
|
|
- BUG_ON(bh->b_blocknr != pblock);
|
|
|
- if (map->m_flags & EXT4_MAP_UNINIT)
|
|
|
- set_buffer_uninit(bh);
|
|
|
- clear_buffer_unwritten(bh);
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * skip page if block allocation undone and
|
|
|
- * block is dirty
|
|
|
- */
|
|
|
- if (ext4_bh_delay_or_unwritten(NULL, bh))
|
|
|
- skip_page = 1;
|
|
|
- bh = bh->b_this_page;
|
|
|
- block_start += bh->b_size;
|
|
|
- cur_logical++;
|
|
|
- pblock++;
|
|
|
- } while (bh != page_bufs);
|
|
|
-
|
|
|
- if (skip_page) {
|
|
|
- unlock_page(page);
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- clear_page_dirty_for_io(page);
|
|
|
- err = ext4_bio_write_page(&io_submit, page, len,
|
|
|
- mpd->wbc);
|
|
|
- if (!err)
|
|
|
- mpd->pages_written++;
|
|
|
- /*
|
|
|
- * In error case, we have to continue because
|
|
|
- * remaining pages are still locked
|
|
|
- */
|
|
|
- if (ret == 0)
|
|
|
- ret = err;
|
|
|
- }
|
|
|
- pagevec_release(&pvec);
|
|
|
- }
|
|
|
- ext4_io_submit(&io_submit);
|
|
|
- /* Drop io_end reference we got from init */
|
|
|
- ext4_put_io_end_defer(io_submit.io_end);
|
|
|
- return ret;
|
|
|
-}
|
|
|
+ struct ext4_map_blocks map;
|
|
|
+ struct ext4_io_submit io_submit; /* IO submission data */
|
|
|
+};
|
|
|
|
|
|
-static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
|
|
|
+static void mpage_release_unused_pages(struct mpage_da_data *mpd,
|
|
|
+ bool invalidate)
|
|
|
{
|
|
|
int nr_pages, i;
|
|
|
pgoff_t index, end;
|
|
|
struct pagevec pvec;
|
|
|
struct inode *inode = mpd->inode;
|
|
|
struct address_space *mapping = inode->i_mapping;
|
|
|
- ext4_lblk_t start, last;
|
|
|
+
|
|
|
+ /* This is necessary when next_page == 0. */
|
|
|
+ if (mpd->first_page >= mpd->next_page)
|
|
|
+ return;
|
|
|
|
|
|
index = mpd->first_page;
|
|
|
end = mpd->next_page - 1;
|
|
|
-
|
|
|
- start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
|
|
- last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
|
|
- ext4_es_remove_extent(inode, start, last - start + 1);
|
|
|
+ if (invalidate) {
|
|
|
+ ext4_lblk_t start, last;
|
|
|
+ start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
|
|
+ last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
|
|
+ ext4_es_remove_extent(inode, start, last - start + 1);
|
|
|
+ }
|
|
|
|
|
|
pagevec_init(&pvec, 0);
|
|
|
while (index <= end) {
|
|
@@ -1562,14 +1455,15 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
|
|
|
break;
|
|
|
BUG_ON(!PageLocked(page));
|
|
|
BUG_ON(PageWriteback(page));
|
|
|
- block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
|
|
|
- ClearPageUptodate(page);
|
|
|
+ if (invalidate) {
|
|
|
+ block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
|
|
|
+ ClearPageUptodate(page);
|
|
|
+ }
|
|
|
unlock_page(page);
|
|
|
}
|
|
|
index = pvec.pages[nr_pages - 1]->index + 1;
|
|
|
pagevec_release(&pvec);
|
|
|
}
|
|
|
- return;
|
|
|
}
|
|
|
|
|
|
static void ext4_print_free_blocks(struct inode *inode)
|
|
@@ -1598,215 +1492,6 @@ static void ext4_print_free_blocks(struct inode *inode)
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * mpage_da_map_and_submit - go through given space, map them
|
|
|
- * if necessary, and then submit them for I/O
|
|
|
- *
|
|
|
- * @mpd - bh describing space
|
|
|
- *
|
|
|
- * The function skips space we know is already mapped to disk blocks.
|
|
|
- *
|
|
|
- */
|
|
|
-static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
|
|
|
-{
|
|
|
- int err, blks, get_blocks_flags;
|
|
|
- struct ext4_map_blocks map, *mapp = NULL;
|
|
|
- sector_t next = mpd->b_blocknr;
|
|
|
- unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
|
|
|
- loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
|
|
|
- handle_t *handle = NULL;
|
|
|
-
|
|
|
- /*
|
|
|
- * If the blocks are mapped already, or we couldn't accumulate
|
|
|
- * any blocks, then proceed immediately to the submission stage.
|
|
|
- */
|
|
|
- if ((mpd->b_size == 0) ||
|
|
|
- ((mpd->b_state & (1 << BH_Mapped)) &&
|
|
|
- !(mpd->b_state & (1 << BH_Delay)) &&
|
|
|
- !(mpd->b_state & (1 << BH_Unwritten))))
|
|
|
- goto submit_io;
|
|
|
-
|
|
|
- handle = ext4_journal_current_handle();
|
|
|
- BUG_ON(!handle);
|
|
|
-
|
|
|
- /*
|
|
|
- * Call ext4_map_blocks() to allocate any delayed allocation
|
|
|
- * blocks, or to convert an uninitialized extent to be
|
|
|
- * initialized (in the case where we have written into
|
|
|
- * one or more preallocated blocks).
|
|
|
- *
|
|
|
- * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to
|
|
|
- * indicate that we are on the delayed allocation path. This
|
|
|
- * affects functions in many different parts of the allocation
|
|
|
- * call path. This flag exists primarily because we don't
|
|
|
- * want to change *many* call functions, so ext4_map_blocks()
|
|
|
- * will set the EXT4_STATE_DELALLOC_RESERVED flag once the
|
|
|
- * inode's allocation semaphore is taken.
|
|
|
- *
|
|
|
- * If the blocks in questions were delalloc blocks, set
|
|
|
- * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
|
|
|
- * variables are updated after the blocks have been allocated.
|
|
|
- */
|
|
|
- map.m_lblk = next;
|
|
|
- map.m_len = max_blocks;
|
|
|
- /*
|
|
|
- * We're in delalloc path and it is possible that we're going to
|
|
|
- * need more metadata blocks than previously reserved. However
|
|
|
- * we must not fail because we're in writeback and there is
|
|
|
- * nothing we can do about it so it might result in data loss.
|
|
|
- * So use reserved blocks to allocate metadata if possible.
|
|
|
- */
|
|
|
- get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
|
|
|
- EXT4_GET_BLOCKS_METADATA_NOFAIL;
|
|
|
- if (ext4_should_dioread_nolock(mpd->inode))
|
|
|
- get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
|
|
|
- if (mpd->b_state & (1 << BH_Delay))
|
|
|
- get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
|
|
|
-
|
|
|
-
|
|
|
- blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
|
|
|
- if (blks < 0) {
|
|
|
- struct super_block *sb = mpd->inode->i_sb;
|
|
|
-
|
|
|
- err = blks;
|
|
|
- /*
|
|
|
- * If get block returns EAGAIN or ENOSPC and there
|
|
|
- * appears to be free blocks we will just let
|
|
|
- * mpage_da_submit_io() unlock all of the pages.
|
|
|
- */
|
|
|
- if (err == -EAGAIN)
|
|
|
- goto submit_io;
|
|
|
-
|
|
|
- if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
|
|
|
- mpd->retval = err;
|
|
|
- goto submit_io;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * get block failure will cause us to loop in
|
|
|
- * writepages, because a_ops->writepage won't be able
|
|
|
- * to make progress. The page will be redirtied by
|
|
|
- * writepage and writepages will again try to write
|
|
|
- * the same.
|
|
|
- */
|
|
|
- if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
|
|
|
- ext4_msg(sb, KERN_CRIT,
|
|
|
- "delayed block allocation failed for inode %lu "
|
|
|
- "at logical offset %llu with max blocks %zd "
|
|
|
- "with error %d", mpd->inode->i_ino,
|
|
|
- (unsigned long long) next,
|
|
|
- mpd->b_size >> mpd->inode->i_blkbits, err);
|
|
|
- ext4_msg(sb, KERN_CRIT,
|
|
|
- "This should not happen!! Data will be lost");
|
|
|
- if (err == -ENOSPC)
|
|
|
- ext4_print_free_blocks(mpd->inode);
|
|
|
- }
|
|
|
- /* invalidate all the pages */
|
|
|
- ext4_da_block_invalidatepages(mpd);
|
|
|
-
|
|
|
- /* Mark this page range as having been completed */
|
|
|
- mpd->io_done = 1;
|
|
|
- return;
|
|
|
- }
|
|
|
- BUG_ON(blks == 0);
|
|
|
-
|
|
|
- mapp = ↦
|
|
|
- if (map.m_flags & EXT4_MAP_NEW) {
|
|
|
- struct block_device *bdev = mpd->inode->i_sb->s_bdev;
|
|
|
- int i;
|
|
|
-
|
|
|
- for (i = 0; i < map.m_len; i++)
|
|
|
- unmap_underlying_metadata(bdev, map.m_pblk + i);
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Update on-disk size along with block allocation.
|
|
|
- */
|
|
|
- disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
|
|
|
- if (disksize > i_size_read(mpd->inode))
|
|
|
- disksize = i_size_read(mpd->inode);
|
|
|
- if (disksize > EXT4_I(mpd->inode)->i_disksize) {
|
|
|
- ext4_update_i_disksize(mpd->inode, disksize);
|
|
|
- err = ext4_mark_inode_dirty(handle, mpd->inode);
|
|
|
- if (err)
|
|
|
- ext4_error(mpd->inode->i_sb,
|
|
|
- "Failed to mark inode %lu dirty",
|
|
|
- mpd->inode->i_ino);
|
|
|
- }
|
|
|
-
|
|
|
-submit_io:
|
|
|
- mpage_da_submit_io(mpd, mapp);
|
|
|
- mpd->io_done = 1;
|
|
|
-}
|
|
|
-
|
|
|
-#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
|
|
|
- (1 << BH_Delay) | (1 << BH_Unwritten))
|
|
|
-
|
|
|
-/*
|
|
|
- * mpage_add_bh_to_extent - try to add one more block to extent of blocks
|
|
|
- *
|
|
|
- * @mpd->lbh - extent of blocks
|
|
|
- * @logical - logical number of the block in the file
|
|
|
- * @b_state - b_state of the buffer head added
|
|
|
- *
|
|
|
- * the function is used to collect contig. blocks in same state
|
|
|
- */
|
|
|
-static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical,
|
|
|
- unsigned long b_state)
|
|
|
-{
|
|
|
- sector_t next;
|
|
|
- int blkbits = mpd->inode->i_blkbits;
|
|
|
- int nrblocks = mpd->b_size >> blkbits;
|
|
|
-
|
|
|
- /*
|
|
|
- * XXX Don't go larger than mballoc is willing to allocate
|
|
|
- * This is a stopgap solution. We eventually need to fold
|
|
|
- * mpage_da_submit_io() into this function and then call
|
|
|
- * ext4_map_blocks() multiple times in a loop
|
|
|
- */
|
|
|
- if (nrblocks >= (8*1024*1024 >> blkbits))
|
|
|
- goto flush_it;
|
|
|
-
|
|
|
- /* check if the reserved journal credits might overflow */
|
|
|
- if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) {
|
|
|
- if (nrblocks >= EXT4_MAX_TRANS_DATA) {
|
|
|
- /*
|
|
|
- * With non-extent format we are limited by the journal
|
|
|
- * credit available. Total credit needed to insert
|
|
|
- * nrblocks contiguous blocks is dependent on the
|
|
|
- * nrblocks. So limit nrblocks.
|
|
|
- */
|
|
|
- goto flush_it;
|
|
|
- }
|
|
|
- }
|
|
|
- /*
|
|
|
- * First block in the extent
|
|
|
- */
|
|
|
- if (mpd->b_size == 0) {
|
|
|
- mpd->b_blocknr = logical;
|
|
|
- mpd->b_size = 1 << blkbits;
|
|
|
- mpd->b_state = b_state & BH_FLAGS;
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
- next = mpd->b_blocknr + nrblocks;
|
|
|
- /*
|
|
|
- * Can we merge the block to our big extent?
|
|
|
- */
|
|
|
- if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
|
|
|
- mpd->b_size += 1 << blkbits;
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
-flush_it:
|
|
|
- /*
|
|
|
- * We couldn't merge the block to our extent, so we
|
|
|
- * need to flush current extent and start new one
|
|
|
- */
|
|
|
- mpage_da_map_and_submit(mpd);
|
|
|
- return;
|
|
|
-}
|
|
|
-
|
|
|
static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
|
|
|
{
|
|
|
return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
|
|
@@ -2204,6 +1889,8 @@ static int ext4_writepage(struct page *page,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+#define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
|
|
|
+
|
|
|
/*
|
|
|
* mballoc gives us at most this number of blocks...
|
|
|
* XXX: That seems to be only a limitation of ext4_mb_normalize_request().
|
|
@@ -2211,6 +1898,315 @@ static int ext4_writepage(struct page *page,
|
|
|
*/
|
|
|
#define MAX_WRITEPAGES_EXTENT_LEN 2048
|
|
|
|
|
|
+/*
|
|
|
+ * mpage_add_bh_to_extent - try to add bh to extent of blocks to map
|
|
|
+ *
|
|
|
+ * @mpd - extent of blocks
|
|
|
+ * @lblk - logical number of the block in the file
|
|
|
+ * @b_state - b_state of the buffer head added
|
|
|
+ *
|
|
|
+ * the function is used to collect contig. blocks in same state
|
|
|
+ */
|
|
|
+static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
|
|
|
+ unsigned long b_state)
|
|
|
+{
|
|
|
+ struct ext4_map_blocks *map = &mpd->map;
|
|
|
+
|
|
|
+ /* Don't go larger than mballoc is willing to allocate */
|
|
|
+ if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ /* First block in the extent? */
|
|
|
+ if (map->m_len == 0) {
|
|
|
+ map->m_lblk = lblk;
|
|
|
+ map->m_len = 1;
|
|
|
+ map->m_flags = b_state & BH_FLAGS;
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Can we merge the block to our big extent? */
|
|
|
+ if (lblk == map->m_lblk + map->m_len &&
|
|
|
+ (b_state & BH_FLAGS) == map->m_flags) {
|
|
|
+ map->m_len++;
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
|
|
|
+ struct buffer_head *head,
|
|
|
+ struct buffer_head *bh,
|
|
|
+ ext4_lblk_t lblk)
|
|
|
+{
|
|
|
+ struct inode *inode = mpd->inode;
|
|
|
+ ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
|
|
|
+ >> inode->i_blkbits;
|
|
|
+
|
|
|
+ do {
|
|
|
+ BUG_ON(buffer_locked(bh));
|
|
|
+
|
|
|
+ if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
|
|
|
+ (!buffer_delay(bh) && !buffer_unwritten(bh)) ||
|
|
|
+ lblk >= blocks) {
|
|
|
+ /* Found extent to map? */
|
|
|
+ if (mpd->map.m_len)
|
|
|
+ return false;
|
|
|
+ if (lblk >= blocks)
|
|
|
+ return true;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state))
|
|
|
+ return false;
|
|
|
+ } while (lblk++, (bh = bh->b_this_page) != head);
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
|
|
|
+{
|
|
|
+ int len;
|
|
|
+ loff_t size = i_size_read(mpd->inode);
|
|
|
+ int err;
|
|
|
+
|
|
|
+ BUG_ON(page->index != mpd->first_page);
|
|
|
+ if (page->index == size >> PAGE_CACHE_SHIFT)
|
|
|
+ len = size & ~PAGE_CACHE_MASK;
|
|
|
+ else
|
|
|
+ len = PAGE_CACHE_SIZE;
|
|
|
+ clear_page_dirty_for_io(page);
|
|
|
+ err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
|
|
|
+ if (!err)
|
|
|
+ mpd->wbc->nr_to_write--;
|
|
|
+ mpd->first_page++;
|
|
|
+
|
|
|
+ return err;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * mpage_map_buffers - update buffers corresponding to changed extent and
|
|
|
+ * submit fully mapped pages for IO
|
|
|
+ *
|
|
|
+ * @mpd - description of extent to map, on return next extent to map
|
|
|
+ *
|
|
|
+ * Scan buffers corresponding to changed extent (we expect corresponding pages
|
|
|
+ * to be already locked) and update buffer state according to new extent state.
|
|
|
+ * We map delalloc buffers to their physical location, clear unwritten bits,
|
|
|
+ * and mark buffers as uninit when we perform writes to uninitialized extents
|
|
|
+ * and do extent conversion after IO is finished. If the last page is not fully
|
|
|
+ * mapped, we update @map to the next extent in the last page that needs
|
|
|
+ * mapping. Otherwise we submit the page for IO.
|
|
|
+ */
|
|
|
+static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
|
|
|
+{
|
|
|
+ struct pagevec pvec;
|
|
|
+ int nr_pages, i;
|
|
|
+ struct inode *inode = mpd->inode;
|
|
|
+ struct buffer_head *head, *bh;
|
|
|
+ int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
|
|
|
+ ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
|
|
|
+ >> inode->i_blkbits;
|
|
|
+ pgoff_t start, end;
|
|
|
+ ext4_lblk_t lblk;
|
|
|
+ sector_t pblock;
|
|
|
+ int err;
|
|
|
+
|
|
|
+ start = mpd->map.m_lblk >> bpp_bits;
|
|
|
+ end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits;
|
|
|
+ lblk = start << bpp_bits;
|
|
|
+ pblock = mpd->map.m_pblk;
|
|
|
+
|
|
|
+ pagevec_init(&pvec, 0);
|
|
|
+ while (start <= end) {
|
|
|
+ nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start,
|
|
|
+ PAGEVEC_SIZE);
|
|
|
+ if (nr_pages == 0)
|
|
|
+ break;
|
|
|
+ for (i = 0; i < nr_pages; i++) {
|
|
|
+ struct page *page = pvec.pages[i];
|
|
|
+
|
|
|
+ if (page->index > end)
|
|
|
+ break;
|
|
|
+ /* Upto 'end' pages must be contiguous */
|
|
|
+ BUG_ON(page->index != start);
|
|
|
+ bh = head = page_buffers(page);
|
|
|
+ do {
|
|
|
+ if (lblk < mpd->map.m_lblk)
|
|
|
+ continue;
|
|
|
+ if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
|
|
|
+ /*
|
|
|
+ * Buffer after end of mapped extent.
|
|
|
+ * Find next buffer in the page to map.
|
|
|
+ */
|
|
|
+ mpd->map.m_len = 0;
|
|
|
+ mpd->map.m_flags = 0;
|
|
|
+ add_page_bufs_to_extent(mpd, head, bh,
|
|
|
+ lblk);
|
|
|
+ pagevec_release(&pvec);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ if (buffer_delay(bh)) {
|
|
|
+ clear_buffer_delay(bh);
|
|
|
+ bh->b_blocknr = pblock++;
|
|
|
+ }
|
|
|
+ if (mpd->map.m_flags & EXT4_MAP_UNINIT)
|
|
|
+ set_buffer_uninit(bh);
|
|
|
+ clear_buffer_unwritten(bh);
|
|
|
+ } while (++lblk < blocks &&
|
|
|
+ (bh = bh->b_this_page) != head);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * FIXME: This is going to break if dioread_nolock
|
|
|
+ * supports blocksize < pagesize as we will try to
|
|
|
+ * convert potentially unmapped parts of inode.
|
|
|
+ */
|
|
|
+ mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
|
|
|
+ /* Page fully mapped - let IO run! */
|
|
|
+ err = mpage_submit_page(mpd, page);
|
|
|
+ if (err < 0) {
|
|
|
+ pagevec_release(&pvec);
|
|
|
+ return err;
|
|
|
+ }
|
|
|
+ start++;
|
|
|
+ }
|
|
|
+ pagevec_release(&pvec);
|
|
|
+ }
|
|
|
+ /* Extent fully mapped and matches with page boundary. We are done. */
|
|
|
+ mpd->map.m_len = 0;
|
|
|
+ mpd->map.m_flags = 0;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
|
|
|
+{
|
|
|
+ struct inode *inode = mpd->inode;
|
|
|
+ struct ext4_map_blocks *map = &mpd->map;
|
|
|
+ int get_blocks_flags;
|
|
|
+ int err;
|
|
|
+
|
|
|
+ trace_ext4_da_write_pages_extent(inode, map);
|
|
|
+ /*
|
|
|
+ * Call ext4_map_blocks() to allocate any delayed allocation blocks, or
|
|
|
+ * to convert an uninitialized extent to be initialized (in the case
|
|
|
+ * where we have written into one or more preallocated blocks). It is
|
|
|
+ * possible that we're going to need more metadata blocks than
|
|
|
+ * previously reserved. However we must not fail because we're in
|
|
|
+ * writeback and there is nothing we can do about it so it might result
|
|
|
+ * in data loss. So use reserved blocks to allocate metadata if
|
|
|
+ * possible.
|
|
|
+ *
|
|
|
+ * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks
|
|
|
+ * in question are delalloc blocks. This affects functions in many
|
|
|
+ * different parts of the allocation call path. This flag exists
|
|
|
+ * primarily because we don't want to change *many* call functions, so
|
|
|
+ * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag
|
|
|
+ * once the inode's allocation semaphore is taken.
|
|
|
+ */
|
|
|
+ get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
|
|
|
+ EXT4_GET_BLOCKS_METADATA_NOFAIL;
|
|
|
+ if (ext4_should_dioread_nolock(inode))
|
|
|
+ get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
|
|
|
+ if (map->m_flags & (1 << BH_Delay))
|
|
|
+ get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
|
|
|
+
|
|
|
+ err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
|
|
|
+ if (err < 0)
|
|
|
+ return err;
|
|
|
+
|
|
|
+ BUG_ON(map->m_len == 0);
|
|
|
+ if (map->m_flags & EXT4_MAP_NEW) {
|
|
|
+ struct block_device *bdev = inode->i_sb->s_bdev;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < map->m_len; i++)
|
|
|
+ unmap_underlying_metadata(bdev, map->m_pblk + i);
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length
|
|
|
+ * mpd->len and submit pages underlying it for IO
|
|
|
+ *
|
|
|
+ * @handle - handle for journal operations
|
|
|
+ * @mpd - extent to map
|
|
|
+ *
|
|
|
+ * The function maps extent starting at mpd->lblk of length mpd->len. If it is
|
|
|
+ * delayed, blocks are allocated, if it is unwritten, we may need to convert
|
|
|
+ * them to initialized or split the described range from larger unwritten
|
|
|
+ * extent. Note that we need not map all the described range since allocation
|
|
|
+ * can return less blocks or the range is covered by more unwritten extents. We
|
|
|
+ * cannot map more because we are limited by reserved transaction credits. On
|
|
|
+ * the other hand we always make sure that the last touched page is fully
|
|
|
+ * mapped so that it can be written out (and thus forward progress is
|
|
|
+ * guaranteed). After mapping we submit all mapped pages for IO.
|
|
|
+ */
|
|
|
+static int mpage_map_and_submit_extent(handle_t *handle,
|
|
|
+ struct mpage_da_data *mpd)
|
|
|
+{
|
|
|
+ struct inode *inode = mpd->inode;
|
|
|
+ struct ext4_map_blocks *map = &mpd->map;
|
|
|
+ int err;
|
|
|
+ loff_t disksize;
|
|
|
+
|
|
|
+ mpd->io_submit.io_end->offset =
|
|
|
+ ((loff_t)map->m_lblk) << inode->i_blkbits;
|
|
|
+ while (map->m_len) {
|
|
|
+ err = mpage_map_one_extent(handle, mpd);
|
|
|
+ if (err < 0) {
|
|
|
+ struct super_block *sb = inode->i_sb;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Need to commit transaction to free blocks. Let upper
|
|
|
+ * layers sort it out.
|
|
|
+ */
|
|
|
+ if (err == -ENOSPC && ext4_count_free_clusters(sb))
|
|
|
+ return -ENOSPC;
|
|
|
+
|
|
|
+ if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
|
|
|
+ ext4_msg(sb, KERN_CRIT,
|
|
|
+ "Delayed block allocation failed for "
|
|
|
+ "inode %lu at logical offset %llu with"
|
|
|
+ " max blocks %u with error %d",
|
|
|
+ inode->i_ino,
|
|
|
+ (unsigned long long)map->m_lblk,
|
|
|
+ (unsigned)map->m_len, err);
|
|
|
+ ext4_msg(sb, KERN_CRIT,
|
|
|
+ "This should not happen!! Data will "
|
|
|
+ "be lost\n");
|
|
|
+ if (err == -ENOSPC)
|
|
|
+ ext4_print_free_blocks(inode);
|
|
|
+ }
|
|
|
+ /* invalidate all the pages */
|
|
|
+ mpage_release_unused_pages(mpd, true);
|
|
|
+ return err;
|
|
|
+ }
|
|
|
+ /*
|
|
|
+ * Update buffer state, submit mapped pages, and get us new
|
|
|
+ * extent to map
|
|
|
+ */
|
|
|
+ err = mpage_map_and_submit_buffers(mpd);
|
|
|
+ if (err < 0)
|
|
|
+ return err;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Update on-disk size after IO is submitted */
|
|
|
+ disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
|
|
|
+ if (disksize > i_size_read(inode))
|
|
|
+ disksize = i_size_read(inode);
|
|
|
+ if (disksize > EXT4_I(inode)->i_disksize) {
|
|
|
+ int err2;
|
|
|
+
|
|
|
+ ext4_update_i_disksize(inode, disksize);
|
|
|
+ err2 = ext4_mark_inode_dirty(handle, inode);
|
|
|
+ if (err2)
|
|
|
+ ext4_error(inode->i_sb,
|
|
|
+ "Failed to mark inode %lu dirty",
|
|
|
+ inode->i_ino);
|
|
|
+ if (!err)
|
|
|
+ err = err2;
|
|
|
+ }
|
|
|
+ return err;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Calculate the total number of credits to reserve for one writepages
|
|
|
* iteration. This is called from ext4_da_writepages(). We map an extent of
|
|
@@ -2227,44 +2223,49 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * write_cache_pages_da - walk the list of dirty pages of the given
|
|
|
- * address space and accumulate pages that need writing, and call
|
|
|
- * mpage_da_map_and_submit to map a single contiguous memory region
|
|
|
- * and then write them.
|
|
|
+ * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages
|
|
|
+ * and underlying extent to map
|
|
|
+ *
|
|
|
+ * @mpd - where to look for pages
|
|
|
+ *
|
|
|
+ * Walk dirty pages in the mapping. If they are fully mapped, submit them for
|
|
|
+ * IO immediately. When we find a page which isn't mapped we start accumulating
|
|
|
+ * extent of buffers underlying these pages that needs mapping (formed by
|
|
|
+ * either delayed or unwritten buffers). We also lock the pages containing
|
|
|
+ * these buffers. The extent found is returned in @mpd structure (starting at
|
|
|
+ * mpd->lblk with length mpd->len blocks).
|
|
|
+ *
|
|
|
+ * Note that this function can attach bios to one io_end structure which are
|
|
|
+ * neither logically nor physically contiguous. Although it may seem as an
|
|
|
+ * unnecessary complication, it is actually inevitable in blocksize < pagesize
|
|
|
+ * case as we need to track IO to all buffers underlying a page in one io_end.
|
|
|
*/
|
|
|
-static int write_cache_pages_da(handle_t *handle,
|
|
|
- struct address_space *mapping,
|
|
|
- struct writeback_control *wbc,
|
|
|
- struct mpage_da_data *mpd,
|
|
|
- pgoff_t *done_index)
|
|
|
+static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
|
|
|
{
|
|
|
- struct buffer_head *bh, *head;
|
|
|
- struct inode *inode = mapping->host;
|
|
|
- struct pagevec pvec;
|
|
|
- unsigned int nr_pages;
|
|
|
- sector_t logical;
|
|
|
- pgoff_t index, end;
|
|
|
- long nr_to_write = wbc->nr_to_write;
|
|
|
- int i, tag, ret = 0;
|
|
|
-
|
|
|
- memset(mpd, 0, sizeof(struct mpage_da_data));
|
|
|
- mpd->wbc = wbc;
|
|
|
- mpd->inode = inode;
|
|
|
- pagevec_init(&pvec, 0);
|
|
|
- index = wbc->range_start >> PAGE_CACHE_SHIFT;
|
|
|
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
|
|
|
+ struct address_space *mapping = mpd->inode->i_mapping;
|
|
|
+ struct pagevec pvec;
|
|
|
+ unsigned int nr_pages;
|
|
|
+ pgoff_t index = mpd->first_page;
|
|
|
+ pgoff_t end = mpd->last_page;
|
|
|
+ int tag;
|
|
|
+ int i, err = 0;
|
|
|
+ int blkbits = mpd->inode->i_blkbits;
|
|
|
+ ext4_lblk_t lblk;
|
|
|
+ struct buffer_head *head;
|
|
|
|
|
|
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
|
|
|
+ if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages)
|
|
|
tag = PAGECACHE_TAG_TOWRITE;
|
|
|
else
|
|
|
tag = PAGECACHE_TAG_DIRTY;
|
|
|
|
|
|
- *done_index = index;
|
|
|
+ pagevec_init(&pvec, 0);
|
|
|
+ mpd->map.m_len = 0;
|
|
|
+ mpd->next_page = index;
|
|
|
while (index <= end) {
|
|
|
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
|
|
|
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
|
|
|
if (nr_pages == 0)
|
|
|
- return 0;
|
|
|
+ goto out;
|
|
|
|
|
|
for (i = 0; i < nr_pages; i++) {
|
|
|
struct page *page = pvec.pages[i];
|
|
@@ -2279,31 +2280,21 @@ static int write_cache_pages_da(handle_t *handle,
|
|
|
if (page->index > end)
|
|
|
goto out;
|
|
|
|
|
|
- *done_index = page->index + 1;
|
|
|
-
|
|
|
- /*
|
|
|
- * If we can't merge this page, and we have
|
|
|
- * accumulated an contiguous region, write it
|
|
|
- */
|
|
|
- if ((mpd->next_page != page->index) &&
|
|
|
- (mpd->next_page != mpd->first_page)) {
|
|
|
- mpage_da_map_and_submit(mpd);
|
|
|
- goto ret_extent_tail;
|
|
|
- }
|
|
|
+ /* If we can't merge this page, we are done. */
|
|
|
+ if (mpd->map.m_len > 0 && mpd->next_page != page->index)
|
|
|
+ goto out;
|
|
|
|
|
|
lock_page(page);
|
|
|
-
|
|
|
/*
|
|
|
- * If the page is no longer dirty, or its
|
|
|
- * mapping no longer corresponds to inode we
|
|
|
- * are writing (which means it has been
|
|
|
- * truncated or invalidated), or the page is
|
|
|
- * already under writeback and we are not
|
|
|
- * doing a data integrity writeback, skip the page
|
|
|
+ * If the page is no longer dirty, or its mapping no
|
|
|
+ * longer corresponds to inode we are writing (which
|
|
|
+ * means it has been truncated or invalidated), or the
|
|
|
+ * page is already under writeback and we are not doing
|
|
|
+ * a data integrity writeback, skip the page
|
|
|
*/
|
|
|
if (!PageDirty(page) ||
|
|
|
(PageWriteback(page) &&
|
|
|
- (wbc->sync_mode == WB_SYNC_NONE)) ||
|
|
|
+ (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
|
|
|
unlikely(page->mapping != mapping)) {
|
|
|
unlock_page(page);
|
|
|
continue;
|
|
@@ -2312,101 +2303,57 @@ static int write_cache_pages_da(handle_t *handle,
|
|
|
wait_on_page_writeback(page);
|
|
|
BUG_ON(PageWriteback(page));
|
|
|
|
|
|
- /*
|
|
|
- * If we have inline data and arrive here, it means that
|
|
|
- * we will soon create the block for the 1st page, so
|
|
|
- * we'd better clear the inline data here.
|
|
|
- */
|
|
|
- if (ext4_has_inline_data(inode)) {
|
|
|
- BUG_ON(ext4_test_inode_state(inode,
|
|
|
- EXT4_STATE_MAY_INLINE_DATA));
|
|
|
- ext4_destroy_inline_data(handle, inode);
|
|
|
- }
|
|
|
-
|
|
|
- if (mpd->next_page != page->index)
|
|
|
+ if (mpd->map.m_len == 0)
|
|
|
mpd->first_page = page->index;
|
|
|
mpd->next_page = page->index + 1;
|
|
|
- logical = (sector_t) page->index <<
|
|
|
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
|
|
-
|
|
|
/* Add all dirty buffers to mpd */
|
|
|
+ lblk = ((ext4_lblk_t)page->index) <<
|
|
|
+ (PAGE_CACHE_SHIFT - blkbits);
|
|
|
head = page_buffers(page);
|
|
|
- bh = head;
|
|
|
- do {
|
|
|
- BUG_ON(buffer_locked(bh));
|
|
|
- /*
|
|
|
- * We need to try to allocate unmapped blocks
|
|
|
- * in the same page. Otherwise we won't make
|
|
|
- * progress with the page in ext4_writepage
|
|
|
- */
|
|
|
- if (ext4_bh_delay_or_unwritten(NULL, bh)) {
|
|
|
- mpage_add_bh_to_extent(mpd, logical,
|
|
|
- bh->b_state);
|
|
|
- if (mpd->io_done)
|
|
|
- goto ret_extent_tail;
|
|
|
- } else if (buffer_dirty(bh) &&
|
|
|
- buffer_mapped(bh)) {
|
|
|
- /*
|
|
|
- * mapped dirty buffer. We need to
|
|
|
- * update the b_state because we look
|
|
|
- * at b_state in mpage_da_map_blocks.
|
|
|
- * We don't update b_size because if we
|
|
|
- * find an unmapped buffer_head later
|
|
|
- * we need to use the b_state flag of
|
|
|
- * that buffer_head.
|
|
|
- */
|
|
|
- if (mpd->b_size == 0)
|
|
|
- mpd->b_state =
|
|
|
- bh->b_state & BH_FLAGS;
|
|
|
- }
|
|
|
- logical++;
|
|
|
- } while ((bh = bh->b_this_page) != head);
|
|
|
-
|
|
|
- if (nr_to_write > 0) {
|
|
|
- nr_to_write--;
|
|
|
- if (nr_to_write == 0 &&
|
|
|
- wbc->sync_mode == WB_SYNC_NONE)
|
|
|
- /*
|
|
|
- * We stop writing back only if we are
|
|
|
- * not doing integrity sync. In case of
|
|
|
- * integrity sync we have to keep going
|
|
|
- * because someone may be concurrently
|
|
|
- * dirtying pages, and we might have
|
|
|
- * synced a lot of newly appeared dirty
|
|
|
- * pages, but have not synced all of the
|
|
|
- * old dirty pages.
|
|
|
- */
|
|
|
+ if (!add_page_bufs_to_extent(mpd, head, head, lblk))
|
|
|
+ goto out;
|
|
|
+ /* So far everything mapped? Submit the page for IO. */
|
|
|
+ if (mpd->map.m_len == 0) {
|
|
|
+ err = mpage_submit_page(mpd, page);
|
|
|
+ if (err < 0)
|
|
|
goto out;
|
|
|
}
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Accumulated enough dirty pages? This doesn't apply
|
|
|
+ * to WB_SYNC_ALL mode. For integrity sync we have to
|
|
|
+ * keep going because someone may be concurrently
|
|
|
+ * dirtying pages, and we might have synced a lot of
|
|
|
+ * newly appeared dirty pages, but have not synced all
|
|
|
+ * of the old dirty pages.
|
|
|
+ */
|
|
|
+ if (mpd->wbc->sync_mode == WB_SYNC_NONE &&
|
|
|
+ mpd->next_page - mpd->first_page >=
|
|
|
+ mpd->wbc->nr_to_write)
|
|
|
+ goto out;
|
|
|
}
|
|
|
pagevec_release(&pvec);
|
|
|
cond_resched();
|
|
|
}
|
|
|
return 0;
|
|
|
-ret_extent_tail:
|
|
|
- ret = MPAGE_DA_EXTENT_TAIL;
|
|
|
out:
|
|
|
pagevec_release(&pvec);
|
|
|
- cond_resched();
|
|
|
- return ret;
|
|
|
+ return err;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
static int ext4_da_writepages(struct address_space *mapping,
|
|
|
struct writeback_control *wbc)
|
|
|
{
|
|
|
- pgoff_t index;
|
|
|
+ pgoff_t writeback_index = 0;
|
|
|
+ long nr_to_write = wbc->nr_to_write;
|
|
|
int range_whole = 0;
|
|
|
+ int cycled = 1;
|
|
|
handle_t *handle = NULL;
|
|
|
struct mpage_da_data mpd;
|
|
|
struct inode *inode = mapping->host;
|
|
|
- int pages_written = 0;
|
|
|
- int range_cyclic, cycled = 1, io_done = 0;
|
|
|
int needed_blocks, ret = 0;
|
|
|
- loff_t range_start = wbc->range_start;
|
|
|
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
|
|
|
- pgoff_t done_index = 0;
|
|
|
- pgoff_t end;
|
|
|
+ bool done;
|
|
|
struct blk_plug plug;
|
|
|
|
|
|
trace_ext4_da_writepages(inode, wbc);
|
|
@@ -2432,40 +2379,65 @@ static int ext4_da_writepages(struct address_space *mapping,
|
|
|
if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
|
|
|
return -EROFS;
|
|
|
|
|
|
+ /*
|
|
|
+ * If we have inline data and arrive here, it means that
|
|
|
+ * we will soon create the block for the 1st page, so
|
|
|
+ * we'd better clear the inline data here.
|
|
|
+ */
|
|
|
+ if (ext4_has_inline_data(inode)) {
|
|
|
+ /* Just inode will be modified... */
|
|
|
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
|
|
|
+ if (IS_ERR(handle)) {
|
|
|
+ ret = PTR_ERR(handle);
|
|
|
+ goto out_writepages;
|
|
|
+ }
|
|
|
+ BUG_ON(ext4_test_inode_state(inode,
|
|
|
+ EXT4_STATE_MAY_INLINE_DATA));
|
|
|
+ ext4_destroy_inline_data(handle, inode);
|
|
|
+ ext4_journal_stop(handle);
|
|
|
+ }
|
|
|
+
|
|
|
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
|
|
|
range_whole = 1;
|
|
|
|
|
|
- range_cyclic = wbc->range_cyclic;
|
|
|
if (wbc->range_cyclic) {
|
|
|
- index = mapping->writeback_index;
|
|
|
- if (index)
|
|
|
+ writeback_index = mapping->writeback_index;
|
|
|
+ if (writeback_index)
|
|
|
cycled = 0;
|
|
|
- wbc->range_start = index << PAGE_CACHE_SHIFT;
|
|
|
- wbc->range_end = LLONG_MAX;
|
|
|
- wbc->range_cyclic = 0;
|
|
|
- end = -1;
|
|
|
+ mpd.first_page = writeback_index;
|
|
|
+ mpd.last_page = -1;
|
|
|
} else {
|
|
|
- index = wbc->range_start >> PAGE_CACHE_SHIFT;
|
|
|
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
|
|
|
+ mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT;
|
|
|
+ mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT;
|
|
|
}
|
|
|
|
|
|
+ mpd.inode = inode;
|
|
|
+ mpd.wbc = wbc;
|
|
|
+ ext4_io_submit_init(&mpd.io_submit, wbc);
|
|
|
retry:
|
|
|
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
|
|
|
- tag_pages_for_writeback(mapping, index, end);
|
|
|
-
|
|
|
+ tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
|
|
|
+ done = false;
|
|
|
blk_start_plug(&plug);
|
|
|
- while (!ret && wbc->nr_to_write > 0) {
|
|
|
+ while (!done && mpd.first_page <= mpd.last_page) {
|
|
|
+ /* For each extent of pages we use new io_end */
|
|
|
+ mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
|
|
|
+ if (!mpd.io_submit.io_end) {
|
|
|
+ ret = -ENOMEM;
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
- * we insert one extent at a time. So we need
|
|
|
- * credit needed for single extent allocation.
|
|
|
- * journalled mode is currently not supported
|
|
|
- * by delalloc
|
|
|
+ * We have two constraints: We find one extent to map and we
|
|
|
+ * must always write out whole page (makes a difference when
|
|
|
+ * blocksize < pagesize) so that we don't block on IO when we
|
|
|
+ * try to write out the rest of the page. Journalled mode is
|
|
|
+ * not supported by delalloc.
|
|
|
*/
|
|
|
BUG_ON(ext4_should_journal_data(inode));
|
|
|
needed_blocks = ext4_da_writepages_trans_blocks(inode);
|
|
|
|
|
|
- /* start a new transaction*/
|
|
|
+ /* start a new transaction */
|
|
|
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
|
|
|
needed_blocks);
|
|
|
if (IS_ERR(handle)) {
|
|
@@ -2473,76 +2445,67 @@ retry:
|
|
|
ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
|
|
|
"%ld pages, ino %lu; err %d", __func__,
|
|
|
wbc->nr_to_write, inode->i_ino, ret);
|
|
|
- blk_finish_plug(&plug);
|
|
|
- goto out_writepages;
|
|
|
+ /* Release allocated io_end */
|
|
|
+ ext4_put_io_end(mpd.io_submit.io_end);
|
|
|
+ break;
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
- * Now call write_cache_pages_da() to find the next
|
|
|
- * contiguous region of logical blocks that need
|
|
|
- * blocks to be allocated by ext4 and submit them.
|
|
|
- */
|
|
|
- ret = write_cache_pages_da(handle, mapping,
|
|
|
- wbc, &mpd, &done_index);
|
|
|
- /*
|
|
|
- * If we have a contiguous extent of pages and we
|
|
|
- * haven't done the I/O yet, map the blocks and submit
|
|
|
- * them for I/O.
|
|
|
- */
|
|
|
- if (!mpd.io_done && mpd.next_page != mpd.first_page) {
|
|
|
- mpage_da_map_and_submit(&mpd);
|
|
|
- ret = MPAGE_DA_EXTENT_TAIL;
|
|
|
+ trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc);
|
|
|
+ ret = mpage_prepare_extent_to_map(&mpd);
|
|
|
+ if (!ret) {
|
|
|
+ if (mpd.map.m_len)
|
|
|
+ ret = mpage_map_and_submit_extent(handle, &mpd);
|
|
|
+ else {
|
|
|
+ /*
|
|
|
+ * We scanned the whole range (or exhausted
|
|
|
+ * nr_to_write), submitted what was mapped and
|
|
|
+ * didn't find anything needing mapping. We are
|
|
|
+ * done.
|
|
|
+ */
|
|
|
+ done = true;
|
|
|
+ }
|
|
|
}
|
|
|
- trace_ext4_da_write_pages(inode, &mpd);
|
|
|
- wbc->nr_to_write -= mpd.pages_written;
|
|
|
-
|
|
|
ext4_journal_stop(handle);
|
|
|
-
|
|
|
- if ((mpd.retval == -ENOSPC) && sbi->s_journal) {
|
|
|
- /* commit the transaction which would
|
|
|
+ /* Submit prepared bio */
|
|
|
+ ext4_io_submit(&mpd.io_submit);
|
|
|
+ /* Unlock pages we didn't use */
|
|
|
+ mpage_release_unused_pages(&mpd, false);
|
|
|
+ /* Drop our io_end reference we got from init */
|
|
|
+ ext4_put_io_end(mpd.io_submit.io_end);
|
|
|
+
|
|
|
+ if (ret == -ENOSPC && sbi->s_journal) {
|
|
|
+ /*
|
|
|
+ * Commit the transaction which would
|
|
|
* free blocks released in the transaction
|
|
|
* and try again
|
|
|
*/
|
|
|
jbd2_journal_force_commit_nested(sbi->s_journal);
|
|
|
ret = 0;
|
|
|
- } else if (ret == MPAGE_DA_EXTENT_TAIL) {
|
|
|
- /*
|
|
|
- * Got one extent now try with rest of the pages.
|
|
|
- * If mpd.retval is set -EIO, journal is aborted.
|
|
|
- * So we don't need to write any more.
|
|
|
- */
|
|
|
- pages_written += mpd.pages_written;
|
|
|
- ret = mpd.retval;
|
|
|
- io_done = 1;
|
|
|
- } else if (wbc->nr_to_write)
|
|
|
- /*
|
|
|
- * There is no more writeout needed
|
|
|
- * or we requested for a noblocking writeout
|
|
|
- * and we found the device congested
|
|
|
- */
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ /* Fatal error - ENOMEM, EIO... */
|
|
|
+ if (ret)
|
|
|
break;
|
|
|
}
|
|
|
blk_finish_plug(&plug);
|
|
|
- if (!io_done && !cycled) {
|
|
|
+ if (!ret && !cycled) {
|
|
|
cycled = 1;
|
|
|
- index = 0;
|
|
|
- wbc->range_start = index << PAGE_CACHE_SHIFT;
|
|
|
- wbc->range_end = mapping->writeback_index - 1;
|
|
|
+ mpd.last_page = writeback_index - 1;
|
|
|
+ mpd.first_page = 0;
|
|
|
goto retry;
|
|
|
}
|
|
|
|
|
|
/* Update index */
|
|
|
- wbc->range_cyclic = range_cyclic;
|
|
|
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
|
|
|
/*
|
|
|
- * set the writeback_index so that range_cyclic
|
|
|
+ * Set the writeback_index so that range_cyclic
|
|
|
* mode will write it back later
|
|
|
*/
|
|
|
- mapping->writeback_index = done_index;
|
|
|
+ mapping->writeback_index = mpd.first_page;
|
|
|
|
|
|
out_writepages:
|
|
|
- wbc->range_start = range_start;
|
|
|
- trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
|
|
|
+ trace_ext4_da_writepages_result(inode, wbc, ret,
|
|
|
+ nr_to_write - wbc->nr_to_write);
|
|
|
return ret;
|
|
|
}
|
|
|
|