|
@@ -618,32 +618,50 @@ static int dax_writeback_one(struct block_device *bdev,
|
|
struct address_space *mapping, pgoff_t index, void *entry)
|
|
struct address_space *mapping, pgoff_t index, void *entry)
|
|
{
|
|
{
|
|
struct radix_tree_root *page_tree = &mapping->page_tree;
|
|
struct radix_tree_root *page_tree = &mapping->page_tree;
|
|
- struct radix_tree_node *node;
|
|
|
|
struct blk_dax_ctl dax;
|
|
struct blk_dax_ctl dax;
|
|
- void **slot;
|
|
|
|
|
|
+ void *entry2, **slot;
|
|
int ret = 0;
|
|
int ret = 0;
|
|
|
|
|
|
- spin_lock_irq(&mapping->tree_lock);
|
|
|
|
/*
|
|
/*
|
|
- * Regular page slots are stabilized by the page lock even
|
|
|
|
- * without the tree itself locked. These unlocked entries
|
|
|
|
- * need verification under the tree lock.
|
|
|
|
|
|
+ * A page got tagged dirty in DAX mapping? Something is seriously
|
|
|
|
+ * wrong.
|
|
*/
|
|
*/
|
|
- if (!__radix_tree_lookup(page_tree, index, &node, &slot))
|
|
|
|
- goto unlock;
|
|
|
|
- if (*slot != entry)
|
|
|
|
- goto unlock;
|
|
|
|
-
|
|
|
|
- /* another fsync thread may have already written back this entry */
|
|
|
|
- if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
|
|
|
|
- goto unlock;
|
|
|
|
|
|
+ if (WARN_ON(!radix_tree_exceptional_entry(entry)))
|
|
|
|
+ return -EIO;
|
|
|
|
|
|
|
|
+ spin_lock_irq(&mapping->tree_lock);
|
|
|
|
+ entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
|
|
|
|
+ /* Entry got punched out / reallocated? */
|
|
|
|
+ if (!entry2 || !radix_tree_exceptional_entry(entry2))
|
|
|
|
+ goto put_unlocked;
|
|
|
|
+ /*
|
|
|
|
+ * Entry got reallocated elsewhere? No need to writeback. We have to
|
|
|
|
+ * compare sectors as we must not bail out due to difference in lockbit
|
|
|
|
+ * or entry type.
|
|
|
|
+ */
|
|
|
|
+ if (dax_radix_sector(entry2) != dax_radix_sector(entry))
|
|
|
|
+ goto put_unlocked;
|
|
if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
|
|
if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
|
|
dax_is_zero_entry(entry))) {
|
|
dax_is_zero_entry(entry))) {
|
|
ret = -EIO;
|
|
ret = -EIO;
|
|
- goto unlock;
|
|
|
|
|
|
+ goto put_unlocked;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ /* Another fsync thread may have already written back this entry */
|
|
|
|
+ if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
|
|
|
|
+ goto put_unlocked;
|
|
|
|
+ /* Lock the entry to serialize with page faults */
|
|
|
|
+ entry = lock_slot(mapping, slot);
|
|
|
|
+ /*
|
|
|
|
+ * We can clear the tag now but we have to be careful so that concurrent
|
|
|
|
+ * dax_writeback_one() calls for the same index cannot finish before we
|
|
|
|
+ * actually flush the caches. This is achieved as the calls will look
|
|
|
|
+ * at the entry only under tree_lock and once they do that they will
|
|
|
|
+ * see the entry locked and wait for it to unlock.
|
|
|
|
+ */
|
|
|
|
+ radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
|
|
|
|
+ spin_unlock_irq(&mapping->tree_lock);
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Even if dax_writeback_mapping_range() was given a wbc->range_start
|
|
* Even if dax_writeback_mapping_range() was given a wbc->range_start
|
|
* in the middle of a PMD, the 'index' we are given will be aligned to
|
|
* in the middle of a PMD, the 'index' we are given will be aligned to
|
|
@@ -653,15 +671,16 @@ static int dax_writeback_one(struct block_device *bdev,
|
|
*/
|
|
*/
|
|
dax.sector = dax_radix_sector(entry);
|
|
dax.sector = dax_radix_sector(entry);
|
|
dax.size = PAGE_SIZE << dax_radix_order(entry);
|
|
dax.size = PAGE_SIZE << dax_radix_order(entry);
|
|
- spin_unlock_irq(&mapping->tree_lock);
|
|
|
|
|
|
|
|
/*
|
|
/*
|
|
* We cannot hold tree_lock while calling dax_map_atomic() because it
|
|
* We cannot hold tree_lock while calling dax_map_atomic() because it
|
|
* eventually calls cond_resched().
|
|
* eventually calls cond_resched().
|
|
*/
|
|
*/
|
|
ret = dax_map_atomic(bdev, &dax);
|
|
ret = dax_map_atomic(bdev, &dax);
|
|
- if (ret < 0)
|
|
|
|
|
|
+ if (ret < 0) {
|
|
|
|
+ put_locked_mapping_entry(mapping, index, entry);
|
|
return ret;
|
|
return ret;
|
|
|
|
+ }
|
|
|
|
|
|
if (WARN_ON_ONCE(ret < dax.size)) {
|
|
if (WARN_ON_ONCE(ret < dax.size)) {
|
|
ret = -EIO;
|
|
ret = -EIO;
|
|
@@ -669,15 +688,13 @@ static int dax_writeback_one(struct block_device *bdev,
|
|
}
|
|
}
|
|
|
|
|
|
wb_cache_pmem(dax.addr, dax.size);
|
|
wb_cache_pmem(dax.addr, dax.size);
|
|
-
|
|
|
|
- spin_lock_irq(&mapping->tree_lock);
|
|
|
|
- radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
|
|
|
|
- spin_unlock_irq(&mapping->tree_lock);
|
|
|
|
unmap:
|
|
unmap:
|
|
dax_unmap_atomic(bdev, &dax);
|
|
dax_unmap_atomic(bdev, &dax);
|
|
|
|
+ put_locked_mapping_entry(mapping, index, entry);
|
|
return ret;
|
|
return ret;
|
|
|
|
|
|
- unlock:
|
|
|
|
|
|
+ put_unlocked:
|
|
|
|
+ put_unlocked_mapping_entry(mapping, index, entry2);
|
|
spin_unlock_irq(&mapping->tree_lock);
|
|
spin_unlock_irq(&mapping->tree_lock);
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|