|
@@ -59,56 +59,57 @@ static int __init init_dax_wait_table(void)
|
|
|
fs_initcall(init_dax_wait_table);
|
|
|
|
|
|
/*
|
|
|
- * We use lowest available bit in exceptional entry for locking, one bit for
|
|
|
- * the entry size (PMD) and two more to tell us if the entry is a zero page or
|
|
|
- * an empty entry that is just used for locking. In total four special bits.
|
|
|
+ * DAX pagecache entries use XArray value entries so they can't be mistaken
|
|
|
+ * for pages. We use one bit for locking, one bit for the entry size (PMD)
|
|
|
+ * and two more to tell us if the entry is a zero page or an empty entry that
|
|
|
+ * is just used for locking. In total four special bits.
|
|
|
*
|
|
|
* If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE
|
|
|
* and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
|
|
|
* block allocation.
|
|
|
*/
|
|
|
-#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4)
|
|
|
-#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
|
|
|
-#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
|
|
|
-#define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
|
|
|
-#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
|
|
|
+#define DAX_SHIFT (4)
|
|
|
+#define DAX_LOCKED (1UL << 0)
|
|
|
+#define DAX_PMD (1UL << 1)
|
|
|
+#define DAX_ZERO_PAGE (1UL << 2)
|
|
|
+#define DAX_EMPTY (1UL << 3)
|
|
|
|
|
|
static unsigned long dax_radix_pfn(void *entry)
|
|
|
{
|
|
|
- return (unsigned long)entry >> RADIX_DAX_SHIFT;
|
|
|
+ return xa_to_value(entry) >> DAX_SHIFT;
|
|
|
}
|
|
|
|
|
|
static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
|
|
|
{
|
|
|
- return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
|
|
|
- (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK);
|
|
|
+ return xa_mk_value(flags | ((unsigned long)pfn << DAX_SHIFT) |
|
|
|
+ DAX_LOCKED);
|
|
|
}
|
|
|
|
|
|
static unsigned int dax_radix_order(void *entry)
|
|
|
{
|
|
|
- if ((unsigned long)entry & RADIX_DAX_PMD)
|
|
|
+ if (xa_to_value(entry) & DAX_PMD)
|
|
|
return PMD_SHIFT - PAGE_SHIFT;
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
static int dax_is_pmd_entry(void *entry)
|
|
|
{
|
|
|
- return (unsigned long)entry & RADIX_DAX_PMD;
|
|
|
+ return xa_to_value(entry) & DAX_PMD;
|
|
|
}
|
|
|
|
|
|
static int dax_is_pte_entry(void *entry)
|
|
|
{
|
|
|
- return !((unsigned long)entry & RADIX_DAX_PMD);
|
|
|
+ return !(xa_to_value(entry) & DAX_PMD);
|
|
|
}
|
|
|
|
|
|
static int dax_is_zero_entry(void *entry)
|
|
|
{
|
|
|
- return (unsigned long)entry & RADIX_DAX_ZERO_PAGE;
|
|
|
+ return xa_to_value(entry) & DAX_ZERO_PAGE;
|
|
|
}
|
|
|
|
|
|
static int dax_is_empty_entry(void *entry)
|
|
|
{
|
|
|
- return (unsigned long)entry & RADIX_DAX_EMPTY;
|
|
|
+ return xa_to_value(entry) & DAX_EMPTY;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -186,9 +187,9 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
|
|
|
*/
|
|
|
static inline int slot_locked(struct address_space *mapping, void **slot)
|
|
|
{
|
|
|
- unsigned long entry = (unsigned long)
|
|
|
- radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
|
|
|
- return entry & RADIX_DAX_ENTRY_LOCK;
|
|
|
+ unsigned long entry = xa_to_value(
|
|
|
+ radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
|
|
|
+ return entry & DAX_LOCKED;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -196,12 +197,11 @@ static inline int slot_locked(struct address_space *mapping, void **slot)
|
|
|
*/
|
|
|
static inline void *lock_slot(struct address_space *mapping, void **slot)
|
|
|
{
|
|
|
- unsigned long entry = (unsigned long)
|
|
|
- radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
|
|
|
-
|
|
|
- entry |= RADIX_DAX_ENTRY_LOCK;
|
|
|
- radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
|
|
|
- return (void *)entry;
|
|
|
+ unsigned long v = xa_to_value(
|
|
|
+ radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
|
|
|
+ void *entry = xa_mk_value(v | DAX_LOCKED);
|
|
|
+ radix_tree_replace_slot(&mapping->i_pages, slot, entry);
|
|
|
+ return entry;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -209,17 +209,16 @@ static inline void *lock_slot(struct address_space *mapping, void **slot)
|
|
|
*/
|
|
|
static inline void *unlock_slot(struct address_space *mapping, void **slot)
|
|
|
{
|
|
|
- unsigned long entry = (unsigned long)
|
|
|
- radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
|
|
|
-
|
|
|
- entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
|
|
|
- radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
|
|
|
- return (void *)entry;
|
|
|
+ unsigned long v = xa_to_value(
|
|
|
+ radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
|
|
|
+ void *entry = xa_mk_value(v & ~DAX_LOCKED);
|
|
|
+ radix_tree_replace_slot(&mapping->i_pages, slot, entry);
|
|
|
+ return entry;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
* Lookup entry in radix tree, wait for it to become unlocked if it is
|
|
|
- * exceptional entry and return it. The caller must call
|
|
|
+ * a DAX entry and return it. The caller must call
|
|
|
* put_unlocked_mapping_entry() when he decided not to lock the entry or
|
|
|
* put_locked_mapping_entry() when he locked the entry and now wants to
|
|
|
* unlock it.
|
|
@@ -242,7 +241,7 @@ static void *__get_unlocked_mapping_entry(struct address_space *mapping,
|
|
|
entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
|
|
|
&slot);
|
|
|
if (!entry ||
|
|
|
- WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) ||
|
|
|
+ WARN_ON_ONCE(!xa_is_value(entry)) ||
|
|
|
!slot_locked(mapping, slot)) {
|
|
|
if (slotp)
|
|
|
*slotp = slot;
|
|
@@ -283,7 +282,7 @@ static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
|
|
|
|
|
|
xa_lock_irq(&mapping->i_pages);
|
|
|
entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot);
|
|
|
- if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
|
|
|
+ if (WARN_ON_ONCE(!entry || !xa_is_value(entry) ||
|
|
|
!slot_locked(mapping, slot))) {
|
|
|
xa_unlock_irq(&mapping->i_pages);
|
|
|
return;
|
|
@@ -472,12 +471,11 @@ void dax_unlock_mapping_entry(struct page *page)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Find radix tree entry at given index. If it points to an exceptional entry,
|
|
|
- * return it with the radix tree entry locked. If the radix tree doesn't
|
|
|
- * contain given index, create an empty exceptional entry for the index and
|
|
|
- * return with it locked.
|
|
|
+ * Find radix tree entry at given index. If it is a DAX entry, return it
|
|
|
+ * with the radix tree entry locked. If the radix tree doesn't contain the
|
|
|
+ * given index, create an empty entry for the index and return with it locked.
|
|
|
*
|
|
|
- * When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will
|
|
|
+ * When requesting an entry with size DAX_PMD, grab_mapping_entry() will
|
|
|
* either return that locked entry or will return an error. This error will
|
|
|
* happen if there are any 4k entries within the 2MiB range that we are
|
|
|
* requesting.
|
|
@@ -507,13 +505,13 @@ restart:
|
|
|
xa_lock_irq(&mapping->i_pages);
|
|
|
entry = get_unlocked_mapping_entry(mapping, index, &slot);
|
|
|
|
|
|
- if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) {
|
|
|
+ if (WARN_ON_ONCE(entry && !xa_is_value(entry))) {
|
|
|
entry = ERR_PTR(-EIO);
|
|
|
goto out_unlock;
|
|
|
}
|
|
|
|
|
|
if (entry) {
|
|
|
- if (size_flag & RADIX_DAX_PMD) {
|
|
|
+ if (size_flag & DAX_PMD) {
|
|
|
if (dax_is_pte_entry(entry)) {
|
|
|
put_unlocked_mapping_entry(mapping, index,
|
|
|
entry);
|
|
@@ -584,7 +582,7 @@ restart:
|
|
|
true);
|
|
|
}
|
|
|
|
|
|
- entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY);
|
|
|
+ entry = dax_radix_locked_entry(0, size_flag | DAX_EMPTY);
|
|
|
|
|
|
err = __radix_tree_insert(&mapping->i_pages, index,
|
|
|
dax_radix_order(entry), entry);
|
|
@@ -673,8 +671,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
|
|
|
if (index >= end)
|
|
|
break;
|
|
|
|
|
|
- if (WARN_ON_ONCE(
|
|
|
- !radix_tree_exceptional_entry(pvec_ent)))
|
|
|
+ if (WARN_ON_ONCE(!xa_is_value(pvec_ent)))
|
|
|
continue;
|
|
|
|
|
|
xa_lock_irq(&mapping->i_pages);
|
|
@@ -713,7 +710,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
|
|
|
|
|
|
xa_lock_irq(pages);
|
|
|
entry = get_unlocked_mapping_entry(mapping, index, NULL);
|
|
|
- if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)))
|
|
|
+ if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
|
|
|
goto out;
|
|
|
if (!trunc &&
|
|
|
(radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
|
|
@@ -729,8 +726,8 @@ out:
|
|
|
return ret;
|
|
|
}
|
|
|
/*
|
|
|
- * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
|
|
|
- * entry to get unlocked before deleting it.
|
|
|
+ * Delete DAX entry at @index from @mapping. Wait for it
|
|
|
+ * to be unlocked before deleting it.
|
|
|
*/
|
|
|
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
|
|
|
{
|
|
@@ -740,7 +737,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
|
|
|
* This gets called from truncate / punch_hole path. As such, the caller
|
|
|
* must hold locks protecting against concurrent modifications of the
|
|
|
* radix tree (usually fs-private i_mmap_sem for writing). Since the
|
|
|
- * caller has seen exceptional entry for this index, we better find it
|
|
|
+ * caller has seen a DAX entry for this index, we better find it
|
|
|
* at that index as well...
|
|
|
*/
|
|
|
WARN_ON_ONCE(!ret);
|
|
@@ -748,7 +745,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Invalidate exceptional DAX entry if it is clean.
|
|
|
+ * Invalidate DAX entry if it is clean.
|
|
|
*/
|
|
|
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
|
|
|
pgoff_t index)
|
|
@@ -802,7 +799,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
|
|
|
if (dirty)
|
|
|
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
|
|
|
|
|
|
- if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) {
|
|
|
+ if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
|
|
|
/* we are replacing a zero page with block mapping */
|
|
|
if (dax_is_pmd_entry(entry))
|
|
|
unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
|
|
@@ -940,13 +937,13 @@ static int dax_writeback_one(struct dax_device *dax_dev,
|
|
|
* A page got tagged dirty in DAX mapping? Something is seriously
|
|
|
* wrong.
|
|
|
*/
|
|
|
- if (WARN_ON(!radix_tree_exceptional_entry(entry)))
|
|
|
+ if (WARN_ON(!xa_is_value(entry)))
|
|
|
return -EIO;
|
|
|
|
|
|
xa_lock_irq(pages);
|
|
|
entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
|
|
|
/* Entry got punched out / reallocated? */
|
|
|
- if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2)))
|
|
|
+ if (!entry2 || WARN_ON_ONCE(!xa_is_value(entry2)))
|
|
|
goto put_unlocked;
|
|
|
/*
|
|
|
* Entry got reallocated elsewhere? No need to writeback. We have to
|
|
@@ -1123,8 +1120,9 @@ static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry,
|
|
|
pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
|
|
|
vm_fault_t ret;
|
|
|
|
|
|
- dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE,
|
|
|
- false);
|
|
|
+ dax_insert_mapping_entry(mapping, vmf, entry, pfn,
|
|
|
+ DAX_ZERO_PAGE, false);
|
|
|
+
|
|
|
ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
|
|
|
trace_dax_load_hole(inode, vmf, ret);
|
|
|
return ret;
|
|
@@ -1514,7 +1512,7 @@ static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
|
|
|
|
|
|
pfn = page_to_pfn_t(zero_page);
|
|
|
ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
|
|
|
- RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
|
|
|
+ DAX_PMD | DAX_ZERO_PAGE, false);
|
|
|
|
|
|
ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
|
|
|
if (!pmd_none(*(vmf->pmd))) {
|
|
@@ -1597,7 +1595,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
|
|
* is already in the tree, for instance), it will return -EEXIST and
|
|
|
* we just fall back to 4k entries.
|
|
|
*/
|
|
|
- entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
|
|
|
+ entry = grab_mapping_entry(mapping, pgoff, DAX_PMD);
|
|
|
if (IS_ERR(entry))
|
|
|
goto fallback;
|
|
|
|
|
@@ -1635,7 +1633,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
|
|
goto finish_iomap;
|
|
|
|
|
|
entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
|
|
|
- RADIX_DAX_PMD, write && !sync);
|
|
|
+ DAX_PMD, write && !sync);
|
|
|
|
|
|
/*
|
|
|
* If we are doing synchronous page fault and inode needs fsync,
|