|
@@ -38,6 +38,17 @@
|
|
|
#define CREATE_TRACE_POINTS
|
|
|
#include <trace/events/fs_dax.h>
|
|
|
|
|
|
+static inline unsigned int pe_order(enum page_entry_size pe_size)
|
|
|
+{
|
|
|
+ if (pe_size == PE_SIZE_PTE)
|
|
|
+ return PAGE_SHIFT - PAGE_SHIFT;
|
|
|
+ if (pe_size == PE_SIZE_PMD)
|
|
|
+ return PMD_SHIFT - PAGE_SHIFT;
|
|
|
+ if (pe_size == PE_SIZE_PUD)
|
|
|
+ return PUD_SHIFT - PAGE_SHIFT;
|
|
|
+ return ~0;
|
|
|
+}
|
|
|
+
|
|
|
/* We choose 4096 entries - same as per-zone page wait tables */
|
|
|
#define DAX_WAIT_TABLE_BITS 12
|
|
|
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
|
|
@@ -46,6 +57,9 @@
|
|
|
#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
|
|
|
#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT)
|
|
|
|
|
|
+/* The order of a PMD entry */
|
|
|
+#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
|
|
|
+
|
|
|
static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
|
|
|
|
|
|
static int __init init_dax_wait_table(void)
|
|
@@ -85,10 +99,15 @@ static void *dax_make_locked(unsigned long pfn, unsigned long flags)
|
|
|
DAX_LOCKED);
|
|
|
}
|
|
|
|
|
|
+static bool dax_is_locked(void *entry)
|
|
|
+{
|
|
|
+ return xa_to_value(entry) & DAX_LOCKED;
|
|
|
+}
|
|
|
+
|
|
|
static unsigned int dax_entry_order(void *entry)
|
|
|
{
|
|
|
if (xa_to_value(entry) & DAX_PMD)
|
|
|
- return PMD_SHIFT - PAGE_SHIFT;
|
|
|
+ return PMD_ORDER;
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -181,6 +200,81 @@ static void dax_wake_mapping_entry_waiter(struct xarray *xa,
|
|
|
__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
|
|
|
}
|
|
|
|
|
|
+static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
|
|
|
+{
|
|
|
+ return dax_wake_mapping_entry_waiter(xas->xa, xas->xa_index, entry,
|
|
|
+ wake_all);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Look up entry in page cache, wait for it to become unlocked if it
|
|
|
+ * is a DAX entry and return it. The caller must subsequently call
|
|
|
+ * put_unlocked_entry() if it did not lock the entry or dax_unlock_entry()
|
|
|
+ * if it did.
|
|
|
+ *
|
|
|
+ * Must be called with the i_pages lock held.
|
|
|
+ */
|
|
|
+static void *get_unlocked_entry(struct xa_state *xas)
|
|
|
+{
|
|
|
+ void *entry;
|
|
|
+ struct wait_exceptional_entry_queue ewait;
|
|
|
+ wait_queue_head_t *wq;
|
|
|
+
|
|
|
+ init_wait(&ewait.wait);
|
|
|
+ ewait.wait.func = wake_exceptional_entry_func;
|
|
|
+
|
|
|
+ for (;;) {
|
|
|
+ entry = xas_load(xas);
|
|
|
+ if (!entry || xa_is_internal(entry) ||
|
|
|
+ WARN_ON_ONCE(!xa_is_value(entry)) ||
|
|
|
+ !dax_is_locked(entry))
|
|
|
+ return entry;
|
|
|
+
|
|
|
+ wq = dax_entry_waitqueue(xas->xa, xas->xa_index, entry,
|
|
|
+ &ewait.key);
|
|
|
+ prepare_to_wait_exclusive(wq, &ewait.wait,
|
|
|
+ TASK_UNINTERRUPTIBLE);
|
|
|
+ xas_unlock_irq(xas);
|
|
|
+ xas_reset(xas);
|
|
|
+ schedule();
|
|
|
+ finish_wait(wq, &ewait.wait);
|
|
|
+ xas_lock_irq(xas);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void put_unlocked_entry(struct xa_state *xas, void *entry)
|
|
|
+{
|
|
|
+ /* If we were the only waiter woken, wake the next one */
|
|
|
+ if (entry)
|
|
|
+ dax_wake_entry(xas, entry, false);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * We used the xa_state to get the entry, but then we locked the entry and
|
|
|
+ * dropped the xa_lock, so we know the xa_state is stale and must be reset
|
|
|
+ * before use.
|
|
|
+ */
|
|
|
+static void dax_unlock_entry(struct xa_state *xas, void *entry)
|
|
|
+{
|
|
|
+ void *old;
|
|
|
+
|
|
|
+ xas_reset(xas);
|
|
|
+ xas_lock_irq(xas);
|
|
|
+ old = xas_store(xas, entry);
|
|
|
+ xas_unlock_irq(xas);
|
|
|
+ BUG_ON(!dax_is_locked(old));
|
|
|
+ dax_wake_entry(xas, entry, false);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Return: The entry stored at this location before it was locked.
|
|
|
+ */
|
|
|
+static void *dax_lock_entry(struct xa_state *xas, void *entry)
|
|
|
+{
|
|
|
+ unsigned long v = xa_to_value(entry);
|
|
|
+ return xas_store(xas, xa_mk_value(v | DAX_LOCKED));
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Check whether the given slot is locked. Must be called with the i_pages
|
|
|
* lock held.
|
|
@@ -1728,50 +1822,46 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
|
|
|
/*
|
|
|
* dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables
|
|
|
* @vmf: The description of the fault
|
|
|
- * @pe_size: Size of entry to be inserted
|
|
|
* @pfn: PFN to insert
|
|
|
+ * @order: Order of entry to insert.
|
|
|
*
|
|
|
* This function inserts a writeable PTE or PMD entry into the page tables
|
|
|
* for an mmaped DAX file. It also marks the page cache entry as dirty.
|
|
|
*/
|
|
|
-static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf,
|
|
|
- enum page_entry_size pe_size,
|
|
|
- pfn_t pfn)
|
|
|
+static vm_fault_t
|
|
|
+dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
|
|
|
{
|
|
|
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
|
|
|
- void *entry, **slot;
|
|
|
- pgoff_t index = vmf->pgoff;
|
|
|
+ XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
|
|
|
+ void *entry;
|
|
|
vm_fault_t ret;
|
|
|
|
|
|
- xa_lock_irq(&mapping->i_pages);
|
|
|
- entry = get_unlocked_mapping_entry(mapping, index, &slot);
|
|
|
+ xas_lock_irq(&xas);
|
|
|
+ entry = get_unlocked_entry(&xas);
|
|
|
/* Did we race with someone splitting entry or so? */
|
|
|
if (!entry ||
|
|
|
- (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
|
|
|
- (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
|
|
|
- put_unlocked_mapping_entry(mapping, index, entry);
|
|
|
- xa_unlock_irq(&mapping->i_pages);
|
|
|
+ (order == 0 && !dax_is_pte_entry(entry)) ||
|
|
|
+ (order == PMD_ORDER && (xa_is_internal(entry) ||
|
|
|
+ !dax_is_pmd_entry(entry)))) {
|
|
|
+ put_unlocked_entry(&xas, entry);
|
|
|
+ xas_unlock_irq(&xas);
|
|
|
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
|
|
|
VM_FAULT_NOPAGE);
|
|
|
return VM_FAULT_NOPAGE;
|
|
|
}
|
|
|
- radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY);
|
|
|
- entry = lock_slot(mapping, slot);
|
|
|
- xa_unlock_irq(&mapping->i_pages);
|
|
|
- switch (pe_size) {
|
|
|
- case PE_SIZE_PTE:
|
|
|
+ xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
|
|
|
+ dax_lock_entry(&xas, entry);
|
|
|
+ xas_unlock_irq(&xas);
|
|
|
+ if (order == 0)
|
|
|
ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
|
|
|
- break;
|
|
|
#ifdef CONFIG_FS_DAX_PMD
|
|
|
- case PE_SIZE_PMD:
|
|
|
+ else if (order == PMD_ORDER)
|
|
|
ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
|
|
|
pfn, true);
|
|
|
- break;
|
|
|
#endif
|
|
|
- default:
|
|
|
+ else
|
|
|
ret = VM_FAULT_FALLBACK;
|
|
|
- }
|
|
|
- put_locked_mapping_entry(mapping, index);
|
|
|
+ dax_unlock_entry(&xas, entry);
|
|
|
trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
|
|
|
return ret;
|
|
|
}
|
|
@@ -1791,17 +1881,12 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
|
|
|
{
|
|
|
int err;
|
|
|
loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
|
|
|
- size_t len = 0;
|
|
|
+ unsigned int order = pe_order(pe_size);
|
|
|
+ size_t len = PAGE_SIZE << order;
|
|
|
|
|
|
- if (pe_size == PE_SIZE_PTE)
|
|
|
- len = PAGE_SIZE;
|
|
|
- else if (pe_size == PE_SIZE_PMD)
|
|
|
- len = PMD_SIZE;
|
|
|
- else
|
|
|
- WARN_ON_ONCE(1);
|
|
|
err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
|
|
|
if (err)
|
|
|
return VM_FAULT_SIGBUS;
|
|
|
- return dax_insert_pfn_mkwrite(vmf, pe_size, pfn);
|
|
|
+ return dax_insert_pfn_mkwrite(vmf, pfn, order);
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
|