|
@@ -351,6 +351,19 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static struct page *dax_busy_page(void *entry)
|
|
|
|
+{
|
|
|
|
+ unsigned long pfn;
|
|
|
|
+
|
|
|
|
+ for_each_mapped_pfn(entry, pfn) {
|
|
|
|
+ struct page *page = pfn_to_page(pfn);
|
|
|
|
+
|
|
|
|
+ if (page_ref_count(page) > 1)
|
|
|
|
+ return page;
|
|
|
|
+ }
|
|
|
|
+ return NULL;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Find radix tree entry at given index. If it points to an exceptional entry,
|
|
* Find radix tree entry at given index. If it points to an exceptional entry,
|
|
* return it with the radix tree entry locked. If the radix tree doesn't
|
|
* return it with the radix tree entry locked. If the radix tree doesn't
|
|
@@ -492,6 +505,90 @@ restart:
|
|
return entry;
|
|
return entry;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/**
|
|
|
|
+ * dax_layout_busy_page - find first pinned page in @mapping
|
|
|
|
+ * @mapping: address space to scan for a page with ref count > 1
|
|
|
|
+ *
|
|
|
|
+ * DAX requires ZONE_DEVICE mapped pages. These pages are never
|
|
|
|
+ * 'onlined' to the page allocator so they are considered idle when
|
|
|
|
+ * page->count == 1. A filesystem uses this interface to determine if
|
|
|
|
+ * any page in the mapping is busy, i.e. for DMA, or other
|
|
|
|
+ * get_user_pages() usages.
|
|
|
|
+ *
|
|
|
|
+ * It is expected that the filesystem is holding locks to block the
|
|
|
|
+ * establishment of new mappings in this address_space. I.e. it expects
|
|
|
|
+ * to be able to run unmap_mapping_range() and subsequently not race
|
|
|
|
+ * mapping_mapped() becoming true.
|
|
|
|
+ */
|
|
|
|
+struct page *dax_layout_busy_page(struct address_space *mapping)
|
|
|
|
+{
|
|
|
|
+ pgoff_t indices[PAGEVEC_SIZE];
|
|
|
|
+ struct page *page = NULL;
|
|
|
|
+ struct pagevec pvec;
|
|
|
|
+ pgoff_t index, end;
|
|
|
|
+ unsigned i;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * In the 'limited' case get_user_pages() for dax is disabled.
|
|
|
|
+ */
|
|
|
|
+ if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
|
|
|
|
+ return NULL;
|
|
|
|
+
|
|
|
|
+ if (!dax_mapping(mapping) || !mapping_mapped(mapping))
|
|
|
|
+ return NULL;
|
|
|
|
+
|
|
|
|
+ pagevec_init(&pvec);
|
|
|
|
+ index = 0;
|
|
|
|
+ end = -1;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If we race get_user_pages_fast() here either we'll see the
|
|
|
|
+ * elevated page count in the pagevec_lookup and wait, or
|
|
|
|
+ * get_user_pages_fast() will see that the page it took a reference
|
|
|
|
+ * against is no longer mapped in the page tables and bail to the
|
|
|
|
+ * get_user_pages() slow path. The slow path is protected by
|
|
|
|
+ * pte_lock() and pmd_lock(). New references are not taken without
|
|
|
|
+ * holding those locks, and unmap_mapping_range() will not zero the
|
|
|
|
+ * pte or pmd without holding the respective lock, so we are
|
|
|
|
+ * guaranteed to either see new references or prevent new
|
|
|
|
+ * references from being established.
|
|
|
|
+ */
|
|
|
|
+ unmap_mapping_range(mapping, 0, 0, 1);
|
|
|
|
+
|
|
|
|
+ while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
|
|
|
|
+ min(end - index, (pgoff_t)PAGEVEC_SIZE),
|
|
|
|
+ indices)) {
|
|
|
|
+ for (i = 0; i < pagevec_count(&pvec); i++) {
|
|
|
|
+ struct page *pvec_ent = pvec.pages[i];
|
|
|
|
+ void *entry;
|
|
|
|
+
|
|
|
|
+ index = indices[i];
|
|
|
|
+ if (index >= end)
|
|
|
|
+ break;
|
|
|
|
+
|
|
|
|
+ if (!radix_tree_exceptional_entry(pvec_ent))
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ xa_lock_irq(&mapping->i_pages);
|
|
|
|
+ entry = get_unlocked_mapping_entry(mapping, index, NULL);
|
|
|
|
+ if (entry)
|
|
|
|
+ page = dax_busy_page(entry);
|
|
|
|
+ put_unlocked_mapping_entry(mapping, index, entry);
|
|
|
|
+ xa_unlock_irq(&mapping->i_pages);
|
|
|
|
+ if (page)
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ pagevec_remove_exceptionals(&pvec);
|
|
|
|
+ pagevec_release(&pvec);
|
|
|
|
+ index++;
|
|
|
|
+
|
|
|
|
+ if (page)
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ return page;
|
|
|
|
+}
|
|
|
|
+EXPORT_SYMBOL_GPL(dax_layout_busy_page);
|
|
|
|
+
|
|
static int __dax_invalidate_mapping_entry(struct address_space *mapping,
|
|
static int __dax_invalidate_mapping_entry(struct address_space *mapping,
|
|
pgoff_t index, bool trunc)
|
|
pgoff_t index, bool trunc)
|
|
{
|
|
{
|
|
@@ -912,7 +1009,6 @@ static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry,
|
|
unsigned long vaddr = vmf->address;
|
|
unsigned long vaddr = vmf->address;
|
|
vm_fault_t ret = VM_FAULT_NOPAGE;
|
|
vm_fault_t ret = VM_FAULT_NOPAGE;
|
|
struct page *zero_page;
|
|
struct page *zero_page;
|
|
- void *entry2;
|
|
|
|
pfn_t pfn;
|
|
pfn_t pfn;
|
|
|
|
|
|
zero_page = ZERO_PAGE(0);
|
|
zero_page = ZERO_PAGE(0);
|
|
@@ -922,13 +1018,8 @@ static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry,
|
|
}
|
|
}
|
|
|
|
|
|
pfn = page_to_pfn_t(zero_page);
|
|
pfn = page_to_pfn_t(zero_page);
|
|
- entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
|
|
|
|
- RADIX_DAX_ZERO_PAGE, false);
|
|
|
|
- if (IS_ERR(entry2)) {
|
|
|
|
- ret = VM_FAULT_SIGBUS;
|
|
|
|
- goto out;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
|
|
+ dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE,
|
|
|
|
+ false);
|
|
ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
|
|
ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
|
|
out:
|
|
out:
|
|
trace_dax_load_hole(inode, vmf, ret);
|
|
trace_dax_load_hole(inode, vmf, ret);
|
|
@@ -991,6 +1082,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
|
|
struct iov_iter *iter = data;
|
|
struct iov_iter *iter = data;
|
|
loff_t end = pos + length, done = 0;
|
|
loff_t end = pos + length, done = 0;
|
|
ssize_t ret = 0;
|
|
ssize_t ret = 0;
|
|
|
|
+ size_t xfer;
|
|
int id;
|
|
int id;
|
|
|
|
|
|
if (iov_iter_rw(iter) == READ) {
|
|
if (iov_iter_rw(iter) == READ) {
|
|
@@ -1054,18 +1146,20 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
|
|
* vfs_write(), depending on which operation we are doing.
|
|
* vfs_write(), depending on which operation we are doing.
|
|
*/
|
|
*/
|
|
if (iov_iter_rw(iter) == WRITE)
|
|
if (iov_iter_rw(iter) == WRITE)
|
|
- map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr,
|
|
|
|
|
|
+ xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
|
|
map_len, iter);
|
|
map_len, iter);
|
|
else
|
|
else
|
|
- map_len = copy_to_iter(kaddr, map_len, iter);
|
|
|
|
- if (map_len <= 0) {
|
|
|
|
- ret = map_len ? map_len : -EFAULT;
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
|
|
+ xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
|
|
|
|
+ map_len, iter);
|
|
|
|
|
|
- pos += map_len;
|
|
|
|
- length -= map_len;
|
|
|
|
- done += map_len;
|
|
|
|
|
|
+ pos += xfer;
|
|
|
|
+ length -= xfer;
|
|
|
|
+ done += xfer;
|
|
|
|
+
|
|
|
|
+ if (xfer == 0)
|
|
|
|
+ ret = -EFAULT;
|
|
|
|
+ if (xfer < map_len)
|
|
|
|
+ break;
|
|
}
|
|
}
|
|
dax_read_unlock(id);
|
|
dax_read_unlock(id);
|
|
|
|
|
|
@@ -1240,10 +1334,6 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
|
|
|
|
|
entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
|
|
entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
|
|
0, write && !sync);
|
|
0, write && !sync);
|
|
- if (IS_ERR(entry)) {
|
|
|
|
- error = PTR_ERR(entry);
|
|
|
|
- goto error_finish_iomap;
|
|
|
|
- }
|
|
|
|
|
|
|
|
/*
|
|
/*
|
|
* If we are doing synchronous page fault and inode needs fsync,
|
|
* If we are doing synchronous page fault and inode needs fsync,
|
|
@@ -1324,8 +1414,6 @@ static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
|
|
pfn = page_to_pfn_t(zero_page);
|
|
pfn = page_to_pfn_t(zero_page);
|
|
ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
|
|
ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
|
|
RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
|
|
RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
|
|
- if (IS_ERR(ret))
|
|
|
|
- goto fallback;
|
|
|
|
|
|
|
|
ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
|
|
ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
|
|
if (!pmd_none(*(vmf->pmd))) {
|
|
if (!pmd_none(*(vmf->pmd))) {
|
|
@@ -1447,8 +1535,6 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
|
|
|
|
|
entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
|
|
entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
|
|
RADIX_DAX_PMD, write && !sync);
|
|
RADIX_DAX_PMD, write && !sync);
|
|
- if (IS_ERR(entry))
|
|
|
|
- goto finish_iomap;
|
|
|
|
|
|
|
|
/*
|
|
/*
|
|
* If we are doing synchronous page fault and inode needs fsync,
|
|
* If we are doing synchronous page fault and inode needs fsync,
|