|
@@ -135,6 +135,10 @@ static struct khugepaged_scan khugepaged_scan = {
|
|
|
.mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
|
|
|
};
|
|
|
|
|
|
+static DEFINE_SPINLOCK(split_queue_lock);
|
|
|
+static LIST_HEAD(split_queue);
|
|
|
+static unsigned long split_queue_len;
|
|
|
+static struct shrinker deferred_split_shrinker;
|
|
|
|
|
|
static void set_recommended_min_free_kbytes(void)
|
|
|
{
|
|
@@ -667,6 +671,9 @@ static int __init hugepage_init(void)
|
|
|
err = register_shrinker(&huge_zero_page_shrinker);
|
|
|
if (err)
|
|
|
goto err_hzp_shrinker;
|
|
|
+ err = register_shrinker(&deferred_split_shrinker);
|
|
|
+ if (err)
|
|
|
+ goto err_split_shrinker;
|
|
|
|
|
|
/*
|
|
|
* By default disable transparent hugepages on smaller systems,
|
|
@@ -684,6 +691,8 @@ static int __init hugepage_init(void)
|
|
|
|
|
|
return 0;
|
|
|
err_khugepaged:
|
|
|
+ unregister_shrinker(&deferred_split_shrinker);
|
|
|
+err_split_shrinker:
|
|
|
unregister_shrinker(&huge_zero_page_shrinker);
|
|
|
err_hzp_shrinker:
|
|
|
khugepaged_slab_exit();
|
|
@@ -740,6 +749,27 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
|
|
|
return entry;
|
|
|
}
|
|
|
|
|
|
+static inline struct list_head *page_deferred_list(struct page *page)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * ->lru in the tail pages is occupied by compound_head.
|
|
|
+ * Let's use ->mapping + ->index in the second tail page as list_head.
|
|
|
+ */
|
|
|
+ return (struct list_head *)&page[2].mapping;
|
|
|
+}
|
|
|
+
|
|
|
+void prep_transhuge_page(struct page *page)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * we use page->mapping and page->indexlru in second tail page
|
|
|
+ * as list_head: assuming THP order >= 2
|
|
|
+ */
|
|
|
+ BUILD_BUG_ON(HPAGE_PMD_ORDER < 2);
|
|
|
+
|
|
|
+ INIT_LIST_HEAD(page_deferred_list(page));
|
|
|
+ set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
|
|
|
+}
|
|
|
+
|
|
|
static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
|
|
|
struct vm_area_struct *vma,
|
|
|
unsigned long address, pmd_t *pmd,
|
|
@@ -896,6 +926,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
count_vm_event(THP_FAULT_FALLBACK);
|
|
|
return VM_FAULT_FALLBACK;
|
|
|
}
|
|
|
+ prep_transhuge_page(page);
|
|
|
return __do_huge_pmd_anonymous_page(mm, vma, address, pmd, page, gfp,
|
|
|
flags);
|
|
|
}
|
|
@@ -1192,7 +1223,9 @@ alloc:
|
|
|
} else
|
|
|
new_page = NULL;
|
|
|
|
|
|
- if (unlikely(!new_page)) {
|
|
|
+ if (likely(new_page)) {
|
|
|
+ prep_transhuge_page(new_page);
|
|
|
+ } else {
|
|
|
if (!page) {
|
|
|
split_huge_pmd(vma, pmd, address);
|
|
|
ret |= VM_FAULT_FALLBACK;
|
|
@@ -2109,6 +2142,7 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
+ prep_transhuge_page(*hpage);
|
|
|
count_vm_event(THP_COLLAPSE_ALLOC);
|
|
|
return *hpage;
|
|
|
}
|
|
@@ -2120,8 +2154,12 @@ static int khugepaged_find_target_node(void)
|
|
|
|
|
|
static inline struct page *alloc_hugepage(int defrag)
|
|
|
{
|
|
|
- return alloc_pages(alloc_hugepage_gfpmask(defrag, 0),
|
|
|
- HPAGE_PMD_ORDER);
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ page = alloc_pages(alloc_hugepage_gfpmask(defrag, 0), HPAGE_PMD_ORDER);
|
|
|
+ if (page)
|
|
|
+ prep_transhuge_page(page);
|
|
|
+ return page;
|
|
|
}
|
|
|
|
|
|
static struct page *khugepaged_alloc_hugepage(bool *wait)
|
|
@@ -3098,7 +3136,7 @@ static int __split_huge_page_tail(struct page *head, int tail,
|
|
|
set_page_idle(page_tail);
|
|
|
|
|
|
/* ->mapping in first tail page is compound_mapcount */
|
|
|
- VM_BUG_ON_PAGE(tail != 1 && page_tail->mapping != TAIL_MAPPING,
|
|
|
+ VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
|
|
|
page_tail);
|
|
|
page_tail->mapping = head->mapping;
|
|
|
|
|
@@ -3207,12 +3245,20 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|
|
freeze_page(anon_vma, head);
|
|
|
VM_BUG_ON_PAGE(compound_mapcount(head), head);
|
|
|
|
|
|
+ /* Prevent deferred_split_scan() touching ->_count */
|
|
|
+ spin_lock(&split_queue_lock);
|
|
|
count = page_count(head);
|
|
|
mapcount = total_mapcount(head);
|
|
|
if (mapcount == count - 1) {
|
|
|
+ if (!list_empty(page_deferred_list(head))) {
|
|
|
+ split_queue_len--;
|
|
|
+ list_del(page_deferred_list(head));
|
|
|
+ }
|
|
|
+ spin_unlock(&split_queue_lock);
|
|
|
__split_huge_page(page, list);
|
|
|
ret = 0;
|
|
|
} else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount > count - 1) {
|
|
|
+ spin_unlock(&split_queue_lock);
|
|
|
pr_alert("total_mapcount: %u, page_count(): %u\n",
|
|
|
mapcount, count);
|
|
|
if (PageTail(page))
|
|
@@ -3220,6 +3266,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|
|
dump_page(page, "total_mapcount(head) > page_count(head) - 1");
|
|
|
BUG();
|
|
|
} else {
|
|
|
+ spin_unlock(&split_queue_lock);
|
|
|
unfreeze_page(anon_vma, head);
|
|
|
ret = -EBUSY;
|
|
|
}
|
|
@@ -3231,3 +3278,87 @@ out:
|
|
|
count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
|
|
|
return ret;
|
|
|
}
|
|
|
+
|
|
|
+void free_transhuge_page(struct page *page)
|
|
|
+{
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&split_queue_lock, flags);
|
|
|
+ if (!list_empty(page_deferred_list(page))) {
|
|
|
+ split_queue_len--;
|
|
|
+ list_del(page_deferred_list(page));
|
|
|
+ }
|
|
|
+ spin_unlock_irqrestore(&split_queue_lock, flags);
|
|
|
+ free_compound_page(page);
|
|
|
+}
|
|
|
+
|
|
|
+void deferred_split_huge_page(struct page *page)
|
|
|
+{
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
|
|
|
+
|
|
|
+ spin_lock_irqsave(&split_queue_lock, flags);
|
|
|
+ if (list_empty(page_deferred_list(page))) {
|
|
|
+ list_add_tail(page_deferred_list(page), &split_queue);
|
|
|
+ split_queue_len++;
|
|
|
+ }
|
|
|
+ spin_unlock_irqrestore(&split_queue_lock, flags);
|
|
|
+}
|
|
|
+
|
|
|
+static unsigned long deferred_split_count(struct shrinker *shrink,
|
|
|
+ struct shrink_control *sc)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * Split a page from split_queue will free up at least one page,
|
|
|
+ * at most HPAGE_PMD_NR - 1. We don't track exact number.
|
|
|
+ * Let's use HPAGE_PMD_NR / 2 as ballpark.
|
|
|
+ */
|
|
|
+ return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2;
|
|
|
+}
|
|
|
+
|
|
|
+static unsigned long deferred_split_scan(struct shrinker *shrink,
|
|
|
+ struct shrink_control *sc)
|
|
|
+{
|
|
|
+ unsigned long flags;
|
|
|
+ LIST_HEAD(list), *pos, *next;
|
|
|
+ struct page *page;
|
|
|
+ int split = 0;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&split_queue_lock, flags);
|
|
|
+ list_splice_init(&split_queue, &list);
|
|
|
+
|
|
|
+ /* Take pin on all head pages to avoid freeing them under us */
|
|
|
+ list_for_each_safe(pos, next, &list) {
|
|
|
+ page = list_entry((void *)pos, struct page, mapping);
|
|
|
+ page = compound_head(page);
|
|
|
+ /* race with put_compound_page() */
|
|
|
+ if (!get_page_unless_zero(page)) {
|
|
|
+ list_del_init(page_deferred_list(page));
|
|
|
+ split_queue_len--;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ spin_unlock_irqrestore(&split_queue_lock, flags);
|
|
|
+
|
|
|
+ list_for_each_safe(pos, next, &list) {
|
|
|
+ page = list_entry((void *)pos, struct page, mapping);
|
|
|
+ lock_page(page);
|
|
|
+ /* split_huge_page() removes page from list on success */
|
|
|
+ if (!split_huge_page(page))
|
|
|
+ split++;
|
|
|
+ unlock_page(page);
|
|
|
+ put_page(page);
|
|
|
+ }
|
|
|
+
|
|
|
+ spin_lock_irqsave(&split_queue_lock, flags);
|
|
|
+ list_splice_tail(&list, &split_queue);
|
|
|
+ spin_unlock_irqrestore(&split_queue_lock, flags);
|
|
|
+
|
|
|
+ return split * HPAGE_PMD_NR / 2;
|
|
|
+}
|
|
|
+
|
|
|
+static struct shrinker deferred_split_shrinker = {
|
|
|
+ .count_objects = deferred_split_count,
|
|
|
+ .scan_objects = deferred_split_scan,
|
|
|
+ .seeks = DEFAULT_SEEKS,
|
|
|
+};
|