před 8 roky · a232591ba2
--- a/.mailmap
+++ b/.mailmap
@@ -99,6 +99,8 @@ Linas Vepstas <linas@austin.ibm.com>
 
				 Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
			
 
				 Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
			
 
				 Mark Brown <broonie@sirena.org.uk>
			
 
				+Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
			
 
				+Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
			
 
				 Matthieu CASTET <castet.matthieu@free.fr>
			
 
				 Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br>
			
 
				 Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com>
			
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -523,7 +523,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
 
				 
			
 
				 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
			
 
				 	if (size == PAGE_SIZE) {
			
 
				-		copy_page(mem, cmem);
			
 
				+		memcpy(mem, cmem, PAGE_SIZE);
			
 
				 	} else {
			
 
				 		struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
			
 
				 
			
@@ -717,7 +717,7 @@ compress_again:
 
				 
			
 
				 	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
			
 
				 		src = kmap_atomic(page);
			
 
				-		copy_page(cmem, src);
			
 
				+		memcpy(cmem, src, PAGE_SIZE);
			
 
				 		kunmap_atomic(src);
			
 
				 	} else {
			
 
				 		memcpy(cmem, src, clen);
			
@@ -928,7 +928,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
 
				 	}
			
 
				 
			
 
				 	index = sector >> SECTORS_PER_PAGE_SHIFT;
			
 
				-	offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
			
 
				+	offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
			
 
				 
			
 
				 	bv.bv_page = page;
			
 
				 	bv.bv_len = PAGE_SIZE;
			
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -136,17 +136,26 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 
				 	vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
			
 
				 	vma->vm_ops = &hugetlb_vm_ops;
			
 
				 
			
 
				+	/*
			
 
				+	 * Offset passed to mmap (before page shift) could have been
			
 
				+	 * negative when represented as a (l)off_t.
			
 
				+	 */
			
 
				+	if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				 	if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
			
 
				+	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
			
 
				+	/* check for overflow */
			
 
				+	if (len < vma_len)
			
 
				+		return -EINVAL;
			
 
				 
			
 
				 	inode_lock(inode);
			
 
				 	file_accessed(file);
			
 
				 
			
 
				 	ret = -ENOMEM;
			
 
				-	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
			
 
				-
			
 
				 	if (hugetlb_reserve_pages(inode,
			
 
				 				vma->vm_pgoff >> huge_page_order(h),
			
 
				 				len >> huge_page_shift(h), vma,
			
@@ -155,7 +164,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 
				 
			
 
				 	ret = 0;
			
 
				 	if (vma->vm_flags & VM_WRITE && inode->i_size < len)
			
 
				-		inode->i_size = len;
			
 
				+		i_size_write(inode, len);
			
 
				 out:
			
 
				 	inode_unlock(inode);
			
 
				 
			
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -900,7 +900,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
 
				 static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
			
 
				 		unsigned long addr, pmd_t *pmdp)
			
 
				 {
			
 
				-	pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
			
 
				+	pmd_t pmd = *pmdp;
			
 
				+
			
 
				+	/* See comment in change_huge_pmd() */
			
 
				+	pmdp_invalidate(vma, addr, pmdp);
			
 
				+	if (pmd_dirty(*pmdp))
			
 
				+		pmd = pmd_mkdirty(pmd);
			
 
				+	if (pmd_young(*pmdp))
			
 
				+		pmd = pmd_mkyoung(pmd);
			
 
				 
			
 
				 	pmd = pmd_wrprotect(pmd);
			
 
				 	pmd = pmd_clear_soft_dirty(pmd);
			
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -394,18 +394,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 
				 	___pud;								\
			
 
				 })
			
 
				 
			
 
				-#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd)		\
			
 
				-({									\
			
 
				-	unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;		\
			
 
				-	pmd_t ___pmd;							\
			
 
				-									\
			
 
				-	___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd);		\
			
 
				-	mmu_notifier_invalidate_range(__mm, ___haddr,			\
			
 
				-				      ___haddr + HPAGE_PMD_SIZE);	\
			
 
				-									\
			
 
				-	___pmd;								\
			
 
				-})
			
 
				-
			
 
				 /*
			
 
				  * set_pte_at_notify() sets the pte _after_ running the notifier.
			
 
				  * This is safe to start by updating the secondary MMUs, because the primary MMU
			
@@ -489,7 +477,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 
				 #define	ptep_clear_flush_notify ptep_clear_flush
			
 
				 #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
			
 
				 #define pudp_huge_clear_flush_notify pudp_huge_clear_flush
			
 
				-#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
			
 
				 #define set_pte_at_notify set_pte_at
			
 
				 
			
 
				 #endif /* CONFIG_MMU_NOTIFIER */
			
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1568,8 +1568,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 
				 		deactivate_page(page);
			
 
				 
			
 
				 	if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) {
			
 
				-		orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
			
 
				-			tlb->fullmm);
			
 
				+		pmdp_invalidate(vma, addr, pmd);
			
 
				 		orig_pmd = pmd_mkold(orig_pmd);
			
 
				 		orig_pmd = pmd_mkclean(orig_pmd);
			
 
				 
			
@@ -1724,37 +1723,69 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 
				 {
			
 
				 	struct mm_struct *mm = vma->vm_mm;
			
 
				 	spinlock_t *ptl;
			
 
				-	int ret = 0;
			
 
				+	pmd_t entry;
			
 
				+	bool preserve_write;
			
 
				+	int ret;
			
 
				 
			
 
				 	ptl = __pmd_trans_huge_lock(pmd, vma);
			
 
				-	if (ptl) {
			
 
				-		pmd_t entry;
			
 
				-		bool preserve_write = prot_numa && pmd_write(*pmd);
			
 
				-		ret = 1;
			
 
				+	if (!ptl)
			
 
				+		return 0;
			
 
				 
			
 
				-		/*
			
 
				-		 * Avoid trapping faults against the zero page. The read-only
			
 
				-		 * data is likely to be read-cached on the local CPU and
			
 
				-		 * local/remote hits to the zero page are not interesting.
			
 
				-		 */
			
 
				-		if (prot_numa && is_huge_zero_pmd(*pmd)) {
			
 
				-			spin_unlock(ptl);
			
 
				-			return ret;
			
 
				-		}
			
 
				+	preserve_write = prot_numa && pmd_write(*pmd);
			
 
				+	ret = 1;
			
 
				 
			
 
				-		if (!prot_numa || !pmd_protnone(*pmd)) {
			
 
				-			entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
			
 
				-			entry = pmd_modify(entry, newprot);
			
 
				-			if (preserve_write)
			
 
				-				entry = pmd_mk_savedwrite(entry);
			
 
				-			ret = HPAGE_PMD_NR;
			
 
				-			set_pmd_at(mm, addr, pmd, entry);
			
 
				-			BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
			
 
				-					pmd_write(entry));
			
 
				-		}
			
 
				-		spin_unlock(ptl);
			
 
				-	}
			
 
				+	/*
			
 
				+	 * Avoid trapping faults against the zero page. The read-only
			
 
				+	 * data is likely to be read-cached on the local CPU and
			
 
				+	 * local/remote hits to the zero page are not interesting.
			
 
				+	 */
			
 
				+	if (prot_numa && is_huge_zero_pmd(*pmd))
			
 
				+		goto unlock;
			
 
				+
			
 
				+	if (prot_numa && pmd_protnone(*pmd))
			
 
				+		goto unlock;
			
 
				+
			
 
				+	/*
			
 
				+	 * In case prot_numa, we are under down_read(mmap_sem). It's critical
			
 
				+	 * to not clear pmd intermittently to avoid race with MADV_DONTNEED
			
 
				+	 * which is also under down_read(mmap_sem):
			
 
				+	 *
			
 
				+	 *	CPU0:				CPU1:
			
 
				+	 *				change_huge_pmd(prot_numa=1)
			
 
				+	 *				 pmdp_huge_get_and_clear_notify()
			
 
				+	 * madvise_dontneed()
			
 
				+	 *  zap_pmd_range()
			
 
				+	 *   pmd_trans_huge(*pmd) == 0 (without ptl)
			
 
				+	 *   // skip the pmd
			
 
				+	 *				 set_pmd_at();
			
 
				+	 *				 // pmd is re-established
			
 
				+	 *
			
 
				+	 * The race makes MADV_DONTNEED miss the huge pmd and don't clear it
			
 
				+	 * which may break userspace.
			
 
				+	 *
			
 
				+	 * pmdp_invalidate() is required to make sure we don't miss
			
 
				+	 * dirty/young flags set by hardware.
			
 
				+	 */
			
 
				+	entry = *pmd;
			
 
				+	pmdp_invalidate(vma, addr, pmd);
			
 
				+
			
 
				+	/*
			
 
				+	 * Recover dirty/young flags.  It relies on pmdp_invalidate to not
			
 
				+	 * corrupt them.
			
 
				+	 */
			
 
				+	if (pmd_dirty(*pmd))
			
 
				+		entry = pmd_mkdirty(entry);
			
 
				+	if (pmd_young(*pmd))
			
 
				+		entry = pmd_mkyoung(entry);
			
 
				 
			
 
				+	entry = pmd_modify(entry, newprot);
			
 
				+	if (preserve_write)
			
 
				+		entry = pmd_mk_savedwrite(entry);
			
 
				+	ret = HPAGE_PMD_NR;
			
 
				+	set_pmd_at(mm, addr, pmd, entry);
			
 
				+	BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
			
 
				+unlock:
			
 
				+	spin_unlock(ptl);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -185,6 +185,12 @@ static inline void z3fold_page_lock(struct z3fold_header *zhdr)
 
				 	spin_lock(&zhdr->page_lock);
			
 
				 }
			
 
				 
			
 
				+/* Try to lock a z3fold page */
			
 
				+static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
			
 
				+{
			
 
				+	return spin_trylock(&zhdr->page_lock);
			
 
				+}
			
 
				+
			
 
				 /* Unlock a z3fold page */
			
 
				 static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
			
 
				 {
			
@@ -385,7 +391,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 
				 			spin_lock(&pool->lock);
			
 
				 			zhdr = list_first_entry_or_null(&pool->unbuddied[i],
			
 
				 						struct z3fold_header, buddy);
			
 
				-			if (!zhdr) {
			
 
				+			if (!zhdr || !z3fold_page_trylock(zhdr)) {
			
 
				 				spin_unlock(&pool->lock);
			
 
				 				continue;
			
 
				 			}
			
@@ -394,7 +400,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 
				 			spin_unlock(&pool->lock);
			
 
				 
			
 
				 			page = virt_to_page(zhdr);
			
 
				-			z3fold_page_lock(zhdr);
			
 
				 			if (zhdr->first_chunks == 0) {
			
 
				 				if (zhdr->middle_chunks != 0 &&
			
 
				 				    chunks >= zhdr->start_middle)
			
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -276,7 +276,7 @@ struct zs_pool {
 
				 struct zspage {
			
 
				 	struct {
			
 
				 		unsigned int fullness:FULLNESS_BITS;
			
 
				-		unsigned int class:CLASS_BITS;
			
 
				+		unsigned int class:CLASS_BITS + 1;
			
 
				 		unsigned int isolated:ISOLATED_BITS;
			
 
				 		unsigned int magic:MAGIC_VAL_BITS;
			
 
				 	};