10 жил өмнө · 8664b90bae
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -168,7 +168,8 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 
				 		 * Other callers might not initialize the si_lsb field,
			
 
				 		 * so check explicitely for the right codes here.
			
 
				 		 */
			
 
				-		if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
			
 
				+		if (from->si_signo == SIGBUS &&
			
 
				+		    (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO))
			
 
				 			err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
			
 
				 #endif
			
 
				 		break;
			
@@ -201,8 +202,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 
				 
			
 
				 int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
			
 
				 {
			
 
				-	memset(to, 0, sizeof *to);
			
 
				-
			
 
				 	if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) ||
			
 
				 	    copy_from_user(to->_sifields._pad,
			
 
				 			   from->_sifields._pad, SI_PAD_SIZE))
			
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -409,8 +409,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 
				 
			
 
				 int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
			
 
				 {
			
 
				-	memset(to, 0, sizeof *to);
			
 
				-
			
 
				 	if (copy_from_user(to, from, 3*sizeof(int)) ||
			
 
				 	    copy_from_user(to->_sifields._pad,
			
 
				 			   from->_sifields._pad, SI_PAD_SIZE32))
			
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -966,8 +966,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s)
 
				 
			
 
				 int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
			
 
				 {
			
 
				-	memset(to, 0, sizeof *to);
			
 
				-
			
 
				 	if (copy_from_user(to, from, 3*sizeof(int)) ||
			
 
				 	    copy_from_user(to->_sifields._pad,
			
 
				 			   from->_sifields._pad, SI_PAD_SIZE32))
			
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -113,8 +113,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
 
				 	if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo)))
			
 
				 		return -EFAULT;
			
 
				 
			
 
				-	memset(to, 0, sizeof(*to));
			
 
				-
			
 
				 	err = __get_user(to->si_signo, &from->si_signo);
			
 
				 	err |= __get_user(to->si_errno, &from->si_errno);
			
 
				 	err |= __get_user(to->si_code, &from->si_code);
			
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3442,22 +3442,15 @@ void __init vfs_caches_init_early(void)
 
				 	inode_init_early();
			
 
				 }
			
 
				 
			
 
				-void __init vfs_caches_init(unsigned long mempages)
			
 
				+void __init vfs_caches_init(void)
			
 
				 {
			
 
				-	unsigned long reserve;
			
 
				-
			
 
				-	/* Base hash sizes on available memory, with a reserve equal to
			
 
				-           150% of current kernel size */
			
 
				-
			
 
				-	reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
			
 
				-	mempages -= reserve;
			
 
				-
			
 
				 	names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
			
 
				 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
			
 
				 
			
 
				 	dcache_init();
			
 
				 	inode_init();
			
 
				-	files_init(mempages);
			
 
				+	files_init();
			
 
				+	files_maxfiles_init();
			
 
				 	mnt_init();
			
 
				 	bdev_cache_init();
			
 
				 	chrdev_init();
			
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -25,6 +25,7 @@
 
				 #include <linux/hardirq.h>
			
 
				 #include <linux/task_work.h>
			
 
				 #include <linux/ima.h>
			
 
				+#include <linux/swap.h>
			
 
				 
			
 
				 #include <linux/atomic.h>
			
 
				 
			
@@ -308,19 +309,24 @@ void put_filp(struct file *file)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void __init files_init(unsigned long mempages)
			
 
				+void __init files_init(void)
			
 
				 { 
			
 
				-	unsigned long n;
			
 
				-
			
 
				 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
			
 
				 			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
			
 
				+	percpu_counter_init(&nr_files, 0, GFP_KERNEL);
			
 
				+}
			
 
				 
			
 
				-	/*
			
 
				-	 * One file with associated inode and dcache is very roughly 1K.
			
 
				-	 * Per default don't use more than 10% of our memory for files. 
			
 
				-	 */ 
			
 
				+/*
			
 
				+ * One file with associated inode and dcache is very roughly 1K. Per default
			
 
				+ * do not use more than 10% of our memory for files.
			
 
				+ */
			
 
				+void __init files_maxfiles_init(void)
			
 
				+{
			
 
				+	unsigned long n;
			
 
				+	unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2;
			
 
				+
			
 
				+	memreserve = min(memreserve, totalram_pages - 1);
			
 
				+	n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10;
			
 
				 
			
 
				-	n = (mempages * (PAGE_SIZE / 1024)) / 10;
			
 
				 	files_stat.max_files = max_t(unsigned long, n, NR_FILE);
			
 
				-	percpu_counter_init(&nr_files, 0, GFP_KERNEL);
			
 
				 } 
			
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1010,6 +1010,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 
				 	inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0);
			
 
				 	if (!inode)
			
 
				 		goto out_dentry;
			
 
				+	if (creat_flags == HUGETLB_SHMFS_INODE)
			
 
				+		inode->i_flags |= S_PRIVATE;
			
 
				 
			
 
				 	file = ERR_PTR(-ENOMEM);
			
 
				 	if (hugetlb_reserve_pages(inode, 0,
			
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -412,16 +412,36 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 
				 					 unsigned int flags)
			
 
				 {
			
 
				 	struct fsnotify_mark *lmark, *mark;
			
 
				+	LIST_HEAD(to_free);
			
 
				 
			
 
				+	/*
			
 
				+	 * We have to be really careful here. Anytime we drop mark_mutex, e.g.
			
 
				+	 * fsnotify_clear_marks_by_inode() can come and free marks. Even in our
			
 
				+	 * to_free list so we have to use mark_mutex even when accessing that
			
 
				+	 * list. And freeing mark requires us to drop mark_mutex. So we can
			
 
				+	 * reliably free only the first mark in the list. That's why we first
			
 
				+	 * move marks to free to to_free list in one go and then free marks in
			
 
				+	 * to_free list one by one.
			
 
				+	 */
			
 
				 	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
			
 
				 	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
			
 
				-		if (mark->flags & flags) {
			
 
				-			fsnotify_get_mark(mark);
			
 
				-			fsnotify_destroy_mark_locked(mark, group);
			
 
				-			fsnotify_put_mark(mark);
			
 
				-		}
			
 
				+		if (mark->flags & flags)
			
 
				+			list_move(&mark->g_list, &to_free);
			
 
				 	}
			
 
				 	mutex_unlock(&group->mark_mutex);
			
 
				+
			
 
				+	while (1) {
			
 
				+		mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
			
 
				+		if (list_empty(&to_free)) {
			
 
				+			mutex_unlock(&group->mark_mutex);
			
 
				+			break;
			
 
				+		}
			
 
				+		mark = list_first_entry(&to_free, struct fsnotify_mark, g_list);
			
 
				+		fsnotify_get_mark(mark);
			
 
				+		fsnotify_destroy_mark_locked(mark, group);
			
 
				+		mutex_unlock(&group->mark_mutex);
			
 
				+		fsnotify_put_mark(mark);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -685,7 +685,7 @@ static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb,
 
				 
			
 
				 	if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
			
 
				 		u64 s = i_size_read(inode);
			
 
				-		sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) +
			
 
				+		sector_t sector = ((u64)p_cpos << (osb->s_clustersize_bits - 9)) +
			
 
				 			(do_div(s, osb->s_clustersize) >> 9);
			
 
				 
			
 
				 		ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector,
			
@@ -910,7 +910,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
 
				 		BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN));
			
 
				 
			
 
				 		ret = blkdev_issue_zeroout(osb->sb->s_bdev,
			
 
				-				p_cpos << (osb->s_clustersize_bits - 9),
			
 
				+				(u64)p_cpos << (osb->s_clustersize_bits - 9),
			
 
				 				zero_len_head >> 9, GFP_NOFS, false);
			
 
				 		if (ret < 0)
			
 
				 			mlog_errno(ret);
			
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -4025,9 +4025,13 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
 
				 	osb->dc_work_sequence = osb->dc_wake_sequence;
			
 
				 
			
 
				 	processed = osb->blocked_lock_count;
			
 
				-	while (processed) {
			
 
				-		BUG_ON(list_empty(&osb->blocked_lock_list));
			
 
				-
			
 
				+	/*
			
 
				+	 * blocked lock processing in this loop might call iput which can
			
 
				+	 * remove items off osb->blocked_lock_list. Downconvert up to
			
 
				+	 * 'processed' number of locks, but stop short if we had some
			
 
				+	 * removed in ocfs2_mark_lockres_freeing when downconverting.
			
 
				+	 */
			
 
				+	while (processed && !list_empty(&osb->blocked_lock_list)) {
			
 
				 		lockres = list_entry(osb->blocked_lock_list.next,
			
 
				 				     struct ocfs2_lock_res, l_blocked_list);
			
 
				 		list_del_init(&lockres->l_blocked_list);
			
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -121,8 +121,9 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 
				 		 * Other callers might not initialize the si_lsb field,
			
 
				 		 * so check explicitly for the right codes here.
			
 
				 		 */
			
 
				-		if (kinfo->si_code == BUS_MCEERR_AR ||
			
 
				-		    kinfo->si_code == BUS_MCEERR_AO)
			
 
				+		if (kinfo->si_signo == SIGBUS &&
			
 
				+		    (kinfo->si_code == BUS_MCEERR_AR ||
			
 
				+		     kinfo->si_code == BUS_MCEERR_AO))
			
 
				 			err |= __put_user((short) kinfo->si_addr_lsb,
			
 
				 					  &uinfo->ssi_addr_lsb);
			
 
				 #endif
			
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -55,7 +55,8 @@ struct vm_fault;
 
				 
			
 
				 extern void __init inode_init(void);
			
 
				 extern void __init inode_init_early(void);
			
 
				-extern void __init files_init(unsigned long);
			
 
				+extern void __init files_init(void);
			
 
				+extern void __init files_maxfiles_init(void);
			
 
				 
			
 
				 extern struct files_stat_struct files_stat;
			
 
				 extern unsigned long get_max_files(void);
			
@@ -2245,7 +2246,7 @@ extern int ioctl_preallocate(struct file *filp, void __user *argp);
 
				 
			
 
				 /* fs/dcache.c */
			
 
				 extern void __init vfs_caches_init_early(void);
			
 
				-extern void __init vfs_caches_init(unsigned long);
			
 
				+extern void __init vfs_caches_init(void);
			
 
				 
			
 
				 extern struct kmem_cache *names_cachep;
			
 
				 
			
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -631,15 +631,19 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
 
				 	 1 << PG_private | 1 << PG_private_2 | \
			
 
				 	 1 << PG_writeback | 1 << PG_reserved | \
			
 
				 	 1 << PG_slab	 | 1 << PG_swapcache | 1 << PG_active | \
			
 
				-	 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
			
 
				+	 1 << PG_unevictable | __PG_MLOCKED | \
			
 
				 	 __PG_COMPOUND_LOCK)
			
 
				 
			
 
				 /*
			
 
				  * Flags checked when a page is prepped for return by the page allocator.
			
 
				- * Pages being prepped should not have any flags set.  It they are set,
			
 
				+ * Pages being prepped should not have these flags set.  It they are set,
			
 
				  * there has been a kernel bug or struct page corruption.
			
 
				+ *
			
 
				+ * __PG_HWPOISON is exceptional because it needs to be kept beyond page's
			
 
				+ * alloc-free cycle to prevent from reusing the page.
			
 
				  */
			
 
				-#define PAGE_FLAGS_CHECK_AT_PREP	((1 << NR_PAGEFLAGS) - 1)
			
 
				+#define PAGE_FLAGS_CHECK_AT_PREP	\
			
 
				+	(((1 << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
			
 
				 
			
 
				 #define PAGE_FLAGS_PRIVATE				\
			
 
				 	(1 << PG_private | 1 << PG_private_2)
			
--- a/init/main.c
+++ b/init/main.c
@@ -656,7 +656,7 @@ asmlinkage __visible void __init start_kernel(void)
 
				 	key_init();
			
 
				 	security_init();
			
 
				 	dbg_late_init();
			
 
				-	vfs_caches_init(totalram_pages);
			
 
				+	vfs_caches_init();
			
 
				 	signals_init();
			
 
				 	/* rootfs populating might need page-writeback */
			
 
				 	page_writeback_init();
			
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -142,7 +142,6 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
 
				 		if (!leaf)
			
 
				 			return -ENOMEM;
			
 
				 		INIT_LIST_HEAD(&leaf->msg_list);
			
 
				-		info->qsize += sizeof(*leaf);
			
 
				 	}
			
 
				 	leaf->priority = msg->m_type;
			
 
				 	rb_link_node(&leaf->rb_node, parent, p);
			
@@ -187,7 +186,6 @@ try_again:
 
				 			     "lazy leaf delete!\n");
			
 
				 		rb_erase(&leaf->rb_node, &info->msg_tree);
			
 
				 		if (info->node_cache) {
			
 
				-			info->qsize -= sizeof(*leaf);
			
 
				 			kfree(leaf);
			
 
				 		} else {
			
 
				 			info->node_cache = leaf;
			
@@ -200,7 +198,6 @@ try_again:
 
				 		if (list_empty(&leaf->msg_list)) {
			
 
				 			rb_erase(&leaf->rb_node, &info->msg_tree);
			
 
				 			if (info->node_cache) {
			
 
				-				info->qsize -= sizeof(*leaf);
			
 
				 				kfree(leaf);
			
 
				 			} else {
			
 
				 				info->node_cache = leaf;
			
@@ -1034,7 +1031,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
 
				 		/* Save our speculative allocation into the cache */
			
 
				 		INIT_LIST_HEAD(&new_leaf->msg_list);
			
 
				 		info->node_cache = new_leaf;
			
 
				-		info->qsize += sizeof(*new_leaf);
			
 
				 		new_leaf = NULL;
			
 
				 	} else {
			
 
				 		kfree(new_leaf);
			
@@ -1142,7 +1138,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
 
				 		/* Save our speculative allocation into the cache */
			
 
				 		INIT_LIST_HEAD(&new_leaf->msg_list);
			
 
				 		info->node_cache = new_leaf;
			
 
				-		info->qsize += sizeof(*new_leaf);
			
 
				 	} else {
			
 
				 		kfree(new_leaf);
			
 
				 	}
			
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -545,7 +545,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 
				 		if  ((shmflg & SHM_NORESERVE) &&
			
 
				 				sysctl_overcommit_memory != OVERCOMMIT_NEVER)
			
 
				 			acctflag = VM_NORESERVE;
			
 
				-		file = shmem_file_setup(name, size, acctflag);
			
 
				+		file = shmem_kernel_file_setup(name, size, acctflag);
			
 
				 	}
			
 
				 	error = PTR_ERR(file);
			
 
				 	if (IS_ERR(file))
			
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -97,6 +97,7 @@ bool kthread_should_park(void)
 
				 {
			
 
				 	return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags);
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(kthread_should_park);
			
 
				 
			
 
				 /**
			
 
				  * kthread_freezable_should_stop - should this freezable kthread return now?
			
@@ -171,6 +172,7 @@ void kthread_parkme(void)
 
				 {
			
 
				 	__kthread_parkme(to_kthread(current));
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(kthread_parkme);
			
 
				 
			
 
				 static int kthread(void *_create)
			
 
				 {
			
@@ -411,6 +413,7 @@ void kthread_unpark(struct task_struct *k)
 
				 	if (kthread)
			
 
				 		__kthread_unpark(k, kthread);
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(kthread_unpark);
			
 
				 
			
 
				 /**
			
 
				  * kthread_park - park a thread created by kthread_create().
			
@@ -441,6 +444,7 @@ int kthread_park(struct task_struct *k)
 
				 	}
			
 
				 	return ret;
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(kthread_park);
			
 
				 
			
 
				 /**
			
 
				  * kthread_stop - stop a thread created by kthread_create().
			
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2748,12 +2748,15 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
 
				 		 * Other callers might not initialize the si_lsb field,
			
 
				 		 * so check explicitly for the right codes here.
			
 
				 		 */
			
 
				-		if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
			
 
				+		if (from->si_signo == SIGBUS &&
			
 
				+		    (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO))
			
 
				 			err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
			
 
				 #endif
			
 
				 #ifdef SEGV_BNDERR
			
 
				-		err |= __put_user(from->si_lower, &to->si_lower);
			
 
				-		err |= __put_user(from->si_upper, &to->si_upper);
			
 
				+		if (from->si_signo == SIGSEGV && from->si_code == SEGV_BNDERR) {
			
 
				+			err |= __put_user(from->si_lower, &to->si_lower);
			
 
				+			err |= __put_user(from->si_upper, &to->si_upper);
			
 
				+		}
			
 
				 #endif
			
 
				 		break;
			
 
				 	case __SI_CHLD:
			
@@ -3017,7 +3020,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
 
				 			int, sig,
			
 
				 			struct compat_siginfo __user *, uinfo)
			
 
				 {
			
 
				-	siginfo_t info;
			
 
				+	siginfo_t info = {};
			
 
				 	int ret = copy_siginfo_from_user32(&info, uinfo);
			
 
				 	if (unlikely(ret))
			
 
				 		return ret;
			
@@ -3061,7 +3064,7 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
 
				 			int, sig,
			
 
				 			struct compat_siginfo __user *, uinfo)
			
 
				 {
			
 
				-	siginfo_t info;
			
 
				+	siginfo_t info = {};
			
 
				 
			
 
				 	if (copy_siginfo_from_user32(&info, uinfo))
			
 
				 		return -EFAULT;
			
--- a/lib/iommu-common.c
+++ b/lib/iommu-common.c
@@ -119,7 +119,7 @@ unsigned long iommu_tbl_range_alloc(struct device *dev,
 
				 	unsigned long align_mask = 0;
			
 
				 
			
 
				 	if (align_order > 0)
			
 
				-		align_mask = 0xffffffffffffffffl >> (64 - align_order);
			
 
				+		align_mask = ~0ul >> (BITS_PER_LONG - align_order);
			
 
				 
			
 
				 	/* Sanity check */
			
 
				 	if (unlikely(npages == 0)) {
			
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1676,12 +1676,7 @@ static void __split_huge_page_refcount(struct page *page,
 
				 		/* after clearing PageTail the gup refcount can be released */
			
 
				 		smp_mb__after_atomic();
			
 
				 
			
 
				-		/*
			
 
				-		 * retain hwpoison flag of the poisoned tail page:
			
 
				-		 *   fix for the unsuitable process killed on Guest Machine(KVM)
			
 
				-		 *   by the memory-failure.
			
 
				-		 */
			
 
				-		page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON;
			
 
				+		page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
			
 
				 		page_tail->flags |= (page->flags &
			
 
				 				     ((1L << PG_referenced) |
			
 
				 				      (1L << PG_swapbacked) |
			
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -909,6 +909,18 @@ int get_hwpoison_page(struct page *page)
 
				 	 * directly for tail pages.
			
 
				 	 */
			
 
				 	if (PageTransHuge(head)) {
			
 
				+		/*
			
 
				+		 * Non anonymous thp exists only in allocation/free time. We
			
 
				+		 * can't handle such a case correctly, so let's give it up.
			
 
				+		 * This should be better than triggering BUG_ON when kernel
			
 
				+		 * tries to touch the "partially handled" page.
			
 
				+		 */
			
 
				+		if (!PageAnon(head)) {
			
 
				+			pr_err("MCE: %#lx: non anonymous thp\n",
			
 
				+				page_to_pfn(page));
			
 
				+			return 0;
			
 
				+		}
			
 
				+
			
 
				 		if (get_page_unless_zero(head)) {
			
 
				 			if (PageTail(page))
			
 
				 				get_page(page);
			
@@ -1134,15 +1146,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 
				 	}
			
 
				 
			
 
				 	if (!PageHuge(p) && PageTransHuge(hpage)) {
			
 
				-		if (!PageAnon(hpage)) {
			
 
				-			pr_err("MCE: %#lx: non anonymous thp\n", pfn);
			
 
				-			if (TestClearPageHWPoison(p))
			
 
				-				atomic_long_sub(nr_pages, &num_poisoned_pages);
			
 
				-			put_page(p);
			
 
				-			if (p != hpage)
			
 
				-				put_page(hpage);
			
 
				-			return -EBUSY;
			
 
				-		}
			
 
				 		if (unlikely(split_huge_page(hpage))) {
			
 
				 			pr_err("MCE: %#lx: thp split failed\n", pfn);
			
 
				 			if (TestClearPageHWPoison(p))
			
@@ -1209,9 +1212,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 
				 	if (!PageHWPoison(p)) {
			
 
				 		printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
			
 
				 		atomic_long_sub(nr_pages, &num_poisoned_pages);
			
 
				+		unlock_page(hpage);
			
 
				 		put_page(hpage);
			
 
				-		res = 0;
			
 
				-		goto out;
			
 
				+		return 0;
			
 
				 	}
			
 
				 	if (hwpoison_filter(p)) {
			
 
				 		if (TestClearPageHWPoison(p))
			
@@ -1656,6 +1659,8 @@ static int __soft_offline_page(struct page *page, int flags)
 
				 		inc_zone_page_state(page, NR_ISOLATED_ANON +
			
 
				 					page_is_file_cache(page));
			
 
				 		list_add(&page->lru, &pagelist);
			
 
				+		if (!TestSetPageHWPoison(page))
			
 
				+			atomic_long_inc(&num_poisoned_pages);
			
 
				 		ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
			
 
				 					MIGRATE_SYNC, MR_MEMORY_FAILURE);
			
 
				 		if (ret) {
			
@@ -1670,9 +1675,8 @@ static int __soft_offline_page(struct page *page, int flags)
 
				 				pfn, ret, page->flags);
			
 
				 			if (ret > 0)
			
 
				 				ret = -EIO;
			
 
				-		} else {
			
 
				-			SetPageHWPoison(page);
			
 
				-			atomic_long_inc(&num_poisoned_pages);
			
 
				+			if (TestClearPageHWPoison(page))
			
 
				+				atomic_long_dec(&num_poisoned_pages);
			
 
				 		}
			
 
				 	} else {
			
 
				 		pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
			
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -446,7 +446,7 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
 
				 	int nr_pages = PAGES_PER_SECTION;
			
 
				 	int nid = pgdat->node_id;
			
 
				 	int zone_type;
			
 
				-	unsigned long flags;
			
 
				+	unsigned long flags, pfn;
			
 
				 	int ret;
			
 
				 
			
 
				 	zone_type = zone - pgdat->node_zones;
			
@@ -461,6 +461,14 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
 
				 	pgdat_resize_unlock(zone->zone_pgdat, &flags);
			
 
				 	memmap_init_zone(nr_pages, nid, zone_type,
			
 
				 			 phys_start_pfn, MEMMAP_HOTPLUG);
			
 
				+
			
 
				+	/* online_page_range is called later and expects pages reserved */
			
 
				+	for (pfn = phys_start_pfn; pfn < phys_start_pfn + nr_pages; pfn++) {
			
 
				+		if (!pfn_valid(pfn))
			
 
				+			continue;
			
 
				+
			
 
				+		SetPageReserved(pfn_to_page(pfn));
			
 
				+	}
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -880,7 +880,8 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 
				 	/* Establish migration ptes or remove ptes */
			
 
				 	if (page_mapped(page)) {
			
 
				 		try_to_unmap(page,
			
 
				-			TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
			
 
				+			TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS|
			
 
				+			TTU_IGNORE_HWPOISON);
			
 
				 		page_was_mapped = 1;
			
 
				 	}
			
 
				 
			
@@ -950,7 +951,10 @@ out:
 
				 		list_del(&page->lru);
			
 
				 		dec_zone_page_state(page, NR_ISOLATED_ANON +
			
 
				 				page_is_file_cache(page));
			
 
				-		if (reason != MR_MEMORY_FAILURE)
			
 
				+		/* Soft-offlined page shouldn't go through lru cache list */
			
 
				+		if (reason == MR_MEMORY_FAILURE)
			
 
				+			put_page(page);
			
 
				+		else
			
 
				 			putback_lru_page(page);
			
 
				 	}
			
 
				 
			
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2063,10 +2063,10 @@ static struct notifier_block ratelimit_nb = {
 
				  */
			
 
				 void __init page_writeback_init(void)
			
 
				 {
			
 
				+	BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
			
 
				+
			
 
				 	writeback_set_ratelimit();
			
 
				 	register_cpu_notifier(&ratelimit_nb);
			
 
				-
			
 
				-	BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
			
 
				 }
			
 
				 
			
 
				 /**
			
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -18,7 +18,6 @@
 
				 #include <linux/mm.h>
			
 
				 #include <linux/swap.h>
			
 
				 #include <linux/interrupt.h>
			
 
				-#include <linux/rwsem.h>
			
 
				 #include <linux/pagemap.h>
			
 
				 #include <linux/jiffies.h>
			
 
				 #include <linux/bootmem.h>
			
@@ -981,21 +980,21 @@ static void __init __free_pages_boot_core(struct page *page,
 
				 
			
 
				 #if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \
			
 
				 	defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
			
 
				-/* Only safe to use early in boot when initialisation is single-threaded */
			
 
				+
			
 
				 static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
			
 
				 
			
 
				 int __meminit early_pfn_to_nid(unsigned long pfn)
			
 
				 {
			
 
				+	static DEFINE_SPINLOCK(early_pfn_lock);
			
 
				 	int nid;
			
 
				 
			
 
				-	/* The system will behave unpredictably otherwise */
			
 
				-	BUG_ON(system_state != SYSTEM_BOOTING);
			
 
				-
			
 
				+	spin_lock(&early_pfn_lock);
			
 
				 	nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
			
 
				-	if (nid >= 0)
			
 
				-		return nid;
			
 
				-	/* just returns 0 */
			
 
				-	return 0;
			
 
				+	if (nid < 0)
			
 
				+		nid = 0;
			
 
				+	spin_unlock(&early_pfn_lock);
			
 
				+
			
 
				+	return nid;
			
 
				 }
			
 
				 #endif
			
 
				 
			
@@ -1060,7 +1059,15 @@ static void __init deferred_free_range(struct page *page,
 
				 		__free_pages_boot_core(page, pfn, 0);
			
 
				 }
			
 
				 
			
 
				-static __initdata DECLARE_RWSEM(pgdat_init_rwsem);
			
 
				+/* Completion tracking for deferred_init_memmap() threads */
			
 
				+static atomic_t pgdat_init_n_undone __initdata;
			
 
				+static __initdata DECLARE_COMPLETION(pgdat_init_all_done_comp);
			
 
				+
			
 
				+static inline void __init pgdat_init_report_one_done(void)
			
 
				+{
			
 
				+	if (atomic_dec_and_test(&pgdat_init_n_undone))
			
 
				+		complete(&pgdat_init_all_done_comp);
			
 
				+}
			
 
				 
			
 
				 /* Initialise remaining memory on a node */
			
 
				 static int __init deferred_init_memmap(void *data)
			
@@ -1077,7 +1084,7 @@ static int __init deferred_init_memmap(void *data)
 
				 	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
			
 
				 
			
 
				 	if (first_init_pfn == ULONG_MAX) {
			
 
				-		up_read(&pgdat_init_rwsem);
			
 
				+		pgdat_init_report_one_done();
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
@@ -1177,7 +1184,8 @@ free_range:
 
				 
			
 
				 	pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages,
			
 
				 					jiffies_to_msecs(jiffies - start));
			
 
				-	up_read(&pgdat_init_rwsem);
			
 
				+
			
 
				+	pgdat_init_report_one_done();
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1185,14 +1193,17 @@ void __init page_alloc_init_late(void)
 
				 {
			
 
				 	int nid;
			
 
				 
			
 
				+	/* There will be num_node_state(N_MEMORY) threads */
			
 
				+	atomic_set(&pgdat_init_n_undone, num_node_state(N_MEMORY));
			
 
				 	for_each_node_state(nid, N_MEMORY) {
			
 
				-		down_read(&pgdat_init_rwsem);
			
 
				 		kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid);
			
 
				 	}
			
 
				 
			
 
				 	/* Block until all are initialised */
			
 
				-	down_write(&pgdat_init_rwsem);
			
 
				-	up_write(&pgdat_init_rwsem);
			
 
				+	wait_for_completion(&pgdat_init_all_done_comp);
			
 
				+
			
 
				+	/* Reinit limits that are based on free pages after the kernel is up */
			
 
				+	files_maxfiles_init();
			
 
				 }
			
 
				 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
			
 
				 
			
@@ -1285,6 +1296,10 @@ static inline int check_new_page(struct page *page)
 
				 		bad_reason = "non-NULL mapping";
			
 
				 	if (unlikely(atomic_read(&page->_count) != 0))
			
 
				 		bad_reason = "nonzero _count";
			
 
				+	if (unlikely(page->flags & __PG_HWPOISON)) {
			
 
				+		bad_reason = "HWPoisoned (hardware-corrupted)";
			
 
				+		bad_flags = __PG_HWPOISON;
			
 
				+	}
			
 
				 	if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) {
			
 
				 		bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
			
 
				 		bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
			
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3363,8 +3363,8 @@ put_path:
 
				  * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
			
 
				  * 	kernel internal.  There will be NO LSM permission checks against the
			
 
				  * 	underlying inode.  So users of this interface must do LSM checks at a
			
 
				- * 	higher layer.  The one user is the big_key implementation.  LSM checks
			
 
				- * 	are provided at the key level rather than the inode level.
			
 
				+ *	higher layer.  The users are the big_key and shm implementations.  LSM
			
 
				+ *	checks are provided at the key or shm level rather than the inode.
			
 
				  * @name: name for dentry (to be seen in /proc/<pid>/maps
			
 
				  * @size: size to be set for the file
			
 
				  * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
			
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -37,8 +37,7 @@ struct kmem_cache *kmem_cache;
 
				 		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
			
 
				 		SLAB_FAILSLAB)
			
 
				 
			
 
				-#define SLAB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
			
 
				-		SLAB_CACHE_DMA | SLAB_NOTRACK)
			
 
				+#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | SLAB_NOTRACK)
			
 
				 
			
 
				 /*
			
 
				  * Merge control. If this is set then no merging of slab caches will occur.