7 年之前 · ab2d92ad88
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9025,6 +9025,7 @@ L:	linux-kernel@vger.kernel.org
 
															 S:	Supported
														
 
															 F:	kernel/sched/membarrier.c
														
 
															 F:	include/uapi/linux/membarrier.h
														
 
															+F:	arch/powerpc/include/asm/membarrier.h
														
 
															 MEMORY MANAGEMENT
														
 
															 L:	linux-mm@kvack.org
														
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -16,6 +16,7 @@ config ARM64
 
															 	select ARCH_HAS_GCOV_PROFILE_ALL
														
 
															 	select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
														
 
															 	select ARCH_HAS_KCOV
														
 
															+	select ARCH_HAS_MEMBARRIER_SYNC_CORE
														
 
															 	select ARCH_HAS_SET_MEMORY
														
 
															 	select ARCH_HAS_SG_CHAIN
														
 
															 	select ARCH_HAS_STRICT_KERNEL_RWX
														
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -324,6 +324,10 @@ alternative_else_nop_endif
 
															 	ldp	x28, x29, [sp, #16 * 14]
														
 
															 	ldr	lr, [sp, #S_LR]
														
 
															 	add	sp, sp, #S_FRAME_SIZE		// restore sp
														
 
															+	/*
														
 
															+	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on eret context synchronization
														
 
															+	 * when returning from IPI handler, and when returning to user-space.
														
 
															+	 */
														
 
															 	.if	\el == 0
														
 
															 alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
														
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -141,6 +141,7 @@ config PPC
 
															 	select ARCH_HAS_GCOV_PROFILE_ALL
														
 
															 	select ARCH_HAS_PHYS_TO_DMA
														
 
															 	select ARCH_HAS_PMEM_API                if PPC64
														
 
															+	select ARCH_HAS_MEMBARRIER_CALLBACKS
														
 
															 	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE
														
 
															 	select ARCH_HAS_SG_CHAIN
														
 
															 	select ARCH_HAS_STRICT_KERNEL_RWX	if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
														
--- a/arch/powerpc/include/asm/membarrier.h
+++ b/arch/powerpc/include/asm/membarrier.h
@@ -0,0 +1,27 @@
 
															+#ifndef _ASM_POWERPC_MEMBARRIER_H
														
 
															+#define _ASM_POWERPC_MEMBARRIER_H
														
 
															+
														
 
															+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
														
 
															+					     struct mm_struct *next,
														
 
															+					     struct task_struct *tsk)
														
 
															+{
														
 
															+	/*
														
 
															+	 * Only need the full barrier when switching between processes.
														
 
															+	 * Barrier when switching from kernel to userspace is not
														
 
															+	 * required here, given that it is implied by mmdrop(). Barrier
														
 
															+	 * when switching from userspace to kernel is not needed after
														
 
															+	 * store to rq->curr.
														
 
															+	 */
														
 
															+	if (likely(!(atomic_read(&next->membarrier_state) &
														
 
															+		     (MEMBARRIER_STATE_PRIVATE_EXPEDITED |
														
 
															+		      MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
														
 
															+		return;
														
 
															+
														
 
															+	/*
														
 
															+	 * The membarrier system call requires a full memory barrier
														
 
															+	 * after storing to rq->curr, before going back to user-space.
														
 
															+	 */
														
 
															+	smp_mb();
														
 
															+}
														
 
															+
														
 
															+#endif /* _ASM_POWERPC_MEMBARRIER_H */
														
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -12,6 +12,7 @@
 
															 #include <linux/mm.h>
														
 
															 #include <linux/cpu.h>
														
 
															+#include <linux/sched/mm.h>
														
 
															 #include <asm/mmu_context.h>
														
@@ -58,6 +59,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
															 		 *
														
 
															 		 * On the read side the barrier is in pte_xchg(), which orders
														
 
															 		 * the store to the PTE vs the load of mm_cpumask.
														
 
															+		 *
														
 
															+		 * This full barrier is needed by membarrier when switching
														
 
															+		 * between processes after store to rq->curr, before user-space
														
 
															+		 * memory accesses.
														
 
															 		 */
														
 
															 		smp_mb();
														
@@ -80,6 +85,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
															 	if (new_on_cpu)
														
 
															 		radix_kvm_prefetch_workaround(next);
														
 
															+	else
														
 
															+		membarrier_arch_switch_mm(prev, next, tsk);
														
 
															 	/*
														
 
															 	 * The actual HW switching method differs between the various
														
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,6 +55,7 @@ config X86
 
															 	select ARCH_HAS_GCOV_PROFILE_ALL
														
 
															 	select ARCH_HAS_KCOV			if X86_64
														
 
															 	select ARCH_HAS_PHYS_TO_DMA
														
 
															+	select ARCH_HAS_MEMBARRIER_SYNC_CORE
														
 
															 	select ARCH_HAS_PMEM_API		if X86_64
														
 
															 	select ARCH_HAS_REFCOUNT
														
 
															 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
														
@@ -62,6 +63,7 @@ config X86
 
															 	select ARCH_HAS_SG_CHAIN
														
 
															 	select ARCH_HAS_STRICT_KERNEL_RWX
														
 
															 	select ARCH_HAS_STRICT_MODULE_RWX
														
 
															+	select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
														
 
															 	select ARCH_HAS_UBSAN_SANITIZE_ALL
														
 
															 	select ARCH_HAS_ZONE_DEVICE		if X86_64
														
 
															 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
														
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -566,6 +566,11 @@ restore_all:
 
															 .Lrestore_nocheck:
														
 
															 	RESTORE_REGS 4				# skip orig_eax/error_code
														
 
															 .Lirq_return:
														
 
															+	/*
														
 
															+	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
														
 
															+	 * when returning from IPI handler and when returning from
														
 
															+	 * scheduler to user-space.
														
 
															+	 */
														
 
															 	INTERRUPT_RETURN
														
 
															 .section .fixup, "ax"
														
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -691,6 +691,10 @@ GLOBAL(restore_regs_and_return_to_kernel)
 
															 	POP_EXTRA_REGS
														
 
															 	POP_C_REGS
														
 
															 	addq	$8, %rsp	/* skip regs->orig_ax */
														
 
															+	/*
														
 
															+	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
														
 
															+	 * when returning from IPI handler.
														
 
															+	 */
														
 
															 	INTERRUPT_RETURN
														
 
															 ENTRY(native_iret)
														
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -0,0 +1,28 @@
 
															+/* SPDX-License-Identifier: GPL-2.0 */
														
 
															+#ifndef _ASM_X86_SYNC_CORE_H
														
 
															+#define _ASM_X86_SYNC_CORE_H
														
 
															+
														
 
															+#include <linux/preempt.h>
														
 
															+#include <asm/processor.h>
														
 
															+#include <asm/cpufeature.h>
														
 
															+
														
 
															+/*
														
 
															+ * Ensure that a core serializing instruction is issued before returning
														
 
															+ * to user-mode. x86 implements return to user-space through sysexit,
														
 
															+ * sysrel, and sysretq, which are not core serializing.
														
 
															+ */
														
 
															+static inline void sync_core_before_usermode(void)
														
 
															+{
														
 
															+	/* With PTI, we unconditionally serialize before running user code. */
														
 
															+	if (static_cpu_has(X86_FEATURE_PTI))
														
 
															+		return;
														
 
															+	/*
														
 
															+	 * Return from interrupt and NMI is done through iret, which is core
														
 
															+	 * serializing.
														
 
															+	 */
														
 
															+	if (in_irq() || in_nmi())
														
 
															+		return;
														
 
															+	sync_core();
														
 
															+}
														
 
															+
														
 
															+#endif /* _ASM_X86_SYNC_CORE_H */
														
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -229,6 +229,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
															 #endif
														
 
															 	this_cpu_write(cpu_tlbstate.is_lazy, false);
														
 
															+	/*
														
 
															+	 * The membarrier system call requires a full memory barrier and
														
 
															+	 * core serialization before returning to user-space, after
														
 
															+	 * storing to rq->curr. Writing to CR3 provides that full
														
 
															+	 * memory barrier and core serializing instruction.
														
 
															+	 */
														
 
															 	if (real_prev == next) {
														
 
															 		VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
														
 
															 			   next->context.ctx_id);
														
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -555,6 +555,14 @@ struct task_struct {
 
															 	unsigned long			wakee_flip_decay_ts;
														
 
															 	struct task_struct		*last_wakee;
														
 
															+	/*
														
 
															+	 * recent_used_cpu is initially set as the last CPU used by a task
														
 
															+	 * that wakes affine another task. Waker/wakee relationships can
														
 
															+	 * push tasks around a CPU where each wakeup moves to the next one.
														
 
															+	 * Tracking a recently used CPU allows a quick search for a recently
														
 
															+	 * used CPU that may be idle.
														
 
															+	 */
														
 
															+	int				recent_used_cpu;
														
 
															 	int				wake_cpu;
														
 
															 #endif
														
 
															 	int				on_rq;
														
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -7,6 +7,7 @@
 
															 #include <linux/sched.h>
														
 
															 #include <linux/mm_types.h>
														
 
															 #include <linux/gfp.h>
														
 
															+#include <linux/sync_core.h>
														
 
															 /*
														
 
															  * Routines for handling mm_structs
														
@@ -194,18 +195,48 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
 
															 #ifdef CONFIG_MEMBARRIER
														
 
															 enum {
														
 
															-	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY	= (1U << 0),
														
 
															-	MEMBARRIER_STATE_SWITCH_MM			= (1U << 1),
														
 
															+	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY		= (1U << 0),
														
 
															+	MEMBARRIER_STATE_PRIVATE_EXPEDITED			= (1U << 1),
														
 
															+	MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY			= (1U << 2),
														
 
															+	MEMBARRIER_STATE_GLOBAL_EXPEDITED			= (1U << 3),
														
 
															+	MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY	= (1U << 4),
														
 
															+	MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE		= (1U << 5),
														
 
															 };
														
 
															+enum {
														
 
															+	MEMBARRIER_FLAG_SYNC_CORE	= (1U << 0),
														
 
															+};
														
 
															+
														
 
															+#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
														
 
															+#include <asm/membarrier.h>
														
 
															+#endif
														
 
															+
														
 
															+static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
														
 
															+{
														
 
															+	if (likely(!(atomic_read(&mm->membarrier_state) &
														
 
															+		     MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
														
 
															+		return;
														
 
															+	sync_core_before_usermode();
														
 
															+}
														
 
															+
														
 
															 static inline void membarrier_execve(struct task_struct *t)
														
 
															 {
														
 
															 	atomic_set(&t->mm->membarrier_state, 0);
														
 
															 }
														
 
															 #else
														
 
															+#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
														
 
															+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
														
 
															+					     struct mm_struct *next,
														
 
															+					     struct task_struct *tsk)
														
 
															+{
														
 
															+}
														
 
															+#endif
														
 
															 static inline void membarrier_execve(struct task_struct *t)
														
 
															 {
														
 
															 }
														
 
															+static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
														
 
															+{
														
 
															+}
														
 
															 #endif
														
 
															 #endif /* _LINUX_SCHED_MM_H */
														
--- a/include/linux/sync_core.h
+++ b/include/linux/sync_core.h
@@ -0,0 +1,21 @@
 
															+/* SPDX-License-Identifier: GPL-2.0 */
														
 
															+#ifndef _LINUX_SYNC_CORE_H
														
 
															+#define _LINUX_SYNC_CORE_H
														
 
															+
														
 
															+#ifdef CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
														
 
															+#include <asm/sync_core.h>
														
 
															+#else
														
 
															+/*
														
 
															+ * This is a dummy sync_core_before_usermode() implementation that can be used
														
 
															+ * on all architectures which return to user-space through core serializing
														
 
															+ * instructions.
														
 
															+ * If your architecture returns to user-space through non-core-serializing
														
 
															+ * instructions, you need to write your own functions.
														
 
															+ */
														
 
															+static inline void sync_core_before_usermode(void)
														
 
															+{
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+#endif /* _LINUX_SYNC_CORE_H */
														
 
															+
														
--- a/include/uapi/linux/membarrier.h
+++ b/include/uapi/linux/membarrier.h
@@ -31,7 +31,7 @@
 
															  * enum membarrier_cmd - membarrier system call command
														
 
															  * @MEMBARRIER_CMD_QUERY:   Query the set of supported commands. It returns
														
 
															  *                          a bitmask of valid commands.
														
 
															- * @MEMBARRIER_CMD_SHARED:  Execute a memory barrier on all running threads.
														
 
															+ * @MEMBARRIER_CMD_GLOBAL:  Execute a memory barrier on all running threads.
														
 
															  *                          Upon return from system call, the caller thread
														
 
															  *                          is ensured that all running threads have passed
														
 
															  *                          through a state where all memory accesses to
														
@@ -40,6 +40,28 @@
 
															  *                          (non-running threads are de facto in such a
														
 
															  *                          state). This covers threads from all processes
														
 
															  *                          running on the system. This command returns 0.
														
 
															+ * @MEMBARRIER_CMD_GLOBAL_EXPEDITED:
														
 
															+ *                          Execute a memory barrier on all running threads
														
 
															+ *                          of all processes which previously registered
														
 
															+ *                          with MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED.
														
 
															+ *                          Upon return from system call, the caller thread
														
 
															+ *                          is ensured that all running threads have passed
														
 
															+ *                          through a state where all memory accesses to
														
 
															+ *                          user-space addresses match program order between
														
 
															+ *                          entry to and return from the system call
														
 
															+ *                          (non-running threads are de facto in such a
														
 
															+ *                          state). This only covers threads from processes
														
 
															+ *                          which registered with
														
 
															+ *                          MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED.
														
 
															+ *                          This command returns 0. Given that
														
 
															+ *                          registration is about the intent to receive
														
 
															+ *                          the barriers, it is valid to invoke
														
 
															+ *                          MEMBARRIER_CMD_GLOBAL_EXPEDITED from a
														
 
															+ *                          non-registered process.
														
 
															+ * @MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
														
 
															+ *                          Register the process intent to receive
														
 
															+ *                          MEMBARRIER_CMD_GLOBAL_EXPEDITED memory
														
 
															+ *                          barriers. Always returns 0.
														
 
															  * @MEMBARRIER_CMD_PRIVATE_EXPEDITED:
														
 
															  *                          Execute a memory barrier on each running
														
 
															  *                          thread belonging to the same process as the current
														
@@ -51,7 +73,7 @@
 
															  *                          to and return from the system call
														
 
															  *                          (non-running threads are de facto in such a
														
 
															  *                          state). This only covers threads from the
														
 
															- *                          same processes as the caller thread. This
														
 
															+ *                          same process as the caller thread. This
														
 
															  *                          command returns 0 on success. The
														
 
															  *                          "expedited" commands complete faster than
														
 
															  *                          the non-expedited ones, they never block,
														
@@ -64,18 +86,54 @@
 
															  *                          Register the process intent to use
														
 
															  *                          MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
														
 
															  *                          returns 0.
														
 
															+ * @MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
														
 
															+ *                          In addition to provide memory ordering
														
 
															+ *                          guarantees described in
														
 
															+ *                          MEMBARRIER_CMD_PRIVATE_EXPEDITED, ensure
														
 
															+ *                          the caller thread, upon return from system
														
 
															+ *                          call, that all its running threads siblings
														
 
															+ *                          have executed a core serializing
														
 
															+ *                          instruction. (architectures are required to
														
 
															+ *                          guarantee that non-running threads issue
														
 
															+ *                          core serializing instructions before they
														
 
															+ *                          resume user-space execution). This only
														
 
															+ *                          covers threads from the same process as the
														
 
															+ *                          caller thread. This command returns 0 on
														
 
															+ *                          success. The "expedited" commands complete
														
 
															+ *                          faster than the non-expedited ones, they
														
 
															+ *                          never block, but have the downside of
														
 
															+ *                          causing extra overhead. If this command is
														
 
															+ *                          not implemented by an architecture, -EINVAL
														
 
															+ *                          is returned. A process needs to register its
														
 
															+ *                          intent to use the private expedited sync
														
 
															+ *                          core command prior to using it, otherwise
														
 
															+ *                          this command returns -EPERM.
														
 
															+ * @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
														
 
															+ *                          Register the process intent to use
														
 
															+ *                          MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE.
														
 
															+ *                          If this command is not implemented by an
														
 
															+ *                          architecture, -EINVAL is returned.
														
 
															+ *                          Returns 0 on success.
														
 
															+ * @MEMBARRIER_CMD_SHARED:
														
 
															+ *                          Alias to MEMBARRIER_CMD_GLOBAL. Provided for
														
 
															+ *                          header backward compatibility.
														
 
															  *
														
 
															  * Command to be passed to the membarrier system call. The commands need to
														
 
															  * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
														
 
															  * the value 0.
														
 
															  */
														
 
															 enum membarrier_cmd {
														
 
															-	MEMBARRIER_CMD_QUERY				= 0,
														
 
															-	MEMBARRIER_CMD_SHARED				= (1 << 0),
														
 
															-	/* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
														
 
															-	/* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
														
 
															-	MEMBARRIER_CMD_PRIVATE_EXPEDITED		= (1 << 3),
														
 
															-	MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED	= (1 << 4),
														
 
															+	MEMBARRIER_CMD_QUERY					= 0,
														
 
															+	MEMBARRIER_CMD_GLOBAL					= (1 << 0),
														
 
															+	MEMBARRIER_CMD_GLOBAL_EXPEDITED				= (1 << 1),
														
 
															+	MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED		= (1 << 2),
														
 
															+	MEMBARRIER_CMD_PRIVATE_EXPEDITED			= (1 << 3),
														
 
															+	MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED		= (1 << 4),
														
 
															+	MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE		= (1 << 5),
														
 
															+	MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE	= (1 << 6),
														
 
															+
														
 
															+	/* Alias for header backward compatibility. */
														
 
															+	MEMBARRIER_CMD_SHARED			= MEMBARRIER_CMD_GLOBAL,
														
 
															 };
														
 
															 #endif /* _UAPI_LINUX_MEMBARRIER_H */
														
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1412,6 +1412,12 @@ config USERFAULTFD
 
															 	  Enable the userfaultfd() system call that allows to intercept and
														
 
															 	  handle page faults in userland.
														
 
															+config ARCH_HAS_MEMBARRIER_CALLBACKS
														
 
															+	bool
														
 
															+
														
 
															+config ARCH_HAS_MEMBARRIER_SYNC_CORE
														
 
															+	bool
														
 
															+
														
 
															 config EMBEDDED
														
 
															 	bool "Embedded system"
														
 
															 	option allnoconfig_y
														
@@ -1915,3 +1921,6 @@ config ASN1
 
															 	  functions to call on what tags.
														
 
															 source "kernel/Kconfig.locks"
														
 
															+
														
 
															+config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
														
 
															+	bool
														
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -606,6 +606,11 @@ static void __mmdrop(struct mm_struct *mm)
 
															 void mmdrop(struct mm_struct *mm)
														
 
															 {
														
 
															+	/*
														
 
															+	 * The implicit full barrier implied by atomic_dec_and_test() is
														
 
															+	 * required by the membarrier system call before returning to
														
 
															+	 * user-space, after storing to rq->curr.
														
 
															+	 */
														
 
															 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
														
 
															 		__mmdrop(mm);
														
 
															 }
														
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1630,16 +1630,16 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 
															 #ifdef CONFIG_SMP
														
 
															 	if (cpu == rq->cpu) {
														
 
															-		schedstat_inc(rq->ttwu_local);
														
 
															-		schedstat_inc(p->se.statistics.nr_wakeups_local);
														
 
															+		__schedstat_inc(rq->ttwu_local);
														
 
															+		__schedstat_inc(p->se.statistics.nr_wakeups_local);
														
 
															 	} else {
														
 
															 		struct sched_domain *sd;
														
 
															-		schedstat_inc(p->se.statistics.nr_wakeups_remote);
														
 
															+		__schedstat_inc(p->se.statistics.nr_wakeups_remote);
														
 
															 		rcu_read_lock();
														
 
															 		for_each_domain(rq->cpu, sd) {
														
 
															 			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
														
 
															-				schedstat_inc(sd->ttwu_wake_remote);
														
 
															+				__schedstat_inc(sd->ttwu_wake_remote);
														
 
															 				break;
														
 
															 			}
														
 
															 		}
														
@@ -1647,14 +1647,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 
															 	}
														
 
															 	if (wake_flags & WF_MIGRATED)
														
 
															-		schedstat_inc(p->se.statistics.nr_wakeups_migrate);
														
 
															+		__schedstat_inc(p->se.statistics.nr_wakeups_migrate);
														
 
															 #endif /* CONFIG_SMP */
														
 
															-	schedstat_inc(rq->ttwu_count);
														
 
															-	schedstat_inc(p->se.statistics.nr_wakeups);
														
 
															+	__schedstat_inc(rq->ttwu_count);
														
 
															+	__schedstat_inc(p->se.statistics.nr_wakeups);
														
 
															 	if (wake_flags & WF_SYNC)
														
 
															-		schedstat_inc(p->se.statistics.nr_wakeups_sync);
														
 
															+		__schedstat_inc(p->se.statistics.nr_wakeups_sync);
														
 
															 }
														
 
															 static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
														
@@ -2461,6 +2461,7 @@ void wake_up_new_task(struct task_struct *p)
 
															 	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
														
 
															 	 * as we're not fully set-up yet.
														
 
															 	 */
														
 
															+	p->recent_used_cpu = task_cpu(p);
														
 
															 	__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
														
 
															 #endif
														
 
															 	rq = __task_rq_lock(p, &rf);
														
@@ -2698,23 +2699,27 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 
															 	prev_state = prev->state;
														
 
															 	vtime_task_switch(prev);
														
 
															 	perf_event_task_sched_in(prev, current);
														
 
															-	/*
														
 
															-	 * The membarrier system call requires a full memory barrier
														
 
															-	 * after storing to rq->curr, before going back to user-space.
														
 
															-	 *
														
 
															-	 * TODO: This smp_mb__after_unlock_lock can go away if PPC end
														
 
															-	 * up adding a full barrier to switch_mm(), or we should figure
														
 
															-	 * out if a smp_mb__after_unlock_lock is really the proper API
														
 
															-	 * to use.
														
 
															-	 */
														
 
															-	smp_mb__after_unlock_lock();
														
 
															 	finish_task(prev);
														
 
															 	finish_lock_switch(rq);
														
 
															 	finish_arch_post_lock_switch();
														
 
															 	fire_sched_in_preempt_notifiers(current);
														
 
															-	if (mm)
														
 
															+	/*
														
 
															+	 * When switching through a kernel thread, the loop in
														
 
															+	 * membarrier_{private,global}_expedited() may have observed that
														
 
															+	 * kernel thread and not issued an IPI. It is therefore possible to
														
 
															+	 * schedule between user->kernel->user threads without passing though
														
 
															+	 * switch_mm(). Membarrier requires a barrier after storing to
														
 
															+	 * rq->curr, before returning to userspace, so provide them here:
														
 
															+	 *
														
 
															+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
														
 
															+	 *   provided by mmdrop(),
														
 
															+	 * - a sync_core for SYNC_CORE.
														
 
															+	 */
														
 
															+	if (mm) {
														
 
															+		membarrier_mm_sync_core_before_usermode(mm);
														
 
															 		mmdrop(mm);
														
 
															+	}
														
 
															 	if (unlikely(prev_state == TASK_DEAD)) {
														
 
															 		if (prev->sched_class->task_dead)
														
 
															 			prev->sched_class->task_dead(prev);
														
@@ -2818,6 +2823,13 @@ context_switch(struct rq *rq, struct task_struct *prev,
 
															 	 */
														
 
															 	arch_start_context_switch(prev);
														
 
															+	/*
														
 
															+	 * If mm is non-NULL, we pass through switch_mm(). If mm is
														
 
															+	 * NULL, we will pass through mmdrop() in finish_task_switch().
														
 
															+	 * Both of these contain the full memory barrier required by
														
 
															+	 * membarrier after storing to rq->curr, before returning to
														
 
															+	 * user-space.
														
 
															+	 */
														
 
															 	if (!mm) {
														
 
															 		next->active_mm = oldmm;
														
 
															 		mmgrab(oldmm);
														
@@ -3354,6 +3366,9 @@ static void __sched notrace __schedule(bool preempt)
 
															 	 * Make sure that signal_pending_state()->signal_pending() below
														
 
															 	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
														
 
															 	 * done by the caller to avoid the race with signal_wake_up().
														
 
															+	 *
														
 
															+	 * The membarrier system call requires a full memory barrier
														
 
															+	 * after coming from user-space, before storing to rq->curr.
														
 
															 	 */
														
 
															 	rq_lock(rq, &rf);
														
 
															 	smp_mb__after_spinlock();
														
@@ -3401,17 +3416,16 @@ static void __sched notrace __schedule(bool preempt)
 
															 		/*
														
 
															 		 * The membarrier system call requires each architecture
														
 
															 		 * to have a full memory barrier after updating
														
 
															-		 * rq->curr, before returning to user-space. For TSO
														
 
															-		 * (e.g. x86), the architecture must provide its own
														
 
															-		 * barrier in switch_mm(). For weakly ordered machines
														
 
															-		 * for which spin_unlock() acts as a full memory
														
 
															-		 * barrier, finish_lock_switch() in common code takes
														
 
															-		 * care of this barrier. For weakly ordered machines for
														
 
															-		 * which spin_unlock() acts as a RELEASE barrier (only
														
 
															-		 * arm64 and PowerPC), arm64 has a full barrier in
														
 
															-		 * switch_to(), and PowerPC has
														
 
															-		 * smp_mb__after_unlock_lock() before
														
 
															-		 * finish_lock_switch().
														
 
															+		 * rq->curr, before returning to user-space.
														
 
															+		 *
														
 
															+		 * Here are the schemes providing that barrier on the
														
 
															+		 * various architectures:
														
 
															+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
														
 
															+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
														
 
															+		 * - finish_lock_switch() for weakly-ordered
														
 
															+		 *   architectures where spin_unlock is a full barrier,
														
 
															+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
														
 
															+		 *   is a RELEASE barrier),
														
 
															 		 */
														
 
															 		++*switch_count;
														
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -871,7 +871,7 @@ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
															 	    likely(wait_start > prev_wait_start))
														
 
															 		wait_start -= prev_wait_start;
														
 
															-	schedstat_set(se->statistics.wait_start, wait_start);
														
 
															+	__schedstat_set(se->statistics.wait_start, wait_start);
														
 
															 }
														
 
															 static inline void
														
@@ -893,17 +893,17 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
															 			 * time stamp can be adjusted to accumulate wait time
														
 
															 			 * prior to migration.
														
 
															 			 */
														
 
															-			schedstat_set(se->statistics.wait_start, delta);
														
 
															+			__schedstat_set(se->statistics.wait_start, delta);
														
 
															 			return;
														
 
															 		}
														
 
															 		trace_sched_stat_wait(p, delta);
														
 
															 	}
														
 
															-	schedstat_set(se->statistics.wait_max,
														
 
															+	__schedstat_set(se->statistics.wait_max,
														
 
															 		      max(schedstat_val(se->statistics.wait_max), delta));
														
 
															-	schedstat_inc(se->statistics.wait_count);
														
 
															-	schedstat_add(se->statistics.wait_sum, delta);
														
 
															-	schedstat_set(se->statistics.wait_start, 0);
														
 
															+	__schedstat_inc(se->statistics.wait_count);
														
 
															+	__schedstat_add(se->statistics.wait_sum, delta);
														
 
															+	__schedstat_set(se->statistics.wait_start, 0);
														
 
															 }
														
 
															 static inline void
														
@@ -928,10 +928,10 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
															 			delta = 0;
														
 
															 		if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
														
 
															-			schedstat_set(se->statistics.sleep_max, delta);
														
 
															+			__schedstat_set(se->statistics.sleep_max, delta);
														
 
															-		schedstat_set(se->statistics.sleep_start, 0);
														
 
															-		schedstat_add(se->statistics.sum_sleep_runtime, delta);
														
 
															+		__schedstat_set(se->statistics.sleep_start, 0);
														
 
															+		__schedstat_add(se->statistics.sum_sleep_runtime, delta);
														
 
															 		if (tsk) {
														
 
															 			account_scheduler_latency(tsk, delta >> 10, 1);
														
@@ -945,15 +945,15 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
															 			delta = 0;
														
 
															 		if (unlikely(delta > schedstat_val(se->statistics.block_max)))
														
 
															-			schedstat_set(se->statistics.block_max, delta);
														
 
															+			__schedstat_set(se->statistics.block_max, delta);
														
 
															-		schedstat_set(se->statistics.block_start, 0);
														
 
															-		schedstat_add(se->statistics.sum_sleep_runtime, delta);
														
 
															+		__schedstat_set(se->statistics.block_start, 0);
														
 
															+		__schedstat_add(se->statistics.sum_sleep_runtime, delta);
														
 
															 		if (tsk) {
														
 
															 			if (tsk->in_iowait) {
														
 
															-				schedstat_add(se->statistics.iowait_sum, delta);
														
 
															-				schedstat_inc(se->statistics.iowait_count);
														
 
															+				__schedstat_add(se->statistics.iowait_sum, delta);
														
 
															+				__schedstat_inc(se->statistics.iowait_count);
														
 
															 				trace_sched_stat_iowait(tsk, delta);
														
 
															 			}
														
@@ -1012,10 +1012,10 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
															 		struct task_struct *tsk = task_of(se);
														
 
															 		if (tsk->state & TASK_INTERRUPTIBLE)
														
 
															-			schedstat_set(se->statistics.sleep_start,
														
 
															+			__schedstat_set(se->statistics.sleep_start,
														
 
															 				      rq_clock(rq_of(cfs_rq)));
														
 
															 		if (tsk->state & TASK_UNINTERRUPTIBLE)
														
 
															-			schedstat_set(se->statistics.block_start,
														
 
															+			__schedstat_set(se->statistics.block_start,
														
 
															 				      rq_clock(rq_of(cfs_rq)));
														
 
															 	}
														
 
															 }
														
@@ -5692,27 +5692,31 @@ static int wake_wide(struct task_struct *p)
 
															  *			  scheduling latency of the CPUs. This seems to work
														
 
															  *			  for the overloaded case.
														
 
															  */
														
 
															-
														
 
															-static bool
														
 
															-wake_affine_idle(struct sched_domain *sd, struct task_struct *p,
														
 
															-		 int this_cpu, int prev_cpu, int sync)
														
 
															+static int
														
 
															+wake_affine_idle(int this_cpu, int prev_cpu, int sync)
														
 
															 {
														
 
															 	/*
														
 
															 	 * If this_cpu is idle, it implies the wakeup is from interrupt
														
 
															 	 * context. Only allow the move if cache is shared. Otherwise an
														
 
															 	 * interrupt intensive workload could force all tasks onto one
														
 
															 	 * node depending on the IO topology or IRQ affinity settings.
														
 
															+	 *
														
 
															+	 * If the prev_cpu is idle and cache affine then avoid a migration.
														
 
															+	 * There is no guarantee that the cache hot data from an interrupt
														
 
															+	 * is more important than cache hot data on the prev_cpu and from
														
 
															+	 * a cpufreq perspective, it's better to have higher utilisation
														
 
															+	 * on one CPU.
														
 
															 	 */
														
 
															 	if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
														
 
															-		return true;
														
 
															+		return idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
														
 
															 	if (sync && cpu_rq(this_cpu)->nr_running == 1)
														
 
															-		return true;
														
 
															+		return this_cpu;
														
 
															-	return false;
														
 
															+	return nr_cpumask_bits;
														
 
															 }
														
 
															-static bool
														
 
															+static int
														
 
															 wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
														
 
															 		   int this_cpu, int prev_cpu, int sync)
														
 
															 {
														
@@ -5726,7 +5730,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
 
															 		unsigned long current_load = task_h_load(current);
														
 
															 		if (current_load > this_eff_load)
														
 
															-			return true;
														
 
															+			return this_cpu;
														
 
															 		this_eff_load -= current_load;
														
 
															 	}
														
@@ -5743,28 +5747,28 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
 
															 		prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
														
 
															 	prev_eff_load *= capacity_of(this_cpu);
														
 
															-	return this_eff_load <= prev_eff_load;
														
 
															+	return this_eff_load <= prev_eff_load ? this_cpu : nr_cpumask_bits;
														
 
															 }
														
 
															 static int wake_affine(struct sched_domain *sd, struct task_struct *p,
														
 
															 		       int prev_cpu, int sync)
														
 
															 {
														
 
															 	int this_cpu = smp_processor_id();
														
 
															-	bool affine = false;
														
 
															+	int target = nr_cpumask_bits;
														
 
															-	if (sched_feat(WA_IDLE) && !affine)
														
 
															-		affine = wake_affine_idle(sd, p, this_cpu, prev_cpu, sync);
														
 
															+	if (sched_feat(WA_IDLE))
														
 
															+		target = wake_affine_idle(this_cpu, prev_cpu, sync);
														
 
															-	if (sched_feat(WA_WEIGHT) && !affine)
														
 
															-		affine = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
														
 
															+	if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits)
														
 
															+		target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
														
 
															 	schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
														
 
															-	if (affine) {
														
 
															-		schedstat_inc(sd->ttwu_move_affine);
														
 
															-		schedstat_inc(p->se.statistics.nr_wakeups_affine);
														
 
															-	}
														
 
															+	if (target == nr_cpumask_bits)
														
 
															+		return prev_cpu;
														
 
															-	return affine;
														
 
															+	schedstat_inc(sd->ttwu_move_affine);
														
 
															+	schedstat_inc(p->se.statistics.nr_wakeups_affine);
														
 
															+	return target;
														
 
															 }
														
 
															 static inline unsigned long task_util(struct task_struct *p);
														
@@ -6193,7 +6197,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 
															 static int select_idle_sibling(struct task_struct *p, int prev, int target)
														
 
															 {
														
 
															 	struct sched_domain *sd;
														
 
															-	int i;
														
 
															+	int i, recent_used_cpu;
														
 
															 	if (idle_cpu(target))
														
 
															 		return target;
														
@@ -6204,6 +6208,21 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 
															 	if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
														
 
															 		return prev;
														
 
															+	/* Check a recently used CPU as a potential idle candidate */
														
 
															+	recent_used_cpu = p->recent_used_cpu;
														
 
															+	if (recent_used_cpu != prev &&
														
 
															+	    recent_used_cpu != target &&
														
 
															+	    cpus_share_cache(recent_used_cpu, target) &&
														
 
															+	    idle_cpu(recent_used_cpu) &&
														
 
															+	    cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
														
 
															+		/*
														
 
															+		 * Replace recent_used_cpu with prev as it is a potential
														
 
															+		 * candidate for the next wake.
														
 
															+		 */
														
 
															+		p->recent_used_cpu = prev;
														
 
															+		return recent_used_cpu;
														
 
															+	}
														
 
															+
														
 
															 	sd = rcu_dereference(per_cpu(sd_llc, target));
														
 
															 	if (!sd)
														
 
															 		return target;
														
@@ -6357,8 +6376,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 
															 		if (cpu == prev_cpu)
														
 
															 			goto pick_cpu;
														
 
															-		if (wake_affine(affine_sd, p, prev_cpu, sync))
														
 
															-			new_cpu = cpu;
														
 
															+		new_cpu = wake_affine(affine_sd, p, prev_cpu, sync);
														
 
															 	}
														
 
															 	if (sd && !(sd_flag & SD_BALANCE_FORK)) {
														
@@ -6372,9 +6390,12 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 
															 	if (!sd) {
														
 
															 pick_cpu:
														
 
															-		if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
														
 
															+		if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
														
 
															 			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
														
 
															+			if (want_affine)
														
 
															+				current->recent_used_cpu = cpu;
														
 
															+		}
														
 
															 	} else {
														
 
															 		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
														
 
															 	}
														
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -26,24 +26,110 @@
 
															  * Bitmask made from a "or" of all commands within enum membarrier_cmd,
														
 
															  * except MEMBARRIER_CMD_QUERY.
														
 
															  */
														
 
															+#ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
														
 
															+#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK	\
														
 
															+	(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \
														
 
															+	| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
														
 
															+#else
														
 
															+#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK	0
														
 
															+#endif
														
 
															+
														
 
															 #define MEMBARRIER_CMD_BITMASK	\
														
 
															-	(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED	\
														
 
															-	| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
														
 
															+	(MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
														
 
															+	| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
														
 
															+	| MEMBARRIER_CMD_PRIVATE_EXPEDITED	\
														
 
															+	| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED	\
														
 
															+	| MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
														
 
															 static void ipi_mb(void *info)
														
 
															 {
														
 
															 	smp_mb();	/* IPIs should be serializing but paranoid. */
														
 
															 }
														
 
															-static int membarrier_private_expedited(void)
														
 
															+static int membarrier_global_expedited(void)
														
 
															 {
														
 
															 	int cpu;
														
 
															 	bool fallback = false;
														
 
															 	cpumask_var_t tmpmask;
														
 
															-	if (!(atomic_read(&current->mm->membarrier_state)
														
 
															-			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
														
 
															-		return -EPERM;
														
 
															+	if (num_online_cpus() == 1)
														
 
															+		return 0;
														
 
															+
														
 
															+	/*
														
 
															+	 * Matches memory barriers around rq->curr modification in
														
 
															+	 * scheduler.
														
 
															+	 */
														
 
															+	smp_mb();	/* system call entry is not a mb. */
														
 
															+
														
 
															+	/*
														
 
															+	 * Expedited membarrier commands guarantee that they won't
														
 
															+	 * block, hence the GFP_NOWAIT allocation flag and fallback
														
 
															+	 * implementation.
														
 
															+	 */
														
 
															+	if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
														
 
															+		/* Fallback for OOM. */
														
 
															+		fallback = true;
														
 
															+	}
														
 
															+
														
 
															+	cpus_read_lock();
														
 
															+	for_each_online_cpu(cpu) {
														
 
															+		struct task_struct *p;
														
 
															+
														
 
															+		/*
														
 
															+		 * Skipping the current CPU is OK even through we can be
														
 
															+		 * migrated at any point. The current CPU, at the point
														
 
															+		 * where we read raw_smp_processor_id(), is ensured to
														
 
															+		 * be in program order with respect to the caller
														
 
															+		 * thread. Therefore, we can skip this CPU from the
														
 
															+		 * iteration.
														
 
															+		 */
														
 
															+		if (cpu == raw_smp_processor_id())
														
 
															+			continue;
														
 
															+		rcu_read_lock();
														
 
															+		p = task_rcu_dereference(&cpu_rq(cpu)->curr);
														
 
															+		if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
														
 
															+				   MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
														
 
															+			if (!fallback)
														
 
															+				__cpumask_set_cpu(cpu, tmpmask);
														
 
															+			else
														
 
															+				smp_call_function_single(cpu, ipi_mb, NULL, 1);
														
 
															+		}
														
 
															+		rcu_read_unlock();
														
 
															+	}
														
 
															+	if (!fallback) {
														
 
															+		preempt_disable();
														
 
															+		smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
														
 
															+		preempt_enable();
														
 
															+		free_cpumask_var(tmpmask);
														
 
															+	}
														
 
															+	cpus_read_unlock();
														
 
															+
														
 
															+	/*
														
 
															+	 * Memory barrier on the caller thread _after_ we finished
														
 
															+	 * waiting for the last IPI. Matches memory barriers around
														
 
															+	 * rq->curr modification in scheduler.
														
 
															+	 */
														
 
															+	smp_mb();	/* exit from system call is not a mb */
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int membarrier_private_expedited(int flags)
														
 
															+{
														
 
															+	int cpu;
														
 
															+	bool fallback = false;
														
 
															+	cpumask_var_t tmpmask;
														
 
															+
														
 
															+	if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
														
 
															+		if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
														
 
															+			return -EINVAL;
														
 
															+		if (!(atomic_read(&current->mm->membarrier_state) &
														
 
															+		      MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
														
 
															+			return -EPERM;
														
 
															+	} else {
														
 
															+		if (!(atomic_read(&current->mm->membarrier_state) &
														
 
															+		      MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
														
 
															+			return -EPERM;
														
 
															+	}
														
 
															 	if (num_online_cpus() == 1)
														
 
															 		return 0;
														
@@ -105,21 +191,69 @@ static int membarrier_private_expedited(void)
 
															 	return 0;
														
 
															 }
														
 
															-static void membarrier_register_private_expedited(void)
														
 
															+static int membarrier_register_global_expedited(void)
														
 
															 {
														
 
															 	struct task_struct *p = current;
														
 
															 	struct mm_struct *mm = p->mm;
														
 
															+	if (atomic_read(&mm->membarrier_state) &
														
 
															+	    MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
														
 
															+		return 0;
														
 
															+	atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
														
 
															+	if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) {
														
 
															+		/*
														
 
															+		 * For single mm user, single threaded process, we can
														
 
															+		 * simply issue a memory barrier after setting
														
 
															+		 * MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that
														
 
															+		 * no memory access following registration is reordered
														
 
															+		 * before registration.
														
 
															+		 */
														
 
															+		smp_mb();
														
 
															+	} else {
														
 
															+		/*
														
 
															+		 * For multi-mm user threads, we need to ensure all
														
 
															+		 * future scheduler executions will observe the new
														
 
															+		 * thread flag state for this mm.
														
 
															+		 */
														
 
															+		synchronize_sched();
														
 
															+	}
														
 
															+	atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
														
 
															+		  &mm->membarrier_state);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int membarrier_register_private_expedited(int flags)
														
 
															+{
														
 
															+	struct task_struct *p = current;
														
 
															+	struct mm_struct *mm = p->mm;
														
 
															+	int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY;
														
 
															+
														
 
															+	if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
														
 
															+		if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
														
 
															+			return -EINVAL;
														
 
															+		state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
														
 
															+	}
														
 
															+
														
 
															 	/*
														
 
															 	 * We need to consider threads belonging to different thread
														
 
															 	 * groups, which use the same mm. (CLONE_VM but not
														
 
															 	 * CLONE_THREAD).
														
 
															 	 */
														
 
															-	if (atomic_read(&mm->membarrier_state)
														
 
															-			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
														
 
															-		return;
														
 
															-	atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
														
 
															-			&mm->membarrier_state);
														
 
															+	if (atomic_read(&mm->membarrier_state) & state)
														
 
															+		return 0;
														
 
															+	atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
														
 
															+	if (flags & MEMBARRIER_FLAG_SYNC_CORE)
														
 
															+		atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE,
														
 
															+			  &mm->membarrier_state);
														
 
															+	if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
														
 
															+		/*
														
 
															+		 * Ensure all future scheduler executions will observe the
														
 
															+		 * new thread flag state for this process.
														
 
															+		 */
														
 
															+		synchronize_sched();
														
 
															+	}
														
 
															+	atomic_or(state, &mm->membarrier_state);
														
 
															+	return 0;
														
 
															 }
														
 
															 /**
														
@@ -159,21 +293,28 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
 
															 		int cmd_mask = MEMBARRIER_CMD_BITMASK;
														
 
															 		if (tick_nohz_full_enabled())
														
 
															-			cmd_mask &= ~MEMBARRIER_CMD_SHARED;
														
 
															+			cmd_mask &= ~MEMBARRIER_CMD_GLOBAL;
														
 
															 		return cmd_mask;
														
 
															 	}
														
 
															-	case MEMBARRIER_CMD_SHARED:
														
 
															-		/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
														
 
															+	case MEMBARRIER_CMD_GLOBAL:
														
 
															+		/* MEMBARRIER_CMD_GLOBAL is not compatible with nohz_full. */
														
 
															 		if (tick_nohz_full_enabled())
														
 
															 			return -EINVAL;
														
 
															 		if (num_online_cpus() > 1)
														
 
															 			synchronize_sched();
														
 
															 		return 0;
														
 
															+	case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
														
 
															+		return membarrier_global_expedited();
														
 
															+	case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
														
 
															+		return membarrier_register_global_expedited();
														
 
															 	case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
														
 
															-		return membarrier_private_expedited();
														
 
															+		return membarrier_private_expedited(0);
														
 
															 	case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
														
 
															-		membarrier_register_private_expedited();
														
 
															-		return 0;
														
 
															+		return membarrier_register_private_expedited(0);
														
 
															+	case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
														
 
															+		return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
														
 
															+	case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
														
 
															+		return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
														
 
															 	default:
														
 
															 		return -EINVAL;
														
 
															 	}
														
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -950,12 +950,13 @@ static void update_curr_rt(struct rq *rq)
 
															 {
														
 
															 	struct task_struct *curr = rq->curr;
														
 
															 	struct sched_rt_entity *rt_se = &curr->rt;
														
 
															+	u64 now = rq_clock_task(rq);
														
 
															 	u64 delta_exec;
														
 
															 	if (curr->sched_class != &rt_sched_class)
														
 
															 		return;
														
 
															-	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
														
 
															+	delta_exec = now - curr->se.exec_start;
														
 
															 	if (unlikely((s64)delta_exec <= 0))
														
 
															 		return;
														
@@ -968,7 +969,7 @@ static void update_curr_rt(struct rq *rq)
 
															 	curr->se.sum_exec_runtime += delta_exec;
														
 
															 	account_group_exec_runtime(curr, delta_exec);
														
 
															-	curr->se.exec_start = rq_clock_task(rq);
														
 
															+	curr->se.exec_start = now;
														
 
															 	cgroup_account_cputime(curr, delta_exec);
														
 
															 	sched_rt_avg_update(rq, delta_exec);
														
@@ -1907,9 +1908,8 @@ static void push_rt_tasks(struct rq *rq)
 
															  * the rt_loop_next will cause the iterator to perform another scan.
														
 
															  *
														
 
															  */
														
 
															-static int rto_next_cpu(struct rq *rq)
														
 
															+static int rto_next_cpu(struct root_domain *rd)
														
 
															 {
														
 
															-	struct root_domain *rd = rq->rd;
														
 
															 	int next;
														
 
															 	int cpu;
														
@@ -1985,19 +1985,24 @@ static void tell_cpu_to_push(struct rq *rq)
 
															 	 * Otherwise it is finishing up and an ipi needs to be sent.
														
 
															 	 */
														
 
															 	if (rq->rd->rto_cpu < 0)
														
 
															-		cpu = rto_next_cpu(rq);
														
 
															+		cpu = rto_next_cpu(rq->rd);
														
 
															 	raw_spin_unlock(&rq->rd->rto_lock);
														
 
															 	rto_start_unlock(&rq->rd->rto_loop_start);
														
 
															-	if (cpu >= 0)
														
 
															+	if (cpu >= 0) {
														
 
															+		/* Make sure the rd does not get freed while pushing */
														
 
															+		sched_get_rd(rq->rd);
														
 
															 		irq_work_queue_on(&rq->rd->rto_push_work, cpu);
														
 
															+	}
														
 
															 }
														
 
															 /* Called from hardirq context */
														
 
															 void rto_push_irq_work_func(struct irq_work *work)
														
 
															 {
														
 
															+	struct root_domain *rd =
														
 
															+		container_of(work, struct root_domain, rto_push_work);
														
 
															 	struct rq *rq;
														
 
															 	int cpu;
														
@@ -2013,18 +2018,20 @@ void rto_push_irq_work_func(struct irq_work *work)
 
															 		raw_spin_unlock(&rq->lock);
														
 
															 	}
														
 
															-	raw_spin_lock(&rq->rd->rto_lock);
														
 
															+	raw_spin_lock(&rd->rto_lock);
														
 
															 	/* Pass the IPI to the next rt overloaded queue */
														
 
															-	cpu = rto_next_cpu(rq);
														
 
															+	cpu = rto_next_cpu(rd);
														
 
															-	raw_spin_unlock(&rq->rd->rto_lock);
														
 
															+	raw_spin_unlock(&rd->rto_lock);
														
 
															-	if (cpu < 0)
														
 
															+	if (cpu < 0) {
														
 
															+		sched_put_rd(rd);
														
 
															 		return;
														
 
															+	}
														
 
															 	/* Try the next RT overloaded CPU */
														
 
															-	irq_work_queue_on(&rq->rd->rto_push_work, cpu);
														
 
															+	irq_work_queue_on(&rd->rto_push_work, cpu);
														
 
															 }
														
 
															 #endif /* HAVE_RT_PUSH_IPI */
														
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -691,6 +691,8 @@ extern struct mutex sched_domains_mutex;
 
															 extern void init_defrootdomain(void);
														
 
															 extern int sched_init_domains(const struct cpumask *cpu_map);
														
 
															 extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
														
 
															+extern void sched_get_rd(struct root_domain *rd);
														
 
															+extern void sched_put_rd(struct root_domain *rd);
														
 
															 #ifdef HAVE_RT_PUSH_IPI
														
 
															 extern void rto_push_irq_work_func(struct irq_work *work);
														
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -31,8 +31,11 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 
															 		rq->rq_sched_info.run_delay += delta;
														
 
															 }
														
 
															 #define schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
														
 
															+#define __schedstat_inc(var)		do { var++; } while (0)
														
 
															 #define schedstat_inc(var)		do { if (schedstat_enabled()) { var++; } } while (0)
														
 
															+#define __schedstat_add(var, amt)	do { var += (amt); } while (0)
														
 
															 #define schedstat_add(var, amt)		do { if (schedstat_enabled()) { var += (amt); } } while (0)
														
 
															+#define __schedstat_set(var, val)		do { var = (val); } while (0)
														
 
															 #define schedstat_set(var, val)		do { if (schedstat_enabled()) { var = (val); } } while (0)
														
 
															 #define schedstat_val(var)		(var)
														
 
															 #define schedstat_val_or_zero(var)	((schedstat_enabled()) ? (var) : 0)
														
@@ -48,8 +51,11 @@ static inline void
 
															 rq_sched_info_depart(struct rq *rq, unsigned long long delta)
														
 
															 {}
														
 
															 #define schedstat_enabled()		0
														
 
															+#define __schedstat_inc(var)		do { } while (0)
														
 
															 #define schedstat_inc(var)		do { } while (0)
														
 
															+#define __schedstat_add(var, amt)	do { } while (0)
														
 
															 #define schedstat_add(var, amt)		do { } while (0)
														
 
															+#define __schedstat_set(var, val)	do { } while (0)
														
 
															 #define schedstat_set(var, val)		do { } while (0)
														
 
															 #define schedstat_val(var)		0
														
 
															 #define schedstat_val_or_zero(var)	0
														
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -259,6 +259,19 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
 
															 		call_rcu_sched(&old_rd->rcu, free_rootdomain);
														
 
															 }
														
 
															+void sched_get_rd(struct root_domain *rd)
														
 
															+{
														
 
															+	atomic_inc(&rd->refcount);
														
 
															+}
														
 
															+
														
 
															+void sched_put_rd(struct root_domain *rd)
														
 
															+{
														
 
															+	if (!atomic_dec_and_test(&rd->refcount))
														
 
															+		return;
														
 
															+
														
 
															+	call_rcu_sched(&rd->rcu, free_rootdomain);
														
 
															+}
														
 
															+
														
 
															 static int init_rootdomain(struct root_domain *rd)
														
 
															 {
														
 
															 	if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
														
--- a/tools/testing/selftests/membarrier/membarrier_test.c
+++ b/tools/testing/selftests/membarrier/membarrier_test.c
@@ -16,49 +16,210 @@ static int sys_membarrier(int cmd, int flags)
 
															 static int test_membarrier_cmd_fail(void)
														
 
															 {
														
 
															 	int cmd = -1, flags = 0;
														
 
															+	const char *test_name = "sys membarrier invalid command";
														
 
															 	if (sys_membarrier(cmd, flags) != -1) {
														
 
															 		ksft_exit_fail_msg(
														
 
															-			"sys membarrier invalid command test: command = %d, flags = %d. Should fail, but passed\n",
														
 
															-			cmd, flags);
														
 
															+			"%s test: command = %d, flags = %d. Should fail, but passed\n",
														
 
															+			test_name, cmd, flags);
														
 
															+	}
														
 
															+	if (errno != EINVAL) {
														
 
															+		ksft_exit_fail_msg(
														
 
															+			"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
														
 
															+			test_name, flags, EINVAL, strerror(EINVAL),
														
 
															+			errno, strerror(errno));
														
 
															 	}
														
 
															 	ksft_test_result_pass(
														
 
															-		"sys membarrier invalid command test: command = %d, flags = %d. Failed as expected\n",
														
 
															-		cmd, flags);
														
 
															+		"%s test: command = %d, flags = %d, errno = %d. Failed as expected\n",
														
 
															+		test_name, cmd, flags, errno);
														
 
															 	return 0;
														
 
															 }
														
 
															 static int test_membarrier_flags_fail(void)
														
 
															 {
														
 
															 	int cmd = MEMBARRIER_CMD_QUERY, flags = 1;
														
 
															+	const char *test_name = "sys membarrier MEMBARRIER_CMD_QUERY invalid flags";
														
 
															+
														
 
															+	if (sys_membarrier(cmd, flags) != -1) {
														
 
															+		ksft_exit_fail_msg(
														
 
															+			"%s test: flags = %d. Should fail, but passed\n",
														
 
															+			test_name, flags);
														
 
															+	}
														
 
															+	if (errno != EINVAL) {
														
 
															+		ksft_exit_fail_msg(
														
 
															+			"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
														
 
															+			test_name, flags, EINVAL, strerror(EINVAL),
														
 
															+			errno, strerror(errno));
														
 
															+	}
														
 
															+
														
 
															+	ksft_test_result_pass(
														
 
															+		"%s test: flags = %d, errno = %d. Failed as expected\n",
														
 
															+		test_name, flags, errno);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int test_membarrier_global_success(void)
														
 
															+{
														
 
															+	int cmd = MEMBARRIER_CMD_GLOBAL, flags = 0;
														
 
															+	const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL";
														
 
															+
														
 
															+	if (sys_membarrier(cmd, flags) != 0) {
														
 
															+		ksft_exit_fail_msg(
														
 
															+			"%s test: flags = %d, errno = %d\n",
														
 
															+			test_name, flags, errno);
														
 
															+	}
														
 
															+
														
 
															+	ksft_test_result_pass(
														
 
															+		"%s test: flags = %d\n", test_name, flags);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int test_membarrier_private_expedited_fail(void)
														
 
															+{
														
 
															+	int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
														
 
															+	const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED not registered failure";
														
 
															+
														
 
															+	if (sys_membarrier(cmd, flags) != -1) {
														
 
															+		ksft_exit_fail_msg(
														
 
															+			"%s test: flags = %d. Should fail, but passed\n",
														
 
															+			test_name, flags);
														
 
															+	}
														
 
															+	if (errno != EPERM) {
														
 
															+		ksft_exit_fail_msg(
														
 
															+			"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
														
 
															+			test_name, flags, EPERM, strerror(EPERM),
														
 
															+			errno, strerror(errno));
														
 
															+	}
														
 
															+
														
 
															+	ksft_test_result_pass(
														
 
															+		"%s test: flags = %d, errno = %d\n",
														
 
															+		test_name, flags, errno);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int test_membarrier_register_private_expedited_success(void)
														
 
															+{
														
 
															+	int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, flags = 0;
														
 
															+	const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED";
														
 
															+
														
 
															+	if (sys_membarrier(cmd, flags) != 0) {
														
 
															+		ksft_exit_fail_msg(
														
 
															+			"%s test: flags = %d, errno = %d\n",
														
 
															+			test_name, flags, errno);
														
 
															+	}
														
 
															+
														
 
															+	ksft_test_result_pass(
														
 
															+		"%s test: flags = %d\n",
														
 
															+		test_name, flags);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int test_membarrier_private_expedited_success(void)
														
 
															+{
														
 
															+	int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
														
 
															+	const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED";
														
 
															+
														
 
															+	if (sys_membarrier(cmd, flags) != 0) {
														
 
															+		ksft_exit_fail_msg(
														
 
															+			"%s test: flags = %d, errno = %d\n",
														
 
															+			test_name, flags, errno);
														
 
															+	}
														
 
															+
														
 
															+	ksft_test_result_pass(
														
 
															+		"%s test: flags = %d\n",
														
 
															+		test_name, flags);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int test_membarrier_private_expedited_sync_core_fail(void)
														
 
															+{
														
 
															+	int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
														
 
															+	const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE not registered failure";
														
 
															 	if (sys_membarrier(cmd, flags) != -1) {
														
 
															 		ksft_exit_fail_msg(
														
 
															-			"sys membarrier MEMBARRIER_CMD_QUERY invalid flags test: flags = %d. Should fail, but passed\n",
														
 
															-			flags);
														
 
															+			"%s test: flags = %d. Should fail, but passed\n",
														
 
															+			test_name, flags);
														
 
															+	}
														
 
															+	if (errno != EPERM) {
														
 
															+		ksft_exit_fail_msg(
														
 
															+			"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
														
 
															+			test_name, flags, EPERM, strerror(EPERM),
														
 
															+			errno, strerror(errno));
														
 
															+	}
														
 
															+
														
 
															+	ksft_test_result_pass(
														
 
															+		"%s test: flags = %d, errno = %d\n",
														
 
															+		test_name, flags, errno);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int test_membarrier_register_private_expedited_sync_core_success(void)
														
 
															+{
														
 
															+	int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
														
 
															+	const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE";
														
 
															+
														
 
															+	if (sys_membarrier(cmd, flags) != 0) {
														
 
															+		ksft_exit_fail_msg(
														
 
															+			"%s test: flags = %d, errno = %d\n",
														
 
															+			test_name, flags, errno);
														
 
															+	}
														
 
															+
														
 
															+	ksft_test_result_pass(
														
 
															+		"%s test: flags = %d\n",
														
 
															+		test_name, flags);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int test_membarrier_private_expedited_sync_core_success(void)
														
 
															+{
														
 
															+	int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
														
 
															+	const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE";
														
 
															+
														
 
															+	if (sys_membarrier(cmd, flags) != 0) {
														
 
															+		ksft_exit_fail_msg(
														
 
															+			"%s test: flags = %d, errno = %d\n",
														
 
															+			test_name, flags, errno);
														
 
															 	}
														
 
															 	ksft_test_result_pass(
														
 
															-		"sys membarrier MEMBARRIER_CMD_QUERY invalid flags test: flags = %d. Failed as expected\n",
														
 
															-		flags);
														
 
															+		"%s test: flags = %d\n",
														
 
															+		test_name, flags);
														
 
															 	return 0;
														
 
															 }
														
 
															-static int test_membarrier_success(void)
														
 
															+static int test_membarrier_register_global_expedited_success(void)
														
 
															 {
														
 
															-	int cmd = MEMBARRIER_CMD_SHARED, flags = 0;
														
 
															-	const char *test_name = "sys membarrier MEMBARRIER_CMD_SHARED\n";
														
 
															+	int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0;
														
 
															+	const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED";
														
 
															 	if (sys_membarrier(cmd, flags) != 0) {
														
 
															 		ksft_exit_fail_msg(
														
 
															-			"sys membarrier MEMBARRIER_CMD_SHARED test: flags = %d\n",
														
 
															-			flags);
														
 
															+			"%s test: flags = %d, errno = %d\n",
														
 
															+			test_name, flags, errno);
														
 
															 	}
														
 
															 	ksft_test_result_pass(
														
 
															-		"sys membarrier MEMBARRIER_CMD_SHARED test: flags = %d\n",
														
 
															-		flags);
														
 
															+		"%s test: flags = %d\n",
														
 
															+		test_name, flags);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int test_membarrier_global_expedited_success(void)
														
 
															+{
														
 
															+	int cmd = MEMBARRIER_CMD_GLOBAL_EXPEDITED, flags = 0;
														
 
															+	const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL_EXPEDITED";
														
 
															+
														
 
															+	if (sys_membarrier(cmd, flags) != 0) {
														
 
															+		ksft_exit_fail_msg(
														
 
															+			"%s test: flags = %d, errno = %d\n",
														
 
															+			test_name, flags, errno);
														
 
															+	}
														
 
															+
														
 
															+	ksft_test_result_pass(
														
 
															+		"%s test: flags = %d\n",
														
 
															+		test_name, flags);
														
 
															 	return 0;
														
 
															 }
														
@@ -72,7 +233,45 @@ static int test_membarrier(void)
 
															 	status = test_membarrier_flags_fail();
														
 
															 	if (status)
														
 
															 		return status;
														
 
															-	status = test_membarrier_success();
														
 
															+	status = test_membarrier_global_success();
														
 
															+	if (status)
														
 
															+		return status;
														
 
															+	status = test_membarrier_private_expedited_fail();
														
 
															+	if (status)
														
 
															+		return status;
														
 
															+	status = test_membarrier_register_private_expedited_success();
														
 
															+	if (status)
														
 
															+		return status;
														
 
															+	status = test_membarrier_private_expedited_success();
														
 
															+	if (status)
														
 
															+		return status;
														
 
															+	status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
														
 
															+	if (status < 0) {
														
 
															+		ksft_test_result_fail("sys_membarrier() failed\n");
														
 
															+		return status;
														
 
															+	}
														
 
															+	if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
														
 
															+		status = test_membarrier_private_expedited_sync_core_fail();
														
 
															+		if (status)
														
 
															+			return status;
														
 
															+		status = test_membarrier_register_private_expedited_sync_core_success();
														
 
															+		if (status)
														
 
															+			return status;
														
 
															+		status = test_membarrier_private_expedited_sync_core_success();
														
 
															+		if (status)
														
 
															+			return status;
														
 
															+	}
														
 
															+	/*
														
 
															+	 * It is valid to send a global membarrier from a non-registered
														
 
															+	 * process.
														
 
															+	 */
														
 
															+	status = test_membarrier_global_expedited_success();
														
 
															+	if (status)
														
 
															+		return status;
														
 
															+	status = test_membarrier_register_global_expedited_success();
														
 
															+	if (status)
														
 
															+		return status;
														
 
															+	status = test_membarrier_global_expedited_success();
														
 
															 	if (status)
														
 
															 		return status;
														
 
															 	return 0;
														
@@ -94,8 +293,10 @@ static int test_membarrier_query(void)
 
															 		}
														
 
															 		ksft_exit_fail_msg("sys_membarrier() failed\n");
														
 
															 	}
														
 
															-	if (!(ret & MEMBARRIER_CMD_SHARED))
														
 
															+	if (!(ret & MEMBARRIER_CMD_GLOBAL)) {
														
 
															+		ksft_test_result_fail("sys_membarrier() CMD_GLOBAL query failed\n");
														
 
															 		ksft_exit_fail_msg("sys_membarrier is not supported.\n");
														
 
															+	}
														
 
															 	ksft_test_result_pass("sys_membarrier available\n");
														
 
															 	return 0;
														
@@ -108,5 +309,5 @@ int main(int argc, char **argv)
 
															 	test_membarrier_query();
														
 
															 	test_membarrier();
														
 
															-	ksft_exit_pass();
														
 
															+	return ksft_exit_pass();
														
 
															 }