Procházet zdrojové kódy

powerpc, membarrier: Skip memory barrier in switch_mm()

Allow PowerPC to skip the full memory barrier in switch_mm(), and
only issue the barrier when scheduling into a task belonging to a
process that has registered to use expedited private.

Threads targeting the same VM but which belong to different thread
groups is a tricky case. It has a few consequences:

It turns out that we cannot rely on get_nr_threads(p) to count the
number of threads using a VM. We can use
(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)
instead to skip the synchronize_sched() for cases where the VM only has
a single user, and that user only has a single thread.

It also turns out that we cannot use for_each_thread() to set
thread flags in all threads using a VM, as it only iterates on the
thread group.

Therefore, test the membarrier state variable directly rather than
relying on thread flags. This means
membarrier_register_private_expedited() needs to set the
MEMBARRIER_STATE_PRIVATE_EXPEDITED flag, issue synchronize_sched(), and
only then set MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY which allows
private expedited membarrier commands to succeed.
membarrier_arch_switch_mm() now tests for the
MEMBARRIER_STATE_PRIVATE_EXPEDITED flag.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrea Parri <parri.andrea@gmail.com>
Cc: Andrew Hunter <ahh@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Avi Kivity <avi@scylladb.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: David Sehr <sehr@google.com>
Cc: Greg Hackmann <ghackmann@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maged Michael <maged.michael@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20180129202020.8515-3-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Mathieu Desnoyers před 7 roky
rodič
revize
3ccfebedd8

+ 1 - 0
MAINTAINERS

@@ -8944,6 +8944,7 @@ L:	linux-kernel@vger.kernel.org
 S:	Supported
 S:	Supported
 F:	kernel/sched/membarrier.c
 F:	kernel/sched/membarrier.c
 F:	include/uapi/linux/membarrier.h
 F:	include/uapi/linux/membarrier.h
+F:	arch/powerpc/include/asm/membarrier.h
 
 
 MEMORY MANAGEMENT
 MEMORY MANAGEMENT
 L:	linux-mm@kvack.org
 L:	linux-mm@kvack.org

+ 1 - 0
arch/powerpc/Kconfig

@@ -140,6 +140,7 @@ config PPC
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_PMEM_API                if PPC64
 	select ARCH_HAS_PMEM_API                if PPC64
+	select ARCH_HAS_MEMBARRIER_CALLBACKS
 	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE
 	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST

+ 26 - 0
arch/powerpc/include/asm/membarrier.h

@@ -0,0 +1,26 @@
+#ifndef _ASM_POWERPC_MEMBARRIER_H
+#define _ASM_POWERPC_MEMBARRIER_H
+
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+					     struct mm_struct *next,
+					     struct task_struct *tsk)
+{
+	/*
+	 * Only need the full barrier when switching between processes.
+	 * Barrier when switching from kernel to userspace is not
+	 * required here, given that it is implied by mmdrop(). Barrier
+	 * when switching from userspace to kernel is not needed after
+	 * store to rq->curr.
+	 */
+	if (likely(!(atomic_read(&next->membarrier_state) &
+		     MEMBARRIER_STATE_PRIVATE_EXPEDITED) || !prev))
+		return;
+
+	/*
+	 * The membarrier system call requires a full memory barrier
+	 * after storing to rq->curr, before going back to user-space.
+	 */
+	smp_mb();
+}
+
+#endif /* _ASM_POWERPC_MEMBARRIER_H */

+ 7 - 0
arch/powerpc/mm/mmu_context.c

@@ -12,6 +12,7 @@
 
 
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/cpu.h>
 #include <linux/cpu.h>
+#include <linux/sched/mm.h>
 
 
 #include <asm/mmu_context.h>
 #include <asm/mmu_context.h>
 
 
@@ -58,6 +59,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		 *
 		 *
 		 * On the read side the barrier is in pte_xchg(), which orders
 		 * On the read side the barrier is in pte_xchg(), which orders
 		 * the store to the PTE vs the load of mm_cpumask.
 		 * the store to the PTE vs the load of mm_cpumask.
+		 *
+		 * This full barrier is needed by membarrier when switching
+		 * between processes after store to rq->curr, before user-space
+		 * memory accesses.
 		 */
 		 */
 		smp_mb();
 		smp_mb();
 
 
@@ -80,6 +85,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
 
 	if (new_on_cpu)
 	if (new_on_cpu)
 		radix_kvm_prefetch_workaround(next);
 		radix_kvm_prefetch_workaround(next);
+	else
+		membarrier_arch_switch_mm(prev, next, tsk);
 
 
 	/*
 	/*
 	 * The actual HW switching method differs between the various
 	 * The actual HW switching method differs between the various

+ 12 - 1
include/linux/sched/mm.h

@@ -215,14 +215,25 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
 #ifdef CONFIG_MEMBARRIER
 #ifdef CONFIG_MEMBARRIER
 enum {
 enum {
 	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY	= (1U << 0),
 	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY	= (1U << 0),
-	MEMBARRIER_STATE_SWITCH_MM			= (1U << 1),
+	MEMBARRIER_STATE_PRIVATE_EXPEDITED		= (1U << 1),
 };
 };
 
 
+#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
+#include <asm/membarrier.h>
+#endif
+
 static inline void membarrier_execve(struct task_struct *t)
 static inline void membarrier_execve(struct task_struct *t)
 {
 {
 	atomic_set(&t->mm->membarrier_state, 0);
 	atomic_set(&t->mm->membarrier_state, 0);
 }
 }
 #else
 #else
+#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+					     struct mm_struct *next,
+					     struct task_struct *tsk)
+{
+}
+#endif
 static inline void membarrier_execve(struct task_struct *t)
 static inline void membarrier_execve(struct task_struct *t)
 {
 {
 }
 }

+ 3 - 0
init/Kconfig

@@ -1412,6 +1412,9 @@ config USERFAULTFD
 	  Enable the userfaultfd() system call that allows to intercept and
 	  Enable the userfaultfd() system call that allows to intercept and
 	  handle page faults in userland.
 	  handle page faults in userland.
 
 
+config ARCH_HAS_MEMBARRIER_CALLBACKS
+	bool
+
 config EMBEDDED
 config EMBEDDED
 	bool "Embedded system"
 	bool "Embedded system"
 	option allnoconfig_y
 	option allnoconfig_y

+ 0 - 10
kernel/sched/core.c

@@ -2698,16 +2698,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 	prev_state = prev->state;
 	prev_state = prev->state;
 	vtime_task_switch(prev);
 	vtime_task_switch(prev);
 	perf_event_task_sched_in(prev, current);
 	perf_event_task_sched_in(prev, current);
-	/*
-	 * The membarrier system call requires a full memory barrier
-	 * after storing to rq->curr, before going back to user-space.
-	 *
-	 * TODO: This smp_mb__after_unlock_lock can go away if PPC end
-	 * up adding a full barrier to switch_mm(), or we should figure
-	 * out if a smp_mb__after_unlock_lock is really the proper API
-	 * to use.
-	 */
-	smp_mb__after_unlock_lock();
 	finish_task(prev);
 	finish_task(prev);
 	finish_lock_switch(rq);
 	finish_lock_switch(rq);
 	finish_arch_post_lock_switch();
 	finish_arch_post_lock_switch();

+ 8 - 0
kernel/sched/membarrier.c

@@ -118,6 +118,14 @@ static void membarrier_register_private_expedited(void)
 	if (atomic_read(&mm->membarrier_state)
 	if (atomic_read(&mm->membarrier_state)
 			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
 			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
 		return;
 		return;
+	atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
+	if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
+		/*
+		 * Ensure all future scheduler executions will observe the
+		 * new thread flag state for this process.
+		 */
+		synchronize_sched();
+	}
 	atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
 	atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
 			&mm->membarrier_state);
 			&mm->membarrier_state);
 }
 }