7 years ago · c5f58bd58f
--- a/arch/powerpc/include/asm/membarrier.h
+++ b/arch/powerpc/include/asm/membarrier.h
@@ -13,7 +13,8 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
 
				 	 * store to rq->curr.
			
 
				 	 */
			
 
				 	if (likely(!(atomic_read(&next->membarrier_state) &
			
 
				-		     MEMBARRIER_STATE_PRIVATE_EXPEDITED) || !prev))
			
 
				+		     (MEMBARRIER_STATE_PRIVATE_EXPEDITED |
			
 
				+		      MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
			
 
				 		return;
			
 
				 
			
 
				 	/*
			
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -219,8 +219,10 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
 
				 
			
 
				 #ifdef CONFIG_MEMBARRIER
			
 
				 enum {
			
 
				-	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY	= (1U << 0),
			
 
				-	MEMBARRIER_STATE_PRIVATE_EXPEDITED		= (1U << 1),
			
 
				+	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY		= (1U << 0),
			
 
				+	MEMBARRIER_STATE_PRIVATE_EXPEDITED			= (1U << 1),
			
 
				+	MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY			= (1U << 2),
			
 
				+	MEMBARRIER_STATE_GLOBAL_EXPEDITED			= (1U << 3),
			
 
				 };
			
 
				 
			
 
				 #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
			
--- a/include/uapi/linux/membarrier.h
+++ b/include/uapi/linux/membarrier.h
@@ -31,7 +31,7 @@
 
				  * enum membarrier_cmd - membarrier system call command
			
 
				  * @MEMBARRIER_CMD_QUERY:   Query the set of supported commands. It returns
			
 
				  *                          a bitmask of valid commands.
			
 
				- * @MEMBARRIER_CMD_SHARED:  Execute a memory barrier on all running threads.
			
 
				+ * @MEMBARRIER_CMD_GLOBAL:  Execute a memory barrier on all running threads.
			
 
				  *                          Upon return from system call, the caller thread
			
 
				  *                          is ensured that all running threads have passed
			
 
				  *                          through a state where all memory accesses to
			
@@ -40,6 +40,28 @@
 
				  *                          (non-running threads are de facto in such a
			
 
				  *                          state). This covers threads from all processes
			
 
				  *                          running on the system. This command returns 0.
			
 
				+ * @MEMBARRIER_CMD_GLOBAL_EXPEDITED:
			
 
				+ *                          Execute a memory barrier on all running threads
			
 
				+ *                          of all processes which previously registered
			
 
				+ *                          with MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED.
			
 
				+ *                          Upon return from system call, the caller thread
			
 
				+ *                          is ensured that all running threads have passed
			
 
				+ *                          through a state where all memory accesses to
			
 
				+ *                          user-space addresses match program order between
			
 
				+ *                          entry to and return from the system call
			
 
				+ *                          (non-running threads are de facto in such a
			
 
				+ *                          state). This only covers threads from processes
			
 
				+ *                          which registered with
			
 
				+ *                          MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED.
			
 
				+ *                          This command returns 0. Given that
			
 
				+ *                          registration is about the intent to receive
			
 
				+ *                          the barriers, it is valid to invoke
			
 
				+ *                          MEMBARRIER_CMD_GLOBAL_EXPEDITED from a
			
 
				+ *                          non-registered process.
			
 
				+ * @MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
			
 
				+ *                          Register the process intent to receive
			
 
				+ *                          MEMBARRIER_CMD_GLOBAL_EXPEDITED memory
			
 
				+ *                          barriers. Always returns 0.
			
 
				  * @MEMBARRIER_CMD_PRIVATE_EXPEDITED:
			
 
				  *                          Execute a memory barrier on each running
			
 
				  *                          thread belonging to the same process as the current
			
@@ -64,18 +86,24 @@
 
				  *                          Register the process intent to use
			
 
				  *                          MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
			
 
				  *                          returns 0.
			
 
				+ * @MEMBARRIER_CMD_SHARED:
			
 
				+ *                          Alias to MEMBARRIER_CMD_GLOBAL. Provided for
			
 
				+ *                          header backward compatibility.
			
 
				  *
			
 
				  * Command to be passed to the membarrier system call. The commands need to
			
 
				  * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
			
 
				  * the value 0.
			
 
				  */
			
 
				 enum membarrier_cmd {
			
 
				-	MEMBARRIER_CMD_QUERY				= 0,
			
 
				-	MEMBARRIER_CMD_SHARED				= (1 << 0),
			
 
				-	/* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
			
 
				-	/* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
			
 
				-	MEMBARRIER_CMD_PRIVATE_EXPEDITED		= (1 << 3),
			
 
				-	MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED	= (1 << 4),
			
 
				+	MEMBARRIER_CMD_QUERY					= 0,
			
 
				+	MEMBARRIER_CMD_GLOBAL					= (1 << 0),
			
 
				+	MEMBARRIER_CMD_GLOBAL_EXPEDITED				= (1 << 1),
			
 
				+	MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED		= (1 << 2),
			
 
				+	MEMBARRIER_CMD_PRIVATE_EXPEDITED			= (1 << 3),
			
 
				+	MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED		= (1 << 4),
			
 
				+
			
 
				+	/* Alias for header backward compatibility. */
			
 
				+	MEMBARRIER_CMD_SHARED			= MEMBARRIER_CMD_GLOBAL,
			
 
				 };
			
 
				 
			
 
				 #endif /* _UAPI_LINUX_MEMBARRIER_H */
			
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -27,7 +27,9 @@
 
				  * except MEMBARRIER_CMD_QUERY.
			
 
				  */
			
 
				 #define MEMBARRIER_CMD_BITMASK	\
			
 
				-	(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED	\
			
 
				+	(MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
			
 
				+	| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
			
 
				+	| MEMBARRIER_CMD_PRIVATE_EXPEDITED	\
			
 
				 	| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
			
 
				 
			
 
				 static void ipi_mb(void *info)
			
@@ -35,6 +37,73 @@ static void ipi_mb(void *info)
 
				 	smp_mb();	/* IPIs should be serializing but paranoid. */
			
 
				 }
			
 
				 
			
 
				+static int membarrier_global_expedited(void)
			
 
				+{
			
 
				+	int cpu;
			
 
				+	bool fallback = false;
			
 
				+	cpumask_var_t tmpmask;
			
 
				+
			
 
				+	if (num_online_cpus() == 1)
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Matches memory barriers around rq->curr modification in
			
 
				+	 * scheduler.
			
 
				+	 */
			
 
				+	smp_mb();	/* system call entry is not a mb. */
			
 
				+
			
 
				+	/*
			
 
				+	 * Expedited membarrier commands guarantee that they won't
			
 
				+	 * block, hence the GFP_NOWAIT allocation flag and fallback
			
 
				+	 * implementation.
			
 
				+	 */
			
 
				+	if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
			
 
				+		/* Fallback for OOM. */
			
 
				+		fallback = true;
			
 
				+	}
			
 
				+
			
 
				+	cpus_read_lock();
			
 
				+	for_each_online_cpu(cpu) {
			
 
				+		struct task_struct *p;
			
 
				+
			
 
				+		/*
			
 
				+		 * Skipping the current CPU is OK even through we can be
			
 
				+		 * migrated at any point. The current CPU, at the point
			
 
				+		 * where we read raw_smp_processor_id(), is ensured to
			
 
				+		 * be in program order with respect to the caller
			
 
				+		 * thread. Therefore, we can skip this CPU from the
			
 
				+		 * iteration.
			
 
				+		 */
			
 
				+		if (cpu == raw_smp_processor_id())
			
 
				+			continue;
			
 
				+		rcu_read_lock();
			
 
				+		p = task_rcu_dereference(&cpu_rq(cpu)->curr);
			
 
				+		if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
			
 
				+				   MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
			
 
				+			if (!fallback)
			
 
				+				__cpumask_set_cpu(cpu, tmpmask);
			
 
				+			else
			
 
				+				smp_call_function_single(cpu, ipi_mb, NULL, 1);
			
 
				+		}
			
 
				+		rcu_read_unlock();
			
 
				+	}
			
 
				+	if (!fallback) {
			
 
				+		preempt_disable();
			
 
				+		smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
			
 
				+		preempt_enable();
			
 
				+		free_cpumask_var(tmpmask);
			
 
				+	}
			
 
				+	cpus_read_unlock();
			
 
				+
			
 
				+	/*
			
 
				+	 * Memory barrier on the caller thread _after_ we finished
			
 
				+	 * waiting for the last IPI. Matches memory barriers around
			
 
				+	 * rq->curr modification in scheduler.
			
 
				+	 */
			
 
				+	smp_mb();	/* exit from system call is not a mb */
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int membarrier_private_expedited(void)
			
 
				 {
			
 
				 	int cpu;
			
@@ -105,7 +174,38 @@ static int membarrier_private_expedited(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void membarrier_register_private_expedited(void)
			
 
				+static int membarrier_register_global_expedited(void)
			
 
				+{
			
 
				+	struct task_struct *p = current;
			
 
				+	struct mm_struct *mm = p->mm;
			
 
				+
			
 
				+	if (atomic_read(&mm->membarrier_state) &
			
 
				+	    MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
			
 
				+		return 0;
			
 
				+	atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
			
 
				+	if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) {
			
 
				+		/*
			
 
				+		 * For single mm user, single threaded process, we can
			
 
				+		 * simply issue a memory barrier after setting
			
 
				+		 * MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that
			
 
				+		 * no memory access following registration is reordered
			
 
				+		 * before registration.
			
 
				+		 */
			
 
				+		smp_mb();
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * For multi-mm user threads, we need to ensure all
			
 
				+		 * future scheduler executions will observe the new
			
 
				+		 * thread flag state for this mm.
			
 
				+		 */
			
 
				+		synchronize_sched();
			
 
				+	}
			
 
				+	atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
			
 
				+		  &mm->membarrier_state);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int membarrier_register_private_expedited(void)
			
 
				 {
			
 
				 	struct task_struct *p = current;
			
 
				 	struct mm_struct *mm = p->mm;
			
@@ -117,7 +217,7 @@ static void membarrier_register_private_expedited(void)
 
				 	 */
			
 
				 	if (atomic_read(&mm->membarrier_state)
			
 
				 			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
			
 
				-		return;
			
 
				+		return 0;
			
 
				 	atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
			
 
				 	if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
			
 
				 		/*
			
@@ -128,6 +228,7 @@ static void membarrier_register_private_expedited(void)
 
				 	}
			
 
				 	atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
			
 
				 			&mm->membarrier_state);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -167,21 +268,24 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
 
				 		int cmd_mask = MEMBARRIER_CMD_BITMASK;
			
 
				 
			
 
				 		if (tick_nohz_full_enabled())
			
 
				-			cmd_mask &= ~MEMBARRIER_CMD_SHARED;
			
 
				+			cmd_mask &= ~MEMBARRIER_CMD_GLOBAL;
			
 
				 		return cmd_mask;
			
 
				 	}
			
 
				-	case MEMBARRIER_CMD_SHARED:
			
 
				-		/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
			
 
				+	case MEMBARRIER_CMD_GLOBAL:
			
 
				+		/* MEMBARRIER_CMD_GLOBAL is not compatible with nohz_full. */
			
 
				 		if (tick_nohz_full_enabled())
			
 
				 			return -EINVAL;
			
 
				 		if (num_online_cpus() > 1)
			
 
				 			synchronize_sched();
			
 
				 		return 0;
			
 
				+	case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
			
 
				+		return membarrier_global_expedited();
			
 
				+	case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
			
 
				+		return membarrier_register_global_expedited();
			
 
				 	case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
			
 
				 		return membarrier_private_expedited();
			
 
				 	case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
			
 
				-		membarrier_register_private_expedited();
			
 
				-		return 0;
			
 
				+		return membarrier_register_private_expedited();
			
 
				 	default:
			
 
				 		return -EINVAL;
			
 
				 	}