9 năm trước cách đây · 9aece75c13
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -4235,6 +4235,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
				 			The default value of this parameter is determined by
			
 
				 			the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
			
 
				 
			
 
				+	workqueue.debug_force_rr_cpu
			
 
				+			Workqueue used to implicitly guarantee that work
			
 
				+			items queued without explicit CPU specified are put
			
 
				+			on the local CPU.  This guarantee is no longer true
			
 
				+			and while local CPU is still preferred work items
			
 
				+			may be put on foreign CPUs.  This debug option
			
 
				+			forces round-robin CPU selection to flush out
			
 
				+			usages which depend on the now broken guarantee.
			
 
				+			When enabled, memory and cache locality will be
			
 
				+			impacted.
			
 
				+
			
 
				 	x2apic_phys	[X86-64,APIC] Use x2apic physical mode instead of
			
 
				 			default x2apic cluster mode on platforms
			
 
				 			supporting x2apic.
			
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -311,6 +311,7 @@ enum {
 
				 
			
 
				 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
			
 
				 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
			
 
				+	__WQ_LEGACY		= 1 << 18, /* internal: create*_workqueue() */
			
 
				 
			
 
				 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
			
 
				 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
			
@@ -411,12 +412,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 
				 	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
			
 
				 
			
 
				 #define create_workqueue(name)						\
			
 
				-	alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name))
			
 
				+	alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
			
 
				 #define create_freezable_workqueue(name)				\
			
 
				-	alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
			
 
				-			1, (name))
			
 
				+	alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND |	\
			
 
				+			WQ_MEM_RECLAIM, 1, (name))
			
 
				 #define create_singlethread_workqueue(name)				\
			
 
				-	alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name)
			
 
				+	alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
			
 
				 
			
 
				 extern void destroy_workqueue(struct workqueue_struct *wq);
			
 
				 
			
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -301,7 +301,23 @@ static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 
				 static LIST_HEAD(workqueues);		/* PR: list of all workqueues */
			
 
				 static bool workqueue_freezing;		/* PL: have wqs started freezing? */
			
 
				 
			
 
				-static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
			
 
				+/* PL: allowable cpus for unbound wqs and work items */
			
 
				+static cpumask_var_t wq_unbound_cpumask;
			
 
				+
			
 
				+/* CPU where unbound work was last round robin scheduled from this CPU */
			
 
				+static DEFINE_PER_CPU(int, wq_rr_cpu_last);
			
 
				+
			
 
				+/*
			
 
				+ * Local execution of unbound work items is no longer guaranteed.  The
			
 
				+ * following always forces round-robin CPU selection on unbound work items
			
 
				+ * to uncover usages which depend on it.
			
 
				+ */
			
 
				+#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
			
 
				+static bool wq_debug_force_rr_cpu = true;
			
 
				+#else
			
 
				+static bool wq_debug_force_rr_cpu = false;
			
 
				+#endif
			
 
				+module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
			
 
				 
			
 
				 /* the per-cpu worker pools */
			
 
				 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
			
@@ -570,6 +586,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
 
				 						  int node)
			
 
				 {
			
 
				 	assert_rcu_or_wq_mutex_or_pool_mutex(wq);
			
 
				+
			
 
				+	/*
			
 
				+	 * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
			
 
				+	 * delayed item is pending.  The plan is to keep CPU -> NODE
			
 
				+	 * mapping valid and stable across CPU on/offlines.  Once that
			
 
				+	 * happens, this workaround can be removed.
			
 
				+	 */
			
 
				+	if (unlikely(node == NUMA_NO_NODE))
			
 
				+		return wq->dfl_pwq;
			
 
				+
			
 
				 	return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
			
 
				 }
			
 
				 
			
@@ -1298,6 +1324,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
 
				 	return worker && worker->current_pwq->wq == wq;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * When queueing an unbound work item to a wq, prefer local CPU if allowed
			
 
				+ * by wq_unbound_cpumask.  Otherwise, round robin among the allowed ones to
			
 
				+ * avoid perturbing sensitive tasks.
			
 
				+ */
			
 
				+static int wq_select_unbound_cpu(int cpu)
			
 
				+{
			
 
				+	static bool printed_dbg_warning;
			
 
				+	int new_cpu;
			
 
				+
			
 
				+	if (likely(!wq_debug_force_rr_cpu)) {
			
 
				+		if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
			
 
				+			return cpu;
			
 
				+	} else if (!printed_dbg_warning) {
			
 
				+		pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
			
 
				+		printed_dbg_warning = true;
			
 
				+	}
			
 
				+
			
 
				+	if (cpumask_empty(wq_unbound_cpumask))
			
 
				+		return cpu;
			
 
				+
			
 
				+	new_cpu = __this_cpu_read(wq_rr_cpu_last);
			
 
				+	new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
			
 
				+	if (unlikely(new_cpu >= nr_cpu_ids)) {
			
 
				+		new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
			
 
				+		if (unlikely(new_cpu >= nr_cpu_ids))
			
 
				+			return cpu;
			
 
				+	}
			
 
				+	__this_cpu_write(wq_rr_cpu_last, new_cpu);
			
 
				+
			
 
				+	return new_cpu;
			
 
				+}
			
 
				+
			
 
				 static void __queue_work(int cpu, struct workqueue_struct *wq,
			
 
				 			 struct work_struct *work)
			
 
				 {
			
@@ -1323,7 +1382,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 
				 		return;
			
 
				 retry:
			
 
				 	if (req_cpu == WORK_CPU_UNBOUND)
			
 
				-		cpu = raw_smp_processor_id();
			
 
				+		cpu = wq_select_unbound_cpu(raw_smp_processor_id());
			
 
				 
			
 
				 	/* pwq which will be used unless @work is executing elsewhere */
			
 
				 	if (!(wq->flags & WQ_UNBOUND))
			
@@ -1464,13 +1523,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 
				 	timer_stats_timer_set_start_info(&dwork->timer);
			
 
				 
			
 
				 	dwork->wq = wq;
			
 
				-	/* timer isn't guaranteed to run in this cpu, record earlier */
			
 
				-	if (cpu == WORK_CPU_UNBOUND)
			
 
				-		cpu = raw_smp_processor_id();
			
 
				 	dwork->cpu = cpu;
			
 
				 	timer->expires = jiffies + delay;
			
 
				 
			
 
				-	add_timer_on(timer, cpu);
			
 
				+	if (unlikely(cpu != WORK_CPU_UNBOUND))
			
 
				+		add_timer_on(timer, cpu);
			
 
				+	else
			
 
				+		add_timer(timer);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -2355,7 +2414,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
 
				 	WARN_ONCE(current->flags & PF_MEMALLOC,
			
 
				 		  "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
			
 
				 		  current->pid, current->comm, target_wq->name, target_func);
			
 
				-	WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
			
 
				+	WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
			
 
				+			      (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
			
 
				 		  "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
			
 
				 		  worker->current_pwq->wq->name, worker->current_func,
			
 
				 		  target_wq->name, target_func);
			
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1400,6 +1400,21 @@ config RCU_EQS_DEBUG
 
				 
			
 
				 endmenu # "RCU Debugging"
			
 
				 
			
 
				+config DEBUG_WQ_FORCE_RR_CPU
			
 
				+	bool "Force round-robin CPU selection for unbound work items"
			
 
				+	depends on DEBUG_KERNEL
			
 
				+	default n
			
 
				+	help
			
 
				+	  Workqueue used to implicitly guarantee that work items queued
			
 
				+	  without explicit CPU specified are put on the local CPU.  This
			
 
				+	  guarantee is no longer true and while local CPU is still
			
 
				+	  preferred work items may be put on foreign CPUs.  Kernel
			
 
				+	  parameter "workqueue.debug_force_rr_cpu" is added to force
			
 
				+	  round-robin CPU selection to flush out usages which depend on the
			
 
				+	  now broken guarantee.  This config option enables the debug
			
 
				+	  feature by default.  When enabled, memory and cache locality will
			
 
				+	  be impacted.
			
 
				+
			
 
				 config DEBUG_BLOCK_EXT_DEVT
			
 
				         bool "Force extended block device numbers and spread them"
			
 
				 	depends on DEBUG_KERNEL