9 anni fa · 0f8c790103
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -4140,6 +4140,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
															 			or other driver-specific files in the
														
 
															 			Documentation/watchdog/ directory.
														
 
															+	workqueue.watchdog_thresh=
														
 
															+			If CONFIG_WQ_WATCHDOG is configured, workqueue can
														
 
															+			warn stall conditions and dump internal state to
														
 
															+			help debugging.  0 disables workqueue stall
														
 
															+			detection; otherwise, it's the stall threshold
														
 
															+			duration in seconds.  The default value is 30 and
														
 
															+			it can be updated at runtime by writing to the
														
 
															+			corresponding sysfs file.
														
 
															+
														
 
															 	workqueue.disable_numa
														
 
															 			By default, all work items queued to unbound
														
 
															 			workqueues are affine to the NUMA nodes they're
														
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -377,6 +377,7 @@ extern void scheduler_tick(void);
 
															 extern void sched_show_task(struct task_struct *p);
														
 
															 #ifdef CONFIG_LOCKUP_DETECTOR
														
 
															+extern void touch_softlockup_watchdog_sched(void);
														
 
															 extern void touch_softlockup_watchdog(void);
														
 
															 extern void touch_softlockup_watchdog_sync(void);
														
 
															 extern void touch_all_softlockup_watchdogs(void);
														
@@ -387,6 +388,9 @@ extern unsigned int  softlockup_panic;
 
															 extern unsigned int  hardlockup_panic;
														
 
															 void lockup_detector_init(void);
														
 
															 #else
														
 
															+static inline void touch_softlockup_watchdog_sched(void)
														
 
															+{
														
 
															+}
														
 
															 static inline void touch_softlockup_watchdog(void)
														
 
															 {
														
 
															 }
														
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -618,4 +618,10 @@ static inline int workqueue_sysfs_register(struct workqueue_struct *wq)
 
															 { return 0; }
														
 
															 #endif	/* CONFIG_SYSFS */
														
 
															+#ifdef CONFIG_WQ_WATCHDOG
														
 
															+void wq_watchdog_touch(int cpu);
														
 
															+#else	/* CONFIG_WQ_WATCHDOG */
														
 
															+static inline void wq_watchdog_touch(int cpu) { }
														
 
															+#endif	/* CONFIG_WQ_WATCHDOG */
														
 
															+
														
 
															 #endif
														
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -354,7 +354,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
 
															 		return;
														
 
															 	sched_clock_tick();
														
 
															-	touch_softlockup_watchdog();
														
 
															+	touch_softlockup_watchdog_sched();
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
														
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -143,7 +143,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 
															 	 * when we go busy again does not account too much ticks.
														
 
															 	 */
														
 
															 	if (ts->tick_stopped) {
														
 
															-		touch_softlockup_watchdog();
														
 
															+		touch_softlockup_watchdog_sched();
														
 
															 		if (is_idle_task(current))
														
 
															 			ts->idle_jiffies++;
														
 
															 	}
														
@@ -430,7 +430,7 @@ static void tick_nohz_update_jiffies(ktime_t now)
 
															 	tick_do_update_jiffies64(now);
														
 
															 	local_irq_restore(flags);
														
 
															-	touch_softlockup_watchdog();
														
 
															+	touch_softlockup_watchdog_sched();
														
 
															 }
														
 
															 /*
														
@@ -717,7 +717,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int
 
															 	update_cpu_load_nohz(active);
														
 
															 	calc_load_exit_idle();
														
 
															-	touch_softlockup_watchdog();
														
 
															+	touch_softlockup_watchdog_sched();
														
 
															 	/*
														
 
															 	 * Cancel the scheduled timer and restore the tick
														
 
															 	 */
														
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -20,6 +20,7 @@
 
															 #include <linux/smpboot.h>
														
 
															 #include <linux/sched/rt.h>
														
 
															 #include <linux/tick.h>
														
 
															+#include <linux/workqueue.h>
														
 
															 #include <asm/irq_regs.h>
														
 
															 #include <linux/kvm_para.h>
														
@@ -225,7 +226,15 @@ static void __touch_watchdog(void)
 
															 	__this_cpu_write(watchdog_touch_ts, get_timestamp());
														
 
															 }
														
 
															-void touch_softlockup_watchdog(void)
														
 
															+/**
														
 
															+ * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
														
 
															+ *
														
 
															+ * Call when the scheduler may have stalled for legitimate reasons
														
 
															+ * preventing the watchdog task from executing - e.g. the scheduler
														
 
															+ * entering idle state.  This should only be used for scheduler events.
														
 
															+ * Use touch_softlockup_watchdog() for everything else.
														
 
															+ */
														
 
															+void touch_softlockup_watchdog_sched(void)
														
 
															 {
														
 
															 	/*
														
 
															 	 * Preemption can be enabled.  It doesn't matter which CPU's timestamp
														
@@ -233,6 +242,12 @@ void touch_softlockup_watchdog(void)
 
															 	 */
														
 
															 	raw_cpu_write(watchdog_touch_ts, 0);
														
 
															 }
														
 
															+
														
 
															+void touch_softlockup_watchdog(void)
														
 
															+{
														
 
															+	touch_softlockup_watchdog_sched();
														
 
															+	wq_watchdog_touch(raw_smp_processor_id());
														
 
															+}
														
 
															 EXPORT_SYMBOL(touch_softlockup_watchdog);
														
 
															 void touch_all_softlockup_watchdogs(void)
														
@@ -246,6 +261,7 @@ void touch_all_softlockup_watchdogs(void)
 
															 	 */
														
 
															 	for_each_watchdog_cpu(cpu)
														
 
															 		per_cpu(watchdog_touch_ts, cpu) = 0;
														
 
															+	wq_watchdog_touch(-1);
														
 
															 }
														
 
															 #ifdef CONFIG_HARDLOCKUP_DETECTOR
														
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -148,6 +148,8 @@ struct worker_pool {
 
															 	int			id;		/* I: pool ID */
														
 
															 	unsigned int		flags;		/* X: flags */
														
 
															+	unsigned long		watchdog_ts;	/* L: watchdog timestamp */
														
 
															+
														
 
															 	struct list_head	worklist;	/* L: list of pending works */
														
 
															 	int			nr_workers;	/* L: total number of workers */
														
@@ -1083,6 +1085,8 @@ static void pwq_activate_delayed_work(struct work_struct *work)
 
															 	struct pool_workqueue *pwq = get_work_pwq(work);
														
 
															 	trace_workqueue_activate_work(work);
														
 
															+	if (list_empty(&pwq->pool->worklist))
														
 
															+		pwq->pool->watchdog_ts = jiffies;
														
 
															 	move_linked_works(work, &pwq->pool->worklist, NULL);
														
 
															 	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
														
 
															 	pwq->nr_active++;
														
@@ -1385,6 +1389,8 @@ retry:
 
															 		trace_workqueue_activate_work(work);
														
 
															 		pwq->nr_active++;
														
 
															 		worklist = &pwq->pool->worklist;
														
 
															+		if (list_empty(worklist))
														
 
															+			pwq->pool->watchdog_ts = jiffies;
														
 
															 	} else {
														
 
															 		work_flags |= WORK_STRUCT_DELAYED;
														
 
															 		worklist = &pwq->delayed_works;
														
@@ -2157,6 +2163,8 @@ recheck:
 
															 			list_first_entry(&pool->worklist,
														
 
															 					 struct work_struct, entry);
														
 
															+		pool->watchdog_ts = jiffies;
														
 
															+
														
 
															 		if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
														
 
															 			/* optimization path, not strictly necessary */
														
 
															 			process_one_work(worker, work);
														
@@ -2240,6 +2248,7 @@ repeat:
 
															 					struct pool_workqueue, mayday_node);
														
 
															 		struct worker_pool *pool = pwq->pool;
														
 
															 		struct work_struct *work, *n;
														
 
															+		bool first = true;
														
 
															 		__set_current_state(TASK_RUNNING);
														
 
															 		list_del_init(&pwq->mayday_node);
														
@@ -2256,9 +2265,14 @@ repeat:
 
															 		 * process'em.
														
 
															 		 */
														
 
															 		WARN_ON_ONCE(!list_empty(scheduled));
														
 
															-		list_for_each_entry_safe(work, n, &pool->worklist, entry)
														
 
															-			if (get_work_pwq(work) == pwq)
														
 
															+		list_for_each_entry_safe(work, n, &pool->worklist, entry) {
														
 
															+			if (get_work_pwq(work) == pwq) {
														
 
															+				if (first)
														
 
															+					pool->watchdog_ts = jiffies;
														
 
															 				move_linked_works(work, scheduled, &n);
														
 
															+			}
														
 
															+			first = false;
														
 
															+		}
														
 
															 		if (!list_empty(scheduled)) {
														
 
															 			process_scheduled_works(rescuer);
														
@@ -2316,6 +2330,37 @@ repeat:
 
															 	goto repeat;
														
 
															 }
														
 
															+/**
														
 
															+ * check_flush_dependency - check for flush dependency sanity
														
 
															+ * @target_wq: workqueue being flushed
														
 
															+ * @target_work: work item being flushed (NULL for workqueue flushes)
														
 
															+ *
														
 
															+ * %current is trying to flush the whole @target_wq or @target_work on it.
														
 
															+ * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
														
 
															+ * reclaiming memory or running on a workqueue which doesn't have
														
 
															+ * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
														
 
															+ * a deadlock.
														
 
															+ */
														
 
															+static void check_flush_dependency(struct workqueue_struct *target_wq,
														
 
															+				   struct work_struct *target_work)
														
 
															+{
														
 
															+	work_func_t target_func = target_work ? target_work->func : NULL;
														
 
															+	struct worker *worker;
														
 
															+
														
 
															+	if (target_wq->flags & WQ_MEM_RECLAIM)
														
 
															+		return;
														
 
															+
														
 
															+	worker = current_wq_worker();
														
 
															+
														
 
															+	WARN_ONCE(current->flags & PF_MEMALLOC,
														
 
															+		  "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
														
 
															+		  current->pid, current->comm, target_wq->name, target_func);
														
 
															+	WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
														
 
															+		  "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
														
 
															+		  worker->current_pwq->wq->name, worker->current_func,
														
 
															+		  target_wq->name, target_func);
														
 
															+}
														
 
															+
														
 
															 struct wq_barrier {
														
 
															 	struct work_struct	work;
														
 
															 	struct completion	done;
														
@@ -2525,6 +2570,8 @@ void flush_workqueue(struct workqueue_struct *wq)
 
															 		list_add_tail(&this_flusher.list, &wq->flusher_overflow);
														
 
															 	}
														
 
															+	check_flush_dependency(wq, NULL);
														
 
															+
														
 
															 	mutex_unlock(&wq->mutex);
														
 
															 	wait_for_completion(&this_flusher.done);
														
@@ -2697,6 +2744,8 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 
															 		pwq = worker->current_pwq;
														
 
															 	}
														
 
															+	check_flush_dependency(pwq->wq, work);
														
 
															+
														
 
															 	insert_wq_barrier(pwq, barr, work, worker);
														
 
															 	spin_unlock_irq(&pool->lock);
														
@@ -3069,6 +3118,7 @@ static int init_worker_pool(struct worker_pool *pool)
 
															 	pool->cpu = -1;
														
 
															 	pool->node = NUMA_NO_NODE;
														
 
															 	pool->flags |= POOL_DISASSOCIATED;
														
 
															+	pool->watchdog_ts = jiffies;
														
 
															 	INIT_LIST_HEAD(&pool->worklist);
														
 
															 	INIT_LIST_HEAD(&pool->idle_list);
														
 
															 	hash_init(pool->busy_hash);
														
@@ -3601,7 +3651,6 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
 
															 					const struct workqueue_attrs *attrs)
														
 
															 {
														
 
															 	struct apply_wqattrs_ctx *ctx;
														
 
															-	int ret = -ENOMEM;
														
 
															 	/* only unbound workqueues can change attributes */
														
 
															 	if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
														
@@ -3612,16 +3661,14 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
 
															 		return -EINVAL;
														
 
															 	ctx = apply_wqattrs_prepare(wq, attrs);
														
 
															+	if (!ctx)
														
 
															+		return -ENOMEM;
														
 
															 	/* the ctx has been prepared successfully, let's commit it */
														
 
															-	if (ctx) {
														
 
															-		apply_wqattrs_commit(ctx);
														
 
															-		ret = 0;
														
 
															-	}
														
 
															-
														
 
															+	apply_wqattrs_commit(ctx);
														
 
															 	apply_wqattrs_cleanup(ctx);
														
 
															-	return ret;
														
 
															+	return 0;
														
 
															 }
														
 
															 /**
														
@@ -4308,7 +4355,9 @@ void show_workqueue_state(void)
 
															 		pr_info("pool %d:", pool->id);
														
 
															 		pr_cont_pool_info(pool);
														
 
															-		pr_cont(" workers=%d", pool->nr_workers);
														
 
															+		pr_cont(" hung=%us workers=%d",
														
 
															+			jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
														
 
															+			pool->nr_workers);
														
 
															 		if (pool->manager)
														
 
															 			pr_cont(" manager: %d",
														
 
															 				task_pid_nr(pool->manager->task));
														
@@ -5167,6 +5216,154 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
 
															 static void workqueue_sysfs_unregister(struct workqueue_struct *wq)	{ }
														
 
															 #endif	/* CONFIG_SYSFS */
														
 
															+/*
														
 
															+ * Workqueue watchdog.
														
 
															+ *
														
 
															+ * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
														
 
															+ * flush dependency, a concurrency managed work item which stays RUNNING
														
 
															+ * indefinitely.  Workqueue stalls can be very difficult to debug as the
														
 
															+ * usual warning mechanisms don't trigger and internal workqueue state is
														
 
															+ * largely opaque.
														
 
															+ *
														
 
															+ * Workqueue watchdog monitors all worker pools periodically and dumps
														
 
															+ * state if some pools failed to make forward progress for a while where
														
 
															+ * forward progress is defined as the first item on ->worklist changing.
														
 
															+ *
														
 
															+ * This mechanism is controlled through the kernel parameter
														
 
															+ * "workqueue.watchdog_thresh" which can be updated at runtime through the
														
 
															+ * corresponding sysfs parameter file.
														
 
															+ */
														
 
															+#ifdef CONFIG_WQ_WATCHDOG
														
 
															+
														
 
															+static void wq_watchdog_timer_fn(unsigned long data);
														
 
															+
														
 
															+static unsigned long wq_watchdog_thresh = 30;
														
 
															+static struct timer_list wq_watchdog_timer =
														
 
															+	TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0);
														
 
															+
														
 
															+static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
														
 
															+static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
														
 
															+
														
 
															+static void wq_watchdog_reset_touched(void)
														
 
															+{
														
 
															+	int cpu;
														
 
															+
														
 
															+	wq_watchdog_touched = jiffies;
														
 
															+	for_each_possible_cpu(cpu)
														
 
															+		per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
														
 
															+}
														
 
															+
														
 
															+static void wq_watchdog_timer_fn(unsigned long data)
														
 
															+{
														
 
															+	unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
														
 
															+	bool lockup_detected = false;
														
 
															+	struct worker_pool *pool;
														
 
															+	int pi;
														
 
															+
														
 
															+	if (!thresh)
														
 
															+		return;
														
 
															+
														
 
															+	rcu_read_lock();
														
 
															+
														
 
															+	for_each_pool(pool, pi) {
														
 
															+		unsigned long pool_ts, touched, ts;
														
 
															+
														
 
															+		if (list_empty(&pool->worklist))
														
 
															+			continue;
														
 
															+
														
 
															+		/* get the latest of pool and touched timestamps */
														
 
															+		pool_ts = READ_ONCE(pool->watchdog_ts);
														
 
															+		touched = READ_ONCE(wq_watchdog_touched);
														
 
															+
														
 
															+		if (time_after(pool_ts, touched))
														
 
															+			ts = pool_ts;
														
 
															+		else
														
 
															+			ts = touched;
														
 
															+
														
 
															+		if (pool->cpu >= 0) {
														
 
															+			unsigned long cpu_touched =
														
 
															+				READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
														
 
															+						  pool->cpu));
														
 
															+			if (time_after(cpu_touched, ts))
														
 
															+				ts = cpu_touched;
														
 
															+		}
														
 
															+
														
 
															+		/* did we stall? */
														
 
															+		if (time_after(jiffies, ts + thresh)) {
														
 
															+			lockup_detected = true;
														
 
															+			pr_emerg("BUG: workqueue lockup - pool");
														
 
															+			pr_cont_pool_info(pool);
														
 
															+			pr_cont(" stuck for %us!\n",
														
 
															+				jiffies_to_msecs(jiffies - pool_ts) / 1000);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	rcu_read_unlock();
														
 
															+
														
 
															+	if (lockup_detected)
														
 
															+		show_workqueue_state();
														
 
															+
														
 
															+	wq_watchdog_reset_touched();
														
 
															+	mod_timer(&wq_watchdog_timer, jiffies + thresh);
														
 
															+}
														
 
															+
														
 
															+void wq_watchdog_touch(int cpu)
														
 
															+{
														
 
															+	if (cpu >= 0)
														
 
															+		per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
														
 
															+	else
														
 
															+		wq_watchdog_touched = jiffies;
														
 
															+}
														
 
															+
														
 
															+static void wq_watchdog_set_thresh(unsigned long thresh)
														
 
															+{
														
 
															+	wq_watchdog_thresh = 0;
														
 
															+	del_timer_sync(&wq_watchdog_timer);
														
 
															+
														
 
															+	if (thresh) {
														
 
															+		wq_watchdog_thresh = thresh;
														
 
															+		wq_watchdog_reset_touched();
														
 
															+		mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static int wq_watchdog_param_set_thresh(const char *val,
														
 
															+					const struct kernel_param *kp)
														
 
															+{
														
 
															+	unsigned long thresh;
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = kstrtoul(val, 0, &thresh);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	if (system_wq)
														
 
															+		wq_watchdog_set_thresh(thresh);
														
 
															+	else
														
 
															+		wq_watchdog_thresh = thresh;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static const struct kernel_param_ops wq_watchdog_thresh_ops = {
														
 
															+	.set	= wq_watchdog_param_set_thresh,
														
 
															+	.get	= param_get_ulong,
														
 
															+};
														
 
															+
														
 
															+module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
														
 
															+		0644);
														
 
															+
														
 
															+static void wq_watchdog_init(void)
														
 
															+{
														
 
															+	wq_watchdog_set_thresh(wq_watchdog_thresh);
														
 
															+}
														
 
															+
														
 
															+#else	/* CONFIG_WQ_WATCHDOG */
														
 
															+
														
 
															+static inline void wq_watchdog_init(void) { }
														
 
															+
														
 
															+#endif	/* CONFIG_WQ_WATCHDOG */
														
 
															+
														
 
															 static void __init wq_numa_init(void)
														
 
															 {
														
 
															 	cpumask_var_t *tbl;
														
@@ -5290,6 +5487,9 @@ static int __init init_workqueues(void)
 
															 	       !system_unbound_wq || !system_freezable_wq ||
														
 
															 	       !system_power_efficient_wq ||
														
 
															 	       !system_freezable_power_efficient_wq);
														
 
															+
														
 
															+	wq_watchdog_init();
														
 
															+
														
 
															 	return 0;
														
 
															 }
														
 
															 early_initcall(init_workqueues);
														
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -812,6 +812,17 @@ config BOOTPARAM_HUNG_TASK_PANIC_VALUE
 
															 	default 0 if !BOOTPARAM_HUNG_TASK_PANIC
														
 
															 	default 1 if BOOTPARAM_HUNG_TASK_PANIC
														
 
															+config WQ_WATCHDOG
														
 
															+	bool "Detect Workqueue Stalls"
														
 
															+	depends on DEBUG_KERNEL
														
 
															+	help
														
 
															+	  Say Y here to enable stall detection on workqueues.  If a
														
 
															+	  worker pool doesn't make forward progress on a pending work
														
 
															+	  item for over a given amount of time, 30s by default, a
														
 
															+	  warning message is printed along with dump of workqueue
														
 
															+	  state.  This can be configured through kernel parameter
														
 
															+	  "workqueue.watchdog_thresh" and its sysfs counterpart.
														
 
															+
														
 
															 endmenu # "Debug lockups and hangs"
														
 
															 config PANIC_ON_OOPS