|
@@ -148,6 +148,8 @@ struct worker_pool {
|
|
int id; /* I: pool ID */
|
|
int id; /* I: pool ID */
|
|
unsigned int flags; /* X: flags */
|
|
unsigned int flags; /* X: flags */
|
|
|
|
|
|
|
|
+ unsigned long watchdog_ts; /* L: watchdog timestamp */
|
|
|
|
+
|
|
struct list_head worklist; /* L: list of pending works */
|
|
struct list_head worklist; /* L: list of pending works */
|
|
int nr_workers; /* L: total number of workers */
|
|
int nr_workers; /* L: total number of workers */
|
|
|
|
|
|
@@ -1083,6 +1085,8 @@ static void pwq_activate_delayed_work(struct work_struct *work)
|
|
struct pool_workqueue *pwq = get_work_pwq(work);
|
|
struct pool_workqueue *pwq = get_work_pwq(work);
|
|
|
|
|
|
trace_workqueue_activate_work(work);
|
|
trace_workqueue_activate_work(work);
|
|
|
|
+ if (list_empty(&pwq->pool->worklist))
|
|
|
|
+ pwq->pool->watchdog_ts = jiffies;
|
|
move_linked_works(work, &pwq->pool->worklist, NULL);
|
|
move_linked_works(work, &pwq->pool->worklist, NULL);
|
|
__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
|
|
__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
|
|
pwq->nr_active++;
|
|
pwq->nr_active++;
|
|
@@ -1385,6 +1389,8 @@ retry:
|
|
trace_workqueue_activate_work(work);
|
|
trace_workqueue_activate_work(work);
|
|
pwq->nr_active++;
|
|
pwq->nr_active++;
|
|
worklist = &pwq->pool->worklist;
|
|
worklist = &pwq->pool->worklist;
|
|
|
|
+ if (list_empty(worklist))
|
|
|
|
+ pwq->pool->watchdog_ts = jiffies;
|
|
} else {
|
|
} else {
|
|
work_flags |= WORK_STRUCT_DELAYED;
|
|
work_flags |= WORK_STRUCT_DELAYED;
|
|
worklist = &pwq->delayed_works;
|
|
worklist = &pwq->delayed_works;
|
|
@@ -2157,6 +2163,8 @@ recheck:
|
|
list_first_entry(&pool->worklist,
|
|
list_first_entry(&pool->worklist,
|
|
struct work_struct, entry);
|
|
struct work_struct, entry);
|
|
|
|
|
|
|
|
+ pool->watchdog_ts = jiffies;
|
|
|
|
+
|
|
if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
|
|
if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
|
|
/* optimization path, not strictly necessary */
|
|
/* optimization path, not strictly necessary */
|
|
process_one_work(worker, work);
|
|
process_one_work(worker, work);
|
|
@@ -2240,6 +2248,7 @@ repeat:
|
|
struct pool_workqueue, mayday_node);
|
|
struct pool_workqueue, mayday_node);
|
|
struct worker_pool *pool = pwq->pool;
|
|
struct worker_pool *pool = pwq->pool;
|
|
struct work_struct *work, *n;
|
|
struct work_struct *work, *n;
|
|
|
|
+ bool first = true;
|
|
|
|
|
|
__set_current_state(TASK_RUNNING);
|
|
__set_current_state(TASK_RUNNING);
|
|
list_del_init(&pwq->mayday_node);
|
|
list_del_init(&pwq->mayday_node);
|
|
@@ -2256,9 +2265,14 @@ repeat:
|
|
* process'em.
|
|
* process'em.
|
|
*/
|
|
*/
|
|
WARN_ON_ONCE(!list_empty(scheduled));
|
|
WARN_ON_ONCE(!list_empty(scheduled));
|
|
- list_for_each_entry_safe(work, n, &pool->worklist, entry)
|
|
|
|
- if (get_work_pwq(work) == pwq)
|
|
|
|
|
|
+ list_for_each_entry_safe(work, n, &pool->worklist, entry) {
|
|
|
|
+ if (get_work_pwq(work) == pwq) {
|
|
|
|
+ if (first)
|
|
|
|
+ pool->watchdog_ts = jiffies;
|
|
move_linked_works(work, scheduled, &n);
|
|
move_linked_works(work, scheduled, &n);
|
|
|
|
+ }
|
|
|
|
+ first = false;
|
|
|
|
+ }
|
|
|
|
|
|
if (!list_empty(scheduled)) {
|
|
if (!list_empty(scheduled)) {
|
|
process_scheduled_works(rescuer);
|
|
process_scheduled_works(rescuer);
|
|
@@ -2316,6 +2330,37 @@ repeat:
|
|
goto repeat;
|
|
goto repeat;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/**
|
|
|
|
+ * check_flush_dependency - check for flush dependency sanity
|
|
|
|
+ * @target_wq: workqueue being flushed
|
|
|
|
+ * @target_work: work item being flushed (NULL for workqueue flushes)
|
|
|
|
+ *
|
|
|
|
+ * %current is trying to flush the whole @target_wq or @target_work on it.
|
|
|
|
+ * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
|
|
|
|
+ * reclaiming memory or running on a workqueue which doesn't have
|
|
|
|
+ * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
|
|
|
|
+ * a deadlock.
|
|
|
|
+ */
|
|
|
|
+static void check_flush_dependency(struct workqueue_struct *target_wq,
|
|
|
|
+ struct work_struct *target_work)
|
|
|
|
+{
|
|
|
|
+ work_func_t target_func = target_work ? target_work->func : NULL;
|
|
|
|
+ struct worker *worker;
|
|
|
|
+
|
|
|
|
+ if (target_wq->flags & WQ_MEM_RECLAIM)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ worker = current_wq_worker();
|
|
|
|
+
|
|
|
|
+ WARN_ONCE(current->flags & PF_MEMALLOC,
|
|
|
|
+ "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
|
|
|
|
+ current->pid, current->comm, target_wq->name, target_func);
|
|
|
|
+ WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
|
|
|
|
+ "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
|
|
|
|
+ worker->current_pwq->wq->name, worker->current_func,
|
|
|
|
+ target_wq->name, target_func);
|
|
|
|
+}
|
|
|
|
+
|
|
struct wq_barrier {
|
|
struct wq_barrier {
|
|
struct work_struct work;
|
|
struct work_struct work;
|
|
struct completion done;
|
|
struct completion done;
|
|
@@ -2525,6 +2570,8 @@ void flush_workqueue(struct workqueue_struct *wq)
|
|
list_add_tail(&this_flusher.list, &wq->flusher_overflow);
|
|
list_add_tail(&this_flusher.list, &wq->flusher_overflow);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ check_flush_dependency(wq, NULL);
|
|
|
|
+
|
|
mutex_unlock(&wq->mutex);
|
|
mutex_unlock(&wq->mutex);
|
|
|
|
|
|
wait_for_completion(&this_flusher.done);
|
|
wait_for_completion(&this_flusher.done);
|
|
@@ -2697,6 +2744,8 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
|
|
pwq = worker->current_pwq;
|
|
pwq = worker->current_pwq;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ check_flush_dependency(pwq->wq, work);
|
|
|
|
+
|
|
insert_wq_barrier(pwq, barr, work, worker);
|
|
insert_wq_barrier(pwq, barr, work, worker);
|
|
spin_unlock_irq(&pool->lock);
|
|
spin_unlock_irq(&pool->lock);
|
|
|
|
|
|
@@ -3069,6 +3118,7 @@ static int init_worker_pool(struct worker_pool *pool)
|
|
pool->cpu = -1;
|
|
pool->cpu = -1;
|
|
pool->node = NUMA_NO_NODE;
|
|
pool->node = NUMA_NO_NODE;
|
|
pool->flags |= POOL_DISASSOCIATED;
|
|
pool->flags |= POOL_DISASSOCIATED;
|
|
|
|
+ pool->watchdog_ts = jiffies;
|
|
INIT_LIST_HEAD(&pool->worklist);
|
|
INIT_LIST_HEAD(&pool->worklist);
|
|
INIT_LIST_HEAD(&pool->idle_list);
|
|
INIT_LIST_HEAD(&pool->idle_list);
|
|
hash_init(pool->busy_hash);
|
|
hash_init(pool->busy_hash);
|
|
@@ -3601,7 +3651,6 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
|
|
const struct workqueue_attrs *attrs)
|
|
const struct workqueue_attrs *attrs)
|
|
{
|
|
{
|
|
struct apply_wqattrs_ctx *ctx;
|
|
struct apply_wqattrs_ctx *ctx;
|
|
- int ret = -ENOMEM;
|
|
|
|
|
|
|
|
/* only unbound workqueues can change attributes */
|
|
/* only unbound workqueues can change attributes */
|
|
if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
|
|
if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
|
|
@@ -3612,16 +3661,14 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
|
|
|
|
ctx = apply_wqattrs_prepare(wq, attrs);
|
|
ctx = apply_wqattrs_prepare(wq, attrs);
|
|
|
|
+ if (!ctx)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
|
|
/* the ctx has been prepared successfully, let's commit it */
|
|
/* the ctx has been prepared successfully, let's commit it */
|
|
- if (ctx) {
|
|
|
|
- apply_wqattrs_commit(ctx);
|
|
|
|
- ret = 0;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
|
|
+ apply_wqattrs_commit(ctx);
|
|
apply_wqattrs_cleanup(ctx);
|
|
apply_wqattrs_cleanup(ctx);
|
|
|
|
|
|
- return ret;
|
|
|
|
|
|
+ return 0;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -4308,7 +4355,9 @@ void show_workqueue_state(void)
|
|
|
|
|
|
pr_info("pool %d:", pool->id);
|
|
pr_info("pool %d:", pool->id);
|
|
pr_cont_pool_info(pool);
|
|
pr_cont_pool_info(pool);
|
|
- pr_cont(" workers=%d", pool->nr_workers);
|
|
|
|
|
|
+ pr_cont(" hung=%us workers=%d",
|
|
|
|
+ jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
|
|
|
|
+ pool->nr_workers);
|
|
if (pool->manager)
|
|
if (pool->manager)
|
|
pr_cont(" manager: %d",
|
|
pr_cont(" manager: %d",
|
|
task_pid_nr(pool->manager->task));
|
|
task_pid_nr(pool->manager->task));
|
|
@@ -5167,6 +5216,154 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
|
|
static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
|
|
static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
|
|
#endif /* CONFIG_SYSFS */
|
|
#endif /* CONFIG_SYSFS */
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Workqueue watchdog.
|
|
|
|
+ *
|
|
|
|
+ * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
|
|
|
|
+ * flush dependency, a concurrency managed work item which stays RUNNING
|
|
|
|
+ * indefinitely. Workqueue stalls can be very difficult to debug as the
|
|
|
|
+ * usual warning mechanisms don't trigger and internal workqueue state is
|
|
|
|
+ * largely opaque.
|
|
|
|
+ *
|
|
|
|
+ * Workqueue watchdog monitors all worker pools periodically and dumps
|
|
|
|
+ * state if some pools failed to make forward progress for a while where
|
|
|
|
+ * forward progress is defined as the first item on ->worklist changing.
|
|
|
|
+ *
|
|
|
|
+ * This mechanism is controlled through the kernel parameter
|
|
|
|
+ * "workqueue.watchdog_thresh" which can be updated at runtime through the
|
|
|
|
+ * corresponding sysfs parameter file.
|
|
|
|
+ */
|
|
|
|
+#ifdef CONFIG_WQ_WATCHDOG
|
|
|
|
+
|
|
|
|
+static void wq_watchdog_timer_fn(unsigned long data);
|
|
|
|
+
|
|
|
|
+static unsigned long wq_watchdog_thresh = 30;
|
|
|
|
+static struct timer_list wq_watchdog_timer =
|
|
|
|
+ TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0);
|
|
|
|
+
|
|
|
|
+static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
|
|
|
|
+static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
|
|
|
|
+
|
|
|
|
+static void wq_watchdog_reset_touched(void)
|
|
|
|
+{
|
|
|
|
+ int cpu;
|
|
|
|
+
|
|
|
|
+ wq_watchdog_touched = jiffies;
|
|
|
|
+ for_each_possible_cpu(cpu)
|
|
|
|
+ per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void wq_watchdog_timer_fn(unsigned long data)
|
|
|
|
+{
|
|
|
|
+ unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
|
|
|
|
+ bool lockup_detected = false;
|
|
|
|
+ struct worker_pool *pool;
|
|
|
|
+ int pi;
|
|
|
|
+
|
|
|
|
+ if (!thresh)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ rcu_read_lock();
|
|
|
|
+
|
|
|
|
+ for_each_pool(pool, pi) {
|
|
|
|
+ unsigned long pool_ts, touched, ts;
|
|
|
|
+
|
|
|
|
+ if (list_empty(&pool->worklist))
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ /* get the latest of pool and touched timestamps */
|
|
|
|
+ pool_ts = READ_ONCE(pool->watchdog_ts);
|
|
|
|
+ touched = READ_ONCE(wq_watchdog_touched);
|
|
|
|
+
|
|
|
|
+ if (time_after(pool_ts, touched))
|
|
|
|
+ ts = pool_ts;
|
|
|
|
+ else
|
|
|
|
+ ts = touched;
|
|
|
|
+
|
|
|
|
+ if (pool->cpu >= 0) {
|
|
|
|
+ unsigned long cpu_touched =
|
|
|
|
+ READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
|
|
|
|
+ pool->cpu));
|
|
|
|
+ if (time_after(cpu_touched, ts))
|
|
|
|
+ ts = cpu_touched;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* did we stall? */
|
|
|
|
+ if (time_after(jiffies, ts + thresh)) {
|
|
|
|
+ lockup_detected = true;
|
|
|
|
+ pr_emerg("BUG: workqueue lockup - pool");
|
|
|
|
+ pr_cont_pool_info(pool);
|
|
|
|
+ pr_cont(" stuck for %us!\n",
|
|
|
|
+ jiffies_to_msecs(jiffies - pool_ts) / 1000);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ rcu_read_unlock();
|
|
|
|
+
|
|
|
|
+ if (lockup_detected)
|
|
|
|
+ show_workqueue_state();
|
|
|
|
+
|
|
|
|
+ wq_watchdog_reset_touched();
|
|
|
|
+ mod_timer(&wq_watchdog_timer, jiffies + thresh);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void wq_watchdog_touch(int cpu)
|
|
|
|
+{
|
|
|
|
+ if (cpu >= 0)
|
|
|
|
+ per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
|
|
|
|
+ else
|
|
|
|
+ wq_watchdog_touched = jiffies;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void wq_watchdog_set_thresh(unsigned long thresh)
|
|
|
|
+{
|
|
|
|
+ wq_watchdog_thresh = 0;
|
|
|
|
+ del_timer_sync(&wq_watchdog_timer);
|
|
|
|
+
|
|
|
|
+ if (thresh) {
|
|
|
|
+ wq_watchdog_thresh = thresh;
|
|
|
|
+ wq_watchdog_reset_touched();
|
|
|
|
+ mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int wq_watchdog_param_set_thresh(const char *val,
|
|
|
|
+ const struct kernel_param *kp)
|
|
|
|
+{
|
|
|
|
+ unsigned long thresh;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ ret = kstrtoul(val, 0, &thresh);
|
|
|
|
+ if (ret)
|
|
|
|
+ return ret;
|
|
|
|
+
|
|
|
|
+ if (system_wq)
|
|
|
|
+ wq_watchdog_set_thresh(thresh);
|
|
|
|
+ else
|
|
|
|
+ wq_watchdog_thresh = thresh;
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static const struct kernel_param_ops wq_watchdog_thresh_ops = {
|
|
|
|
+ .set = wq_watchdog_param_set_thresh,
|
|
|
|
+ .get = param_get_ulong,
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
|
|
|
|
+ 0644);
|
|
|
|
+
|
|
|
|
+static void wq_watchdog_init(void)
|
|
|
|
+{
|
|
|
|
+ wq_watchdog_set_thresh(wq_watchdog_thresh);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#else /* CONFIG_WQ_WATCHDOG */
|
|
|
|
+
|
|
|
|
+static inline void wq_watchdog_init(void) { }
|
|
|
|
+
|
|
|
|
+#endif /* CONFIG_WQ_WATCHDOG */
|
|
|
|
+
|
|
static void __init wq_numa_init(void)
|
|
static void __init wq_numa_init(void)
|
|
{
|
|
{
|
|
cpumask_var_t *tbl;
|
|
cpumask_var_t *tbl;
|
|
@@ -5290,6 +5487,9 @@ static int __init init_workqueues(void)
|
|
!system_unbound_wq || !system_freezable_wq ||
|
|
!system_unbound_wq || !system_freezable_wq ||
|
|
!system_power_efficient_wq ||
|
|
!system_power_efficient_wq ||
|
|
!system_freezable_power_efficient_wq);
|
|
!system_freezable_power_efficient_wq);
|
|
|
|
+
|
|
|
|
+ wq_watchdog_init();
|
|
|
|
+
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
early_initcall(init_workqueues);
|
|
early_initcall(init_workqueues);
|