浏览代码

Merge branch 'for-4.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue fixes from Tejun Heo:
 "Two notable fixes.

   - While adding NUMA affinity support to unbound workqueues, the
     assumption that an unbound workqueue with max_active == 1 is
     ordered was broken.

     The plan was to use explicit alloc_ordered_workqueue() for those
     cases. Unfortunately, I forgot to update the documentation properly
     and we grew a handful of use cases which depend on that assumption.

     While we want to convert them to alloc_ordered_workqueue(), we
     don't really lose anything by enforcing ordered execution on
     unbound max_active == 1 workqueues and it doesn't make sense to
     risk subtle bugs. Restore the assumption.

   - Workqueue assumes that CPU <-> NUMA node mapping remains static.

     This is a general assumption - we don't have any synchronization
     mechanism around CPU <-> node mapping. Unfortunately, powerpc may
     change the mapping dynamically leading to crashes. Michael added a
     workaround so that we at least don't crash while powerpc hotplug
     code gets updated"

* 'for-4.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  workqueue: Work around edge cases for calc of pool's cpumask
  workqueue: implicit ordered attribute should be overridable
  workqueue: restore WQ_UNBOUND/max_active==1 to be ordered
Linus Torvalds 8 年之前
父节点
当前提交
ff2620f778
共有 2 个文件被更改,包括 29 次插入5 次删除
  1. 3 1
      include/linux/workqueue.h
  2. 26 4
      kernel/workqueue.c

+ 3 - 1
include/linux/workqueue.h

@@ -323,6 +323,7 @@ enum {
 
 
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
+	__WQ_ORDERED_EXPLICIT	= 1 << 18, /* internal: alloc_ordered_workqueue() */
 	__WQ_LEGACY		= 1 << 18, /* internal: create*_workqueue() */
 	__WQ_LEGACY		= 1 << 18, /* internal: create*_workqueue() */
 
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
@@ -422,7 +423,8 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
  * Pointer to the allocated workqueue on success, %NULL on failure.
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
  */
 #define alloc_ordered_workqueue(fmt, flags, args...)			\
 #define alloc_ordered_workqueue(fmt, flags, args...)			\
-	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
+	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED |		\
+			__WQ_ORDERED_EXPLICIT | (flags), 1, ##args)
 
 
 #define create_workqueue(name)						\
 #define create_workqueue(name)						\
 	alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
 	alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))

+ 26 - 4
kernel/workqueue.c

@@ -3577,6 +3577,13 @@ static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
 
 
 	/* yeap, return possible CPUs in @node that @attrs wants */
 	/* yeap, return possible CPUs in @node that @attrs wants */
 	cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
 	cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
+
+	if (cpumask_empty(cpumask)) {
+		pr_warn_once("WARNING: workqueue cpumask: online intersect > "
+				"possible intersect\n");
+		return false;
+	}
+
 	return !cpumask_equal(cpumask, attrs->cpumask);
 	return !cpumask_equal(cpumask, attrs->cpumask);
 
 
 use_dfl:
 use_dfl:
@@ -3744,8 +3751,12 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
 		return -EINVAL;
 		return -EINVAL;
 
 
 	/* creating multiple pwqs breaks ordering guarantee */
 	/* creating multiple pwqs breaks ordering guarantee */
-	if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
-		return -EINVAL;
+	if (!list_empty(&wq->pwqs)) {
+		if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
+			return -EINVAL;
+
+		wq->flags &= ~__WQ_ORDERED;
+	}
 
 
 	ctx = apply_wqattrs_prepare(wq, attrs);
 	ctx = apply_wqattrs_prepare(wq, attrs);
 	if (!ctx)
 	if (!ctx)
@@ -3929,6 +3940,16 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	struct workqueue_struct *wq;
 	struct workqueue_struct *wq;
 	struct pool_workqueue *pwq;
 	struct pool_workqueue *pwq;
 
 
+	/*
+	 * Unbound && max_active == 1 used to imply ordered, which is no
+	 * longer the case on NUMA machines due to per-node pools.  While
+	 * alloc_ordered_workqueue() is the right way to create an ordered
+	 * workqueue, keep the previous behavior to avoid subtle breakages
+	 * on NUMA.
+	 */
+	if ((flags & WQ_UNBOUND) && max_active == 1)
+		flags |= __WQ_ORDERED;
+
 	/* see the comment above the definition of WQ_POWER_EFFICIENT */
 	/* see the comment above the definition of WQ_POWER_EFFICIENT */
 	if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
 	if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
 		flags |= WQ_UNBOUND;
 		flags |= WQ_UNBOUND;
@@ -4119,13 +4140,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 	struct pool_workqueue *pwq;
 	struct pool_workqueue *pwq;
 
 
 	/* disallow meddling with max_active for ordered workqueues */
 	/* disallow meddling with max_active for ordered workqueues */
-	if (WARN_ON(wq->flags & __WQ_ORDERED))
+	if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
 		return;
 		return;
 
 
 	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
 	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
 
 
 	mutex_lock(&wq->mutex);
 	mutex_lock(&wq->mutex);
 
 
+	wq->flags &= ~__WQ_ORDERED;
 	wq->saved_max_active = max_active;
 	wq->saved_max_active = max_active;
 
 
 	for_each_pwq(pwq, wq)
 	for_each_pwq(pwq, wq)
@@ -5253,7 +5275,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
 	 * attributes breaks ordering guarantee.  Disallow exposing ordered
 	 * attributes breaks ordering guarantee.  Disallow exposing ordered
 	 * workqueues.
 	 * workqueues.
 	 */
 	 */
-	if (WARN_ON(wq->flags & __WQ_ORDERED))
+	if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
 	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);