瀏覽代碼

Merge branch 'net_sched-fix-filter-chain-reference-counting'

Cong Wang says:

====================
net_sched: fix filter chain reference counting

This patchset fixes tc filter chain reference counting and nasty race
conditions with RCU callbacks. Please see each patch for details.

v3: Rebase on the latest -net
    Add code comment in patch 1
    Improve comment and changelog for patch 2
    Add patch 3

v2: Add patch 1
    Get rid of more ugly code in patch 2
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 8 年之前
父節點
當前提交
63428fb6a1
共有 3 個文件被更改,包括 48 次插入34 次删除
  1. 0 2
      include/net/act_api.h
  2. 8 9
      net/sched/act_api.c
  3. 40 23
      net/sched/cls_api.c

+ 0 - 2
include/net/act_api.h

@@ -34,7 +34,6 @@ struct tc_action {
 	struct gnet_stats_queue		tcfa_qstats;
 	struct net_rate_estimator __rcu *tcfa_rate_est;
 	spinlock_t			tcfa_lock;
-	struct rcu_head			tcfa_rcu;
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	struct gnet_stats_queue __percpu *cpu_qstats;
 	struct tc_cookie	*act_cookie;
@@ -50,7 +49,6 @@ struct tc_action {
 #define tcf_qstats	common.tcfa_qstats
 #define tcf_rate_est	common.tcfa_rate_est
 #define tcf_lock	common.tcfa_lock
-#define tcf_rcu		common.tcfa_rcu
 
 /* Update lastuse only if needed, to avoid dirtying a cache line.
  * We use a temp variable to avoid fetching jiffies twice.

+ 8 - 9
net/sched/act_api.c

@@ -53,10 +53,13 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a,
 	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
 }
 
-static void free_tcf(struct rcu_head *head)
+/* XXX: For standalone actions, we don't need a RCU grace period either, because
+ * actions are always connected to filters and filters are already destroyed in
+ * RCU callbacks, so after a RCU grace period actions are already disconnected
+ * from filters. Readers later can not find us.
+ */
+static void free_tcf(struct tc_action *p)
 {
-	struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu);
-
 	free_percpu(p->cpu_bstats);
 	free_percpu(p->cpu_qstats);
 
@@ -76,11 +79,7 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
 	idr_remove_ext(&idrinfo->action_idr, p->tcfa_index);
 	spin_unlock_bh(&idrinfo->lock);
 	gen_kill_estimator(&p->tcfa_rate_est);
-	/*
-	 * gen_estimator est_timer() might access p->tcfa_lock
-	 * or bstats, wait a RCU grace period before freeing p
-	 */
-	call_rcu(&p->tcfa_rcu, free_tcf);
+	free_tcf(p);
 }
 
 int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
@@ -259,7 +258,7 @@ void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est)
 {
 	if (est)
 		gen_kill_estimator(&a->tcfa_rate_est);
-	call_rcu(&a->tcfa_rcu, free_tcf);
+	free_tcf(a);
 }
 EXPORT_SYMBOL(tcf_idr_cleanup);
 

+ 40 - 23
net/sched/cls_api.c

@@ -182,7 +182,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
 	list_add_tail(&chain->list, &block->chain_list);
 	chain->block = block;
 	chain->index = chain_index;
-	chain->refcnt = 0;
+	chain->refcnt = 1;
 	return chain;
 }
 
@@ -194,21 +194,20 @@ static void tcf_chain_flush(struct tcf_chain *chain)
 		RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
 	while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
 		RCU_INIT_POINTER(chain->filter_chain, tp->next);
+		tcf_chain_put(chain);
 		tcf_proto_destroy(tp);
 	}
 }
 
 static void tcf_chain_destroy(struct tcf_chain *chain)
 {
-	/* May be already removed from the list by the previous call. */
-	if (!list_empty(&chain->list))
-		list_del_init(&chain->list);
+	list_del(&chain->list);
+	kfree(chain);
+}
 
-	/* There might still be a reference held when we got here from
-	 * tcf_block_put. Wait for the user to drop reference before free.
-	 */
-	if (!chain->refcnt)
-		kfree(chain);
+static void tcf_chain_hold(struct tcf_chain *chain)
+{
+	++chain->refcnt;
 }
 
 struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
@@ -217,24 +216,19 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
 	struct tcf_chain *chain;
 
 	list_for_each_entry(chain, &block->chain_list, list) {
-		if (chain->index == chain_index)
-			goto incref;
+		if (chain->index == chain_index) {
+			tcf_chain_hold(chain);
+			return chain;
+		}
 	}
-	chain = create ? tcf_chain_create(block, chain_index) : NULL;
 
-incref:
-	if (chain)
-		chain->refcnt++;
-	return chain;
+	return create ? tcf_chain_create(block, chain_index) : NULL;
 }
 EXPORT_SYMBOL(tcf_chain_get);
 
 void tcf_chain_put(struct tcf_chain *chain)
 {
-	/* Destroy unused chain, with exception of chain 0, which is the
-	 * default one and has to be always present.
-	 */
-	if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0)
+	if (--chain->refcnt == 0)
 		tcf_chain_destroy(chain);
 }
 EXPORT_SYMBOL(tcf_chain_put);
@@ -279,10 +273,31 @@ void tcf_block_put(struct tcf_block *block)
 	if (!block)
 		return;
 
-	list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
+	/* XXX: Standalone actions are not allowed to jump to any chain, and
+	 * bound actions should be all removed after flushing. However,
+	 * filters are destroyed in RCU callbacks, we have to hold the chains
+	 * first, otherwise we would always race with RCU callbacks on this list
+	 * without proper locking.
+	 */
+
+	/* Wait for existing RCU callbacks to cool down. */
+	rcu_barrier();
+
+	/* Hold a refcnt for all chains, except 0, in case they are gone. */
+	list_for_each_entry(chain, &block->chain_list, list)
+		if (chain->index)
+			tcf_chain_hold(chain);
+
+	/* No race on the list, because no chain could be destroyed. */
+	list_for_each_entry(chain, &block->chain_list, list)
 		tcf_chain_flush(chain);
-		tcf_chain_destroy(chain);
-	}
+
+	/* Wait for RCU callbacks to release the reference count. */
+	rcu_barrier();
+
+	/* At this point, all the chains should have refcnt == 1. */
+	list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+		tcf_chain_put(chain);
 	kfree(block);
 }
 EXPORT_SYMBOL(tcf_block_put);
@@ -360,6 +375,7 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
 		rcu_assign_pointer(*chain->p_filter_chain, tp);
 	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
 	rcu_assign_pointer(*chain_info->pprev, tp);
+	tcf_chain_hold(chain);
 }
 
 static void tcf_chain_tp_remove(struct tcf_chain *chain,
@@ -371,6 +387,7 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
 	if (chain->p_filter_chain && tp == chain->filter_chain)
 		RCU_INIT_POINTER(*chain->p_filter_chain, next);
 	RCU_INIT_POINTER(*chain_info->pprev, next);
+	tcf_chain_put(chain);
 }
 
 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,