Browse Source

Merge branch 'pktgen'

Jesper Dangaard Brouer says:

====================
Optimizing pktgen for single CPU performance

This series focus on optimizing "pktgen" for single CPU performance.

V2-series:
 - Removed some patches
 - Doc real reason for TX ring buffer filling up

NIC tuning for pktgen:
 http://netoptimizer.blogspot.dk/2014/06/pktgen-for-network-overload-testing.html

General overload setup according to:
 http://netoptimizer.blogspot.dk/2014/04/basic-tuning-for-network-overload.html

Hardware:
 System: CPU E5-2630
 NIC: Intel ixgbe/82599 chip

Testing done with net-next git tree on top of
 commit 6623b41944 ("Merge branch 'master' of...jkirsher/net-next")

Pktgen script exercising race condition:
 https://github.com/netoptimizer/network-testing/blob/master/pktgen/unit_test01_race_add_rem_device_loop.sh

Tool for measuring LOCK overhead:
 https://github.com/netoptimizer/network-testing/blob/master/src/overhead_cmpxchg.c
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 11 years ago
parent
commit
edd79ca8b3
2 changed files with 83 additions and 55 deletions
  1. 28 0
      Documentation/networking/pktgen.txt
  2. 55 55
      net/core/pktgen.c

+ 28 - 0
Documentation/networking/pktgen.txt

@@ -24,6 +24,34 @@ For monitoring and control pktgen creates:
         /proc/net/pktgen/ethX
         /proc/net/pktgen/ethX
 
 
 
 
+Tuning NIC for max performance
+==============================
+
+The default NIC setting are (likely) not tuned for pktgen's artificial
+overload type of benchmarking, as this could hurt the normal use-case.
+
+Specifically increasing the TX ring buffer in the NIC:
+ # ethtool -G ethX tx 1024
+
+A larger TX ring can improve pktgen's performance, while it can hurt
+in the general case, 1) because the TX ring buffer might get larger
+than the CPUs L1/L2 cache, 2) because it allow more queueing in the
+NIC HW layer (which is bad for bufferbloat).
+
+One should be careful to conclude, that packets/descriptors in the HW
+TX ring cause delay.  Drivers usually delay cleaning up the
+ring-buffers (for various performance reasons), thus packets stalling
+the TX ring, might just be waiting for cleanup.
+
+This cleanup issues is specifically the case, for the driver ixgbe
+(Intel 82599 chip).  This driver (ixgbe) combine TX+RX ring cleanups,
+and the cleanup interval is affected by the ethtool --coalesce setting
+of parameter "rx-usecs".
+
+For ixgbe use e.g "30" resulting in approx 33K interrupts/sec (1/30*10^6):
+ # ethtool -C ethX rx-usecs 30
+
+
 Viewing threads
 Viewing threads
 ===============
 ===============
 /proc/net/pktgen/kpktgend_0 
 /proc/net/pktgen/kpktgend_0 

+ 55 - 55
net/core/pktgen.c

@@ -69,8 +69,9 @@
  * for running devices in the if_list and sends packets until count is 0 it
  * for running devices in the if_list and sends packets until count is 0 it
  * also the thread checks the thread->control which is used for inter-process
  * also the thread checks the thread->control which is used for inter-process
  * communication. controlling process "posts" operations to the threads this
  * communication. controlling process "posts" operations to the threads this
- * way. The if_lock should be possible to remove when add/rem_device is merged
- * into this too.
+ * way.
+ * The if_list is RCU protected, and the if_lock remains to protect updating
+ * of if_list, from "add_device" as it invoked from userspace (via proc write).
  *
  *
  * By design there should only be *one* "controlling" process. In practice
  * By design there should only be *one* "controlling" process. In practice
  * multiple write accesses gives unpredictable result. Understood by "write"
  * multiple write accesses gives unpredictable result. Understood by "write"
@@ -208,7 +209,7 @@
 #define T_REMDEVALL   (1<<2)	/* Remove all devs */
 #define T_REMDEVALL   (1<<2)	/* Remove all devs */
 #define T_REMDEV      (1<<3)	/* Remove one dev */
 #define T_REMDEV      (1<<3)	/* Remove one dev */
 
 
-/* If lock -- can be removed after some work */
+/* If lock -- protects updating of if_list */
 #define   if_lock(t)           spin_lock(&(t->if_lock));
 #define   if_lock(t)           spin_lock(&(t->if_lock));
 #define   if_unlock(t)           spin_unlock(&(t->if_lock));
 #define   if_unlock(t)           spin_unlock(&(t->if_lock));
 
 
@@ -241,6 +242,7 @@ struct pktgen_dev {
 	struct proc_dir_entry *entry;	/* proc file */
 	struct proc_dir_entry *entry;	/* proc file */
 	struct pktgen_thread *pg_thread;/* the owner */
 	struct pktgen_thread *pg_thread;/* the owner */
 	struct list_head list;		/* chaining in the thread's run-queue */
 	struct list_head list;		/* chaining in the thread's run-queue */
+	struct rcu_head	 rcu;		/* freed by RCU */
 
 
 	int running;		/* if false, the test will stop */
 	int running;		/* if false, the test will stop */
 
 
@@ -1737,14 +1739,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
 
 
 	seq_puts(seq, "Running: ");
 	seq_puts(seq, "Running: ");
 
 
-	if_lock(t);
-	list_for_each_entry(pkt_dev, &t->if_list, list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
 		if (pkt_dev->running)
 		if (pkt_dev->running)
 			seq_printf(seq, "%s ", pkt_dev->odevname);
 			seq_printf(seq, "%s ", pkt_dev->odevname);
 
 
 	seq_puts(seq, "\nStopped: ");
 	seq_puts(seq, "\nStopped: ");
 
 
-	list_for_each_entry(pkt_dev, &t->if_list, list)
+	list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
 		if (!pkt_dev->running)
 		if (!pkt_dev->running)
 			seq_printf(seq, "%s ", pkt_dev->odevname);
 			seq_printf(seq, "%s ", pkt_dev->odevname);
 
 
@@ -1753,7 +1755,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
 	else
 	else
 		seq_puts(seq, "\nResult: NA\n");
 		seq_puts(seq, "\nResult: NA\n");
 
 
-	if_unlock(t);
+	rcu_read_unlock();
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1878,10 +1880,8 @@ static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn,
 		pkt_dev = pktgen_find_dev(t, ifname, exact);
 		pkt_dev = pktgen_find_dev(t, ifname, exact);
 		if (pkt_dev) {
 		if (pkt_dev) {
 			if (remove) {
 			if (remove) {
-				if_lock(t);
 				pkt_dev->removal_mark = 1;
 				pkt_dev->removal_mark = 1;
 				t->control |= T_REMDEV;
 				t->control |= T_REMDEV;
-				if_unlock(t);
 			}
 			}
 			break;
 			break;
 		}
 		}
@@ -1931,7 +1931,8 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
 	list_for_each_entry(t, &pn->pktgen_threads, th_list) {
 	list_for_each_entry(t, &pn->pktgen_threads, th_list) {
 		struct pktgen_dev *pkt_dev;
 		struct pktgen_dev *pkt_dev;
 
 
-		list_for_each_entry(pkt_dev, &t->if_list, list) {
+		rcu_read_lock();
+		list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
 			if (pkt_dev->odev != dev)
 			if (pkt_dev->odev != dev)
 				continue;
 				continue;
 
 
@@ -1946,6 +1947,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
 				       dev->name);
 				       dev->name);
 			break;
 			break;
 		}
 		}
+		rcu_read_unlock();
 	}
 	}
 }
 }
 
 
@@ -2997,8 +2999,8 @@ static void pktgen_run(struct pktgen_thread *t)
 
 
 	func_enter();
 	func_enter();
 
 
-	if_lock(t);
-	list_for_each_entry(pkt_dev, &t->if_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
 
 
 		/*
 		/*
 		 * setup odev and create initial packet.
 		 * setup odev and create initial packet.
@@ -3007,18 +3009,18 @@ static void pktgen_run(struct pktgen_thread *t)
 
 
 		if (pkt_dev->odev) {
 		if (pkt_dev->odev) {
 			pktgen_clear_counters(pkt_dev);
 			pktgen_clear_counters(pkt_dev);
-			pkt_dev->running = 1;	/* Cranke yeself! */
 			pkt_dev->skb = NULL;
 			pkt_dev->skb = NULL;
 			pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
 			pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
 
 
 			set_pkt_overhead(pkt_dev);
 			set_pkt_overhead(pkt_dev);
 
 
 			strcpy(pkt_dev->result, "Starting");
 			strcpy(pkt_dev->result, "Starting");
+			pkt_dev->running = 1;	/* Cranke yeself! */
 			started++;
 			started++;
 		} else
 		} else
 			strcpy(pkt_dev->result, "Error starting");
 			strcpy(pkt_dev->result, "Error starting");
 	}
 	}
-	if_unlock(t);
+	rcu_read_unlock();
 	if (started)
 	if (started)
 		t->control &= ~(T_STOP);
 		t->control &= ~(T_STOP);
 }
 }
@@ -3041,27 +3043,25 @@ static int thread_is_running(const struct pktgen_thread *t)
 {
 {
 	const struct pktgen_dev *pkt_dev;
 	const struct pktgen_dev *pkt_dev;
 
 
-	list_for_each_entry(pkt_dev, &t->if_list, list)
-		if (pkt_dev->running)
+	rcu_read_lock();
+	list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
+		if (pkt_dev->running) {
+			rcu_read_unlock();
 			return 1;
 			return 1;
+		}
+	rcu_read_unlock();
 	return 0;
 	return 0;
 }
 }
 
 
 static int pktgen_wait_thread_run(struct pktgen_thread *t)
 static int pktgen_wait_thread_run(struct pktgen_thread *t)
 {
 {
-	if_lock(t);
-
 	while (thread_is_running(t)) {
 	while (thread_is_running(t)) {
 
 
-		if_unlock(t);
-
 		msleep_interruptible(100);
 		msleep_interruptible(100);
 
 
 		if (signal_pending(current))
 		if (signal_pending(current))
 			goto signal;
 			goto signal;
-		if_lock(t);
 	}
 	}
-	if_unlock(t);
 	return 1;
 	return 1;
 signal:
 signal:
 	return 0;
 	return 0;
@@ -3166,10 +3166,10 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
+	pkt_dev->running = 0;
 	kfree_skb(pkt_dev->skb);
 	kfree_skb(pkt_dev->skb);
 	pkt_dev->skb = NULL;
 	pkt_dev->skb = NULL;
 	pkt_dev->stopped_at = ktime_get();
 	pkt_dev->stopped_at = ktime_get();
-	pkt_dev->running = 0;
 
 
 	show_results(pkt_dev, nr_frags);
 	show_results(pkt_dev, nr_frags);
 
 
@@ -3180,9 +3180,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
 {
 {
 	struct pktgen_dev *pkt_dev, *best = NULL;
 	struct pktgen_dev *pkt_dev, *best = NULL;
 
 
-	if_lock(t);
-
-	list_for_each_entry(pkt_dev, &t->if_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
 		if (!pkt_dev->running)
 		if (!pkt_dev->running)
 			continue;
 			continue;
 		if (best == NULL)
 		if (best == NULL)
@@ -3190,7 +3189,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
 		else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
 		else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
 			best = pkt_dev;
 			best = pkt_dev;
 	}
 	}
-	if_unlock(t);
+	rcu_read_unlock();
+
 	return best;
 	return best;
 }
 }
 
 
@@ -3200,13 +3200,13 @@ static void pktgen_stop(struct pktgen_thread *t)
 
 
 	func_enter();
 	func_enter();
 
 
-	if_lock(t);
+	rcu_read_lock();
 
 
-	list_for_each_entry(pkt_dev, &t->if_list, list) {
+	list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
 		pktgen_stop_device(pkt_dev);
 		pktgen_stop_device(pkt_dev);
 	}
 	}
 
 
-	if_unlock(t);
+	rcu_read_unlock();
 }
 }
 
 
 /*
 /*
@@ -3220,8 +3220,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
 
 
 	func_enter();
 	func_enter();
 
 
-	if_lock(t);
-
 	list_for_each_safe(q, n, &t->if_list) {
 	list_for_each_safe(q, n, &t->if_list) {
 		cur = list_entry(q, struct pktgen_dev, list);
 		cur = list_entry(q, struct pktgen_dev, list);
 
 
@@ -3235,8 +3233,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
 
 
 		break;
 		break;
 	}
 	}
-
-	if_unlock(t);
 }
 }
 
 
 static void pktgen_rem_all_ifs(struct pktgen_thread *t)
 static void pktgen_rem_all_ifs(struct pktgen_thread *t)
@@ -3248,8 +3244,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
 
 
 	/* Remove all devices, free mem */
 	/* Remove all devices, free mem */
 
 
-	if_lock(t);
-
 	list_for_each_safe(q, n, &t->if_list) {
 	list_for_each_safe(q, n, &t->if_list) {
 		cur = list_entry(q, struct pktgen_dev, list);
 		cur = list_entry(q, struct pktgen_dev, list);
 
 
@@ -3258,8 +3252,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
 
 
 		pktgen_remove_device(t, cur);
 		pktgen_remove_device(t, cur);
 	}
 	}
-
-	if_unlock(t);
 }
 }
 
 
 static void pktgen_rem_thread(struct pktgen_thread *t)
 static void pktgen_rem_thread(struct pktgen_thread *t)
@@ -3407,10 +3399,10 @@ static int pktgen_thread_worker(void *arg)
 
 
 	pr_debug("starting pktgen/%d:  pid=%d\n", cpu, task_pid_nr(current));
 	pr_debug("starting pktgen/%d:  pid=%d\n", cpu, task_pid_nr(current));
 
 
-	set_current_state(TASK_INTERRUPTIBLE);
-
 	set_freezable();
 	set_freezable();
 
 
+	__set_current_state(TASK_RUNNING);
+
 	while (!kthread_should_stop()) {
 	while (!kthread_should_stop()) {
 		pkt_dev = next_to_run(t);
 		pkt_dev = next_to_run(t);
 
 
@@ -3424,8 +3416,6 @@ static int pktgen_thread_worker(void *arg)
 			continue;
 			continue;
 		}
 		}
 
 
-		__set_current_state(TASK_RUNNING);
-
 		if (likely(pkt_dev)) {
 		if (likely(pkt_dev)) {
 			pktgen_xmit(pkt_dev);
 			pktgen_xmit(pkt_dev);
 
 
@@ -3456,9 +3446,8 @@ static int pktgen_thread_worker(void *arg)
 		}
 		}
 
 
 		try_to_freeze();
 		try_to_freeze();
-
-		set_current_state(TASK_INTERRUPTIBLE);
 	}
 	}
+	set_current_state(TASK_INTERRUPTIBLE);
 
 
 	pr_debug("%s stopping all device\n", t->tsk->comm);
 	pr_debug("%s stopping all device\n", t->tsk->comm);
 	pktgen_stop(t);
 	pktgen_stop(t);
@@ -3485,8 +3474,8 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
 	struct pktgen_dev *p, *pkt_dev = NULL;
 	struct pktgen_dev *p, *pkt_dev = NULL;
 	size_t len = strlen(ifname);
 	size_t len = strlen(ifname);
 
 
-	if_lock(t);
-	list_for_each_entry(p, &t->if_list, list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(p, &t->if_list, list)
 		if (strncmp(p->odevname, ifname, len) == 0) {
 		if (strncmp(p->odevname, ifname, len) == 0) {
 			if (p->odevname[len]) {
 			if (p->odevname[len]) {
 				if (exact || p->odevname[len] != '@')
 				if (exact || p->odevname[len] != '@')
@@ -3496,7 +3485,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
 			break;
 			break;
 		}
 		}
 
 
-	if_unlock(t);
+	rcu_read_unlock();
 	pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
 	pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
 	return pkt_dev;
 	return pkt_dev;
 }
 }
@@ -3510,6 +3499,12 @@ static int add_dev_to_thread(struct pktgen_thread *t,
 {
 {
 	int rv = 0;
 	int rv = 0;
 
 
+	/* This function cannot be called concurrently, as its called
+	 * under pktgen_thread_lock mutex, but it can run from
+	 * userspace on another CPU than the kthread.  The if_lock()
+	 * is used here to sync with concurrent instances of
+	 * _rem_dev_from_if_list() invoked via kthread, which is also
+	 * updating the if_list */
 	if_lock(t);
 	if_lock(t);
 
 
 	if (pkt_dev->pg_thread) {
 	if (pkt_dev->pg_thread) {
@@ -3518,9 +3513,9 @@ static int add_dev_to_thread(struct pktgen_thread *t,
 		goto out;
 		goto out;
 	}
 	}
 
 
-	list_add(&pkt_dev->list, &t->if_list);
-	pkt_dev->pg_thread = t;
 	pkt_dev->running = 0;
 	pkt_dev->running = 0;
+	pkt_dev->pg_thread = t;
+	list_add_rcu(&pkt_dev->list, &t->if_list);
 
 
 out:
 out:
 	if_unlock(t);
 	if_unlock(t);
@@ -3675,11 +3670,13 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
 	struct list_head *q, *n;
 	struct list_head *q, *n;
 	struct pktgen_dev *p;
 	struct pktgen_dev *p;
 
 
+	if_lock(t);
 	list_for_each_safe(q, n, &t->if_list) {
 	list_for_each_safe(q, n, &t->if_list) {
 		p = list_entry(q, struct pktgen_dev, list);
 		p = list_entry(q, struct pktgen_dev, list);
 		if (p == pkt_dev)
 		if (p == pkt_dev)
-			list_del(&p->list);
+			list_del_rcu(&p->list);
 	}
 	}
+	if_unlock(t);
 }
 }
 
 
 static int pktgen_remove_device(struct pktgen_thread *t,
 static int pktgen_remove_device(struct pktgen_thread *t,
@@ -3699,20 +3696,22 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 		pkt_dev->odev = NULL;
 		pkt_dev->odev = NULL;
 	}
 	}
 
 
-	/* And update the thread if_list */
-
-	_rem_dev_from_if_list(t, pkt_dev);
-
+	/* Remove proc before if_list entry, because add_device uses
+	 * list to determine if interface already exist, avoid race
+	 * with proc_create_data() */
 	if (pkt_dev->entry)
 	if (pkt_dev->entry)
 		proc_remove(pkt_dev->entry);
 		proc_remove(pkt_dev->entry);
 
 
+	/* And update the thread if_list */
+	_rem_dev_from_if_list(t, pkt_dev);
+
 #ifdef CONFIG_XFRM
 #ifdef CONFIG_XFRM
 	free_SAs(pkt_dev);
 	free_SAs(pkt_dev);
 #endif
 #endif
 	vfree(pkt_dev->flows);
 	vfree(pkt_dev->flows);
 	if (pkt_dev->page)
 	if (pkt_dev->page)
 		put_page(pkt_dev->page);
 		put_page(pkt_dev->page);
-	kfree(pkt_dev);
+	kfree_rcu(pkt_dev, rcu);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -3812,6 +3811,7 @@ static void __exit pg_cleanup(void)
 {
 {
 	unregister_netdevice_notifier(&pktgen_notifier_block);
 	unregister_netdevice_notifier(&pktgen_notifier_block);
 	unregister_pernet_subsys(&pg_net_ops);
 	unregister_pernet_subsys(&pg_net_ops);
+	/* Don't need rcu_barrier() due to use of kfree_rcu() */
 }
 }
 
 
 module_init(pg_init);
 module_init(pg_init);