浏览代码

Merge branch 'ipvlan-mcast'

Mahesh Bandewar says:

====================
Multicast processing in IPvlan

Dan Willems pointed out that autoconf in IPvlan is broken because of the
way broadcast bit gets set. Since broadcast processing is a real performance
drain, the broadcast bit in multicast filter was only set when the interface
was configured with IPv4 address. In autoconf scenario, when there are
no addresses configured; this logic did not work and it wouldn't allow
DHCPv4 to work. The only way was to add protocol specific hacks to avoid
processing unnecessary broadcast burdon.

This jugglery could be avoided if these multicast / broadcast packets are taken
out of fast-path and are processed in a work-queue. This will enable us to add
broadcast bit in all multicast filters without any impact on performance of
the virtual device. This patch series just does that.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 10 年之前
父节点
当前提交
1a376de8e3
共有 3 个文件被更改,包括 102 次插入66 次删除
  1. 5 0
      drivers/net/ipvlan/ipvlan.h
  2. 86 52
      drivers/net/ipvlan/ipvlan_core.c
  3. 11 14
      drivers/net/ipvlan/ipvlan_main.c

+ 5 - 0
drivers/net/ipvlan/ipvlan.h

@@ -39,6 +39,8 @@
 #define IPVLAN_MAC_FILTER_SIZE	(1 << IPVLAN_MAC_FILTER_BITS)
 #define IPVLAN_MAC_FILTER_SIZE	(1 << IPVLAN_MAC_FILTER_BITS)
 #define IPVLAN_MAC_FILTER_MASK	(IPVLAN_MAC_FILTER_SIZE - 1)
 #define IPVLAN_MAC_FILTER_MASK	(IPVLAN_MAC_FILTER_SIZE - 1)
 
 
+#define IPVLAN_QBACKLOG_LIMIT	1000
+
 typedef enum {
 typedef enum {
 	IPVL_IPV6 = 0,
 	IPVL_IPV6 = 0,
 	IPVL_ICMPV6,
 	IPVL_ICMPV6,
@@ -93,6 +95,8 @@ struct ipvl_port {
 	struct hlist_head	hlhead[IPVLAN_HASH_SIZE];
 	struct hlist_head	hlhead[IPVLAN_HASH_SIZE];
 	struct list_head	ipvlans;
 	struct list_head	ipvlans;
 	struct rcu_head		rcu;
 	struct rcu_head		rcu;
+	struct work_struct	wq;
+	struct sk_buff_head	backlog;
 	int			count;
 	int			count;
 	u16			mode;
 	u16			mode;
 };
 };
@@ -112,6 +116,7 @@ void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval);
 void ipvlan_init_secret(void);
 void ipvlan_init_secret(void);
 unsigned int ipvlan_mac_hash(const unsigned char *addr);
 unsigned int ipvlan_mac_hash(const unsigned char *addr);
 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb);
 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb);
+void ipvlan_process_multicast(struct work_struct *work);
 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev);
 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev);
 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr);
 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr);
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,

+ 86 - 52
drivers/net/ipvlan/ipvlan_core.c

@@ -189,62 +189,69 @@ unsigned int ipvlan_mac_hash(const unsigned char *addr)
 	return hash & IPVLAN_MAC_FILTER_MASK;
 	return hash & IPVLAN_MAC_FILTER_MASK;
 }
 }
 
 
-static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb,
-				   const struct ipvl_dev *in_dev, bool local)
+void ipvlan_process_multicast(struct work_struct *work)
 {
 {
-	struct ethhdr *eth = eth_hdr(skb);
+	struct ipvl_port *port = container_of(work, struct ipvl_port, wq);
+	struct ethhdr *ethh;
 	struct ipvl_dev *ipvlan;
 	struct ipvl_dev *ipvlan;
-	struct sk_buff *nskb;
+	struct sk_buff *skb, *nskb;
+	struct sk_buff_head list;
 	unsigned int len;
 	unsigned int len;
 	unsigned int mac_hash;
 	unsigned int mac_hash;
 	int ret;
 	int ret;
+	u8 pkt_type;
+	bool hlocal, dlocal;
 
 
-	if (skb->protocol == htons(ETH_P_PAUSE))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
-		if (local && (ipvlan == in_dev))
-			continue;
+	__skb_queue_head_init(&list);
 
 
-		mac_hash = ipvlan_mac_hash(eth->h_dest);
-		if (!test_bit(mac_hash, ipvlan->mac_filters))
-			continue;
+	spin_lock_bh(&port->backlog.lock);
+	skb_queue_splice_tail_init(&port->backlog, &list);
+	spin_unlock_bh(&port->backlog.lock);
 
 
-		ret = NET_RX_DROP;
-		len = skb->len + ETH_HLEN;
-		nskb = skb_clone(skb, GFP_ATOMIC);
-		if (!nskb)
-			goto mcast_acct;
+	while ((skb = __skb_dequeue(&list)) != NULL) {
+		ethh = eth_hdr(skb);
+		hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr);
+		mac_hash = ipvlan_mac_hash(ethh->h_dest);
 
 
-		if (ether_addr_equal(eth->h_dest, ipvlan->phy_dev->broadcast))
-			nskb->pkt_type = PACKET_BROADCAST;
+		if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
+			pkt_type = PACKET_BROADCAST;
 		else
 		else
-			nskb->pkt_type = PACKET_MULTICAST;
-
-		nskb->dev = ipvlan->dev;
-		if (local)
-			ret = dev_forward_skb(ipvlan->dev, nskb);
-		else
-			ret = netif_rx(nskb);
-mcast_acct:
-		ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
-	}
-	rcu_read_unlock();
-
-	/* Locally generated? ...Forward a copy to the main-device as
-	 * well. On the RX side we'll ignore it (wont give it to any
-	 * of the virtual devices.
-	 */
-	if (local) {
-		nskb = skb_clone(skb, GFP_ATOMIC);
-		if (nskb) {
-			if (ether_addr_equal(eth->h_dest, port->dev->broadcast))
-				nskb->pkt_type = PACKET_BROADCAST;
+			pkt_type = PACKET_MULTICAST;
+
+		dlocal = false;
+		rcu_read_lock();
+		list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+			if (hlocal && (ipvlan->dev == skb->dev)) {
+				dlocal = true;
+				continue;
+			}
+			if (!test_bit(mac_hash, ipvlan->mac_filters))
+				continue;
+
+			ret = NET_RX_DROP;
+			len = skb->len + ETH_HLEN;
+			nskb = skb_clone(skb, GFP_ATOMIC);
+			if (!nskb)
+				goto acct;
+
+			nskb->pkt_type = pkt_type;
+			nskb->dev = ipvlan->dev;
+			if (hlocal)
+				ret = dev_forward_skb(ipvlan->dev, nskb);
 			else
 			else
-				nskb->pkt_type = PACKET_MULTICAST;
-
-			dev_forward_skb(port->dev, nskb);
+				ret = netif_rx(nskb);
+acct:
+			ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
+		}
+		rcu_read_unlock();
+
+		if (dlocal) {
+			/* If the packet originated here, send it out. */
+			skb->dev = port->dev;
+			skb->pkt_type = pkt_type;
+			dev_queue_xmit(skb);
+		} else {
+			kfree_skb(skb);
 		}
 		}
 	}
 	}
 }
 }
@@ -446,6 +453,26 @@ out:
 	return ret;
 	return ret;
 }
 }
 
 
+static void ipvlan_multicast_enqueue(struct ipvl_port *port,
+				     struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_PAUSE)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	spin_lock(&port->backlog.lock);
+	if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
+		__skb_queue_tail(&port->backlog, skb);
+		spin_unlock(&port->backlog.lock);
+		schedule_work(&port->wq);
+	} else {
+		spin_unlock(&port->backlog.lock);
+		atomic_long_inc(&skb->dev->rx_dropped);
+		kfree_skb(skb);
+	}
+}
+
 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 {
 {
 	const struct ipvl_dev *ipvlan = netdev_priv(dev);
 	const struct ipvl_dev *ipvlan = netdev_priv(dev);
@@ -493,11 +520,8 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 		return dev_forward_skb(ipvlan->phy_dev, skb);
 		return dev_forward_skb(ipvlan->phy_dev, skb);
 
 
 	} else if (is_multicast_ether_addr(eth->h_dest)) {
 	} else if (is_multicast_ether_addr(eth->h_dest)) {
-		u8 ip_summed = skb->ip_summed;
-
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		ipvlan_multicast_frame(ipvlan->port, skb, ipvlan, true);
-		skb->ip_summed = ip_summed;
+		ipvlan_multicast_enqueue(ipvlan->port, skb);
+		return NET_XMIT_SUCCESS;
 	}
 	}
 
 
 	skb->dev = ipvlan->phy_dev;
 	skb->dev = ipvlan->phy_dev;
@@ -581,8 +605,18 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 	int addr_type;
 	int addr_type;
 
 
 	if (is_multicast_ether_addr(eth->h_dest)) {
 	if (is_multicast_ether_addr(eth->h_dest)) {
-		if (ipvlan_external_frame(skb, port))
-			ipvlan_multicast_frame(port, skb, NULL, false);
+		if (ipvlan_external_frame(skb, port)) {
+			struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+			/* External frames are queued for device local
+			 * distribution, but a copy is given to master
+			 * straight away to avoid sending duplicates later
+			 * when work-queue processes this frame. This is
+			 * achieved by returning RX_HANDLER_PASS.
+			 */
+			if (nskb)
+				ipvlan_multicast_enqueue(port, nskb);
+		}
 	} else {
 	} else {
 		struct ipvl_addr *addr;
 		struct ipvl_addr *addr;
 
 

+ 11 - 14
drivers/net/ipvlan/ipvlan_main.c

@@ -54,6 +54,9 @@ static int ipvlan_port_create(struct net_device *dev)
 	for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
 	for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
 		INIT_HLIST_HEAD(&port->hlhead[idx]);
 		INIT_HLIST_HEAD(&port->hlhead[idx]);
 
 
+	skb_queue_head_init(&port->backlog);
+	INIT_WORK(&port->wq, ipvlan_process_multicast);
+
 	err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port);
 	err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port);
 	if (err)
 	if (err)
 		goto err;
 		goto err;
@@ -72,6 +75,8 @@ static void ipvlan_port_destroy(struct net_device *dev)
 
 
 	dev->priv_flags &= ~IFF_IPVLAN_MASTER;
 	dev->priv_flags &= ~IFF_IPVLAN_MASTER;
 	netdev_rx_handler_unregister(dev);
 	netdev_rx_handler_unregister(dev);
+	cancel_work_sync(&port->wq);
+	__skb_queue_purge(&port->backlog);
 	kfree_rcu(port, rcu);
 	kfree_rcu(port, rcu);
 }
 }
 
 
@@ -213,17 +218,6 @@ static void ipvlan_change_rx_flags(struct net_device *dev, int change)
 		dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1);
 		dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1);
 }
 }
 
 
-static void ipvlan_set_broadcast_mac_filter(struct ipvl_dev *ipvlan, bool set)
-{
-	struct net_device *dev = ipvlan->dev;
-	unsigned int hashbit = ipvlan_mac_hash(dev->broadcast);
-
-	if (set && !test_bit(hashbit, ipvlan->mac_filters))
-		__set_bit(hashbit, ipvlan->mac_filters);
-	else if (!set && test_bit(hashbit, ipvlan->mac_filters))
-		__clear_bit(hashbit, ipvlan->mac_filters);
-}
-
 static void ipvlan_set_multicast_mac_filter(struct net_device *dev)
 static void ipvlan_set_multicast_mac_filter(struct net_device *dev)
 {
 {
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
@@ -238,6 +232,12 @@ static void ipvlan_set_multicast_mac_filter(struct net_device *dev)
 		netdev_for_each_mc_addr(ha, dev)
 		netdev_for_each_mc_addr(ha, dev)
 			__set_bit(ipvlan_mac_hash(ha->addr), mc_filters);
 			__set_bit(ipvlan_mac_hash(ha->addr), mc_filters);
 
 
+		/* Turn-on broadcast bit irrespective of address family,
+		 * since broadcast is deferred to a work-queue, hence no
+		 * impact on fast-path processing.
+		 */
+		__set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters);
+
 		bitmap_copy(ipvlan->mac_filters, mc_filters,
 		bitmap_copy(ipvlan->mac_filters, mc_filters,
 			    IPVLAN_MAC_FILTER_SIZE);
 			    IPVLAN_MAC_FILTER_SIZE);
 	}
 	}
@@ -705,7 +705,6 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 	 */
 	 */
 	if (netif_running(ipvlan->dev))
 	if (netif_running(ipvlan->dev))
 		ipvlan_ht_addr_add(ipvlan, addr);
 		ipvlan_ht_addr_add(ipvlan, addr);
-	ipvlan_set_broadcast_mac_filter(ipvlan, true);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -722,8 +721,6 @@ static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 	list_del(&addr->anode);
 	list_del(&addr->anode);
 	ipvlan->ipv4cnt--;
 	ipvlan->ipv4cnt--;
 	WARN_ON(ipvlan->ipv4cnt < 0);
 	WARN_ON(ipvlan->ipv4cnt < 0);
-	if (!ipvlan->ipv4cnt)
-	    ipvlan_set_broadcast_mac_filter(ipvlan, false);
 	kfree_rcu(addr, rcu);
 	kfree_rcu(addr, rcu);
 
 
 	return;
 	return;