Przeglądaj źródła

Merge branch 'r8152'

Hayes Wang says:

====================
r8152: tx/rx improvement

 - Select the suitable spin lock for each function.
 - Add additional check to reduce the spin lock.
 - Up the priority of the tx to avoid interrupted by rx.
 - Support rx checksum, large send, and IPv6 hw checksum.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 11 lat temu
rodzic
commit
a5d5ff572c
1 zmienionych plików z 263 dodań i 60 usunięć
  1. 263 60
      drivers/net/usb/r8152.c

+ 263 - 60
drivers/net/usb/r8152.c

@@ -21,9 +21,10 @@
 #include <linux/list.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
 
 /* Version Information */
-#define DRIVER_VERSION "v1.05.0 (2014/02/18)"
+#define DRIVER_VERSION "v1.06.0 (2014/03/03)"
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
 #define DRIVER_DESC "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters"
 #define MODULENAME "r8152"
@@ -447,6 +448,7 @@ enum rtl8152_flags {
 	RTL8152_LINK_CHG,
 	SELECTIVE_SUSPEND,
 	PHY_RESET,
+	SCHEDULE_TASKLET,
 };
 
 /* Define these values to match your device */
@@ -466,8 +468,18 @@ enum rtl8152_flags {
 struct rx_desc {
 	__le32 opts1;
 #define RX_LEN_MASK			0x7fff
+
 	__le32 opts2;
+#define RD_UDP_CS			(1 << 23)
+#define RD_TCP_CS			(1 << 22)
+#define RD_IPV6_CS			(1 << 20)
+#define RD_IPV4_CS			(1 << 19)
+
 	__le32 opts3;
+#define IPF				(1 << 23) /* IP checksum fail */
+#define UDPF				(1 << 22) /* UDP checksum fail */
+#define TCPF				(1 << 21) /* TCP checksum fail */
+
 	__le32 opts4;
 	__le32 opts5;
 	__le32 opts6;
@@ -477,13 +489,21 @@ struct tx_desc {
 	__le32 opts1;
 #define TX_FS			(1 << 31) /* First segment of a packet */
 #define TX_LS			(1 << 30) /* Final segment of a packet */
-#define TX_LEN_MASK		0x3ffff
+#define GTSENDV4		(1 << 28)
+#define GTSENDV6		(1 << 27)
+#define GTTCPHO_SHIFT		18
+#define GTTCPHO_MAX		0x7fU
+#define TX_LEN_MAX		0x3ffffU
 
 	__le32 opts2;
 #define UDP_CS			(1 << 31) /* Calculate UDP/IP checksum */
 #define TCP_CS			(1 << 30) /* Calculate TCP/IP checksum */
 #define IPV4_CS			(1 << 29) /* Calculate IPv4 checksum */
 #define IPV6_CS			(1 << 28) /* Calculate IPv6 checksum */
+#define MSS_SHIFT		17
+#define MSS_MAX			0x7ffU
+#define TCPHO_SHIFT		17
+#define TCPHO_MAX		0x7ffU
 };
 
 struct r8152;
@@ -550,12 +570,21 @@ enum rtl_version {
 	RTL_VER_MAX
 };
 
+enum tx_csum_stat {
+	TX_CSUM_SUCCESS = 0,
+	TX_CSUM_TSO,
+	TX_CSUM_NONE
+};
+
 /* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
  * The RTL chips use a 64 element hash table based on the Ethernet CRC.
  */
 static const int multicast_filter_limit = 32;
 static unsigned int rx_buf_sz = 16384;
 
+#define RTL_LIMITED_TSO_SIZE	(rx_buf_sz - sizeof(struct tx_desc) - \
+				 VLAN_ETH_HLEN - VLAN_HLEN)
+
 static
 int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data)
 {
@@ -963,7 +992,6 @@ static int rtl8152_set_mac_address(struct net_device *netdev, void *p)
 static void read_bulk_callback(struct urb *urb)
 {
 	struct net_device *netdev;
-	unsigned long flags;
 	int status = urb->status;
 	struct rx_agg *agg;
 	struct r8152 *tp;
@@ -997,9 +1025,9 @@ static void read_bulk_callback(struct urb *urb)
 		if (urb->actual_length < ETH_ZLEN)
 			break;
 
-		spin_lock_irqsave(&tp->rx_lock, flags);
+		spin_lock(&tp->rx_lock);
 		list_add_tail(&agg->list, &tp->rx_done);
-		spin_unlock_irqrestore(&tp->rx_lock, flags);
+		spin_unlock(&tp->rx_lock);
 		tasklet_schedule(&tp->tl);
 		return;
 	case -ESHUTDOWN:
@@ -1022,9 +1050,9 @@ static void read_bulk_callback(struct urb *urb)
 	if (result == -ENODEV) {
 		netif_device_detach(tp->netdev);
 	} else if (result) {
-		spin_lock_irqsave(&tp->rx_lock, flags);
+		spin_lock(&tp->rx_lock);
 		list_add_tail(&agg->list, &tp->rx_done);
-		spin_unlock_irqrestore(&tp->rx_lock, flags);
+		spin_unlock(&tp->rx_lock);
 		tasklet_schedule(&tp->tl);
 	}
 }
@@ -1033,7 +1061,6 @@ static void write_bulk_callback(struct urb *urb)
 {
 	struct net_device_stats *stats;
 	struct net_device *netdev;
-	unsigned long flags;
 	struct tx_agg *agg;
 	struct r8152 *tp;
 	int status = urb->status;
@@ -1057,9 +1084,9 @@ static void write_bulk_callback(struct urb *urb)
 		stats->tx_bytes += agg->skb_len;
 	}
 
-	spin_lock_irqsave(&tp->tx_lock, flags);
+	spin_lock(&tp->tx_lock);
 	list_add_tail(&agg->list, &tp->tx_free);
-	spin_unlock_irqrestore(&tp->tx_lock, flags);
+	spin_unlock(&tp->tx_lock);
 
 	usb_autopm_put_interface_async(tp->intf);
 
@@ -1073,7 +1100,7 @@ static void write_bulk_callback(struct urb *urb)
 		return;
 
 	if (!skb_queue_empty(&tp->tx_queue))
-		schedule_delayed_work(&tp->schedule, 0);
+		tasklet_schedule(&tp->tl);
 }
 
 static void intr_callback(struct urb *urb)
@@ -1268,6 +1295,9 @@ static struct tx_agg *r8152_get_tx_agg(struct r8152 *tp)
 	struct tx_agg *agg = NULL;
 	unsigned long flags;
 
+	if (list_empty(&tp->tx_free))
+		return NULL;
+
 	spin_lock_irqsave(&tp->tx_lock, flags);
 	if (!list_empty(&tp->tx_free)) {
 		struct list_head *cursor;
@@ -1281,24 +1311,130 @@ static struct tx_agg *r8152_get_tx_agg(struct r8152 *tp)
 	return agg;
 }
 
-static void
-r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, struct sk_buff *skb)
+static inline __be16 get_protocol(struct sk_buff *skb)
 {
-	memset(desc, 0, sizeof(*desc));
+	__be16 protocol;
 
-	desc->opts1 = cpu_to_le32((skb->len & TX_LEN_MASK) | TX_FS | TX_LS);
+	if (skb->protocol == htons(ETH_P_8021Q))
+		protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
+	else
+		protocol = skb->protocol;
+
+	return protocol;
+}
+
+/*
+ * r8152_csum_workaround()
+ * The hw limites the value the transport offset. When the offset is out of the
+ * range, calculate the checksum by sw.
+ */
+static void r8152_csum_workaround(struct r8152 *tp, struct sk_buff *skb,
+				  struct sk_buff_head *list)
+{
+	if (skb_shinfo(skb)->gso_size) {
+		netdev_features_t features = tp->netdev->features;
+		struct sk_buff_head seg_list;
+		struct sk_buff *segs, *nskb;
+
+		features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO);
+		segs = skb_gso_segment(skb, features);
+		if (IS_ERR(segs) || !segs)
+			goto drop;
+
+		__skb_queue_head_init(&seg_list);
+
+		do {
+			nskb = segs;
+			segs = segs->next;
+			nskb->next = NULL;
+			__skb_queue_tail(&seg_list, nskb);
+		} while (segs);
+
+		skb_queue_splice(&seg_list, list);
+		dev_kfree_skb(skb);
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		if (skb_checksum_help(skb) < 0)
+			goto drop;
+
+		__skb_queue_head(list, skb);
+	} else {
+		struct net_device_stats *stats;
+
+drop:
+		stats = &tp->netdev->stats;
+		stats->tx_dropped++;
+		dev_kfree_skb(skb);
+	}
+}
+
+/*
+ * msdn_giant_send_check()
+ * According to the document of microsoft, the TCP Pseudo Header excludes the
+ * packet length for IPv6 TCP large packets.
+ */
+static int msdn_giant_send_check(struct sk_buff *skb)
+{
+	const struct ipv6hdr *ipv6h;
+	struct tcphdr *th;
+
+	ipv6h = ipv6_hdr(skb);
+	th = tcp_hdr(skb);
+
+	th->check = 0;
+	th->check = ~tcp_v6_check(0, &ipv6h->saddr, &ipv6h->daddr, 0);
+
+	return 0;
+}
+
+static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc,
+			 struct sk_buff *skb, u32 len, u32 transport_offset)
+{
+	u32 mss = skb_shinfo(skb)->gso_size;
+	u32 opts1, opts2 = 0;
+	int ret = TX_CSUM_SUCCESS;
+
+	WARN_ON_ONCE(len > TX_LEN_MAX);
+
+	opts1 = len | TX_FS | TX_LS;
+
+	if (mss) {
+		if (transport_offset > GTTCPHO_MAX) {
+			netif_warn(tp, tx_err, tp->netdev,
+				   "Invalid transport offset 0x%x for TSO\n",
+				   transport_offset);
+			ret = TX_CSUM_TSO;
+			goto unavailable;
+		}
+
+		switch (get_protocol(skb)) {
+		case htons(ETH_P_IP):
+			opts1 |= GTSENDV4;
+			break;
+
+		case htons(ETH_P_IPV6):
+			opts1 |= GTSENDV6;
+			msdn_giant_send_check(skb);
+			break;
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		__be16 protocol;
+		default:
+			WARN_ON_ONCE(1);
+			break;
+		}
+
+		opts1 |= transport_offset << GTTCPHO_SHIFT;
+		opts2 |= min(mss, MSS_MAX) << MSS_SHIFT;
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u8 ip_protocol;
-		u32 opts2 = 0;
 
-		if (skb->protocol == htons(ETH_P_8021Q))
-			protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
-		else
-			protocol = skb->protocol;
+		if (transport_offset > TCPHO_MAX) {
+			netif_warn(tp, tx_err, tp->netdev,
+				   "Invalid transport offset 0x%x\n",
+				   transport_offset);
+			ret = TX_CSUM_NONE;
+			goto unavailable;
+		}
 
-		switch (protocol) {
+		switch (get_protocol(skb)) {
 		case htons(ETH_P_IP):
 			opts2 |= IPV4_CS;
 			ip_protocol = ip_hdr(skb)->protocol;
@@ -1314,30 +1450,33 @@ r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, struct sk_buff *skb)
 			break;
 		}
 
-		if (ip_protocol == IPPROTO_TCP) {
+		if (ip_protocol == IPPROTO_TCP)
 			opts2 |= TCP_CS;
-			opts2 |= (skb_transport_offset(skb) & 0x7fff) << 17;
-		} else if (ip_protocol == IPPROTO_UDP) {
+		else if (ip_protocol == IPPROTO_UDP)
 			opts2 |= UDP_CS;
-		} else {
+		else
 			WARN_ON_ONCE(1);
-		}
 
-		desc->opts2 = cpu_to_le32(opts2);
+		opts2 |= transport_offset << TCPHO_SHIFT;
 	}
+
+	desc->opts2 = cpu_to_le32(opts2);
+	desc->opts1 = cpu_to_le32(opts1);
+
+unavailable:
+	return ret;
 }
 
 static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
 {
 	struct sk_buff_head skb_head, *tx_queue = &tp->tx_queue;
-	unsigned long flags;
 	int remain, ret;
 	u8 *tx_data;
 
 	__skb_queue_head_init(&skb_head);
-	spin_lock_irqsave(&tx_queue->lock, flags);
+	spin_lock(&tx_queue->lock);
 	skb_queue_splice_init(tx_queue, &skb_head);
-	spin_unlock_irqrestore(&tx_queue->lock, flags);
+	spin_unlock(&tx_queue->lock);
 
 	tx_data = agg->head;
 	agg->skb_num = agg->skb_len = 0;
@@ -1347,47 +1486,65 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
 		struct tx_desc *tx_desc;
 		struct sk_buff *skb;
 		unsigned int len;
+		u32 offset;
 
 		skb = __skb_dequeue(&skb_head);
 		if (!skb)
 			break;
 
-		remain -= sizeof(*tx_desc);
-		len = skb->len;
-		if (remain < len) {
+		len = skb->len + sizeof(*tx_desc);
+
+		if (len > remain) {
 			__skb_queue_head(&skb_head, skb);
 			break;
 		}
 
 		tx_data = tx_agg_align(tx_data);
 		tx_desc = (struct tx_desc *)tx_data;
+
+		offset = (u32)skb_transport_offset(skb);
+
+		if (r8152_tx_csum(tp, tx_desc, skb, skb->len, offset)) {
+			r8152_csum_workaround(tp, skb, &skb_head);
+			continue;
+		}
+
 		tx_data += sizeof(*tx_desc);
 
-		r8152_tx_csum(tp, tx_desc, skb);
-		memcpy(tx_data, skb->data, len);
-		agg->skb_num++;
+		len = skb->len;
+		if (skb_copy_bits(skb, 0, tx_data, len) < 0) {
+			struct net_device_stats *stats = &tp->netdev->stats;
+
+			stats->tx_dropped++;
+			dev_kfree_skb_any(skb);
+			tx_data -= sizeof(*tx_desc);
+			continue;
+		}
+
+		tx_data += len;
 		agg->skb_len += len;
+		agg->skb_num++;
+
 		dev_kfree_skb_any(skb);
 
-		tx_data += len;
 		remain = rx_buf_sz - (int)(tx_agg_align(tx_data) - agg->head);
 	}
 
 	if (!skb_queue_empty(&skb_head)) {
-		spin_lock_irqsave(&tx_queue->lock, flags);
+		spin_lock(&tx_queue->lock);
 		skb_queue_splice(&skb_head, tx_queue);
-		spin_unlock_irqrestore(&tx_queue->lock, flags);
+		spin_unlock(&tx_queue->lock);
 	}
 
-	netif_tx_lock_bh(tp->netdev);
+	netif_tx_lock(tp->netdev);
 
 	if (netif_queue_stopped(tp->netdev) &&
 	    skb_queue_len(&tp->tx_queue) < tp->tx_qlen)
 		netif_wake_queue(tp->netdev);
 
-	netif_tx_unlock_bh(tp->netdev);
+	netif_tx_unlock(tp->netdev);
 
-	ret = usb_autopm_get_interface(tp->intf);
+	ret = usb_autopm_get_interface_async(tp->intf);
 	if (ret < 0)
 		goto out_tx_fill;
 
@@ -1395,14 +1552,45 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
 			  agg->head, (int)(tx_data - (u8 *)agg->head),
 			  (usb_complete_t)write_bulk_callback, agg);
 
-	ret = usb_submit_urb(agg->urb, GFP_KERNEL);
+	ret = usb_submit_urb(agg->urb, GFP_ATOMIC);
 	if (ret < 0)
-		usb_autopm_put_interface(tp->intf);
+		usb_autopm_put_interface_async(tp->intf);
 
 out_tx_fill:
 	return ret;
 }
 
+static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc)
+{
+	u8 checksum = CHECKSUM_NONE;
+	u32 opts2, opts3;
+
+	if (tp->version == RTL_VER_01)
+		goto return_result;
+
+	opts2 = le32_to_cpu(rx_desc->opts2);
+	opts3 = le32_to_cpu(rx_desc->opts3);
+
+	if (opts2 & RD_IPV4_CS) {
+		if (opts3 & IPF)
+			checksum = CHECKSUM_NONE;
+		else if ((opts2 & RD_UDP_CS) && (opts3 & UDPF))
+			checksum = CHECKSUM_NONE;
+		else if ((opts2 & RD_TCP_CS) && (opts3 & TCPF))
+			checksum = CHECKSUM_NONE;
+		else
+			checksum = CHECKSUM_UNNECESSARY;
+	} else if (RD_IPV6_CS) {
+		if ((opts2 & RD_UDP_CS) && !(opts3 & UDPF))
+			checksum = CHECKSUM_UNNECESSARY;
+		else if ((opts2 & RD_TCP_CS) && !(opts3 & TCPF))
+			checksum = CHECKSUM_UNNECESSARY;
+	}
+
+return_result:
+	return checksum;
+}
+
 static void rx_bottom(struct r8152 *tp)
 {
 	unsigned long flags;
@@ -1455,8 +1643,10 @@ static void rx_bottom(struct r8152 *tp)
 			skb = netdev_alloc_skb_ip_align(netdev, pkt_len);
 			if (!skb) {
 				stats->rx_dropped++;
-				break;
+				goto find_next_rx;
 			}
+
+			skb->ip_summed = r8152_rx_csum(tp, rx_desc);
 			memcpy(skb->data, rx_data, pkt_len);
 			skb_put(skb, pkt_len);
 			skb->protocol = eth_type_trans(skb, netdev);
@@ -1464,6 +1654,7 @@ static void rx_bottom(struct r8152 *tp)
 			stats->rx_packets++;
 			stats->rx_bytes += pkt_len;
 
+find_next_rx:
 			rx_data = rx_agg_align(rx_data + pkt_len + CRC_SIZE);
 			rx_desc = (struct rx_desc *)rx_data;
 			len_used = (int)(rx_data - (u8 *)agg->head);
@@ -1535,6 +1726,7 @@ static void bottom_half(unsigned long data)
 		return;
 
 	rx_bottom(tp);
+	tx_bottom(tp);
 }
 
 static
@@ -1551,16 +1743,15 @@ static void rtl_drop_queued_tx(struct r8152 *tp)
 {
 	struct net_device_stats *stats = &tp->netdev->stats;
 	struct sk_buff_head skb_head, *tx_queue = &tp->tx_queue;
-	unsigned long flags;
 	struct sk_buff *skb;
 
 	if (skb_queue_empty(tx_queue))
 		return;
 
 	__skb_queue_head_init(&skb_head);
-	spin_lock_irqsave(&tx_queue->lock, flags);
+	spin_lock_bh(&tx_queue->lock);
 	skb_queue_splice_init(tx_queue, &skb_head);
-	spin_unlock_irqrestore(&tx_queue->lock, flags);
+	spin_unlock_bh(&tx_queue->lock);
 
 	while ((skb = __skb_dequeue(&skb_head))) {
 		dev_kfree_skb(skb);
@@ -1631,7 +1822,7 @@ static void _rtl8152_set_rx_mode(struct net_device *netdev)
 }
 
 static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb,
-					    struct net_device *netdev)
+					struct net_device *netdev)
 {
 	struct r8152 *tp = netdev_priv(netdev);
 
@@ -1639,13 +1830,17 @@ static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb,
 
 	skb_queue_tail(&tp->tx_queue, skb);
 
-	if (list_empty(&tp->tx_free) &&
-	    skb_queue_len(&tp->tx_queue) > tp->tx_qlen)
+	if (!list_empty(&tp->tx_free)) {
+		if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
+			set_bit(SCHEDULE_TASKLET, &tp->flags);
+			schedule_delayed_work(&tp->schedule, 0);
+		} else {
+			usb_mark_last_busy(tp->udev);
+			tasklet_schedule(&tp->tl);
+		}
+	} else if (skb_queue_len(&tp->tx_queue) > tp->tx_qlen)
 		netif_stop_queue(netdev);
 
-	if (!list_empty(&tp->tx_free))
-		schedule_delayed_work(&tp->schedule, 0);
-
 	return NETDEV_TX_OK;
 }
 
@@ -2524,8 +2719,11 @@ static void rtl_work_func_t(struct work_struct *work)
 	if (test_bit(RTL8152_SET_RX_MODE, &tp->flags))
 		_rtl8152_set_rx_mode(tp->netdev);
 
-	if (tp->speed & LINK_STATUS)
-		tx_bottom(tp);
+	if (test_bit(SCHEDULE_TASKLET, &tp->flags) &&
+	    (tp->speed & LINK_STATUS)) {
+		clear_bit(SCHEDULE_TASKLET, &tp->flags);
+		tasklet_schedule(&tp->tl);
+	}
 
 	if (test_bit(PHY_RESET, &tp->flags))
 		rtl_phy_reset(tp);
@@ -3094,10 +3292,15 @@ static int rtl8152_probe(struct usb_interface *intf,
 	netdev->netdev_ops = &rtl8152_netdev_ops;
 	netdev->watchdog_timeo = RTL8152_TX_TIMEOUT;
 
-	netdev->features |= NETIF_F_IP_CSUM;
-	netdev->hw_features = NETIF_F_IP_CSUM;
+	netdev->features |= NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
+			    NETIF_F_TSO | NETIF_F_FRAGLIST | NETIF_F_IPV6_CSUM |
+			    NETIF_F_TSO6;
+	netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
+			      NETIF_F_TSO | NETIF_F_FRAGLIST |
+			      NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
 
 	SET_ETHTOOL_OPS(netdev, &ops);
+	netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE);
 
 	tp->mii.dev = netdev;
 	tp->mii.mdio_read = read_mii_word;