瀏覽代碼

Merge branch 'hyperv-next'

K. Y. Srinivasan says:

====================
Drivers: net: hyperv: Enable various offloads

This patch set enables both checksum as well as segmentation offload.
As part of this effort I have enabled scatter gather I/O a well.

In version 2 of these patches, I addressed comments from David Miller and
Dan Carpenter.

In this version I have addressed the latest comments from David Miller.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 11 年之前
父節點
當前提交
e3ca64948b
共有 3 個文件被更改,包括 504 次插入124 次删除
  1. 138 7
      drivers/net/hyperv/hyperv_net.h
  2. 283 50
      drivers/net/hyperv/netvsc_drv.c
  3. 83 67
      drivers/net/hyperv/rndis_filter.c

+ 138 - 7
drivers/net/hyperv/hyperv_net.h

@@ -30,6 +30,7 @@
 
 /* Fwd declaration */
 struct hv_netvsc_packet;
+struct ndis_tcp_ip_checksum_info;
 
 /* Represent the xfer page packet which contains 1 or more netvsc packet */
 struct xferpage_packet {
@@ -73,7 +74,7 @@ struct hv_netvsc_packet {
 	} completion;
 
 	/* This points to the memory after page_buf */
-	void *extension;
+	struct rndis_message *rndis_msg;
 
 	u32 total_data_buflen;
 	/* Points to the send/receive buffer where the ethernet frame is */
@@ -117,7 +118,8 @@ int netvsc_send(struct hv_device *device,
 void netvsc_linkstatus_callback(struct hv_device *device_obj,
 				unsigned int status);
 int netvsc_recv_callback(struct hv_device *device_obj,
-			struct hv_netvsc_packet *packet);
+			struct hv_netvsc_packet *packet,
+			struct ndis_tcp_ip_checksum_info *csum_info);
 int rndis_filter_open(struct hv_device *dev);
 int rndis_filter_close(struct hv_device *dev);
 int rndis_filter_device_add(struct hv_device *dev,
@@ -126,11 +128,6 @@ void rndis_filter_device_remove(struct hv_device *dev);
 int rndis_filter_receive(struct hv_device *dev,
 			struct hv_netvsc_packet *pkt);
 
-
-
-int rndis_filter_send(struct hv_device *dev,
-			struct hv_netvsc_packet *pkt);
-
 int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter);
 int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac);
 
@@ -726,9 +723,133 @@ struct ndis_pkt_8021q_info {
 	};
 };
 
+struct ndis_oject_header {
+	u8 type;
+	u8 revision;
+	u16 size;
+};
+
+#define NDIS_OBJECT_TYPE_DEFAULT	0x80
+#define NDIS_OFFLOAD_PARAMETERS_REVISION_3 3
+#define NDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0
+#define NDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1
+#define NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED  2
+#define NDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED  2
+#define NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED 1
+#define NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED 2
+#define NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED 1
+#define NDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED 2
+#define NDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3
+#define NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4
+
+#define NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE	1
+#define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4	0
+#define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6	1
+
+/*
+ * New offload OIDs for NDIS 6
+ */
+#define OID_TCP_OFFLOAD_CURRENT_CONFIG 0xFC01020B /* query only */
+#define OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C		/* set only */
+#define OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D/* query only */
+#define OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG 0xFC01020E /* query only */
+#define OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */
+#define OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */
+
+struct ndis_offload_params {
+	struct ndis_oject_header header;
+	u8 ip_v4_csum;
+	u8 tcp_ip_v4_csum;
+	u8 udp_ip_v4_csum;
+	u8 tcp_ip_v6_csum;
+	u8 udp_ip_v6_csum;
+	u8 lso_v1;
+	u8 ip_sec_v1;
+	u8 lso_v2_ipv4;
+	u8 lso_v2_ipv6;
+	u8 tcp_connection_ip_v4;
+	u8 tcp_connection_ip_v6;
+	u32 flags;
+	u8 ip_sec_v2;
+	u8 ip_sec_v2_ip_v4;
+	struct {
+		u8 rsc_ip_v4;
+		u8 rsc_ip_v6;
+	};
+	struct {
+		u8 encapsulated_packet_task_offload;
+		u8 encapsulation_types;
+	};
+};
+
+struct ndis_tcp_ip_checksum_info {
+	union {
+		struct {
+			u32 is_ipv4:1;
+			u32 is_ipv6:1;
+			u32 tcp_checksum:1;
+			u32 udp_checksum:1;
+			u32 ip_header_checksum:1;
+			u32 reserved:11;
+			u32 tcp_header_offset:10;
+		} transmit;
+		struct {
+			u32 tcp_checksum_failed:1;
+			u32 udp_checksum_failed:1;
+			u32 ip_checksum_failed:1;
+			u32 tcp_checksum_succeeded:1;
+			u32 udp_checksum_succeeded:1;
+			u32 ip_checksum_succeeded:1;
+			u32 loopback:1;
+			u32 tcp_checksum_value_invalid:1;
+			u32 ip_checksum_value_invalid:1;
+		} receive;
+		u32  value;
+	};
+};
+
+struct ndis_tcp_lso_info {
+	union {
+		struct {
+			u32 unused:30;
+			u32 type:1;
+			u32 reserved2:1;
+		} transmit;
+		struct {
+			u32 mss:20;
+			u32 tcp_header_offset:10;
+			u32 type:1;
+			u32 reserved2:1;
+		} lso_v1_transmit;
+		struct {
+			u32 tcp_payload:30;
+			u32 type:1;
+			u32 reserved2:1;
+		} lso_v1_transmit_complete;
+		struct {
+			u32 mss:20;
+			u32 tcp_header_offset:10;
+			u32 type:1;
+			u32 ip_version:1;
+		} lso_v2_transmit;
+		struct {
+			u32 reserved:30;
+			u32 type:1;
+			u32 reserved2:1;
+		} lso_v2_transmit_complete;
+		u32  value;
+	};
+};
+
 #define NDIS_VLAN_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
 		sizeof(struct ndis_pkt_8021q_info))
 
+#define NDIS_CSUM_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+		sizeof(struct ndis_tcp_ip_checksum_info))
+
+#define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+		sizeof(struct ndis_tcp_lso_info))
+
 /* Format of Information buffer passed in a SetRequest for the OID */
 /* OID_GEN_RNDIS_CONFIG_PARAMETER. */
 struct rndis_config_parameter_info {
@@ -954,6 +1075,16 @@ struct rndis_message {
 #define NDIS_PACKET_TYPE_FUNCTIONAL	0x00000400
 #define NDIS_PACKET_TYPE_MAC_FRAME	0x00000800
 
+#define INFO_IPV4       2
+#define INFO_IPV6       4
+#define INFO_TCP        2
+#define INFO_UDP        4
+
+#define TRANSPORT_INFO_NOT_IP   0
+#define TRANSPORT_INFO_IPV4_TCP ((INFO_IPV4 << 16) | INFO_TCP)
+#define TRANSPORT_INFO_IPV4_UDP ((INFO_IPV4 << 16) | INFO_UDP)
+#define TRANSPORT_INFO_IPV6_TCP ((INFO_IPV6 << 16) | INFO_TCP)
+#define TRANSPORT_INFO_IPV6_UDP ((INFO_IPV6 << 16) | INFO_UDP)
 
 
 #endif /* _HYPERV_NET_H */

+ 283 - 50
drivers/net/hyperv/netvsc_drv.c

@@ -128,6 +128,27 @@ static int netvsc_close(struct net_device *net)
 	return ret;
 }
 
+static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
+				int pkt_type)
+{
+	struct rndis_packet *rndis_pkt;
+	struct rndis_per_packet_info *ppi;
+
+	rndis_pkt = &msg->msg.pkt;
+	rndis_pkt->data_offset += ppi_size;
+
+	ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt +
+		rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len);
+
+	ppi->size = ppi_size;
+	ppi->type = pkt_type;
+	ppi->ppi_offset = sizeof(struct rndis_per_packet_info);
+
+	rndis_pkt->per_pkt_info_len += ppi_size;
+
+	return ppi;
+}
+
 static void netvsc_xmit_completion(void *context)
 {
 	struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
@@ -140,21 +161,163 @@ static void netvsc_xmit_completion(void *context)
 		dev_kfree_skb_any(skb);
 }
 
+static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
+			struct hv_page_buffer *pb)
+{
+	int j = 0;
+
+	/* Deal with compund pages by ignoring unused part
+	 * of the page.
+	 */
+	page += (offset >> PAGE_SHIFT);
+	offset &= ~PAGE_MASK;
+
+	while (len > 0) {
+		unsigned long bytes;
+
+		bytes = PAGE_SIZE - offset;
+		if (bytes > len)
+			bytes = len;
+		pb[j].pfn = page_to_pfn(page);
+		pb[j].offset = offset;
+		pb[j].len = bytes;
+
+		offset += bytes;
+		len -= bytes;
+
+		if (offset == PAGE_SIZE && len) {
+			page++;
+			offset = 0;
+			j++;
+		}
+	}
+
+	return j + 1;
+}
+
+static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
+			   struct hv_page_buffer *pb)
+{
+	u32 slots_used = 0;
+	char *data = skb->data;
+	int frags = skb_shinfo(skb)->nr_frags;
+	int i;
+
+	/* The packet is laid out thus:
+	 * 1. hdr
+	 * 2. skb linear data
+	 * 3. skb fragment data
+	 */
+	if (hdr != NULL)
+		slots_used += fill_pg_buf(virt_to_page(hdr),
+					offset_in_page(hdr),
+					len, &pb[slots_used]);
+
+	slots_used += fill_pg_buf(virt_to_page(data),
+				offset_in_page(data),
+				skb_headlen(skb), &pb[slots_used]);
+
+	for (i = 0; i < frags; i++) {
+		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+
+		slots_used += fill_pg_buf(skb_frag_page(frag),
+					frag->page_offset,
+					skb_frag_size(frag), &pb[slots_used]);
+	}
+	return slots_used;
+}
+
+static int count_skb_frag_slots(struct sk_buff *skb)
+{
+	int i, frags = skb_shinfo(skb)->nr_frags;
+	int pages = 0;
+
+	for (i = 0; i < frags; i++) {
+		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+		unsigned long size = skb_frag_size(frag);
+		unsigned long offset = frag->page_offset;
+
+		/* Skip unused frames from start of page */
+		offset &= ~PAGE_MASK;
+		pages += PFN_UP(offset + size);
+	}
+	return pages;
+}
+
+static int netvsc_get_slots(struct sk_buff *skb)
+{
+	char *data = skb->data;
+	unsigned int offset = offset_in_page(data);
+	unsigned int len = skb_headlen(skb);
+	int slots;
+	int frag_slots;
+
+	slots = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+	frag_slots = count_skb_frag_slots(skb);
+	return slots + frag_slots;
+}
+
+static u32 get_net_transport_info(struct sk_buff *skb, u32 *trans_off)
+{
+	u32 ret_val = TRANSPORT_INFO_NOT_IP;
+
+	if ((eth_hdr(skb)->h_proto != htons(ETH_P_IP)) &&
+		(eth_hdr(skb)->h_proto != htons(ETH_P_IPV6))) {
+		goto not_ip;
+	}
+
+	*trans_off = skb_transport_offset(skb);
+
+	if ((eth_hdr(skb)->h_proto == htons(ETH_P_IP))) {
+		struct iphdr *iphdr = ip_hdr(skb);
+
+		if (iphdr->protocol == IPPROTO_TCP)
+			ret_val = TRANSPORT_INFO_IPV4_TCP;
+		else if (iphdr->protocol == IPPROTO_UDP)
+			ret_val = TRANSPORT_INFO_IPV4_UDP;
+	} else {
+		if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
+			ret_val = TRANSPORT_INFO_IPV6_TCP;
+		else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)
+			ret_val = TRANSPORT_INFO_IPV6_UDP;
+	}
+
+not_ip:
+	return ret_val;
+}
+
 static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
 	struct hv_netvsc_packet *packet;
 	int ret;
-	unsigned int i, num_pages, npg_data;
-
-	/* Add multipages for skb->data and additional 2 for RNDIS */
-	npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1)
-		>> PAGE_SHIFT) - ((unsigned long)skb->data >> PAGE_SHIFT) + 1;
-	num_pages = skb_shinfo(skb)->nr_frags + npg_data + 2;
+	unsigned int num_data_pgs;
+	struct rndis_message *rndis_msg;
+	struct rndis_packet *rndis_pkt;
+	u32 rndis_msg_size;
+	bool isvlan;
+	struct rndis_per_packet_info *ppi;
+	struct ndis_tcp_ip_checksum_info *csum_info;
+	struct ndis_tcp_lso_info *lso_info;
+	int  hdr_offset;
+	u32 net_trans_info;
+
+
+	/* We will atmost need two pages to describe the rndis
+	 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
+	 * of pages in a single packet.
+	 */
+	num_data_pgs = netvsc_get_slots(skb) + 2;
+	if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
+		netdev_err(net, "Packet too big: %u\n", skb->len);
+		dev_kfree_skb(skb);
+		net->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
 
 	/* Allocate a netvsc packet based on # of frags. */
 	packet = kzalloc(sizeof(struct hv_netvsc_packet) +
-			 (num_pages * sizeof(struct hv_page_buffer)) +
+			 (num_data_pgs * sizeof(struct hv_page_buffer)) +
 			 sizeof(struct rndis_message) +
 			 NDIS_VLAN_PPI_SIZE, GFP_ATOMIC);
 	if (!packet) {
@@ -168,53 +331,111 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 
 	packet->vlan_tci = skb->vlan_tci;
 
-	packet->extension = (void *)(unsigned long)packet +
+	packet->is_data_pkt = true;
+	packet->total_data_buflen = skb->len;
+
+	packet->rndis_msg = (struct rndis_message *)((unsigned long)packet +
 				sizeof(struct hv_netvsc_packet) +
-				    (num_pages * sizeof(struct hv_page_buffer));
+				(num_data_pgs * sizeof(struct hv_page_buffer)));
+
+	/* Set the completion routine */
+	packet->completion.send.send_completion = netvsc_xmit_completion;
+	packet->completion.send.send_completion_ctx = packet;
+	packet->completion.send.send_completion_tid = (unsigned long)skb;
 
-	/* If the rndis msg goes beyond 1 page, we will add 1 later */
-	packet->page_buf_cnt = num_pages - 1;
+	isvlan = packet->vlan_tci & VLAN_TAG_PRESENT;
+
+	/* Add the rndis header */
+	rndis_msg = packet->rndis_msg;
+	rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
+	rndis_msg->msg_len = packet->total_data_buflen;
+	rndis_pkt = &rndis_msg->msg.pkt;
+	rndis_pkt->data_offset = sizeof(struct rndis_packet);
+	rndis_pkt->data_len = packet->total_data_buflen;
+	rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);
+
+	rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
+
+	if (isvlan) {
+		struct ndis_pkt_8021q_info *vlan;
+
+		rndis_msg_size += NDIS_VLAN_PPI_SIZE;
+		ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
+					IEEE_8021Q_INFO);
+		vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
+						ppi->ppi_offset);
+		vlan->vlanid = packet->vlan_tci & VLAN_VID_MASK;
+		vlan->pri = (packet->vlan_tci & VLAN_PRIO_MASK) >>
+				VLAN_PRIO_SHIFT;
+	}
 
-	/* Initialize it from the skb */
-	packet->total_data_buflen = skb->len;
+	net_trans_info = get_net_transport_info(skb, &hdr_offset);
+	if (net_trans_info == TRANSPORT_INFO_NOT_IP)
+		goto do_send;
+
+	/*
+	 * Setup the sendside checksum offload only if this is not a
+	 * GSO packet.
+	 */
+	if (skb_is_gso(skb))
+		goto do_lso;
+
+	rndis_msg_size += NDIS_CSUM_PPI_SIZE;
+	ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
+			    TCPIP_CHKSUM_PKTINFO);
+
+	csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi +
+			ppi->ppi_offset);
 
-	/* Start filling in the page buffers starting after RNDIS buffer. */
-	packet->page_buf[1].pfn = virt_to_phys(skb->data) >> PAGE_SHIFT;
-	packet->page_buf[1].offset
-		= (unsigned long)skb->data & (PAGE_SIZE - 1);
-	if (npg_data == 1)
-		packet->page_buf[1].len = skb_headlen(skb);
+	if (net_trans_info & (INFO_IPV4 << 16))
+		csum_info->transmit.is_ipv4 = 1;
 	else
-		packet->page_buf[1].len = PAGE_SIZE
-			- packet->page_buf[1].offset;
-
-	for (i = 2; i <= npg_data; i++) {
-		packet->page_buf[i].pfn = virt_to_phys(skb->data
-			+ PAGE_SIZE * (i-1)) >> PAGE_SHIFT;
-		packet->page_buf[i].offset = 0;
-		packet->page_buf[i].len = PAGE_SIZE;
+		csum_info->transmit.is_ipv6 = 1;
+
+	if (net_trans_info & INFO_TCP) {
+		csum_info->transmit.tcp_checksum = 1;
+		csum_info->transmit.tcp_header_offset = hdr_offset;
+	} else if (net_trans_info & INFO_UDP) {
+		csum_info->transmit.udp_checksum = 1;
 	}
-	if (npg_data > 1)
-		packet->page_buf[npg_data].len = (((unsigned long)skb->data
-			+ skb_headlen(skb) - 1) & (PAGE_SIZE - 1)) + 1;
-
-	/* Additional fragments are after SKB data */
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
-
-		packet->page_buf[i+npg_data+1].pfn =
-			page_to_pfn(skb_frag_page(f));
-		packet->page_buf[i+npg_data+1].offset = f->page_offset;
-		packet->page_buf[i+npg_data+1].len = skb_frag_size(f);
+	goto do_send;
+
+do_lso:
+	rndis_msg_size += NDIS_LSO_PPI_SIZE;
+	ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
+			    TCP_LARGESEND_PKTINFO);
+
+	lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
+			ppi->ppi_offset);
+
+	lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
+	if (net_trans_info & (INFO_IPV4 << 16)) {
+		lso_info->lso_v2_transmit.ip_version =
+			NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
+		ip_hdr(skb)->tot_len = 0;
+		ip_hdr(skb)->check = 0;
+		tcp_hdr(skb)->check =
+		~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+				   ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
+	} else {
+		lso_info->lso_v2_transmit.ip_version =
+			NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
+		ipv6_hdr(skb)->payload_len = 0;
+		tcp_hdr(skb)->check =
+		~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+				&ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
 	}
+	lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset;
+	lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;
 
-	/* Set the completion routine */
-	packet->completion.send.send_completion = netvsc_xmit_completion;
-	packet->completion.send.send_completion_ctx = packet;
-	packet->completion.send.send_completion_tid = (unsigned long)skb;
+do_send:
+	/* Start filling in the page buffers with the rndis hdr */
+	rndis_msg->msg_len += rndis_msg_size;
+	packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
+					skb, &packet->page_buf[0]);
+
+	ret = netvsc_send(net_device_ctx->device_ctx, packet);
 
-	ret = rndis_filter_send(net_device_ctx->device_ctx,
-				  packet);
 	if (ret == 0) {
 		net->stats.tx_bytes += skb->len;
 		net->stats.tx_packets++;
@@ -264,7 +485,8 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
  * "wire" on the specified device.
  */
 int netvsc_recv_callback(struct hv_device *device_obj,
-				struct hv_netvsc_packet *packet)
+				struct hv_netvsc_packet *packet,
+				struct ndis_tcp_ip_checksum_info *csum_info)
 {
 	struct net_device *net;
 	struct sk_buff *skb;
@@ -291,7 +513,17 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 		packet->total_data_buflen);
 
 	skb->protocol = eth_type_trans(skb, net);
-	skb->ip_summed = CHECKSUM_NONE;
+	if (csum_info) {
+		/* We only look at the IP checksum here.
+		 * Should we be dropping the packet if checksum
+		 * failed? How do we deal with other checksums - TCP/UDP?
+		 */
+		if (csum_info->receive.ip_checksum_succeeded)
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		else
+			skb->ip_summed = CHECKSUM_NONE;
+	}
+
 	if (packet->vlan_tci & VLAN_TAG_PRESENT)
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 				       packet->vlan_tci);
@@ -450,9 +682,10 @@ static int netvsc_probe(struct hv_device *dev,
 
 	net->netdev_ops = &device_ops;
 
-	/* TODO: Add GSO and Checksum offload */
-	net->hw_features = 0;
-	net->features = NETIF_F_HW_VLAN_CTAG_TX;
+	net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM |
+				NETIF_F_TSO;
+	net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM |
+			NETIF_F_IP_CSUM | NETIF_F_TSO;
 
 	SET_ETHTOOL_OPS(net, &ethtool_ops);
 	SET_NETDEV_DEV(net, &dev->device);

+ 83 - 67
drivers/net/hyperv/rndis_filter.c

@@ -350,6 +350,7 @@ static void rndis_filter_receive_data(struct rndis_device *dev,
 	struct rndis_packet *rndis_pkt;
 	u32 data_offset;
 	struct ndis_pkt_8021q_info *vlan;
+	struct ndis_tcp_ip_checksum_info *csum_info;
 
 	rndis_pkt = &msg->msg.pkt;
 
@@ -388,7 +389,8 @@ static void rndis_filter_receive_data(struct rndis_device *dev,
 		pkt->vlan_tci = 0;
 	}
 
-	netvsc_recv_callback(dev->net_dev->dev, pkt);
+	csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO);
+	netvsc_recv_callback(dev->net_dev->dev, pkt, csum_info);
 }
 
 int rndis_filter_receive(struct hv_device *dev,
@@ -607,6 +609,61 @@ cleanup:
 	return ret;
 }
 
+int rndis_filter_set_offload_params(struct hv_device *hdev,
+				struct ndis_offload_params *req_offloads)
+{
+	struct netvsc_device *nvdev = hv_get_drvdata(hdev);
+	struct rndis_device *rdev = nvdev->extension;
+	struct net_device *ndev = nvdev->ndev;
+	struct rndis_request *request;
+	struct rndis_set_request *set;
+	struct ndis_offload_params *offload_params;
+	struct rndis_set_complete *set_complete;
+	u32 extlen = sizeof(struct ndis_offload_params);
+	int ret, t;
+
+	request = get_rndis_request(rdev, RNDIS_MSG_SET,
+		RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
+	if (!request)
+		return -ENOMEM;
+
+	set = &request->request_msg.msg.set_req;
+	set->oid = OID_TCP_OFFLOAD_PARAMETERS;
+	set->info_buflen = extlen;
+	set->info_buf_offset = sizeof(struct rndis_set_request);
+	set->dev_vc_handle = 0;
+
+	offload_params = (struct ndis_offload_params *)((ulong)set +
+				set->info_buf_offset);
+	*offload_params = *req_offloads;
+	offload_params->header.type = NDIS_OBJECT_TYPE_DEFAULT;
+	offload_params->header.revision = NDIS_OFFLOAD_PARAMETERS_REVISION_3;
+	offload_params->header.size = extlen;
+
+	ret = rndis_filter_send_request(rdev, request);
+	if (ret != 0)
+		goto cleanup;
+
+	t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
+	if (t == 0) {
+		netdev_err(ndev, "timeout before we got aOFFLOAD set response...\n");
+		/* can't put_rndis_request, since we may still receive a
+		 * send-completion.
+		 */
+		return -EBUSY;
+	} else {
+		set_complete = &request->response_msg.msg.set_complete;
+		if (set_complete->status != RNDIS_STATUS_SUCCESS) {
+			netdev_err(ndev, "Fail to set MAC on host side:0x%x\n",
+				   set_complete->status);
+			ret = -EINVAL;
+		}
+	}
+
+cleanup:
+	put_rndis_request(rdev, request);
+	return ret;
+}
 
 static int rndis_filter_query_device_link_status(struct rndis_device *dev)
 {
@@ -807,6 +864,7 @@ int rndis_filter_device_add(struct hv_device *dev,
 	struct netvsc_device *net_device;
 	struct rndis_device *rndis_device;
 	struct netvsc_device_info *device_info = additional_info;
+	struct ndis_offload_params offloads;
 
 	rndis_device = get_rndis_device();
 	if (!rndis_device)
@@ -846,6 +904,26 @@ int rndis_filter_device_add(struct hv_device *dev,
 
 	memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
 
+	/* Turn on the offloads; the host supports all of the relevant
+	 * offloads.
+	 */
+	memset(&offloads, 0, sizeof(struct ndis_offload_params));
+	/* A value of zero means "no change"; now turn on what we
+	 * want.
+	 */
+	offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+	offloads.tcp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+	offloads.udp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+	offloads.tcp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+	offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+	offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
+
+
+	ret = rndis_filter_set_offload_params(dev, &offloads);
+	if (ret)
+		goto err_dev_remv;
+
+
 	rndis_filter_query_device_link_status(rndis_device);
 
 	device_info->link_state = rndis_device->link_state;
@@ -855,6 +933,10 @@ int rndis_filter_device_add(struct hv_device *dev,
 		 device_info->link_state ? "down" : "up");
 
 	return ret;
+
+err_dev_remv:
+	rndis_filter_device_remove(dev);
+	return ret;
 }
 
 void rndis_filter_device_remove(struct hv_device *dev)
@@ -891,69 +973,3 @@ int rndis_filter_close(struct hv_device *dev)
 
 	return rndis_filter_close_device(nvdev->extension);
 }
-
-int rndis_filter_send(struct hv_device *dev,
-			     struct hv_netvsc_packet *pkt)
-{
-	struct rndis_message *rndis_msg;
-	struct rndis_packet *rndis_pkt;
-	u32 rndis_msg_size;
-	bool isvlan = pkt->vlan_tci & VLAN_TAG_PRESENT;
-
-	/* Add the rndis header */
-	rndis_msg = (struct rndis_message *)pkt->extension;
-
-	rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
-	if (isvlan)
-		rndis_msg_size += NDIS_VLAN_PPI_SIZE;
-
-	rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
-	rndis_msg->msg_len = pkt->total_data_buflen +
-				      rndis_msg_size;
-
-	rndis_pkt = &rndis_msg->msg.pkt;
-	rndis_pkt->data_offset = sizeof(struct rndis_packet);
-	if (isvlan)
-		rndis_pkt->data_offset += NDIS_VLAN_PPI_SIZE;
-	rndis_pkt->data_len = pkt->total_data_buflen;
-
-	if (isvlan) {
-		struct rndis_per_packet_info *ppi;
-		struct ndis_pkt_8021q_info *vlan;
-
-		rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);
-		rndis_pkt->per_pkt_info_len = NDIS_VLAN_PPI_SIZE;
-
-		ppi = (struct rndis_per_packet_info *)((ulong)rndis_pkt +
-			rndis_pkt->per_pkt_info_offset);
-		ppi->size = NDIS_VLAN_PPI_SIZE;
-		ppi->type = IEEE_8021Q_INFO;
-		ppi->ppi_offset = sizeof(struct rndis_per_packet_info);
-
-		vlan = (struct ndis_pkt_8021q_info *)((ulong)ppi +
-			ppi->ppi_offset);
-		vlan->vlanid = pkt->vlan_tci & VLAN_VID_MASK;
-		vlan->pri = (pkt->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-	}
-
-	pkt->is_data_pkt = true;
-	pkt->page_buf[0].pfn = virt_to_phys(rndis_msg) >> PAGE_SHIFT;
-	pkt->page_buf[0].offset =
-			(unsigned long)rndis_msg & (PAGE_SIZE-1);
-	pkt->page_buf[0].len = rndis_msg_size;
-
-	/* Add one page_buf if the rndis msg goes beyond page boundary */
-	if (pkt->page_buf[0].offset + rndis_msg_size > PAGE_SIZE) {
-		int i;
-		for (i = pkt->page_buf_cnt; i > 1; i--)
-			pkt->page_buf[i] = pkt->page_buf[i-1];
-		pkt->page_buf_cnt++;
-		pkt->page_buf[0].len = PAGE_SIZE - pkt->page_buf[0].offset;
-		pkt->page_buf[1].pfn = virt_to_phys((void *)((ulong)
-			rndis_msg + pkt->page_buf[0].len)) >> PAGE_SHIFT;
-		pkt->page_buf[1].offset = 0;
-		pkt->page_buf[1].len = rndis_msg_size - pkt->page_buf[0].len;
-	}
-
-	return netvsc_send(dev, pkt);
-}