Browse Source

Merge branch 'tstamp-next'

Willem de Bruijn says:

====================
timestamping updates

The main goal for this patchset is to allow correlating timestamps
with the egress interface. Also introduce a warning, as discussed
previously, and update the tests to verify the new feature.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 10 years ago
parent
commit
aae68bc6f6

+ 25 - 8
Documentation/networking/timestamping.txt

@@ -122,7 +122,7 @@ SOF_TIMESTAMPING_RAW_HARDWARE:
 
 
 1.3.3 Timestamp Options
 1.3.3 Timestamp Options
 
 
-The interface supports one option
+The interface supports the options
 
 
 SOF_TIMESTAMPING_OPT_ID:
 SOF_TIMESTAMPING_OPT_ID:
 
 
@@ -130,19 +130,36 @@ SOF_TIMESTAMPING_OPT_ID:
   have multiple concurrent timestamping requests outstanding. Packets
   have multiple concurrent timestamping requests outstanding. Packets
   can be reordered in the transmit path, for instance in the packet
   can be reordered in the transmit path, for instance in the packet
   scheduler. In that case timestamps will be queued onto the error
   scheduler. In that case timestamps will be queued onto the error
-  queue out of order from the original send() calls. This option
-  embeds a counter that is incremented at send() time, to order
-  timestamps within a flow.
+  queue out of order from the original send() calls. It is not always
+  possible to uniquely match timestamps to the original send() calls
+  based on timestamp order or payload inspection alone, then.
+
+  This option associates each packet at send() with a unique
+  identifier and returns that along with the timestamp. The identifier
+  is derived from a per-socket u32 counter (that wraps). For datagram
+  sockets, the counter increments with each sent packet. For stream
+  sockets, it increments with every byte.
+
+  The counter starts at zero. It is initialized the first time that
+  the socket option is enabled. It is reset each time the option is
+  enabled after having been disabled. Resetting the counter does not
+  change the identifiers of existing packets in the system.
 
 
   This option is implemented only for transmit timestamps. There, the
   This option is implemented only for transmit timestamps. There, the
   timestamp is always looped along with a struct sock_extended_err.
   timestamp is always looped along with a struct sock_extended_err.
   The option modifies field ee_data to pass an id that is unique
   The option modifies field ee_data to pass an id that is unique
   among all possibly concurrently outstanding timestamp requests for
   among all possibly concurrently outstanding timestamp requests for
-  that socket. In practice, it is a monotonically increasing u32
-  (that wraps).
+  that socket.
+
+
+SOF_TIMESTAMPING_OPT_CMSG:
 
 
-  In datagram sockets, the counter increments on each send call. In
-  stream sockets, it increments with every byte.
+  Support recv() cmsg for all timestamped packets. Control messages
+  are already supported unconditionally on all packets with receive
+  timestamps and on IPv6 packets with transmit timestamp. This option
+  extends them to IPv4 packets with transmit timestamp. One use case
+  is to correlate packets with their egress device, by enabling socket
+  option IP_PKTINFO simultaneously.
 
 
 
 
 1.4 Bytestream Timestamps
 1.4 Bytestream Timestamps

+ 78 - 12
Documentation/networking/timestamping/txtimestamp.c

@@ -46,6 +46,7 @@
 #include <netpacket/packet.h>
 #include <netpacket/packet.h>
 #include <poll.h>
 #include <poll.h>
 #include <stdarg.h>
 #include <stdarg.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdlib.h>
@@ -58,6 +59,14 @@
 #include <time.h>
 #include <time.h>
 #include <unistd.h>
 #include <unistd.h>
 
 
+/* ugly hack to work around netinet/in.h and linux/ipv6.h conflicts */
+#ifndef in6_pktinfo
+struct in6_pktinfo {
+	struct in6_addr	ipi6_addr;
+	int		ipi6_ifindex;
+};
+#endif
+
 /* command line parameters */
 /* command line parameters */
 static int cfg_proto = SOCK_STREAM;
 static int cfg_proto = SOCK_STREAM;
 static int cfg_ipproto = IPPROTO_TCP;
 static int cfg_ipproto = IPPROTO_TCP;
@@ -65,6 +74,8 @@ static int cfg_num_pkts = 4;
 static int do_ipv4 = 1;
 static int do_ipv4 = 1;
 static int do_ipv6 = 1;
 static int do_ipv6 = 1;
 static int cfg_payload_len = 10;
 static int cfg_payload_len = 10;
+static bool cfg_show_payload;
+static bool cfg_do_pktinfo;
 static uint16_t dest_port = 9000;
 static uint16_t dest_port = 9000;
 
 
 static struct sockaddr_in daddr;
 static struct sockaddr_in daddr;
@@ -131,6 +142,30 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype,
 	__print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
 	__print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
 }
 }
 
 
+/* TODO: convert to check_and_print payload once API is stable */
+static void print_payload(char *data, int len)
+{
+	int i;
+
+	if (len > 70)
+		len = 70;
+
+	fprintf(stderr, "payload: ");
+	for (i = 0; i < len; i++)
+		fprintf(stderr, "%02hhx ", data[i]);
+	fprintf(stderr, "\n");
+}
+
+static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr)
+{
+	char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN];
+
+	fprintf(stderr, "         pktinfo: ifindex=%u src=%s dst=%s\n",
+		ifindex,
+		saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown",
+		daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown");
+}
+
 static void __poll(int fd)
 static void __poll(int fd)
 {
 {
 	struct pollfd pollfd;
 	struct pollfd pollfd;
@@ -156,10 +191,9 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
 		    cm->cmsg_type == SCM_TIMESTAMPING) {
 		    cm->cmsg_type == SCM_TIMESTAMPING) {
 			tss = (void *) CMSG_DATA(cm);
 			tss = (void *) CMSG_DATA(cm);
 		} else if ((cm->cmsg_level == SOL_IP &&
 		} else if ((cm->cmsg_level == SOL_IP &&
-		     cm->cmsg_type == IP_RECVERR) ||
-		    (cm->cmsg_level == SOL_IPV6 &&
-		     cm->cmsg_type == IPV6_RECVERR)) {
-
+			    cm->cmsg_type == IP_RECVERR) ||
+			   (cm->cmsg_level == SOL_IPV6 &&
+			    cm->cmsg_type == IPV6_RECVERR)) {
 			serr = (void *) CMSG_DATA(cm);
 			serr = (void *) CMSG_DATA(cm);
 			if (serr->ee_errno != ENOMSG ||
 			if (serr->ee_errno != ENOMSG ||
 			    serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
 			    serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
@@ -168,6 +202,16 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
 						serr->ee_origin);
 						serr->ee_origin);
 				serr = NULL;
 				serr = NULL;
 			}
 			}
+		} else if (cm->cmsg_level == SOL_IP &&
+			   cm->cmsg_type == IP_PKTINFO) {
+			struct in_pktinfo *info = (void *) CMSG_DATA(cm);
+			print_pktinfo(AF_INET, info->ipi_ifindex,
+				      &info->ipi_spec_dst, &info->ipi_addr);
+		} else if (cm->cmsg_level == SOL_IPV6 &&
+			   cm->cmsg_type == IPV6_PKTINFO) {
+			struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm);
+			print_pktinfo(AF_INET6, info6->ipi6_ifindex,
+				      NULL, &info6->ipi6_addr);
 		} else
 		} else
 			fprintf(stderr, "unknown cmsg %d,%d\n",
 			fprintf(stderr, "unknown cmsg %d,%d\n",
 					cm->cmsg_level, cm->cmsg_type);
 					cm->cmsg_level, cm->cmsg_type);
@@ -206,7 +250,11 @@ static int recv_errmsg(int fd)
 	if (ret == -1 && errno != EAGAIN)
 	if (ret == -1 && errno != EAGAIN)
 		error(1, errno, "recvmsg");
 		error(1, errno, "recvmsg");
 
 
-	__recv_errmsg_cmsg(&msg, ret);
+	if (ret > 0) {
+		__recv_errmsg_cmsg(&msg, ret);
+		if (cfg_show_payload)
+			print_payload(data, cfg_payload_len);
+	}
 
 
 	free(data);
 	free(data);
 	return ret == -1;
 	return ret == -1;
@@ -215,9 +263,9 @@ static int recv_errmsg(int fd)
 static void do_test(int family, unsigned int opt)
 static void do_test(int family, unsigned int opt)
 {
 {
 	char *buf;
 	char *buf;
-	int fd, i, val, total_len;
+	int fd, i, val = 1, total_len;
 
 
-	if (family == IPPROTO_IPV6 && cfg_proto != SOCK_STREAM) {
+	if (family == AF_INET6 && cfg_proto != SOCK_STREAM) {
 		/* due to lack of checksum generation code */
 		/* due to lack of checksum generation code */
 		fprintf(stderr, "test: skipping datagram over IPv6\n");
 		fprintf(stderr, "test: skipping datagram over IPv6\n");
 		return;
 		return;
@@ -239,7 +287,6 @@ static void do_test(int family, unsigned int opt)
 		error(1, errno, "socket");
 		error(1, errno, "socket");
 
 
 	if (cfg_proto == SOCK_STREAM) {
 	if (cfg_proto == SOCK_STREAM) {
-		val = 1;
 		if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
 		if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
 			       (char*) &val, sizeof(val)))
 			       (char*) &val, sizeof(val)))
 			error(1, 0, "setsockopt no nagle");
 			error(1, 0, "setsockopt no nagle");
@@ -253,7 +300,20 @@ static void do_test(int family, unsigned int opt)
 		}
 		}
 	}
 	}
 
 
+	if (cfg_do_pktinfo) {
+		if (family == AF_INET6) {
+			if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO,
+				       &val, sizeof(val)))
+				error(1, errno, "setsockopt pktinfo ipv6");
+		} else {
+			if (setsockopt(fd, SOL_IP, IP_PKTINFO,
+				       &val, sizeof(val)))
+				error(1, errno, "setsockopt pktinfo ipv4");
+		}
+	}
+
 	opt |= SOF_TIMESTAMPING_SOFTWARE |
 	opt |= SOF_TIMESTAMPING_SOFTWARE |
+	       SOF_TIMESTAMPING_OPT_CMSG |
 	       SOF_TIMESTAMPING_OPT_ID;
 	       SOF_TIMESTAMPING_OPT_ID;
 	if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
 	if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
 		       (char *) &opt, sizeof(opt)))
 		       (char *) &opt, sizeof(opt)))
@@ -262,8 +322,6 @@ static void do_test(int family, unsigned int opt)
 	for (i = 0; i < cfg_num_pkts; i++) {
 	for (i = 0; i < cfg_num_pkts; i++) {
 		memset(&ts_prev, 0, sizeof(ts_prev));
 		memset(&ts_prev, 0, sizeof(ts_prev));
 		memset(buf, 'a' + i, total_len);
 		memset(buf, 'a' + i, total_len);
-		buf[total_len - 2] = '\n';
-		buf[total_len - 1] = '\0';
 
 
 		if (cfg_proto == SOCK_RAW) {
 		if (cfg_proto == SOCK_RAW) {
 			struct udphdr *udph;
 			struct udphdr *udph;
@@ -324,11 +382,13 @@ static void __attribute__((noreturn)) usage(const char *filepath)
 			"  -4:   only IPv4\n"
 			"  -4:   only IPv4\n"
 			"  -6:   only IPv6\n"
 			"  -6:   only IPv6\n"
 			"  -h:   show this message\n"
 			"  -h:   show this message\n"
+			"  -I:   request PKTINFO\n"
 			"  -l N: send N bytes at a time\n"
 			"  -l N: send N bytes at a time\n"
 			"  -r:   use raw\n"
 			"  -r:   use raw\n"
 			"  -R:   use raw (IP_HDRINCL)\n"
 			"  -R:   use raw (IP_HDRINCL)\n"
 			"  -p N: connect to port N\n"
 			"  -p N: connect to port N\n"
-			"  -u:   use udp\n",
+			"  -u:   use udp\n"
+			"  -x:   show payload (up to 70 bytes)\n",
 			filepath);
 			filepath);
 	exit(1);
 	exit(1);
 }
 }
@@ -338,7 +398,7 @@ static void parse_opt(int argc, char **argv)
 	int proto_count = 0;
 	int proto_count = 0;
 	char c;
 	char c;
 
 
-	while ((c = getopt(argc, argv, "46hl:p:rRu")) != -1) {
+	while ((c = getopt(argc, argv, "46hIl:p:rRux")) != -1) {
 		switch (c) {
 		switch (c) {
 		case '4':
 		case '4':
 			do_ipv6 = 0;
 			do_ipv6 = 0;
@@ -346,6 +406,9 @@ static void parse_opt(int argc, char **argv)
 		case '6':
 		case '6':
 			do_ipv4 = 0;
 			do_ipv4 = 0;
 			break;
 			break;
+		case 'I':
+			cfg_do_pktinfo = true;
+			break;
 		case 'r':
 		case 'r':
 			proto_count++;
 			proto_count++;
 			cfg_proto = SOCK_RAW;
 			cfg_proto = SOCK_RAW;
@@ -367,6 +430,9 @@ static void parse_opt(int argc, char **argv)
 		case 'p':
 		case 'p':
 			dest_port = strtoul(optarg, NULL, 10);
 			dest_port = strtoul(optarg, NULL, 10);
 			break;
 			break;
+		case 'x':
+			cfg_show_payload = true;
+			break;
 		case 'h':
 		case 'h':
 		default:
 		default:
 			usage(argv[0]);
 			usage(argv[0]);

+ 2 - 1
include/uapi/linux/net_tstamp.h

@@ -23,8 +23,9 @@ enum {
 	SOF_TIMESTAMPING_OPT_ID = (1<<7),
 	SOF_TIMESTAMPING_OPT_ID = (1<<7),
 	SOF_TIMESTAMPING_TX_SCHED = (1<<8),
 	SOF_TIMESTAMPING_TX_SCHED = (1<<8),
 	SOF_TIMESTAMPING_TX_ACK = (1<<9),
 	SOF_TIMESTAMPING_TX_ACK = (1<<9),
+	SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
 
 
-	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK,
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG,
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 				 SOF_TIMESTAMPING_LAST
 				 SOF_TIMESTAMPING_LAST
 };
 };

+ 22 - 2
net/ipv4/ip_sockglue.c

@@ -399,6 +399,22 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
 		kfree_skb(skb);
 		kfree_skb(skb);
 }
 }
 
 
+static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk,
+					  const struct sk_buff *skb,
+					  int ee_origin)
+{
+	struct in_pktinfo *info = PKTINFO_SKB_CB(skb);
+
+	if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) ||
+	    (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
+	    (!skb->dev))
+		return false;
+
+	info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
+	info->ipi_ifindex = skb->dev->ifindex;
+	return true;
+}
+
 /*
 /*
  *	Handle MSG_ERRQUEUE
  *	Handle MSG_ERRQUEUE
  */
  */
@@ -414,6 +430,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	int err;
 	int err;
 	int copied;
 	int copied;
 
 
+	WARN_ON_ONCE(sk->sk_family == AF_INET6);
+
 	err = -EAGAIN;
 	err = -EAGAIN;
 	skb = sock_dequeue_err_skb(sk);
 	skb = sock_dequeue_err_skb(sk);
 	if (skb == NULL)
 	if (skb == NULL)
@@ -444,7 +462,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 	sin = &errhdr.offender;
 	sin = &errhdr.offender;
 	sin->sin_family = AF_UNSPEC;
 	sin->sin_family = AF_UNSPEC;
-	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) {
+
+	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+	    ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
 		struct inet_sock *inet = inet_sk(sk);
 		struct inet_sock *inet = inet_sk(sk);
 
 
 		sin->sin_family = AF_INET;
 		sin->sin_family = AF_INET;
@@ -1049,7 +1069,7 @@ e_inval:
 }
 }
 
 
 /**
 /**
- * ipv4_pktinfo_prepare - transfert some info from rtable to skb
+ * ipv4_pktinfo_prepare - transfer some info from rtable to skb
  * @sk: socket
  * @sk: socket
  * @skb: buffer
  * @skb: buffer
  *
  *

+ 19 - 2
net/ipv6/datagram.c

@@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
 	kfree_skb(skb);
 	kfree_skb(skb);
 }
 }
 
 
+static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb)
+{
+	int ifindex = skb->dev ? skb->dev->ifindex : -1;
+
+	if (skb->protocol == htons(ETH_P_IPV6))
+		IP6CB(skb)->iif = ifindex;
+	else
+		PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex;
+}
+
 /*
 /*
  *	Handle MSG_ERRQUEUE
  *	Handle MSG_ERRQUEUE
  */
  */
@@ -388,8 +398,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 		sin->sin6_family = AF_INET6;
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
 		sin->sin6_flowinfo = 0;
 		sin->sin6_port = 0;
 		sin->sin6_port = 0;
-		if (np->rxopt.all)
+		if (np->rxopt.all) {
+			if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
+			    serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6)
+				ip6_datagram_prepare_pktinfo_errqueue(skb);
 			ip6_datagram_recv_common_ctl(sk, msg, skb);
 			ip6_datagram_recv_common_ctl(sk, msg, skb);
+		}
 		if (skb->protocol == htons(ETH_P_IPV6)) {
 		if (skb->protocol == htons(ETH_P_IPV6)) {
 			sin->sin6_addr = ipv6_hdr(skb)->saddr;
 			sin->sin6_addr = ipv6_hdr(skb)->saddr;
 			if (np->rxopt.all)
 			if (np->rxopt.all)
@@ -491,7 +505,10 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg,
 			ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
 			ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
 					       &src_info.ipi6_addr);
 					       &src_info.ipi6_addr);
 		}
 		}
-		put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
+
+		if (src_info.ipi6_ifindex >= 0)
+			put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO,
+				 sizeof(src_info), &src_info);
 	}
 	}
 }
 }