فهرست منبع

Merge branch 'so_incoming_cpu'

Eric Dumazet says:

====================
net: SO_INCOMING_CPU support

SO_INCOMING_CPU socket option (read by getsockopt()) provides
an alternative to RPS/RFS for high performance servers using
multi queues NIC.

TCP should use sk_mark_napi_id() for established sockets only.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 10 سال پیش
والد
کامیت
b00394c007

+ 2 - 0
arch/alpha/include/uapi/asm/socket.h

@@ -87,4 +87,6 @@
 
 #define SO_BPF_EXTENSIONS	48
 
+#define SO_INCOMING_CPU		49
+
 #endif /* _UAPI_ASM_SOCKET_H */

+ 2 - 0
arch/avr32/include/uapi/asm/socket.h

@@ -80,4 +80,6 @@
 
 #define SO_BPF_EXTENSIONS	48
 
+#define SO_INCOMING_CPU		49
+
 #endif /* _UAPI__ASM_AVR32_SOCKET_H */

+ 2 - 0
arch/cris/include/uapi/asm/socket.h

@@ -82,6 +82,8 @@
 
 #define SO_BPF_EXTENSIONS	48
 
+#define SO_INCOMING_CPU		49
+
 #endif /* _ASM_SOCKET_H */
 
 

+ 2 - 0
arch/frv/include/uapi/asm/socket.h

@@ -80,5 +80,7 @@
 
 #define SO_BPF_EXTENSIONS	48
 
+#define SO_INCOMING_CPU		49
+
 #endif /* _ASM_SOCKET_H */
 

+ 2 - 0
arch/ia64/include/uapi/asm/socket.h

@@ -89,4 +89,6 @@
 
 #define SO_BPF_EXTENSIONS	48
 
+#define SO_INCOMING_CPU		49
+
 #endif /* _ASM_IA64_SOCKET_H */

+ 2 - 0
arch/m32r/include/uapi/asm/socket.h

@@ -80,4 +80,6 @@
 
 #define SO_BPF_EXTENSIONS	48
 
+#define SO_INCOMING_CPU		49
+
 #endif /* _ASM_M32R_SOCKET_H */

+ 2 - 0
arch/mips/include/uapi/asm/socket.h

@@ -98,4 +98,6 @@
 
 #define SO_BPF_EXTENSIONS	48
 
+#define SO_INCOMING_CPU		49
+
 #endif /* _UAPI_ASM_SOCKET_H */

+ 2 - 0
arch/mn10300/include/uapi/asm/socket.h

@@ -80,4 +80,6 @@
 
 #define SO_BPF_EXTENSIONS	48
 
+#define SO_INCOMING_CPU		49
+
 #endif /* _ASM_SOCKET_H */

+ 2 - 0
arch/parisc/include/uapi/asm/socket.h

@@ -79,4 +79,6 @@
 
 #define SO_BPF_EXTENSIONS	0x4029
 
+#define SO_INCOMING_CPU		0x402A
+
 #endif /* _UAPI_ASM_SOCKET_H */

+ 2 - 0
arch/powerpc/include/uapi/asm/socket.h

@@ -87,4 +87,6 @@
 
 #define SO_BPF_EXTENSIONS	48
 
+#define SO_INCOMING_CPU		49
+
 #endif	/* _ASM_POWERPC_SOCKET_H */

+ 2 - 0
arch/s390/include/uapi/asm/socket.h

@@ -86,4 +86,6 @@
 
 #define SO_BPF_EXTENSIONS	48
 
+#define SO_INCOMING_CPU		49
+
 #endif /* _ASM_SOCKET_H */

+ 2 - 0
arch/sparc/include/uapi/asm/socket.h

@@ -76,6 +76,8 @@
 
 #define SO_BPF_EXTENSIONS	0x0032
 
+#define SO_INCOMING_CPU		0x0033
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002

+ 2 - 0
arch/xtensa/include/uapi/asm/socket.h

@@ -91,4 +91,6 @@
 
 #define SO_BPF_EXTENSIONS	48
 
+#define SO_INCOMING_CPU		49
+
 #endif	/* _XTENSA_SOCKET_H */

+ 12 - 0
include/net/sock.h

@@ -273,6 +273,7 @@ struct cg_proto;
   *	@sk_rcvtimeo: %SO_RCVTIMEO setting
   *	@sk_sndtimeo: %SO_SNDTIMEO setting
   *	@sk_rxhash: flow hash received from netif layer
+  *	@sk_incoming_cpu: record cpu processing incoming packets
   *	@sk_txhash: computed flow hash for use on transmit
   *	@sk_filter: socket filtering instructions
   *	@sk_protinfo: private area, net family specific, when not using slab
@@ -350,6 +351,12 @@ struct sock {
 #ifdef CONFIG_RPS
 	__u32			sk_rxhash;
 #endif
+	u16			sk_incoming_cpu;
+	/* 16bit hole
+	 * Warned : sk_incoming_cpu can be set from softirq,
+	 * Do not use this hole without fully understanding possible issues.
+	 */
+
 	__u32			sk_txhash;
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned int		sk_napi_id;
@@ -833,6 +840,11 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 	return sk->sk_backlog_rcv(sk, skb);
 }
 
+static inline void sk_incoming_cpu_update(struct sock *sk)
+{
+	sk->sk_incoming_cpu = raw_smp_processor_id();
+}
+
 static inline void sock_rps_record_flow_hash(__u32 hash)
 {
 #ifdef CONFIG_RPS

+ 2 - 0
include/uapi/asm-generic/socket.h

@@ -82,4 +82,6 @@
 
 #define SO_BPF_EXTENSIONS	48
 
+#define SO_INCOMING_CPU		49
+
 #endif /* __ASM_GENERIC_SOCKET_H */

+ 5 - 0
net/core/sock.c

@@ -1213,6 +1213,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_max_pacing_rate;
 		break;
 
+	case SO_INCOMING_CPU:
+		v.val = sk->sk_incoming_cpu;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1517,6 +1521,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 
 		newsk->sk_err	   = 0;
 		newsk->sk_priority = 0;
+		newsk->sk_incoming_cpu = raw_smp_processor_id();
 		/*
 		 * Before updating sk_refcnt, we must commit prior changes to memory
 		 * (Documentation/RCU/rculist_nulls.txt for details)

+ 3 - 1
net/ipv4/tcp_ipv4.c

@@ -1429,6 +1429,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 		struct dst_entry *dst = sk->sk_rx_dst;
 
 		sock_rps_save_rxhash(sk, skb);
+		sk_mark_napi_id(sk, skb);
 		if (dst) {
 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
 			    dst->ops->check(dst, 0) == NULL) {
@@ -1450,6 +1451,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 		if (nsk != sk) {
 			sock_rps_save_rxhash(nsk, skb);
+			sk_mark_napi_id(sk, skb);
 			if (tcp_child_process(sk, nsk, skb)) {
 				rsk = nsk;
 				goto reset;
@@ -1661,7 +1663,7 @@ process:
 	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
-	sk_mark_napi_id(sk, skb);
+	sk_incoming_cpu_update(sk);
 	skb->dev = NULL;
 
 	bh_lock_sock_nested(sk);

+ 1 - 0
net/ipv4/udp.c

@@ -1445,6 +1445,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (inet_sk(sk)->inet_daddr) {
 		sock_rps_save_rxhash(sk, skb);
 		sk_mark_napi_id(sk, skb);
+		sk_incoming_cpu_update(sk);
 	}
 
 	rc = sock_queue_rcv_skb(sk, skb);

+ 3 - 1
net/ipv6/tcp_ipv6.c

@@ -1293,6 +1293,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		struct dst_entry *dst = sk->sk_rx_dst;
 
 		sock_rps_save_rxhash(sk, skb);
+		sk_mark_napi_id(sk, skb);
 		if (dst) {
 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
 			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
@@ -1322,6 +1323,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (nsk != sk) {
 			sock_rps_save_rxhash(nsk, skb);
+			sk_mark_napi_id(sk, skb);
 			if (tcp_child_process(sk, nsk, skb))
 				goto reset;
 			if (opt_skb)
@@ -1454,7 +1456,7 @@ process:
 	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
-	sk_mark_napi_id(sk, skb);
+	sk_incoming_cpu_update(sk);
 	skb->dev = NULL;
 
 	bh_lock_sock_nested(sk);

+ 1 - 0
net/ipv6/udp.c

@@ -577,6 +577,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
 		sock_rps_save_rxhash(sk, skb);
 		sk_mark_napi_id(sk, skb);
+		sk_incoming_cpu_update(sk);
 	}
 
 	rc = sock_queue_rcv_skb(sk, skb);

+ 3 - 2
net/sctp/ulpqueue.c

@@ -205,9 +205,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
 	if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN))
 		goto out_free;
 
-	if (!sctp_ulpevent_is_notification(event))
+	if (!sctp_ulpevent_is_notification(event)) {
 		sk_mark_napi_id(sk, skb);
-
+		sk_incoming_cpu_update(sk);
+	}
 	/* Check if the user wishes to receive this event.  */
 	if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe))
 		goto out_free;