Эх сурвалжийг харах

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
netfilter/IPVS updates for net-next

The following patchset contains Netfilter updates for your net-next tree,
they are:

* Add full port randomization support. Some crazy researchers found a way
  to reconstruct the secure ephemeral ports that are allocated in random mode
  by sending off-path bursts of UDP packets to overrun the socket buffer of
  the DNS resolver to trigger retransmissions, then if the timing for the
  DNS resolution done by a client is larger than usual, then they conclude
  that the port that received the burst of UDP packets is the one that was
  opened. It seems a bit aggressive method to me but it seems to work for
  them. As a result, Daniel Borkmann and Hannes Frederic Sowa came up with a
  new NAT mode to fully randomize ports using prandom.

* Add a new classifier to x_tables based on the socket net_cls set via
  cgroups. These includes two patches to prepare the field as requested by
  Zefan Li. Also from Daniel Borkmann.

* Use prandom instead of get_random_bytes in several locations of the
  netfilter code, from Florian Westphal.

* Allow to use the CTA_MARK_MASK in ctnetlink when mangling the conntrack
  mark, also from Florian Westphal.

* Fix compilation warning due to unused variable in IPVS, from Geert
  Uytterhoeven.

* Add support for UID/GID via nfnetlink_queue, from Valentina Giusti.

* Add IPComp extension to x_tables, from Fan Du.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 11 жил өмнө
parent
commit
855404efae
42 өөрчлөгдсөн 487 нэмэгдсэн , 274 устгасан
  1. 5 0
      Documentation/cgroups/net_cls.txt
  2. 2 2
      include/linux/cgroup_subsys.h
  3. 1 1
      include/linux/netdevice.h
  4. 0 1
      include/linux/netfilter/ipset/ip_set.h
  5. 12 28
      include/net/cls_cgroup.h
  6. 0 2
      include/net/netfilter/ipv4/nf_conntrack_ipv4.h
  7. 0 1
      include/net/netfilter/nf_conntrack_l3proto.h
  8. 17 16
      include/net/netns/conntrack.h
  9. 6 12
      include/net/netprio_cgroup.h
  10. 1 1
      include/net/sock.h
  11. 2 0
      include/uapi/linux/netfilter/Kbuild
  12. 8 4
      include/uapi/linux/netfilter/nf_nat.h
  13. 4 1
      include/uapi/linux/netfilter/nfnetlink_queue.h
  14. 11 0
      include/uapi/linux/netfilter/xt_cgroup.h
  15. 16 0
      include/uapi/linux/netfilter/xt_ipcomp.h
  16. 9 2
      net/Kconfig
  17. 2 1
      net/core/Makefile
  18. 1 1
      net/core/dev.c
  19. 120 0
      net/core/netclassid_cgroup.c
  20. 1 13
      net/core/sock.c
  21. 0 6
      net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
  22. 19 0
      net/netfilter/Kconfig
  23. 2 0
      net/netfilter/Makefile
  24. 0 28
      net/netfilter/ipset/ip_set_core.c
  25. 4 1
      net/netfilter/ipvs/ip_vs_sync.c
  26. 0 15
      net/netfilter/nf_conntrack_core.c
  27. 10 2
      net/netfilter/nf_conntrack_netlink.c
  28. 0 6
      net/netfilter/nf_conntrack_proto.c
  29. 2 2
      net/netfilter/nf_nat_core.c
  30. 6 4
      net/netfilter/nf_nat_proto_common.c
  31. 0 8
      net/netfilter/nfnetlink_log.c
  32. 34 0
      net/netfilter/nfnetlink_queue_core.c
  33. 1 1
      net/netfilter/nft_hash.c
  34. 3 1
      net/netfilter/xt_CT.c
  35. 1 1
      net/netfilter/xt_RATEEST.c
  36. 71 0
      net/netfilter/xt_cgroup.c
  37. 1 1
      net/netfilter/xt_connlimit.c
  38. 1 1
      net/netfilter/xt_hashlimit.c
  39. 111 0
      net/netfilter/xt_ipcomp.c
  40. 1 1
      net/netfilter/xt_recent.c
  41. 1 0
      net/sched/Kconfig
  42. 1 110
      net/sched/cls_cgroup.c

+ 5 - 0
Documentation/cgroups/net_cls.txt

@@ -6,6 +6,8 @@ tag network packets with a class identifier (classid).
 
 The Traffic Controller (tc) can be used to assign
 different priorities to packets from different cgroups.
+Also, Netfilter (iptables) can use this tag to perform
+actions on such packets.
 
 Creating a net_cls cgroups instance creates a net_cls.classid file.
 This net_cls.classid value is initialized to 0.
@@ -32,3 +34,6 @@ tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit
  - creating traffic class 10:1
 
 tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup
+
+configuring iptables, basic example:
+iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP

+ 2 - 2
include/linux/cgroup_subsys.h

@@ -31,7 +31,7 @@ SUBSYS(devices)
 SUBSYS(freezer)
 #endif
 
-#if IS_SUBSYS_ENABLED(CONFIG_NET_CLS_CGROUP)
+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_NET_CLASSID)
 SUBSYS(net_cls)
 #endif
 
@@ -43,7 +43,7 @@ SUBSYS(blkio)
 SUBSYS(perf)
 #endif
 
-#if IS_SUBSYS_ENABLED(CONFIG_NETPRIO_CGROUP)
+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 SUBSYS(net_prio)
 #endif
 

+ 1 - 1
include/linux/netdevice.h

@@ -1444,7 +1444,7 @@ struct net_device {
 	/* max exchange id for FCoE LRO by ddp */
 	unsigned int		fcoe_ddp_xid;
 #endif
-#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 	struct netprio_map __rcu *priomap;
 #endif
 	/* phy device may attach itself for hardware timestamping */

+ 0 - 1
include/linux/netfilter/ipset/ip_set.h

@@ -331,7 +331,6 @@ extern ip_set_id_t ip_set_get_byname(struct net *net,
 				     const char *name, struct ip_set **set);
 extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
 extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
-extern ip_set_id_t ip_set_nfnl_get(struct net *net, const char *name);
 extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
 extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
 

+ 12 - 28
include/net/cls_cgroup.h

@@ -16,17 +16,16 @@
 #include <linux/cgroup.h>
 #include <linux/hardirq.h>
 #include <linux/rcupdate.h>
+#include <net/sock.h>
 
-#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
-struct cgroup_cls_state
-{
+#ifdef CONFIG_CGROUP_NET_CLASSID
+struct cgroup_cls_state {
 	struct cgroup_subsys_state css;
 	u32 classid;
 };
 
-void sock_update_classid(struct sock *sk);
+struct cgroup_cls_state *task_cls_state(struct task_struct *p);
 
-#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
 static inline u32 task_cls_classid(struct task_struct *p)
 {
 	u32 classid;
@@ -41,33 +40,18 @@ static inline u32 task_cls_classid(struct task_struct *p)
 
 	return classid;
 }
-#elif IS_MODULE(CONFIG_NET_CLS_CGROUP)
-static inline u32 task_cls_classid(struct task_struct *p)
-{
-	struct cgroup_subsys_state *css;
-	u32 classid = 0;
-
-	if (in_interrupt())
-		return 0;
-
-	rcu_read_lock();
-	css = task_css(p, net_cls_subsys_id);
-	if (css)
-		classid = container_of(css,
-				       struct cgroup_cls_state, css)->classid;
-	rcu_read_unlock();
 
-	return classid;
-}
-#endif
-#else /* !CGROUP_NET_CLS_CGROUP */
 static inline void sock_update_classid(struct sock *sk)
 {
-}
+	u32 classid;
 
-static inline u32 task_cls_classid(struct task_struct *p)
+	classid = task_cls_classid(current);
+	if (classid != sk->sk_classid)
+		sk->sk_classid = classid;
+}
+#else /* !CONFIG_CGROUP_NET_CLASSID */
+static inline void sock_update_classid(struct sock *sk)
 {
-	return 0;
 }
-#endif /* CGROUP_NET_CLS_CGROUP */
+#endif /* CONFIG_CGROUP_NET_CLASSID */
 #endif  /* _NET_CLS_CGROUP_H */

+ 0 - 2
include/net/netfilter/ipv4/nf_conntrack_ipv4.h

@@ -19,6 +19,4 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
 int nf_conntrack_ipv4_compat_init(void);
 void nf_conntrack_ipv4_compat_fini(void);
 
-void need_ipv4_conntrack(void);
-
 #endif /*_NF_CONNTRACK_IPV4_H*/

+ 0 - 1
include/net/netfilter/nf_conntrack_l3proto.h

@@ -87,7 +87,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto);
 void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto);
 
 struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
-void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p);
 
 /* Existing built-in protocols */
 extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;

+ 17 - 16
include/net/netns/conntrack.h

@@ -65,6 +65,23 @@ struct nf_ip_net {
 struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_header	*sysctl_header;
+	struct ctl_table_header	*acct_sysctl_header;
+	struct ctl_table_header	*tstamp_sysctl_header;
+	struct ctl_table_header	*event_sysctl_header;
+	struct ctl_table_header	*helper_sysctl_header;
+#endif
+	char			*slabname;
+	unsigned int		sysctl_log_invalid; /* Log invalid packets */
+	unsigned int		sysctl_events_retry_timeout;
+	int			sysctl_events;
+	int			sysctl_acct;
+	int			sysctl_auto_assign_helper;
+	bool			auto_assign_helper_warned;
+	int			sysctl_tstamp;
+	int			sysctl_checksum;
+
 	unsigned int		htable_size;
 	struct kmem_cache	*nf_conntrack_cachep;
 	struct hlist_nulls_head	*hash;
@@ -75,14 +92,6 @@ struct netns_ct {
 	struct ip_conntrack_stat __percpu *stat;
 	struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
 	struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
-	int			sysctl_events;
-	unsigned int		sysctl_events_retry_timeout;
-	int			sysctl_acct;
-	int			sysctl_tstamp;
-	int			sysctl_checksum;
-	unsigned int		sysctl_log_invalid; /* Log invalid packets */
-	int			sysctl_auto_assign_helper;
-	bool			auto_assign_helper_warned;
 	struct nf_ip_net	nf_ct_proto;
 #if defined(CONFIG_NF_CONNTRACK_LABELS)
 	unsigned int		labels_used;
@@ -92,13 +101,5 @@ struct netns_ct {
 	struct hlist_head	*nat_bysource;
 	unsigned int		nat_htable_size;
 #endif
-#ifdef CONFIG_SYSCTL
-	struct ctl_table_header	*sysctl_header;
-	struct ctl_table_header	*acct_sysctl_header;
-	struct ctl_table_header	*tstamp_sysctl_header;
-	struct ctl_table_header	*event_sysctl_header;
-	struct ctl_table_header	*helper_sysctl_header;
-#endif
-	char			*slabname;
 };
 #endif

+ 6 - 12
include/net/netprio_cgroup.h

@@ -13,12 +13,12 @@
 
 #ifndef _NETPRIO_CGROUP_H
 #define _NETPRIO_CGROUP_H
+
 #include <linux/cgroup.h>
 #include <linux/hardirq.h>
 #include <linux/rcupdate.h>
 
-
-#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 struct netprio_map {
 	struct rcu_head rcu;
 	u32 priomap_len;
@@ -27,8 +27,7 @@ struct netprio_map {
 
 void sock_update_netprioidx(struct sock *sk);
 
-#if IS_BUILTIN(CONFIG_NETPRIO_CGROUP)
-
+#if IS_BUILTIN(CONFIG_CGROUP_NET_PRIO)
 static inline u32 task_netprioidx(struct task_struct *p)
 {
 	struct cgroup_subsys_state *css;
@@ -40,9 +39,7 @@ static inline u32 task_netprioidx(struct task_struct *p)
 	rcu_read_unlock();
 	return idx;
 }
-
-#elif IS_MODULE(CONFIG_NETPRIO_CGROUP)
-
+#elif IS_MODULE(CONFIG_CGROUP_NET_PRIO)
 static inline u32 task_netprioidx(struct task_struct *p)
 {
 	struct cgroup_subsys_state *css;
@@ -56,9 +53,7 @@ static inline u32 task_netprioidx(struct task_struct *p)
 	return idx;
 }
 #endif
-
-#else /* !CONFIG_NETPRIO_CGROUP */
-
+#else /* !CONFIG_CGROUP_NET_PRIO */
 static inline u32 task_netprioidx(struct task_struct *p)
 {
 	return 0;
@@ -66,6 +61,5 @@ static inline u32 task_netprioidx(struct task_struct *p)
 
 #define sock_update_netprioidx(sk)
 
-#endif /* CONFIG_NETPRIO_CGROUP */
-
+#endif /* CONFIG_CGROUP_NET_PRIO */
 #endif  /* _NET_CLS_CGROUP_H */

+ 1 - 1
include/net/sock.h

@@ -395,7 +395,7 @@ struct sock {
 	unsigned short		sk_ack_backlog;
 	unsigned short		sk_max_ack_backlog;
 	__u32			sk_priority;
-#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 	__u32			sk_cgrp_prioidx;
 #endif
 	struct pid		*sk_peer_pid;

+ 2 - 0
include/uapi/linux/netfilter/Kbuild

@@ -39,6 +39,7 @@ header-y += xt_TEE.h
 header-y += xt_TPROXY.h
 header-y += xt_addrtype.h
 header-y += xt_bpf.h
+header-y += xt_cgroup.h
 header-y += xt_cluster.h
 header-y += xt_comment.h
 header-y += xt_connbytes.h
@@ -54,6 +55,7 @@ header-y += xt_ecn.h
 header-y += xt_esp.h
 header-y += xt_hashlimit.h
 header-y += xt_helper.h
+header-y += xt_ipcomp.h
 header-y += xt_iprange.h
 header-y += xt_ipvs.h
 header-y += xt_length.h

+ 8 - 4
include/uapi/linux/netfilter/nf_nat.h

@@ -4,10 +4,14 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 
-#define NF_NAT_RANGE_MAP_IPS		1
-#define NF_NAT_RANGE_PROTO_SPECIFIED	2
-#define NF_NAT_RANGE_PROTO_RANDOM	4
-#define NF_NAT_RANGE_PERSISTENT		8
+#define NF_NAT_RANGE_MAP_IPS			(1 << 0)
+#define NF_NAT_RANGE_PROTO_SPECIFIED		(1 << 1)
+#define NF_NAT_RANGE_PROTO_RANDOM		(1 << 2)
+#define NF_NAT_RANGE_PERSISTENT			(1 << 3)
+#define NF_NAT_RANGE_PROTO_RANDOM_FULLY		(1 << 4)
+
+#define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
+	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
 
 struct nf_nat_ipv4_range {
 	unsigned int			flags;

+ 4 - 1
include/uapi/linux/netfilter/nfnetlink_queue.h

@@ -47,6 +47,8 @@ enum nfqnl_attr_type {
 	NFQA_CAP_LEN,			/* __u32 length of captured packet */
 	NFQA_SKB_INFO,			/* __u32 skb meta information */
 	NFQA_EXP,			/* nf_conntrack_netlink.h */
+	NFQA_UID,			/* __u32 sk uid */
+	NFQA_GID,			/* __u32 sk gid */
 
 	__NFQA_MAX
 };
@@ -99,7 +101,8 @@ enum nfqnl_attr_config {
 #define NFQA_CFG_F_FAIL_OPEN			(1 << 0)
 #define NFQA_CFG_F_CONNTRACK			(1 << 1)
 #define NFQA_CFG_F_GSO				(1 << 2)
-#define NFQA_CFG_F_MAX				(1 << 3)
+#define NFQA_CFG_F_UID_GID			(1 << 3)
+#define NFQA_CFG_F_MAX				(1 << 4)
 
 /* flags for NFQA_SKB_INFO */
 /* packet appears to have wrong checksums, but they are ok */

+ 11 - 0
include/uapi/linux/netfilter/xt_cgroup.h

@@ -0,0 +1,11 @@
+#ifndef _UAPI_XT_CGROUP_H
+#define _UAPI_XT_CGROUP_H
+
+#include <linux/types.h>
+
+struct xt_cgroup_info {
+	__u32 id;
+	__u32 invert;
+};
+
+#endif /* _UAPI_XT_CGROUP_H */

+ 16 - 0
include/uapi/linux/netfilter/xt_ipcomp.h

@@ -0,0 +1,16 @@
+#ifndef _XT_IPCOMP_H
+#define _XT_IPCOMP_H
+
+#include <linux/types.h>
+
+struct xt_ipcomp {
+	__u32 spis[2];	/* Security Parameter Index */
+	__u8 invflags;	/* Inverse flags */
+	__u8 hdrres;	/* Test of the Reserved Filed */
+};
+
+/* Values for "invflags" field in struct xt_ipcomp. */
+#define XT_IPCOMP_INV_SPI	0x01	/* Invert the sense of spi. */
+#define XT_IPCOMP_INV_MASK	0x01	/* All possible flags. */
+
+#endif /*_XT_IPCOMP_H*/

+ 9 - 2
net/Kconfig

@@ -238,12 +238,19 @@ config XPS
 	depends on SMP
 	default y
 
-config NETPRIO_CGROUP
+config CGROUP_NET_PRIO
 	tristate "Network priority cgroup"
 	depends on CGROUPS
 	---help---
 	  Cgroup subsystem for use in assigning processes to network priorities on
-	  a per-interface basis
+	  a per-interface basis.
+
+config CGROUP_NET_CLASSID
+	boolean "Network classid cgroup"
+	depends on CGROUPS
+	---help---
+	  Cgroup subsystem for use as general purpose socket classid marker that is
+	  being used in cls_cgroup and for netfilter matching.
 
 config NET_RX_BUSY_POLL
 	boolean

+ 2 - 1
net/core/Makefile

@@ -21,4 +21,5 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o
 obj-$(CONFIG_TRACEPOINTS) += net-traces.o
 obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
 obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
-obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o
+obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
+obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o

+ 1 - 1
net/core/dev.c

@@ -2741,7 +2741,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	return rc;
 }
 
-#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 static void skb_update_prio(struct sk_buff *skb)
 {
 	struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);

+ 120 - 0
net/core/netclassid_cgroup.c

@@ -0,0 +1,120 @@
+/*
+ * net/core/netclassid_cgroup.c	Classid Cgroupfs Handling
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/cgroup.h>
+#include <linux/fdtable.h>
+#include <net/cls_cgroup.h>
+#include <net/sock.h>
+
+static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
+}
+
+struct cgroup_cls_state *task_cls_state(struct task_struct *p)
+{
+	return css_cls_state(task_css(p, net_cls_subsys_id));
+}
+EXPORT_SYMBOL_GPL(task_cls_state);
+
+static struct cgroup_subsys_state *
+cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+	struct cgroup_cls_state *cs;
+
+	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+	if (!cs)
+		return ERR_PTR(-ENOMEM);
+
+	return &cs->css;
+}
+
+static int cgrp_css_online(struct cgroup_subsys_state *css)
+{
+	struct cgroup_cls_state *cs = css_cls_state(css);
+	struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
+
+	if (parent)
+		cs->classid = parent->classid;
+
+	return 0;
+}
+
+static void cgrp_css_free(struct cgroup_subsys_state *css)
+{
+	kfree(css_cls_state(css));
+}
+
+static int update_classid(const void *v, struct file *file, unsigned n)
+{
+	int err;
+	struct socket *sock = sock_from_file(file, &err);
+
+	if (sock)
+		sock->sk->sk_classid = (u32)(unsigned long)v;
+
+	return 0;
+}
+
+static void cgrp_attach(struct cgroup_subsys_state *css,
+			struct cgroup_taskset *tset)
+{
+	struct cgroup_cls_state *cs = css_cls_state(css);
+	void *v = (void *)(unsigned long)cs->classid;
+	struct task_struct *p;
+
+	cgroup_taskset_for_each(p, css, tset) {
+		task_lock(p);
+		iterate_fd(p->files, 0, update_classid, v);
+		task_unlock(p);
+	}
+}
+
+static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return css_cls_state(css)->classid;
+}
+
+static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
+			 u64 value)
+{
+	css_cls_state(css)->classid = (u32) value;
+
+	return 0;
+}
+
+static struct cftype ss_files[] = {
+	{
+		.name		= "classid",
+		.read_u64	= read_classid,
+		.write_u64	= write_classid,
+	},
+	{ }	/* terminate */
+};
+
+struct cgroup_subsys net_cls_subsys = {
+	.name			= "net_cls",
+	.css_alloc		= cgrp_css_alloc,
+	.css_online		= cgrp_css_online,
+	.css_free		= cgrp_css_free,
+	.attach			= cgrp_attach,
+	.subsys_id		= net_cls_subsys_id,
+	.base_cftypes		= ss_files,
+	.module			= THIS_MODULE,
+};
+
+static int __init init_netclassid_cgroup(void)
+{
+	return cgroup_load_subsys(&net_cls_subsys);
+}
+__initcall(init_netclassid_cgroup);

+ 1 - 13
net/core/sock.c

@@ -1307,19 +1307,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
 	module_put(owner);
 }
 
-#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
-void sock_update_classid(struct sock *sk)
-{
-	u32 classid;
-
-	classid = task_cls_classid(current);
-	if (classid != sk->sk_classid)
-		sk->sk_classid = classid;
-}
-EXPORT_SYMBOL(sock_update_classid);
-#endif
-
-#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 void sock_update_netprioidx(struct sock *sk)
 {
 	if (in_interrupt())

+ 0 - 6
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c

@@ -548,9 +548,3 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 
 module_init(nf_conntrack_l3proto_ipv4_init);
 module_exit(nf_conntrack_l3proto_ipv4_fini);
-
-void need_ipv4_conntrack(void)
-{
-	return;
-}
-EXPORT_SYMBOL_GPL(need_ipv4_conntrack);

+ 19 - 0
net/netfilter/Kconfig

@@ -858,6 +858,16 @@ config NETFILTER_XT_MATCH_BPF
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_CGROUP
+	tristate '"control group" match support'
+	depends on NETFILTER_ADVANCED
+	depends on CGROUPS
+	select CGROUP_NET_CLASSID
+	---help---
+	Socket/process control group matching allows you to match locally
+	generated packets based on which net_cls control group processes
+	belong to.
+
 config NETFILTER_XT_MATCH_CLUSTER
 	tristate '"cluster" match support'
 	depends on NF_CONNTRACK
@@ -1035,6 +1045,15 @@ config NETFILTER_XT_MATCH_HL
 	in the IPv6 header, or the time-to-live field in the IPv4
 	header of the packet.
 
+config NETFILTER_XT_MATCH_IPCOMP
+	tristate '"ipcomp" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This match extension allows you to match a range of CPIs(16 bits)
+	  inside IPComp header of IPSec packets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_IPRANGE
 	tristate '"iprange" address range match support'
 	depends on NETFILTER_ADVANCED

+ 2 - 0
net/netfilter/Makefile

@@ -133,6 +133,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_IPCOMP) += xt_ipcomp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
@@ -142,6 +143,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CGROUP) += xt_cgroup.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o

+ 0 - 28
net/netfilter/ipset/ip_set_core.c

@@ -624,34 +624,6 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex);
  * call nfnl_lock for us.
  */
 
-/*
- * Find set by name, reference it once. The reference makes sure the
- * thing pointed to, does not go away under our feet.
- *
- * The nfnl mutex is used in the function.
- */
-ip_set_id_t
-ip_set_nfnl_get(struct net *net, const char *name)
-{
-	ip_set_id_t i, index = IPSET_INVALID_ID;
-	struct ip_set *s;
-	struct ip_set_net *inst = ip_set_pernet(net);
-
-	nfnl_lock(NFNL_SUBSYS_IPSET);
-	for (i = 0; i < inst->ip_set_max; i++) {
-		s = nfnl_set(inst, i);
-		if (s != NULL && STREQ(s->name, name)) {
-			__ip_set_get(s);
-			index = i;
-			break;
-		}
-	}
-	nfnl_unlock(NFNL_SUBSYS_IPSET);
-
-	return index;
-}
-EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
-
 /*
  * Find set by index, reference it once. The reference makes sure the
  * thing pointed to, does not go away under our feet.

+ 4 - 1
net/netfilter/ipvs/ip_vs_sync.c

@@ -1637,7 +1637,10 @@ static int sync_thread_master(void *data)
 			continue;
 		}
 		while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
-			int ret = __wait_event_interruptible(*sk_sleep(sk),
+			/* (Ab)use interruptible sleep to avoid increasing
+			 * the load avg.
+			 */
+			__wait_event_interruptible(*sk_sleep(sk),
 						   sock_writeable(sk) ||
 						   kthread_should_stop());
 			if (unlikely(kthread_should_stop()))

+ 0 - 15
net/netfilter/nf_conntrack_core.c

@@ -60,12 +60,6 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
 				      const struct nlattr *attr) __read_mostly;
 EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
 
-int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
-			      struct nf_conn *ct,
-			      enum ip_conntrack_info ctinfo,
-			      unsigned int protoff);
-EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
-
 DEFINE_SPINLOCK(nf_conntrack_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
 
@@ -361,15 +355,6 @@ begin:
 	return NULL;
 }
 
-struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(struct net *net, u16 zone,
-		    const struct nf_conntrack_tuple *tuple)
-{
-	return ____nf_conntrack_find(net, zone, tuple,
-				     hash_conntrack_raw(tuple, zone));
-}
-EXPORT_SYMBOL_GPL(__nf_conntrack_find);
-
 /* Find a connection corresponding to a tuple. */
 static struct nf_conntrack_tuple_hash *
 __nf_conntrack_find_get(struct net *net, u16 zone,

+ 10 - 2
net/netfilter/nf_conntrack_netlink.c

@@ -2118,8 +2118,16 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
 			return err;
 	}
 #if defined(CONFIG_NF_CONNTRACK_MARK)
-	if (cda[CTA_MARK])
-		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+	if (cda[CTA_MARK]) {
+		u32 mask = 0, mark, newmark;
+		if (cda[CTA_MARK_MASK])
+			mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+
+		mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+		newmark = (ct->mark & mask) ^ mark;
+		if (newmark != ct->mark)
+			ct->mark = newmark;
+	}
 #endif
 	return 0;
 }

+ 0 - 6
net/netfilter/nf_conntrack_proto.c

@@ -92,12 +92,6 @@ nf_ct_l3proto_find_get(u_int16_t l3proto)
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
 
-void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
-{
-	module_put(p->me);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_put);
-
 int
 nf_ct_l3proto_try_module_get(unsigned short l3proto)
 {

+ 2 - 2
net/netfilter/nf_nat_core.c

@@ -315,7 +315,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	 * manips not an issue.
 	 */
 	if (maniptype == NF_NAT_MANIP_SRC &&
-	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
+	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
 		/* try the original tuple first */
 		if (in_range(l3proto, l4proto, orig_tuple, range)) {
 			if (!nf_nat_used_tuple(orig_tuple, ct)) {
@@ -339,7 +339,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	 */
 
 	/* Only bother mapping if it's not already in range and unique */
-	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
+	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (l4proto->in_range(tuple, maniptype,
 					      &range->min_proto,

+ 6 - 4
net/netfilter/nf_nat_proto_common.c

@@ -74,22 +74,24 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		range_size = ntohs(range->max_proto.all) - min + 1;
 	}
 
-	if (range->flags & NF_NAT_RANGE_PROTO_RANDOM)
+	if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) {
 		off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC
 						  ? tuple->dst.u.all
 						  : tuple->src.u.all);
-	else
+	} else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) {
+		off = prandom_u32();
+	} else {
 		off = *rover;
+	}
 
 	for (i = 0; ; ++off) {
 		*portptr = htons(min + off % range_size);
 		if (++i != range_size && nf_nat_used_tuple(tuple, ct))
 			continue;
-		if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM))
+		if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL))
 			*rover = off;
 		return;
 	}
-	return;
 }
 EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
 

+ 0 - 8
net/netfilter/nfnetlink_log.c

@@ -28,8 +28,6 @@
 #include <linux/proc_fs.h>
 #include <linux/security.h>
 #include <linux/list.h>
-#include <linux/jhash.h>
-#include <linux/random.h>
 #include <linux/slab.h>
 #include <net/sock.h>
 #include <net/netfilter/nf_log.h>
@@ -75,7 +73,6 @@ struct nfulnl_instance {
 };
 
 #define INSTANCE_BUCKETS	16
-static unsigned int hash_init;
 
 static int nfnl_log_net_id __read_mostly;
 
@@ -1066,11 +1063,6 @@ static int __init nfnetlink_log_init(void)
 {
 	int status = -ENOMEM;
 
-	/* it's not really all that important to have a random value, so
-	 * we can do this from the init function, even if there hasn't
-	 * been that much entropy yet */
-	get_random_bytes(&hash_init, sizeof(hash_init));
-
 	netlink_register_notifier(&nfulnl_rtnl_notifier);
 	status = nfnetlink_subsys_register(&nfulnl_subsys);
 	if (status < 0) {

+ 34 - 0
net/netfilter/nfnetlink_queue_core.c

@@ -297,6 +297,31 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
 	return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0;
 }
 
+static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
+{
+	const struct cred *cred;
+
+	if (sk->sk_state == TCP_TIME_WAIT)
+		return 0;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	if (sk->sk_socket && sk->sk_socket->file) {
+		cred = sk->sk_socket->file->f_cred;
+		if (nla_put_be32(skb, NFQA_UID,
+		    htonl(from_kuid_munged(&init_user_ns, cred->fsuid))))
+			goto nla_put_failure;
+		if (nla_put_be32(skb, NFQA_GID,
+		    htonl(from_kgid_munged(&init_user_ns, cred->fsgid))))
+			goto nla_put_failure;
+	}
+	read_unlock_bh(&sk->sk_callback_lock);
+	return 0;
+
+nla_put_failure:
+	read_unlock_bh(&sk->sk_callback_lock);
+	return -1;
+}
+
 static struct sk_buff *
 nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 			   struct nf_queue_entry *entry,
@@ -372,6 +397,11 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	if (queue->flags & NFQA_CFG_F_CONNTRACK)
 		ct = nfqnl_ct_get(entskb, &size, &ctinfo);
 
+	if (queue->flags & NFQA_CFG_F_UID_GID) {
+		size +=  (nla_total_size(sizeof(u_int32_t))	/* uid */
+			+ nla_total_size(sizeof(u_int32_t)));	/* gid */
+	}
+
 	skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
 				  GFP_ATOMIC);
 	if (!skb)
@@ -484,6 +514,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 			goto nla_put_failure;
 	}
 
+	if ((queue->flags & NFQA_CFG_F_UID_GID) && entskb->sk &&
+	    nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
+		goto nla_put_failure;
+
 	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
 		goto nla_put_failure;
 

+ 1 - 1
net/netfilter/nft_hash.c

@@ -164,7 +164,7 @@ static int nft_hash_init(const struct nft_set *set,
 	unsigned int cnt, i;
 
 	if (unlikely(!nft_hash_rnd_initted)) {
-		get_random_bytes(&nft_hash_rnd, 4);
+		nft_hash_rnd = prandom_u32();
 		nft_hash_rnd_initted = true;
 	}
 

+ 3 - 1
net/netfilter/xt_CT.c

@@ -211,8 +211,10 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
 	ret = 0;
 	if ((info->ct_events || info->exp_events) &&
 	    !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
-				  GFP_KERNEL))
+				  GFP_KERNEL)) {
+		ret = -EINVAL;
 		goto err3;
+	}
 
 	if (info->helper[0]) {
 		ret = xt_ct_set_helper(ct, info->helper, par);

+ 1 - 1
net/netfilter/xt_RATEEST.c

@@ -100,7 +100,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 	int ret;
 
 	if (unlikely(!rnd_inited)) {
-		get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
+		jhash_rnd = prandom_u32();
 		rnd_inited = true;
 	}
 

+ 71 - 0
net/netfilter/xt_cgroup.c

@@ -0,0 +1,71 @@
+/*
+ * Xtables module to match the process control group.
+ *
+ * Might be used to implement individual "per-application" firewall
+ * policies in contrast to global policies based on control groups.
+ * Matching is based upon processes tagged to net_cls' classid marker.
+ *
+ * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_cgroup.h>
+#include <net/sock.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
+MODULE_DESCRIPTION("Xtables: process control group matching");
+MODULE_ALIAS("ipt_cgroup");
+MODULE_ALIAS("ip6t_cgroup");
+
+static int cgroup_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_cgroup_info *info = par->matchinfo;
+
+	if (info->invert & ~1)
+		return -EINVAL;
+
+	return info->id ? 0 : -EINVAL;
+}
+
+static bool
+cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_cgroup_info *info = par->matchinfo;
+
+	if (skb->sk == NULL)
+		return false;
+
+	return (info->id == skb->sk->sk_classid) ^ info->invert;
+}
+
+static struct xt_match cgroup_mt_reg __read_mostly = {
+	.name       = "cgroup",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = cgroup_mt_check,
+	.match      = cgroup_mt,
+	.matchsize  = sizeof(struct xt_cgroup_info),
+	.me         = THIS_MODULE,
+	.hooks      = (1 << NF_INET_LOCAL_OUT) |
+		      (1 << NF_INET_POST_ROUTING),
+};
+
+static int __init cgroup_mt_init(void)
+{
+	return xt_register_match(&cgroup_mt_reg);
+}
+
+static void __exit cgroup_mt_exit(void)
+{
+	xt_unregister_match(&cgroup_mt_reg);
+}
+
+module_init(cgroup_mt_init);
+module_exit(cgroup_mt_exit);

+ 1 - 1
net/netfilter/xt_connlimit.c

@@ -229,7 +229,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
 		u_int32_t rand;
 
 		do {
-			get_random_bytes(&rand, sizeof(rand));
+			rand = prandom_u32();
 		} while (!rand);
 		cmpxchg(&connlimit_rnd, 0, rand);
 	}

+ 1 - 1
net/netfilter/xt_hashlimit.c

@@ -177,7 +177,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
 	/* initialize hash with random val at the time we allocate
 	 * the first hashtable entry */
 	if (unlikely(!ht->rnd_initialized)) {
-		get_random_bytes(&ht->rnd, sizeof(ht->rnd));
+		ht->rnd = prandom_u32();
 		ht->rnd_initialized = true;
 	}
 

+ 111 - 0
net/netfilter/xt_ipcomp.c

@@ -0,0 +1,111 @@
+/*  Kernel module to match IPComp parameters for IPv4 and IPv6
+ *
+ *  Copyright (C) 2013 WindRiver
+ *
+ *  Author:
+ *  Fan Du <fan.du@windriver.com>
+ *
+ *  Based on:
+ *  net/netfilter/xt_esp.c
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter/xt_ipcomp.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Fan Du <fan.du@windriver.com>");
+MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match");
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
+{
+	bool r;
+	pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+		 invert ? '!' : ' ', min, spi, max);
+	r = (spi >= min && spi <= max) ^ invert;
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
+	return r;
+}
+
+static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct ip_comp_hdr _comphdr;
+	const struct ip_comp_hdr *chdr;
+	const struct xt_ipcomp *compinfo = par->matchinfo;
+
+	/* Must not be a fragment. */
+	if (par->fragoff != 0)
+		return false;
+
+	chdr = skb_header_pointer(skb, par->thoff, sizeof(_comphdr), &_comphdr);
+	if (chdr == NULL) {
+		/* We've been asked to examine this packet, and we
+		 * can't.  Hence, no choice but to drop.
+		 */
+		pr_debug("Dropping evil IPComp tinygram.\n");
+		par->hotdrop = true;
+		return 0;
+	}
+
+	return spi_match(compinfo->spis[0], compinfo->spis[1],
+			 ntohl(chdr->cpi << 16),
+			 !!(compinfo->invflags & XT_IPCOMP_INV_SPI));
+}
+
+static int comp_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_ipcomp *compinfo = par->matchinfo;
+
+	/* Must specify no unknown invflags */
+	if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) {
+		pr_err("unknown flags %X\n", compinfo->invflags);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct xt_match comp_mt_reg[] __read_mostly = {
+	{
+		.name		= "ipcomp",
+		.family		= NFPROTO_IPV4,
+		.match		= comp_mt,
+		.matchsize	= sizeof(struct xt_ipcomp),
+		.proto		= IPPROTO_COMP,
+		.checkentry	= comp_mt_check,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "ipcomp",
+		.family		= NFPROTO_IPV6,
+		.match		= comp_mt,
+		.matchsize	= sizeof(struct xt_ipcomp),
+		.proto		= IPPROTO_COMP,
+		.checkentry	= comp_mt_check,
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init comp_mt_init(void)
+{
+	return xt_register_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg));
+}
+
+static void __exit comp_mt_exit(void)
+{
+	xt_unregister_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg));
+}
+
+module_init(comp_mt_init);
+module_exit(comp_mt_exit);

+ 1 - 1
net/netfilter/xt_recent.c

@@ -334,7 +334,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
 	size_t sz;
 
 	if (unlikely(!hash_rnd_inited)) {
-		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
+		hash_rnd = prandom_u32();
 		hash_rnd_inited = true;
 	}
 	if (info->check_set & ~XT_RECENT_VALID_FLAGS) {

+ 1 - 0
net/sched/Kconfig

@@ -444,6 +444,7 @@ config NET_CLS_FLOW
 config NET_CLS_CGROUP
 	tristate "Control Group Classifier"
 	select NET_CLS
+	select CGROUP_NET_CLASSID
 	depends on CGROUPS
 	---help---
 	  Say Y here if you want to classify packets based on the control

+ 1 - 110
net/sched/cls_cgroup.c

@@ -11,109 +11,13 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/errno.h>
 #include <linux/skbuff.h>
-#include <linux/cgroup.h>
 #include <linux/rcupdate.h>
-#include <linux/fdtable.h>
 #include <net/rtnetlink.h>
 #include <net/pkt_cls.h>
 #include <net/sock.h>
 #include <net/cls_cgroup.h>
 
-static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
-{
-	return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
-}
-
-static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
-{
-	return css_cls_state(task_css(p, net_cls_subsys_id));
-}
-
-static struct cgroup_subsys_state *
-cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
-{
-	struct cgroup_cls_state *cs;
-
-	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
-	if (!cs)
-		return ERR_PTR(-ENOMEM);
-	return &cs->css;
-}
-
-static int cgrp_css_online(struct cgroup_subsys_state *css)
-{
-	struct cgroup_cls_state *cs = css_cls_state(css);
-	struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
-
-	if (parent)
-		cs->classid = parent->classid;
-	return 0;
-}
-
-static void cgrp_css_free(struct cgroup_subsys_state *css)
-{
-	kfree(css_cls_state(css));
-}
-
-static int update_classid(const void *v, struct file *file, unsigned n)
-{
-	int err;
-	struct socket *sock = sock_from_file(file, &err);
-	if (sock)
-		sock->sk->sk_classid = (u32)(unsigned long)v;
-	return 0;
-}
-
-static void cgrp_attach(struct cgroup_subsys_state *css,
-			struct cgroup_taskset *tset)
-{
-	struct task_struct *p;
-	struct cgroup_cls_state *cs = css_cls_state(css);
-	void *v = (void *)(unsigned long)cs->classid;
-
-	cgroup_taskset_for_each(p, css, tset) {
-		task_lock(p);
-		iterate_fd(p->files, 0, update_classid, v);
-		task_unlock(p);
-	}
-}
-
-static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
-{
-	return css_cls_state(css)->classid;
-}
-
-static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
-			 u64 value)
-{
-	css_cls_state(css)->classid = (u32) value;
-	return 0;
-}
-
-static struct cftype ss_files[] = {
-	{
-		.name = "classid",
-		.read_u64 = read_classid,
-		.write_u64 = write_classid,
-	},
-	{ }	/* terminate */
-};
-
-struct cgroup_subsys net_cls_subsys = {
-	.name		= "net_cls",
-	.css_alloc	= cgrp_css_alloc,
-	.css_online	= cgrp_css_online,
-	.css_free	= cgrp_css_free,
-	.attach		= cgrp_attach,
-	.subsys_id	= net_cls_subsys_id,
-	.base_cftypes	= ss_files,
-	.module		= THIS_MODULE,
-};
-
 struct cls_cgroup_head {
 	u32			handle;
 	struct tcf_exts		exts;
@@ -305,25 +209,12 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
 
 static int __init init_cgroup_cls(void)
 {
-	int ret;
-
-	ret = cgroup_load_subsys(&net_cls_subsys);
-	if (ret)
-		goto out;
-
-	ret = register_tcf_proto_ops(&cls_cgroup_ops);
-	if (ret)
-		cgroup_unload_subsys(&net_cls_subsys);
-
-out:
-	return ret;
+	return register_tcf_proto_ops(&cls_cgroup_ops);
 }
 
 static void __exit exit_cgroup_cls(void)
 {
 	unregister_tcf_proto_ops(&cls_cgroup_ops);
-
-	cgroup_unload_subsys(&net_cls_subsys);
 }
 
 module_init(init_cgroup_cls);