Browse Source

Merge branch 'nfp-ttl-tos-geneve'

Simon Horman says:

====================
nfp: flower: tunnel TTL & TOS, and Geneve options set & match support

this series contains updates for the TC Flower classifier
and the offload facility for it in the NFP driver.

* Patches 1 & 2: update the NFP driver to allow offload
  of matching and setting tunnel ToS/TTL of flows using the TC Flower
  classifier and tun_key action

* Patches 3 & 4: enhance the flow dissector and TC Flower classifier
  to allow match on Geneve options

* Patch 5 & 6: update the NFP driver to allow offload of
  matching and setting Geneve options of flows using the TC Flower
  classifier and tun_key action
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 7 years ago
parent
commit
d5082b27ed

+ 115 - 24
drivers/net/ethernet/netronome/nfp/flower/action.c

@@ -32,6 +32,7 @@
  */
 
 #include <linux/bitfield.h>
+#include <net/geneve.h>
 #include <net/pkt_cls.h>
 #include <net/switchdev.h>
 #include <net/tc_act/tc_csum.h>
@@ -45,7 +46,15 @@
 #include "main.h"
 #include "../nfp_net_repr.h"
 
-#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS	(TUNNEL_CSUM | TUNNEL_KEY)
+/* The kernel versions of TUNNEL_* are not ABI and therefore vulnerable
+ * to change. Such changes will break our FW ABI.
+ */
+#define NFP_FL_TUNNEL_CSUM			cpu_to_be16(0x01)
+#define NFP_FL_TUNNEL_KEY			cpu_to_be16(0x04)
+#define NFP_FL_TUNNEL_GENEVE_OPT		cpu_to_be16(0x0800)
+#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS	(NFP_FL_TUNNEL_CSUM | \
+						 NFP_FL_TUNNEL_KEY | \
+						 NFP_FL_TUNNEL_GENEVE_OPT)
 
 static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan)
 {
@@ -229,7 +238,71 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len)
 }
 
 static int
-nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
+nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
+			   const struct tc_action *action)
+{
+	struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action);
+	int opt_len, opt_cnt, act_start, tot_push_len;
+	u8 *src = ip_tunnel_info_opts(ip_tun);
+
+	/* We need to populate the options in reverse order for HW.
+	 * Therefore we go through the options, calculating the
+	 * number of options and the total size, then we populate
+	 * them in reverse order in the action list.
+	 */
+	opt_cnt = 0;
+	tot_push_len = 0;
+	opt_len = ip_tun->options_len;
+	while (opt_len > 0) {
+		struct geneve_opt *opt = (struct geneve_opt *)src;
+
+		opt_cnt++;
+		if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT)
+			return -EOPNOTSUPP;
+
+		tot_push_len += sizeof(struct nfp_fl_push_geneve) +
+			       opt->length * 4;
+		if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT)
+			return -EOPNOTSUPP;
+
+		opt_len -= sizeof(struct geneve_opt) + opt->length * 4;
+		src += sizeof(struct geneve_opt) + opt->length * 4;
+	}
+
+	if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ)
+		return -EOPNOTSUPP;
+
+	act_start = *list_len;
+	*list_len += tot_push_len;
+	src = ip_tunnel_info_opts(ip_tun);
+	while (opt_cnt) {
+		struct geneve_opt *opt = (struct geneve_opt *)src;
+		struct nfp_fl_push_geneve *push;
+		size_t act_size, len;
+
+		opt_cnt--;
+		act_size = sizeof(struct nfp_fl_push_geneve) + opt->length * 4;
+		tot_push_len -= act_size;
+		len = act_start + tot_push_len;
+
+		push = (struct nfp_fl_push_geneve *)&nfp_fl->action_data[len];
+		push->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_GENEVE;
+		push->head.len_lw = act_size >> NFP_FL_LW_SIZ;
+		push->reserved = 0;
+		push->class = opt->opt_class;
+		push->type = opt->type;
+		push->length = opt->length;
+		memcpy(&push->opt_data, opt->opt_data, opt->length * 4);
+
+		src += sizeof(struct geneve_opt) + opt->length * 4;
+	}
+
+	return 0;
+}
+
+static int
+nfp_fl_set_ipv4_udp_tun(struct nfp_app *app,
+			struct nfp_fl_set_ipv4_udp_tun *set_tun,
 			const struct tc_action *action,
 			struct nfp_fl_pre_tunnel *pre_tun,
 			enum nfp_flower_tun_type tun_type,
@@ -237,19 +310,19 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
 {
 	size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun);
 	struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action);
+	struct nfp_flower_priv *priv = app->priv;
 	u32 tmp_set_ip_tun_type_index = 0;
-	struct flowi4 flow = {};
 	/* Currently support one pre-tunnel so index is always 0. */
 	int pretun_idx = 0;
-	struct rtable *rt;
-	struct net *net;
-	int err;
 
-	if (ip_tun->options_len)
+	BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM ||
+		     NFP_FL_TUNNEL_KEY	!= TUNNEL_KEY ||
+		     NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT);
+	if (ip_tun->options_len &&
+	    (tun_type != NFP_FL_TUNNEL_GENEVE ||
+	    !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)))
 		return -EOPNOTSUPP;
 
-	net = dev_net(netdev);
-
 	set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL;
 	set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ;
 
@@ -261,28 +334,42 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
 	set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index);
 	set_tun->tun_id = ip_tun->key.tun_id;
 
-	/* Do a route lookup to determine ttl - if fails then use default.
-	 * Note that CONFIG_INET is a requirement of CONFIG_NET_SWITCHDEV so
-	 * must be defined here.
-	 */
-	flow.daddr = ip_tun->key.u.ipv4.dst;
-	flow.flowi4_proto = IPPROTO_UDP;
-	rt = ip_route_output_key(net, &flow);
-	err = PTR_ERR_OR_ZERO(rt);
-	if (!err) {
-		set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
-		ip_rt_put(rt);
+	if (ip_tun->key.ttl) {
+		set_tun->ttl = ip_tun->key.ttl;
 	} else {
-		set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+		struct net *net = dev_net(netdev);
+		struct flowi4 flow = {};
+		struct rtable *rt;
+		int err;
+
+		/* Do a route lookup to determine ttl - if fails then use
+		 * default. Note that CONFIG_INET is a requirement of
+		 * CONFIG_NET_SWITCHDEV so must be defined here.
+		 */
+		flow.daddr = ip_tun->key.u.ipv4.dst;
+		flow.flowi4_proto = IPPROTO_UDP;
+		rt = ip_route_output_key(net, &flow);
+		err = PTR_ERR_OR_ZERO(rt);
+		if (!err) {
+			set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
+			ip_rt_put(rt);
+		} else {
+			set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+		}
 	}
 
 	set_tun->tos = ip_tun->key.tos;
 
-	if (!(ip_tun->key.tun_flags & TUNNEL_KEY) ||
+	if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) ||
 	    ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS)
 		return -EOPNOTSUPP;
 	set_tun->tun_flags = ip_tun->key.tun_flags;
 
+	if (tun_type == NFP_FL_TUNNEL_GENEVE) {
+		set_tun->tun_proto = htons(ETH_P_TEB);
+		set_tun->tun_len = ip_tun->options_len / 4;
+	}
+
 	/* Complete pre_tunnel action. */
 	pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst;
 
@@ -671,9 +758,13 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a,
 		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
 		*a_len += sizeof(struct nfp_fl_pre_tunnel);
 
+		err = nfp_fl_push_geneve_options(nfp_fl, a_len, a);
+		if (err)
+			return err;
+
 		set_tun = (void *)&nfp_fl->action_data[*a_len];
-		err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type,
-					      netdev);
+		err = nfp_fl_set_ipv4_udp_tun(app, set_tun, a, pre_tun,
+					      *tun_type, netdev);
 		if (err)
 			return err;
 		*a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun);

+ 30 - 3
drivers/net/ethernet/netronome/nfp/flower/cmsg.h

@@ -37,6 +37,7 @@
 #include <linux/bitfield.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
+#include <net/geneve.h>
 
 #include "../nfp_app.h"
 #include "../nfpcore/nfp_cpp.h"
@@ -51,6 +52,7 @@
 #define NFP_FLOWER_LAYER_VXLAN		BIT(7)
 
 #define NFP_FLOWER_LAYER2_GENEVE	BIT(5)
+#define NFP_FLOWER_LAYER2_GENEVE_OP	BIT(6)
 
 #define NFP_FLOWER_MASK_VLAN_PRIO	GENMASK(15, 13)
 #define NFP_FLOWER_MASK_VLAN_CFI	BIT(12)
@@ -81,6 +83,11 @@
 #define NFP_FL_MAX_A_SIZ		1216
 #define NFP_FL_LW_SIZ			2
 
+/* Maximum allowed geneve options */
+#define NFP_FL_MAX_GENEVE_OPT_ACT	32
+#define NFP_FL_MAX_GENEVE_OPT_CNT	64
+#define NFP_FL_MAX_GENEVE_OPT_KEY	32
+
 /* Action opcodes */
 #define NFP_FL_ACTION_OPCODE_OUTPUT		0
 #define NFP_FL_ACTION_OPCODE_PUSH_VLAN		1
@@ -94,6 +101,7 @@
 #define NFP_FL_ACTION_OPCODE_SET_TCP		15
 #define NFP_FL_ACTION_OPCODE_PRE_LAG		16
 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL		17
+#define NFP_FL_ACTION_OPCODE_PUSH_GENEVE	26
 #define NFP_FL_ACTION_OPCODE_NUM		32
 
 #define NFP_FL_OUT_FLAGS_LAST		BIT(15)
@@ -206,7 +214,19 @@ struct nfp_fl_set_ipv4_udp_tun {
 	__be16 tun_flags;
 	u8 ttl;
 	u8 tos;
-	__be32 extra[2];
+	__be32 extra;
+	u8 tun_len;
+	u8 res2;
+	__be16 tun_proto;
+};
+
+struct nfp_fl_push_geneve {
+	struct nfp_fl_act_head head;
+	__be16 reserved;
+	__be16 class;
+	u8 type;
+	u8 length;
+	u8 opt_data[];
 };
 
 /* Metadata with L2 (1W/4B)
@@ -346,7 +366,7 @@ struct nfp_flower_ipv6 {
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * |                         ipv4_addr_dst                         |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |                            Reserved                           |
+ * |           Reserved            |      tos      |      ttl      |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * |                            Reserved                           |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -356,10 +376,17 @@ struct nfp_flower_ipv6 {
 struct nfp_flower_ipv4_udp_tun {
 	__be32 ip_src;
 	__be32 ip_dst;
-	__be32 reserved[2];
+	__be16 reserved1;
+	u8 tos;
+	u8 ttl;
+	__be32 reserved2;
 	__be32 tun_id;
 };
 
+struct nfp_flower_geneve_options {
+	u8 data[NFP_FL_MAX_GENEVE_OPT_KEY];
+};
+
 #define NFP_FL_TUN_VNI_OFFSET 8
 
 /* The base header for a control message packet.

+ 1 - 0
drivers/net/ethernet/netronome/nfp/flower/main.h

@@ -69,6 +69,7 @@ struct nfp_app;
 /* Extra features bitmap. */
 #define NFP_FL_FEATS_GENEVE		BIT(0)
 #define NFP_FL_NBI_MTU_SETTING		BIT(1)
+#define NFP_FL_FEATS_GENEVE_OPT		BIT(2)
 #define NFP_FL_FEATS_LAG		BIT(31)
 
 struct nfp_fl_mask_id {

+ 34 - 0
drivers/net/ethernet/netronome/nfp/flower/match.c

@@ -262,6 +262,21 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
 	nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version);
 }
 
+static int
+nfp_flower_compile_geneve_opt(void *key_buf, struct tc_cls_flower_offload *flow,
+			      bool mask_version)
+{
+	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+	struct flow_dissector_key_enc_opts *opts;
+
+	opts = skb_flow_dissector_target(flow->dissector,
+					 FLOW_DISSECTOR_KEY_ENC_OPTS,
+					 target);
+	memcpy(key_buf, opts->data, opts->len);
+
+	return 0;
+}
+
 static void
 nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,
 				struct tc_cls_flower_offload *flow,
@@ -270,6 +285,7 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,
 	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
 	struct flow_dissector_key_ipv4_addrs *tun_ips;
 	struct flow_dissector_key_keyid *vni;
+	struct flow_dissector_key_ip *ip;
 
 	memset(frame, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
 
@@ -293,6 +309,14 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,
 		frame->ip_src = tun_ips->src;
 		frame->ip_dst = tun_ips->dst;
 	}
+
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
+		ip = skb_flow_dissector_target(flow->dissector,
+					       FLOW_DISSECTOR_KEY_ENC_IP,
+					       target);
+		frame->tos = ip->tos;
+		frame->ttl = ip->ttl;
+	}
 }
 
 int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
@@ -415,6 +439,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 			nfp_flow->nfp_tun_ipv4_addr = tun_dst;
 			nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst);
 		}
+
+		if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
+			err = nfp_flower_compile_geneve_opt(ext, flow, false);
+			if (err)
+				return err;
+
+			err = nfp_flower_compile_geneve_opt(msk, flow, true);
+			if (err)
+				return err;
+		}
 	}
 
 	return 0;

+ 41 - 1
drivers/net/ethernet/netronome/nfp/flower/offload.c

@@ -66,6 +66,8 @@
 	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
 	 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
 	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \
 	 BIT(FLOW_DISSECTOR_KEY_MPLS) | \
 	 BIT(FLOW_DISSECTOR_KEY_IP))
 
@@ -74,7 +76,9 @@
 	 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
 	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
 	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
-	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+	 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IP))
 
 #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \
 	(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
@@ -138,6 +142,21 @@ static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f)
 		dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ICMP);
 }
 
+static int
+nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts,
+			  u32 *key_layer_two, int *key_size)
+{
+	if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY)
+		return -EOPNOTSUPP;
+
+	if (enc_opts->len > 0) {
+		*key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP;
+		*key_size += sizeof(struct nfp_flower_geneve_options);
+	}
+
+	return 0;
+}
+
 static int
 nfp_flower_calculate_key_layers(struct nfp_app *app,
 				struct nfp_fl_key_ls *ret_key_ls,
@@ -151,6 +170,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 	u32 key_layer_two;
 	u8 key_layer;
 	int key_size;
+	int err;
 
 	if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR)
 		return -EOPNOTSUPP;
@@ -176,6 +196,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 			       FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
 		struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL;
 		struct flow_dissector_key_ports *mask_enc_ports = NULL;
+		struct flow_dissector_key_enc_opts *enc_op = NULL;
 		struct flow_dissector_key_ports *enc_ports = NULL;
 		struct flow_dissector_key_control *mask_enc_ctl =
 			skb_flow_dissector_target(flow->dissector,
@@ -212,11 +233,21 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		if (mask_enc_ports->dst != cpu_to_be16(~0))
 			return -EOPNOTSUPP;
 
+		if (dissector_uses_key(flow->dissector,
+				       FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+			enc_op = skb_flow_dissector_target(flow->dissector,
+							   FLOW_DISSECTOR_KEY_ENC_OPTS,
+							   flow->key);
+		}
+
 		switch (enc_ports->dst) {
 		case htons(NFP_FL_VXLAN_PORT):
 			*tun_type = NFP_FL_TUNNEL_VXLAN;
 			key_layer |= NFP_FLOWER_LAYER_VXLAN;
 			key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+			if (enc_op)
+				return -EOPNOTSUPP;
 			break;
 		case htons(NFP_FL_GENEVE_PORT):
 			if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE))
@@ -226,6 +257,15 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 			key_size += sizeof(struct nfp_flower_ext_meta);
 			key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;
 			key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+			if (!enc_op)
+				break;
+			if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))
+				return -EOPNOTSUPP;
+			err = nfp_flower_calc_opt_layer(enc_op, &key_layer_two,
+							&key_size);
+			if (err)
+				return err;
 			break;
 		default:
 			return -EOPNOTSUPP;

+ 17 - 0
include/net/flow_dissector.h

@@ -57,6 +57,21 @@ struct flow_dissector_key_mpls {
 		mpls_label:20;
 };
 
+#define FLOW_DIS_TUN_OPTS_MAX 255
+/**
+ * struct flow_dissector_key_enc_opts:
+ * @data: tunnel option data
+ * @len: length of tunnel option data
+ * @dst_opt_type: tunnel option type
+ */
+struct flow_dissector_key_enc_opts {
+	u8 data[FLOW_DIS_TUN_OPTS_MAX];	/* Using IP_TUNNEL_OPTS_MAX is desired
+					 * here but seems difficult to #include
+					 */
+	u8 len;
+	__be16 dst_opt_type;
+};
+
 struct flow_dissector_key_keyid {
 	__be32	keyid;
 };
@@ -208,6 +223,8 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
 	FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */
 	FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */
+	FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */
+
 	FLOW_DISSECTOR_KEY_MAX,
 };
 

+ 26 - 0
include/uapi/linux/pkt_cls.h

@@ -480,11 +480,37 @@ enum {
 	TCA_FLOWER_KEY_ENC_IP_TTL,	/* u8 */
 	TCA_FLOWER_KEY_ENC_IP_TTL_MASK,	/* u8 */
 
+	TCA_FLOWER_KEY_ENC_OPTS,
+	TCA_FLOWER_KEY_ENC_OPTS_MASK,
+
 	__TCA_FLOWER_MAX,
 };
 
 #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
 
+enum {
+	TCA_FLOWER_KEY_ENC_OPTS_UNSPEC,
+	TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested
+					 * TCA_FLOWER_KEY_ENC_OPT_GENEVE_
+					 * attributes
+					 */
+	__TCA_FLOWER_KEY_ENC_OPTS_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1)
+
+enum {
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC,
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,            /* u16 */
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE,             /* u8 */
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA,             /* 4 to 128 bytes */
+
+	__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \
+		(__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1)
+
 enum {
 	TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
 	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),

+ 18 - 1
net/core/flow_dissector.c

@@ -154,7 +154,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 	    !dissector_uses_key(flow_dissector,
 				FLOW_DISSECTOR_KEY_ENC_PORTS) &&
 	    !dissector_uses_key(flow_dissector,
-				FLOW_DISSECTOR_KEY_ENC_IP))
+				FLOW_DISSECTOR_KEY_ENC_IP) &&
+	    !dissector_uses_key(flow_dissector,
+				FLOW_DISSECTOR_KEY_ENC_OPTS))
 		return;
 
 	info = skb_tunnel_info(skb);
@@ -224,6 +226,21 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 		ip->tos = key->tos;
 		ip->ttl = key->ttl;
 	}
+
+	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+		struct flow_dissector_key_enc_opts *enc_opt;
+
+		enc_opt = skb_flow_dissector_target(flow_dissector,
+						    FLOW_DISSECTOR_KEY_ENC_OPTS,
+						    target_container);
+
+		if (info->options_len) {
+			enc_opt->len = info->options_len;
+			ip_tunnel_info_opts_get(enc_opt->data, info);
+			enc_opt->dst_opt_type = info->key.tun_flags &
+						TUNNEL_OPTIONS_PRESENT;
+		}
+	}
 }
 EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
 

+ 243 - 1
net/sched/cls_flower.c

@@ -24,6 +24,7 @@
 #include <net/pkt_cls.h>
 #include <net/ip.h>
 #include <net/flow_dissector.h>
+#include <net/geneve.h>
 
 #include <net/dst.h>
 #include <net/dst_metadata.h>
@@ -53,6 +54,7 @@ struct fl_flow_key {
 	struct flow_dissector_key_tcp tcp;
 	struct flow_dissector_key_ip ip;
 	struct flow_dissector_key_ip enc_ip;
+	struct flow_dissector_key_enc_opts enc_opts;
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -482,6 +484,21 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_ENC_IP_TTL]	 = { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ENC_OPTS]	= { .type = NLA_NESTED },
+	[TCA_FLOWER_KEY_ENC_OPTS_MASK]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
+	[TCA_FLOWER_KEY_ENC_OPTS_GENEVE]        = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = {
+	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]      = { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]       = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]       = { .type = NLA_BINARY,
+						       .len = 128 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -603,6 +620,145 @@ static void fl_set_key_ip(struct nlattr **tb, bool encap,
 	fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));
 }
 
+static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key,
+			     int depth, int option_len,
+			     struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1];
+	struct nlattr *class = NULL, *type = NULL, *data = NULL;
+	struct geneve_opt *opt;
+	int err, data_len = 0;
+
+	if (option_len > sizeof(struct geneve_opt))
+		data_len = option_len - sizeof(struct geneve_opt);
+
+	opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len];
+	memset(opt, 0xff, option_len);
+	opt->length = data_len / 4;
+	opt->r1 = 0;
+	opt->r2 = 0;
+	opt->r3 = 0;
+
+	/* If no mask has been prodived we assume an exact match. */
+	if (!depth)
+		return sizeof(struct geneve_opt) + data_len;
+
+	if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) {
+		NL_SET_ERR_MSG(extack, "Non-geneve option type for mask");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
+			       nla, geneve_opt_policy, extack);
+	if (err < 0)
+		return err;
+
+	/* We are not allowed to omit any of CLASS, TYPE or DATA
+	 * fields from the key.
+	 */
+	if (!option_len &&
+	    (!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] ||
+	     !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] ||
+	     !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) {
+		NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
+		return -EINVAL;
+	}
+
+	/* Omitting any of CLASS, TYPE or DATA fields is allowed
+	 * for the mask.
+	 */
+	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) {
+		int new_len = key->enc_opts.len;
+
+		data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA];
+		data_len = nla_len(data);
+		if (data_len < 4) {
+			NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
+			return -ERANGE;
+		}
+		if (data_len % 4) {
+			NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
+			return -ERANGE;
+		}
+
+		new_len += sizeof(struct geneve_opt) + data_len;
+		BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX);
+		if (new_len > FLOW_DIS_TUN_OPTS_MAX) {
+			NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
+			return -ERANGE;
+		}
+		opt->length = data_len / 4;
+		memcpy(opt->opt_data, nla_data(data), data_len);
+	}
+
+	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) {
+		class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS];
+		opt->opt_class = nla_get_be16(class);
+	}
+
+	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) {
+		type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE];
+		opt->type = nla_get_u8(type);
+	}
+
+	return sizeof(struct geneve_opt) + data_len;
+}
+
+static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
+			  struct fl_flow_key *mask,
+			  struct netlink_ext_ack *extack)
+{
+	const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
+	int option_len, key_depth, msk_depth = 0;
+
+	nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
+
+	if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
+		nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
+		msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
+	}
+
+	nla_for_each_attr(nla_opt_key, nla_enc_key,
+			  nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) {
+		switch (nla_type(nla_opt_key)) {
+		case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
+			option_len = 0;
+			key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
+			option_len = fl_set_geneve_opt(nla_opt_key, key,
+						       key_depth, option_len,
+						       extack);
+			if (option_len < 0)
+				return option_len;
+
+			key->enc_opts.len += option_len;
+			/* At the same time we need to parse through the mask
+			 * in order to verify exact and mask attribute lengths.
+			 */
+			mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
+			option_len = fl_set_geneve_opt(nla_opt_msk, mask,
+						       msk_depth, option_len,
+						       extack);
+			if (option_len < 0)
+				return option_len;
+
+			mask->enc_opts.len += option_len;
+			if (key->enc_opts.len != mask->enc_opts.len) {
+				NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
+				return -EINVAL;
+			}
+
+			if (msk_depth)
+				nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int fl_set_key(struct net *net, struct nlattr **tb,
 		      struct fl_flow_key *key, struct fl_flow_key *mask,
 		      struct netlink_ext_ack *extack)
@@ -799,6 +955,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 
 	fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);
 
+	if (tb[TCA_FLOWER_KEY_ENC_OPTS]) {
+		ret = fl_set_enc_opt(tb, key, mask, extack);
+		if (ret)
+			return ret;
+	}
+
 	if (tb[TCA_FLOWER_KEY_FLAGS])
 		ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
 
@@ -894,6 +1056,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
 			     FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_IP, enc_ip);
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+			     FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts);
 
 	skb_flow_dissector_init(dissector, keys, cnt);
 }
@@ -1414,6 +1578,83 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
 	return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
 }
 
+static int fl_dump_key_geneve_opt(struct sk_buff *skb,
+				  struct flow_dissector_key_enc_opts *enc_opts)
+{
+	struct geneve_opt *opt;
+	struct nlattr *nest;
+	int opt_off = 0;
+
+	nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE);
+	if (!nest)
+		goto nla_put_failure;
+
+	while (enc_opts->len > opt_off) {
+		opt = (struct geneve_opt *)&enc_opts->data[opt_off];
+
+		if (nla_put_be16(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,
+				 opt->opt_class))
+			goto nla_put_failure;
+		if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE,
+			       opt->type))
+			goto nla_put_failure;
+		if (nla_put(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA,
+			    opt->length * 4, opt->opt_data))
+			goto nla_put_failure;
+
+		opt_off += sizeof(struct geneve_opt) + opt->length * 4;
+	}
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
+			       struct flow_dissector_key_enc_opts *enc_opts)
+{
+	struct nlattr *nest;
+	int err;
+
+	if (!enc_opts->len)
+		return 0;
+
+	nest = nla_nest_start(skb, enc_opt_type);
+	if (!nest)
+		goto nla_put_failure;
+
+	switch (enc_opts->dst_opt_type) {
+	case TUNNEL_GENEVE_OPT:
+		err = fl_dump_key_geneve_opt(skb, enc_opts);
+		if (err)
+			goto nla_put_failure;
+		break;
+	default:
+		goto nla_put_failure;
+	}
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int fl_dump_key_enc_opt(struct sk_buff *skb,
+			       struct flow_dissector_key_enc_opts *key_opts,
+			       struct flow_dissector_key_enc_opts *msk_opts)
+{
+	int err;
+
+	err = fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS, key_opts);
+	if (err)
+		return err;
+
+	return fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS_MASK, msk_opts);
+}
+
 static int fl_dump_key(struct sk_buff *skb, struct net *net,
 		       struct fl_flow_key *key, struct fl_flow_key *mask)
 {
@@ -1594,7 +1835,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
 			    &mask->enc_tp.dst,
 			    TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
 			    sizeof(key->enc_tp.dst)) ||
-	    fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip))
+	    fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip) ||
+	    fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts))
 		goto nla_put_failure;
 
 	if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))