bpf_flow.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <limits.h>
  3. #include <stddef.h>
  4. #include <stdbool.h>
  5. #include <string.h>
  6. #include <linux/pkt_cls.h>
  7. #include <linux/bpf.h>
  8. #include <linux/in.h>
  9. #include <linux/if_ether.h>
  10. #include <linux/icmp.h>
  11. #include <linux/ip.h>
  12. #include <linux/ipv6.h>
  13. #include <linux/tcp.h>
  14. #include <linux/udp.h>
  15. #include <linux/if_packet.h>
  16. #include <sys/socket.h>
  17. #include <linux/if_tunnel.h>
  18. #include <linux/mpls.h>
  19. #include "bpf_helpers.h"
  20. #include "bpf_endian.h"
  21. int _version SEC("version") = 1;
  22. #define PROG(F) SEC(#F) int bpf_func_##F
  23. /* These are the identifiers of the BPF programs that will be used in tail
  24. * calls. Name is limited to 16 characters, with the terminating character and
  25. * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
  26. */
  27. enum {
  28. IP,
  29. IPV6,
  30. IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */
  31. IPV6FR, /* Fragmentation IPv6 Extension Header */
  32. MPLS,
  33. VLAN,
  34. };
  35. #define IP_MF 0x2000
  36. #define IP_OFFSET 0x1FFF
  37. #define IP6_MF 0x0001
  38. #define IP6_OFFSET 0xFFF8
  39. struct vlan_hdr {
  40. __be16 h_vlan_TCI;
  41. __be16 h_vlan_encapsulated_proto;
  42. };
  43. struct gre_hdr {
  44. __be16 flags;
  45. __be16 proto;
  46. };
  47. struct frag_hdr {
  48. __u8 nexthdr;
  49. __u8 reserved;
  50. __be16 frag_off;
  51. __be32 identification;
  52. };
  53. struct bpf_map_def SEC("maps") jmp_table = {
  54. .type = BPF_MAP_TYPE_PROG_ARRAY,
  55. .key_size = sizeof(__u32),
  56. .value_size = sizeof(__u32),
  57. .max_entries = 8
  58. };
  59. static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
  60. __u16 hdr_size,
  61. void *buffer)
  62. {
  63. void *data_end = (void *)(long)skb->data_end;
  64. void *data = (void *)(long)skb->data;
  65. __u16 nhoff = skb->flow_keys->nhoff;
  66. __u8 *hdr;
  67. /* Verifies this variable offset does not overflow */
  68. if (nhoff > (USHRT_MAX - hdr_size))
  69. return NULL;
  70. hdr = data + nhoff;
  71. if (hdr + hdr_size <= data_end)
  72. return hdr;
  73. if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size))
  74. return NULL;
  75. return buffer;
  76. }
  77. /* Dispatches on ETHERTYPE */
  78. static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
  79. {
  80. struct bpf_flow_keys *keys = skb->flow_keys;
  81. keys->n_proto = proto;
  82. switch (proto) {
  83. case bpf_htons(ETH_P_IP):
  84. bpf_tail_call(skb, &jmp_table, IP);
  85. break;
  86. case bpf_htons(ETH_P_IPV6):
  87. bpf_tail_call(skb, &jmp_table, IPV6);
  88. break;
  89. case bpf_htons(ETH_P_MPLS_MC):
  90. case bpf_htons(ETH_P_MPLS_UC):
  91. bpf_tail_call(skb, &jmp_table, MPLS);
  92. break;
  93. case bpf_htons(ETH_P_8021Q):
  94. case bpf_htons(ETH_P_8021AD):
  95. bpf_tail_call(skb, &jmp_table, VLAN);
  96. break;
  97. default:
  98. /* Protocol not supported */
  99. return BPF_DROP;
  100. }
  101. return BPF_DROP;
  102. }
  103. SEC("dissect")
  104. int _dissect(struct __sk_buff *skb)
  105. {
  106. if (!skb->vlan_present)
  107. return parse_eth_proto(skb, skb->protocol);
  108. else
  109. return parse_eth_proto(skb, skb->vlan_proto);
  110. }
  111. /* Parses on IPPROTO_* */
  112. static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
  113. {
  114. struct bpf_flow_keys *keys = skb->flow_keys;
  115. void *data_end = (void *)(long)skb->data_end;
  116. struct icmphdr *icmp, _icmp;
  117. struct gre_hdr *gre, _gre;
  118. struct ethhdr *eth, _eth;
  119. struct tcphdr *tcp, _tcp;
  120. struct udphdr *udp, _udp;
  121. keys->ip_proto = proto;
  122. switch (proto) {
  123. case IPPROTO_ICMP:
  124. icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
  125. if (!icmp)
  126. return BPF_DROP;
  127. return BPF_OK;
  128. case IPPROTO_IPIP:
  129. keys->is_encap = true;
  130. return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
  131. case IPPROTO_IPV6:
  132. keys->is_encap = true;
  133. return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
  134. case IPPROTO_GRE:
  135. gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
  136. if (!gre)
  137. return BPF_DROP;
  138. if (bpf_htons(gre->flags & GRE_VERSION))
  139. /* Only inspect standard GRE packets with version 0 */
  140. return BPF_OK;
  141. keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */
  142. if (GRE_IS_CSUM(gre->flags))
  143. keys->nhoff += 4; /* Step over chksum and Padding */
  144. if (GRE_IS_KEY(gre->flags))
  145. keys->nhoff += 4; /* Step over key */
  146. if (GRE_IS_SEQ(gre->flags))
  147. keys->nhoff += 4; /* Step over sequence number */
  148. keys->is_encap = true;
  149. if (gre->proto == bpf_htons(ETH_P_TEB)) {
  150. eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
  151. &_eth);
  152. if (!eth)
  153. return BPF_DROP;
  154. keys->nhoff += sizeof(*eth);
  155. return parse_eth_proto(skb, eth->h_proto);
  156. } else {
  157. return parse_eth_proto(skb, gre->proto);
  158. }
  159. case IPPROTO_TCP:
  160. tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
  161. if (!tcp)
  162. return BPF_DROP;
  163. if (tcp->doff < 5)
  164. return BPF_DROP;
  165. if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
  166. return BPF_DROP;
  167. keys->thoff = keys->nhoff;
  168. keys->sport = tcp->source;
  169. keys->dport = tcp->dest;
  170. return BPF_OK;
  171. case IPPROTO_UDP:
  172. case IPPROTO_UDPLITE:
  173. udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
  174. if (!udp)
  175. return BPF_DROP;
  176. keys->thoff = keys->nhoff;
  177. keys->sport = udp->source;
  178. keys->dport = udp->dest;
  179. return BPF_OK;
  180. default:
  181. return BPF_DROP;
  182. }
  183. return BPF_DROP;
  184. }
  185. static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
  186. {
  187. struct bpf_flow_keys *keys = skb->flow_keys;
  188. keys->ip_proto = nexthdr;
  189. switch (nexthdr) {
  190. case IPPROTO_HOPOPTS:
  191. case IPPROTO_DSTOPTS:
  192. bpf_tail_call(skb, &jmp_table, IPV6OP);
  193. break;
  194. case IPPROTO_FRAGMENT:
  195. bpf_tail_call(skb, &jmp_table, IPV6FR);
  196. break;
  197. default:
  198. return parse_ip_proto(skb, nexthdr);
  199. }
  200. return BPF_DROP;
  201. }
  202. PROG(IP)(struct __sk_buff *skb)
  203. {
  204. void *data_end = (void *)(long)skb->data_end;
  205. struct bpf_flow_keys *keys = skb->flow_keys;
  206. void *data = (void *)(long)skb->data;
  207. struct iphdr *iph, _iph;
  208. bool done = false;
  209. iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
  210. if (!iph)
  211. return BPF_DROP;
  212. /* IP header cannot be smaller than 20 bytes */
  213. if (iph->ihl < 5)
  214. return BPF_DROP;
  215. keys->addr_proto = ETH_P_IP;
  216. keys->ipv4_src = iph->saddr;
  217. keys->ipv4_dst = iph->daddr;
  218. keys->nhoff += iph->ihl << 2;
  219. if (data + keys->nhoff > data_end)
  220. return BPF_DROP;
  221. if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
  222. keys->is_frag = true;
  223. if (iph->frag_off & bpf_htons(IP_OFFSET))
  224. /* From second fragment on, packets do not have headers
  225. * we can parse.
  226. */
  227. done = true;
  228. else
  229. keys->is_first_frag = true;
  230. }
  231. if (done)
  232. return BPF_OK;
  233. return parse_ip_proto(skb, iph->protocol);
  234. }
  235. PROG(IPV6)(struct __sk_buff *skb)
  236. {
  237. struct bpf_flow_keys *keys = skb->flow_keys;
  238. struct ipv6hdr *ip6h, _ip6h;
  239. ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
  240. if (!ip6h)
  241. return BPF_DROP;
  242. keys->addr_proto = ETH_P_IPV6;
  243. memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
  244. keys->nhoff += sizeof(struct ipv6hdr);
  245. return parse_ipv6_proto(skb, ip6h->nexthdr);
  246. }
  247. PROG(IPV6OP)(struct __sk_buff *skb)
  248. {
  249. struct ipv6_opt_hdr *ip6h, _ip6h;
  250. ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
  251. if (!ip6h)
  252. return BPF_DROP;
  253. /* hlen is in 8-octets and does not include the first 8 bytes
  254. * of the header
  255. */
  256. skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3;
  257. return parse_ipv6_proto(skb, ip6h->nexthdr);
  258. }
  259. PROG(IPV6FR)(struct __sk_buff *skb)
  260. {
  261. struct bpf_flow_keys *keys = skb->flow_keys;
  262. struct frag_hdr *fragh, _fragh;
  263. fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
  264. if (!fragh)
  265. return BPF_DROP;
  266. keys->nhoff += sizeof(*fragh);
  267. keys->is_frag = true;
  268. if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
  269. keys->is_first_frag = true;
  270. return parse_ipv6_proto(skb, fragh->nexthdr);
  271. }
  272. PROG(MPLS)(struct __sk_buff *skb)
  273. {
  274. struct mpls_label *mpls, _mpls;
  275. mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
  276. if (!mpls)
  277. return BPF_DROP;
  278. return BPF_OK;
  279. }
  280. PROG(VLAN)(struct __sk_buff *skb)
  281. {
  282. struct bpf_flow_keys *keys = skb->flow_keys;
  283. struct vlan_hdr *vlan, _vlan;
  284. __be16 proto;
  285. /* Peek back to see if single or double-tagging */
  286. if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto,
  287. sizeof(proto)))
  288. return BPF_DROP;
  289. /* Account for double-tagging */
  290. if (proto == bpf_htons(ETH_P_8021AD)) {
  291. vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
  292. if (!vlan)
  293. return BPF_DROP;
  294. if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
  295. return BPF_DROP;
  296. keys->nhoff += sizeof(*vlan);
  297. }
  298. vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
  299. if (!vlan)
  300. return BPF_DROP;
  301. keys->nhoff += sizeof(*vlan);
  302. /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
  303. if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
  304. vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
  305. return BPF_DROP;
  306. return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
  307. }
  308. char __license[] SEC("license") = "GPL";