lwtunnel.c 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. /*
  2. * lwtunnel Infrastructure for light weight tunnels like mpls
  3. *
  4. * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. *
  11. */
  12. #include <linux/capability.h>
  13. #include <linux/module.h>
  14. #include <linux/types.h>
  15. #include <linux/kernel.h>
  16. #include <linux/slab.h>
  17. #include <linux/uaccess.h>
  18. #include <linux/skbuff.h>
  19. #include <linux/netdevice.h>
  20. #include <linux/lwtunnel.h>
  21. #include <linux/in.h>
  22. #include <linux/init.h>
  23. #include <linux/err.h>
  24. #include <net/lwtunnel.h>
  25. #include <net/rtnetlink.h>
  26. #include <net/ip6_fib.h>
  27. #include <net/nexthop.h>
  28. #ifdef CONFIG_MODULES
  29. static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
  30. {
  31. /* Only lwt encaps implemented without using an interface for
  32. * the encap need to return a string here.
  33. */
  34. switch (encap_type) {
  35. case LWTUNNEL_ENCAP_MPLS:
  36. return "MPLS";
  37. case LWTUNNEL_ENCAP_ILA:
  38. return "ILA";
  39. case LWTUNNEL_ENCAP_SEG6:
  40. return "SEG6";
  41. case LWTUNNEL_ENCAP_BPF:
  42. return "BPF";
  43. case LWTUNNEL_ENCAP_IP6:
  44. case LWTUNNEL_ENCAP_IP:
  45. case LWTUNNEL_ENCAP_NONE:
  46. case __LWTUNNEL_ENCAP_MAX:
  47. /* should not have got here */
  48. WARN_ON(1);
  49. break;
  50. }
  51. return NULL;
  52. }
  53. #endif /* CONFIG_MODULES */
  54. struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
  55. {
  56. struct lwtunnel_state *lws;
  57. lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC);
  58. return lws;
  59. }
  60. EXPORT_SYMBOL(lwtunnel_state_alloc);
  61. static const struct lwtunnel_encap_ops __rcu *
  62. lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
  63. int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
  64. unsigned int num)
  65. {
  66. if (num > LWTUNNEL_ENCAP_MAX)
  67. return -ERANGE;
  68. return !cmpxchg((const struct lwtunnel_encap_ops **)
  69. &lwtun_encaps[num],
  70. NULL, ops) ? 0 : -1;
  71. }
  72. EXPORT_SYMBOL(lwtunnel_encap_add_ops);
  73. int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
  74. unsigned int encap_type)
  75. {
  76. int ret;
  77. if (encap_type == LWTUNNEL_ENCAP_NONE ||
  78. encap_type > LWTUNNEL_ENCAP_MAX)
  79. return -ERANGE;
  80. ret = (cmpxchg((const struct lwtunnel_encap_ops **)
  81. &lwtun_encaps[encap_type],
  82. ops, NULL) == ops) ? 0 : -1;
  83. synchronize_net();
  84. return ret;
  85. }
  86. EXPORT_SYMBOL(lwtunnel_encap_del_ops);
  87. int lwtunnel_build_state(u16 encap_type,
  88. struct nlattr *encap, unsigned int family,
  89. const void *cfg, struct lwtunnel_state **lws,
  90. struct netlink_ext_ack *extack)
  91. {
  92. const struct lwtunnel_encap_ops *ops;
  93. bool found = false;
  94. int ret = -EINVAL;
  95. if (encap_type == LWTUNNEL_ENCAP_NONE ||
  96. encap_type > LWTUNNEL_ENCAP_MAX) {
  97. NL_SET_ERR_MSG_ATTR(extack, encap,
  98. "Unknown LWT encapsulation type");
  99. return ret;
  100. }
  101. ret = -EOPNOTSUPP;
  102. rcu_read_lock();
  103. ops = rcu_dereference(lwtun_encaps[encap_type]);
  104. if (likely(ops && ops->build_state && try_module_get(ops->owner))) {
  105. found = true;
  106. ret = ops->build_state(encap, family, cfg, lws, extack);
  107. if (ret)
  108. module_put(ops->owner);
  109. }
  110. rcu_read_unlock();
  111. /* don't rely on -EOPNOTSUPP to detect match as build_state
  112. * handlers could return it
  113. */
  114. if (!found) {
  115. NL_SET_ERR_MSG_ATTR(extack, encap,
  116. "LWT encapsulation type not supported");
  117. }
  118. return ret;
  119. }
  120. EXPORT_SYMBOL(lwtunnel_build_state);
  121. int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
  122. {
  123. const struct lwtunnel_encap_ops *ops;
  124. int ret = -EINVAL;
  125. if (encap_type == LWTUNNEL_ENCAP_NONE ||
  126. encap_type > LWTUNNEL_ENCAP_MAX) {
  127. NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type");
  128. return ret;
  129. }
  130. rcu_read_lock();
  131. ops = rcu_dereference(lwtun_encaps[encap_type]);
  132. rcu_read_unlock();
  133. #ifdef CONFIG_MODULES
  134. if (!ops) {
  135. const char *encap_type_str = lwtunnel_encap_str(encap_type);
  136. if (encap_type_str) {
  137. __rtnl_unlock();
  138. request_module("rtnl-lwt-%s", encap_type_str);
  139. rtnl_lock();
  140. rcu_read_lock();
  141. ops = rcu_dereference(lwtun_encaps[encap_type]);
  142. rcu_read_unlock();
  143. }
  144. }
  145. #endif
  146. ret = ops ? 0 : -EOPNOTSUPP;
  147. if (ret < 0)
  148. NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported");
  149. return ret;
  150. }
  151. EXPORT_SYMBOL(lwtunnel_valid_encap_type);
  152. int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
  153. struct netlink_ext_ack *extack)
  154. {
  155. struct rtnexthop *rtnh = (struct rtnexthop *)attr;
  156. struct nlattr *nla_entype;
  157. struct nlattr *attrs;
  158. u16 encap_type;
  159. int attrlen;
  160. while (rtnh_ok(rtnh, remaining)) {
  161. attrlen = rtnh_attrlen(rtnh);
  162. if (attrlen > 0) {
  163. attrs = rtnh_attrs(rtnh);
  164. nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
  165. if (nla_entype) {
  166. encap_type = nla_get_u16(nla_entype);
  167. if (lwtunnel_valid_encap_type(encap_type,
  168. extack) != 0)
  169. return -EOPNOTSUPP;
  170. }
  171. }
  172. rtnh = rtnh_next(rtnh, &remaining);
  173. }
  174. return 0;
  175. }
  176. EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr);
  177. void lwtstate_free(struct lwtunnel_state *lws)
  178. {
  179. const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type];
  180. if (ops->destroy_state) {
  181. ops->destroy_state(lws);
  182. kfree_rcu(lws, rcu);
  183. } else {
  184. kfree(lws);
  185. }
  186. module_put(ops->owner);
  187. }
  188. EXPORT_SYMBOL(lwtstate_free);
  189. int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
  190. {
  191. const struct lwtunnel_encap_ops *ops;
  192. struct nlattr *nest;
  193. int ret;
  194. if (!lwtstate)
  195. return 0;
  196. if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
  197. lwtstate->type > LWTUNNEL_ENCAP_MAX)
  198. return 0;
  199. nest = nla_nest_start(skb, RTA_ENCAP);
  200. if (!nest)
  201. return -EMSGSIZE;
  202. ret = -EOPNOTSUPP;
  203. rcu_read_lock();
  204. ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
  205. if (likely(ops && ops->fill_encap))
  206. ret = ops->fill_encap(skb, lwtstate);
  207. rcu_read_unlock();
  208. if (ret)
  209. goto nla_put_failure;
  210. nla_nest_end(skb, nest);
  211. ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type);
  212. if (ret)
  213. goto nla_put_failure;
  214. return 0;
  215. nla_put_failure:
  216. nla_nest_cancel(skb, nest);
  217. return (ret == -EOPNOTSUPP ? 0 : ret);
  218. }
  219. EXPORT_SYMBOL(lwtunnel_fill_encap);
  220. int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
  221. {
  222. const struct lwtunnel_encap_ops *ops;
  223. int ret = 0;
  224. if (!lwtstate)
  225. return 0;
  226. if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
  227. lwtstate->type > LWTUNNEL_ENCAP_MAX)
  228. return 0;
  229. rcu_read_lock();
  230. ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
  231. if (likely(ops && ops->get_encap_size))
  232. ret = nla_total_size(ops->get_encap_size(lwtstate));
  233. rcu_read_unlock();
  234. return ret;
  235. }
  236. EXPORT_SYMBOL(lwtunnel_get_encap_size);
  237. int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
  238. {
  239. const struct lwtunnel_encap_ops *ops;
  240. int ret = 0;
  241. if (!a && !b)
  242. return 0;
  243. if (!a || !b)
  244. return 1;
  245. if (a->type != b->type)
  246. return 1;
  247. if (a->type == LWTUNNEL_ENCAP_NONE ||
  248. a->type > LWTUNNEL_ENCAP_MAX)
  249. return 0;
  250. rcu_read_lock();
  251. ops = rcu_dereference(lwtun_encaps[a->type]);
  252. if (likely(ops && ops->cmp_encap))
  253. ret = ops->cmp_encap(a, b);
  254. rcu_read_unlock();
  255. return ret;
  256. }
  257. EXPORT_SYMBOL(lwtunnel_cmp_encap);
  258. int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
  259. {
  260. struct dst_entry *dst = skb_dst(skb);
  261. const struct lwtunnel_encap_ops *ops;
  262. struct lwtunnel_state *lwtstate;
  263. int ret = -EINVAL;
  264. if (!dst)
  265. goto drop;
  266. lwtstate = dst->lwtstate;
  267. if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
  268. lwtstate->type > LWTUNNEL_ENCAP_MAX)
  269. return 0;
  270. ret = -EOPNOTSUPP;
  271. rcu_read_lock();
  272. ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
  273. if (likely(ops && ops->output))
  274. ret = ops->output(net, sk, skb);
  275. rcu_read_unlock();
  276. if (ret == -EOPNOTSUPP)
  277. goto drop;
  278. return ret;
  279. drop:
  280. kfree_skb(skb);
  281. return ret;
  282. }
  283. EXPORT_SYMBOL(lwtunnel_output);
  284. int lwtunnel_xmit(struct sk_buff *skb)
  285. {
  286. struct dst_entry *dst = skb_dst(skb);
  287. const struct lwtunnel_encap_ops *ops;
  288. struct lwtunnel_state *lwtstate;
  289. int ret = -EINVAL;
  290. if (!dst)
  291. goto drop;
  292. lwtstate = dst->lwtstate;
  293. if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
  294. lwtstate->type > LWTUNNEL_ENCAP_MAX)
  295. return 0;
  296. ret = -EOPNOTSUPP;
  297. rcu_read_lock();
  298. ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
  299. if (likely(ops && ops->xmit))
  300. ret = ops->xmit(skb);
  301. rcu_read_unlock();
  302. if (ret == -EOPNOTSUPP)
  303. goto drop;
  304. return ret;
  305. drop:
  306. kfree_skb(skb);
  307. return ret;
  308. }
  309. EXPORT_SYMBOL(lwtunnel_xmit);
  310. int lwtunnel_input(struct sk_buff *skb)
  311. {
  312. struct dst_entry *dst = skb_dst(skb);
  313. const struct lwtunnel_encap_ops *ops;
  314. struct lwtunnel_state *lwtstate;
  315. int ret = -EINVAL;
  316. if (!dst)
  317. goto drop;
  318. lwtstate = dst->lwtstate;
  319. if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
  320. lwtstate->type > LWTUNNEL_ENCAP_MAX)
  321. return 0;
  322. ret = -EOPNOTSUPP;
  323. rcu_read_lock();
  324. ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
  325. if (likely(ops && ops->input))
  326. ret = ops->input(skb);
  327. rcu_read_unlock();
  328. if (ret == -EOPNOTSUPP)
  329. goto drop;
  330. return ret;
  331. drop:
  332. kfree_skb(skb);
  333. return ret;
  334. }
  335. EXPORT_SYMBOL(lwtunnel_input);