vport-netdev.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. /*
  2. * Copyright (c) 2007-2012 Nicira, Inc.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of version 2 of the GNU General Public
  6. * License as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License
  14. * along with this program; if not, write to the Free Software
  15. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16. * 02110-1301, USA
  17. */
  18. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  19. #include <linux/if_arp.h>
  20. #include <linux/if_bridge.h>
  21. #include <linux/if_vlan.h>
  22. #include <linux/kernel.h>
  23. #include <linux/llc.h>
  24. #include <linux/rtnetlink.h>
  25. #include <linux/skbuff.h>
  26. #include <linux/openvswitch.h>
  27. #include <net/udp.h>
  28. #include <net/ip_tunnels.h>
  29. #include <net/rtnetlink.h>
  30. #include <net/vxlan.h>
  31. #include "datapath.h"
  32. #include "vport.h"
  33. #include "vport-internal_dev.h"
  34. #include "vport-netdev.h"
  35. static struct vport_ops ovs_netdev_vport_ops;
  36. /* Must be called with rcu_read_lock. */
  37. static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
  38. {
  39. if (unlikely(!vport))
  40. goto error;
  41. if (unlikely(skb_warn_if_lro(skb)))
  42. goto error;
  43. /* Make our own copy of the packet. Otherwise we will mangle the
  44. * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
  45. */
  46. skb = skb_share_check(skb, GFP_ATOMIC);
  47. if (unlikely(!skb))
  48. return;
  49. skb_push(skb, ETH_HLEN);
  50. ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
  51. ovs_vport_receive(vport, skb, skb_tunnel_info(skb, AF_INET));
  52. return;
  53. error:
  54. kfree_skb(skb);
  55. }
  56. /* Called with rcu_read_lock and bottom-halves disabled. */
  57. static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
  58. {
  59. struct sk_buff *skb = *pskb;
  60. struct vport *vport;
  61. if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
  62. return RX_HANDLER_PASS;
  63. vport = ovs_netdev_get_vport(skb->dev);
  64. netdev_port_receive(vport, skb);
  65. return RX_HANDLER_CONSUMED;
  66. }
  67. static struct net_device *get_dpdev(const struct datapath *dp)
  68. {
  69. struct vport *local;
  70. local = ovs_vport_ovsl(dp, OVSP_LOCAL);
  71. BUG_ON(!local);
  72. return local->dev;
  73. }
  74. static struct vport *netdev_link(struct vport *vport, const char *name)
  75. {
  76. int err;
  77. vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), name);
  78. if (!vport->dev) {
  79. err = -ENODEV;
  80. goto error_free_vport;
  81. }
  82. if (vport->dev->flags & IFF_LOOPBACK ||
  83. vport->dev->type != ARPHRD_ETHER ||
  84. ovs_is_internal_dev(vport->dev)) {
  85. err = -EINVAL;
  86. goto error_put;
  87. }
  88. rtnl_lock();
  89. err = netdev_master_upper_dev_link(vport->dev,
  90. get_dpdev(vport->dp));
  91. if (err)
  92. goto error_unlock;
  93. err = netdev_rx_handler_register(vport->dev, netdev_frame_hook,
  94. vport);
  95. if (err)
  96. goto error_master_upper_dev_unlink;
  97. dev_disable_lro(vport->dev);
  98. dev_set_promiscuity(vport->dev, 1);
  99. vport->dev->priv_flags |= IFF_OVS_DATAPATH;
  100. rtnl_unlock();
  101. return vport;
  102. error_master_upper_dev_unlink:
  103. netdev_upper_dev_unlink(vport->dev, get_dpdev(vport->dp));
  104. error_unlock:
  105. rtnl_unlock();
  106. error_put:
  107. dev_put(vport->dev);
  108. error_free_vport:
  109. ovs_vport_free(vport);
  110. return ERR_PTR(err);
  111. }
  112. static struct vport *netdev_create(const struct vport_parms *parms)
  113. {
  114. struct vport *vport;
  115. vport = ovs_vport_alloc(0, &ovs_netdev_vport_ops, parms);
  116. if (IS_ERR(vport))
  117. return vport;
  118. return netdev_link(vport, parms->name);
  119. }
  120. static void free_port_rcu(struct rcu_head *rcu)
  121. {
  122. struct vport *vport = container_of(rcu, struct vport, rcu);
  123. if (vport->dev)
  124. dev_put(vport->dev);
  125. ovs_vport_free(vport);
  126. }
  127. void ovs_netdev_detach_dev(struct vport *vport)
  128. {
  129. ASSERT_RTNL();
  130. vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
  131. netdev_rx_handler_unregister(vport->dev);
  132. netdev_upper_dev_unlink(vport->dev,
  133. netdev_master_upper_dev_get(vport->dev));
  134. dev_set_promiscuity(vport->dev, -1);
  135. }
  136. static void netdev_destroy(struct vport *vport)
  137. {
  138. rtnl_lock();
  139. if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
  140. ovs_netdev_detach_dev(vport);
  141. rtnl_unlock();
  142. call_rcu(&vport->rcu, free_port_rcu);
  143. }
  144. static unsigned int packet_length(const struct sk_buff *skb)
  145. {
  146. unsigned int length = skb->len - ETH_HLEN;
  147. if (skb->protocol == htons(ETH_P_8021Q))
  148. length -= VLAN_HLEN;
  149. return length;
  150. }
  151. static int netdev_send(struct vport *vport, struct sk_buff *skb)
  152. {
  153. int mtu = vport->dev->mtu;
  154. int len;
  155. if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
  156. net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
  157. vport->dev->name,
  158. packet_length(skb), mtu);
  159. goto drop;
  160. }
  161. skb->dev = vport->dev;
  162. len = skb->len;
  163. dev_queue_xmit(skb);
  164. return len;
  165. drop:
  166. kfree_skb(skb);
  167. return 0;
  168. }
  169. /* Returns null if this device is not attached to a datapath. */
  170. struct vport *ovs_netdev_get_vport(struct net_device *dev)
  171. {
  172. if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
  173. return (struct vport *)
  174. rcu_dereference_rtnl(dev->rx_handler_data);
  175. else
  176. return NULL;
  177. }
  178. static struct vport_ops ovs_netdev_vport_ops = {
  179. .type = OVS_VPORT_TYPE_NETDEV,
  180. .create = netdev_create,
  181. .destroy = netdev_destroy,
  182. .send = netdev_send,
  183. };
  184. /* Compat code for old userspace. */
  185. #if IS_ENABLED(CONFIG_VXLAN)
  186. static struct vport_ops ovs_vxlan_netdev_vport_ops;
  187. static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
  188. {
  189. struct vxlan_dev *vxlan = netdev_priv(vport->dev);
  190. __be16 dst_port = vxlan->cfg.dst_port;
  191. if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
  192. return -EMSGSIZE;
  193. if (vxlan->flags & VXLAN_F_GBP) {
  194. struct nlattr *exts;
  195. exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
  196. if (!exts)
  197. return -EMSGSIZE;
  198. if (vxlan->flags & VXLAN_F_GBP &&
  199. nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
  200. return -EMSGSIZE;
  201. nla_nest_end(skb, exts);
  202. }
  203. return 0;
  204. }
  205. static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX + 1] = {
  206. [OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, },
  207. };
  208. static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,
  209. struct vxlan_config *conf)
  210. {
  211. struct nlattr *exts[OVS_VXLAN_EXT_MAX + 1];
  212. int err;
  213. if (nla_len(attr) < sizeof(struct nlattr))
  214. return -EINVAL;
  215. err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
  216. if (err < 0)
  217. return err;
  218. if (exts[OVS_VXLAN_EXT_GBP])
  219. conf->flags |= VXLAN_F_GBP;
  220. return 0;
  221. }
  222. static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
  223. {
  224. struct net *net = ovs_dp_get_net(parms->dp);
  225. struct nlattr *options = parms->options;
  226. struct net_device *dev;
  227. struct vport *vport;
  228. struct nlattr *a;
  229. int err;
  230. struct vxlan_config conf = {
  231. .no_share = true,
  232. .flags = VXLAN_F_FLOW_BASED | VXLAN_F_COLLECT_METADATA,
  233. };
  234. if (!options) {
  235. err = -EINVAL;
  236. goto error;
  237. }
  238. a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
  239. if (a && nla_len(a) == sizeof(u16)) {
  240. conf.dst_port = htons(nla_get_u16(a));
  241. } else {
  242. /* Require destination port from userspace. */
  243. err = -EINVAL;
  244. goto error;
  245. }
  246. vport = ovs_vport_alloc(0, &ovs_vxlan_netdev_vport_ops, parms);
  247. if (IS_ERR(vport))
  248. return vport;
  249. a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
  250. if (a) {
  251. err = vxlan_configure_exts(vport, a, &conf);
  252. if (err) {
  253. ovs_vport_free(vport);
  254. goto error;
  255. }
  256. }
  257. rtnl_lock();
  258. dev = vxlan_dev_create(net, parms->name, NET_NAME_USER, &conf);
  259. if (IS_ERR(dev)) {
  260. rtnl_unlock();
  261. ovs_vport_free(vport);
  262. return ERR_CAST(dev);
  263. }
  264. dev_change_flags(dev, dev->flags | IFF_UP);
  265. rtnl_unlock();
  266. return vport;
  267. error:
  268. return ERR_PTR(err);
  269. }
  270. static struct vport *vxlan_create(const struct vport_parms *parms)
  271. {
  272. struct vport *vport;
  273. vport = vxlan_tnl_create(parms);
  274. if (IS_ERR(vport))
  275. return vport;
  276. return netdev_link(vport, parms->name);
  277. }
  278. static void vxlan_destroy(struct vport *vport)
  279. {
  280. rtnl_lock();
  281. if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
  282. ovs_netdev_detach_dev(vport);
  283. /* Early release so we can unregister the device */
  284. dev_put(vport->dev);
  285. rtnl_delete_link(vport->dev);
  286. vport->dev = NULL;
  287. rtnl_unlock();
  288. call_rcu(&vport->rcu, free_port_rcu);
  289. }
  290. static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
  291. struct ip_tunnel_info *egress_tun_info)
  292. {
  293. struct vxlan_dev *vxlan = netdev_priv(vport->dev);
  294. struct net *net = ovs_dp_get_net(vport->dp);
  295. __be16 dst_port = vxlan_dev_dst_port(vxlan);
  296. __be16 src_port;
  297. int port_min;
  298. int port_max;
  299. inet_get_local_port_range(net, &port_min, &port_max);
  300. src_port = udp_flow_src_port(net, skb, 0, 0, true);
  301. return ovs_tunnel_get_egress_info(egress_tun_info, net,
  302. OVS_CB(skb)->egress_tun_info,
  303. IPPROTO_UDP, skb->mark,
  304. src_port, dst_port);
  305. }
  306. static struct vport_ops ovs_vxlan_netdev_vport_ops = {
  307. .type = OVS_VPORT_TYPE_VXLAN,
  308. .create = vxlan_create,
  309. .destroy = vxlan_destroy,
  310. .get_options = vxlan_get_options,
  311. .send = netdev_send,
  312. .get_egress_tun_info = vxlan_get_egress_tun_info,
  313. };
  314. static int vxlan_compat_init(void)
  315. {
  316. return ovs_vport_ops_register(&ovs_vxlan_netdev_vport_ops);
  317. }
  318. static void vxlan_compat_exit(void)
  319. {
  320. ovs_vport_ops_unregister(&ovs_vxlan_netdev_vport_ops);
  321. }
  322. #else
  323. static int vxlan_compat_init(void)
  324. {
  325. return 0;
  326. }
  327. static void vxlan_compat_exit(void)
  328. {
  329. }
  330. #endif
  331. int __init ovs_netdev_init(void)
  332. {
  333. int err;
  334. err = ovs_vport_ops_register(&ovs_netdev_vport_ops);
  335. if (err)
  336. return err;
  337. err = vxlan_compat_init();
  338. if (err)
  339. vxlan_compat_exit();
  340. return err;
  341. }
  342. void ovs_netdev_exit(void)
  343. {
  344. ovs_vport_ops_unregister(&ovs_netdev_vport_ops);
  345. vxlan_compat_exit();
  346. }