macvtap.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375
  1. #include <linux/etherdevice.h>
  2. #include <linux/if_macvlan.h>
  3. #include <linux/if_vlan.h>
  4. #include <linux/interrupt.h>
  5. #include <linux/nsproxy.h>
  6. #include <linux/compat.h>
  7. #include <linux/if_tun.h>
  8. #include <linux/module.h>
  9. #include <linux/skbuff.h>
  10. #include <linux/cache.h>
  11. #include <linux/sched.h>
  12. #include <linux/types.h>
  13. #include <linux/slab.h>
  14. #include <linux/wait.h>
  15. #include <linux/cdev.h>
  16. #include <linux/idr.h>
  17. #include <linux/fs.h>
  18. #include <linux/uio.h>
  19. #include <net/net_namespace.h>
  20. #include <net/rtnetlink.h>
  21. #include <net/sock.h>
  22. #include <linux/virtio_net.h>
  23. #include <linux/skb_array.h>
  24. /*
  25. * A macvtap queue is the central object of this driver, it connects
  26. * an open character device to a macvlan interface. There can be
  27. * multiple queues on one interface, which map back to queues
  28. * implemented in hardware on the underlying device.
  29. *
  30. * macvtap_proto is used to allocate queues through the sock allocation
  31. * mechanism.
  32. *
  33. */
  34. struct macvtap_queue {
  35. struct sock sk;
  36. struct socket sock;
  37. struct socket_wq wq;
  38. int vnet_hdr_sz;
  39. struct macvlan_dev __rcu *vlan;
  40. struct file *file;
  41. unsigned int flags;
  42. u16 queue_index;
  43. bool enabled;
  44. struct list_head next;
  45. struct skb_array skb_array;
  46. };
  47. #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
  48. #define MACVTAP_VNET_LE 0x80000000
  49. #define MACVTAP_VNET_BE 0x40000000
  50. #ifdef CONFIG_TUN_VNET_CROSS_LE
  51. static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q)
  52. {
  53. return q->flags & MACVTAP_VNET_BE ? false :
  54. virtio_legacy_is_little_endian();
  55. }
  56. static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp)
  57. {
  58. int s = !!(q->flags & MACVTAP_VNET_BE);
  59. if (put_user(s, sp))
  60. return -EFAULT;
  61. return 0;
  62. }
  63. static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp)
  64. {
  65. int s;
  66. if (get_user(s, sp))
  67. return -EFAULT;
  68. if (s)
  69. q->flags |= MACVTAP_VNET_BE;
  70. else
  71. q->flags &= ~MACVTAP_VNET_BE;
  72. return 0;
  73. }
  74. #else
  75. static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q)
  76. {
  77. return virtio_legacy_is_little_endian();
  78. }
  79. static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *argp)
  80. {
  81. return -EINVAL;
  82. }
  83. static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *argp)
  84. {
  85. return -EINVAL;
  86. }
  87. #endif /* CONFIG_TUN_VNET_CROSS_LE */
  88. static inline bool macvtap_is_little_endian(struct macvtap_queue *q)
  89. {
  90. return q->flags & MACVTAP_VNET_LE ||
  91. macvtap_legacy_is_little_endian(q);
  92. }
  93. static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
  94. {
  95. return __virtio16_to_cpu(macvtap_is_little_endian(q), val);
  96. }
  97. static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val)
  98. {
  99. return __cpu_to_virtio16(macvtap_is_little_endian(q), val);
  100. }
  101. static struct proto macvtap_proto = {
  102. .name = "macvtap",
  103. .owner = THIS_MODULE,
  104. .obj_size = sizeof (struct macvtap_queue),
  105. };
  106. /*
  107. * Variables for dealing with macvtaps device numbers.
  108. */
  109. static dev_t macvtap_major;
  110. #define MACVTAP_NUM_DEVS (1U << MINORBITS)
  111. static DEFINE_MUTEX(minor_lock);
  112. static DEFINE_IDR(minor_idr);
  113. #define GOODCOPY_LEN 128
  114. static const void *macvtap_net_namespace(struct device *d)
  115. {
  116. struct net_device *dev = to_net_dev(d->parent);
  117. return dev_net(dev);
  118. }
  119. static struct class macvtap_class = {
  120. .name = "macvtap",
  121. .owner = THIS_MODULE,
  122. .ns_type = &net_ns_type_operations,
  123. .namespace = macvtap_net_namespace,
  124. };
  125. static struct cdev macvtap_cdev;
  126. static const struct proto_ops macvtap_socket_ops;
  127. #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
  128. NETIF_F_TSO6 | NETIF_F_UFO)
  129. #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)
  130. #define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST)
  131. static struct macvlan_dev *macvtap_get_vlan_rcu(const struct net_device *dev)
  132. {
  133. return rcu_dereference(dev->rx_handler_data);
  134. }
  135. /*
  136. * RCU usage:
  137. * The macvtap_queue and the macvlan_dev are loosely coupled, the
  138. * pointers from one to the other can only be read while rcu_read_lock
  139. * or rtnl is held.
  140. *
  141. * Both the file and the macvlan_dev hold a reference on the macvtap_queue
  142. * through sock_hold(&q->sk). When the macvlan_dev goes away first,
  143. * q->vlan becomes inaccessible. When the files gets closed,
  144. * macvtap_get_queue() fails.
  145. *
  146. * There may still be references to the struct sock inside of the
  147. * queue from outbound SKBs, but these never reference back to the
  148. * file or the dev. The data structure is freed through __sk_free
  149. * when both our references and any pending SKBs are gone.
  150. */
  151. static int macvtap_enable_queue(struct net_device *dev, struct file *file,
  152. struct macvtap_queue *q)
  153. {
  154. struct macvlan_dev *vlan = netdev_priv(dev);
  155. int err = -EINVAL;
  156. ASSERT_RTNL();
  157. if (q->enabled)
  158. goto out;
  159. err = 0;
  160. rcu_assign_pointer(vlan->taps[vlan->numvtaps], q);
  161. q->queue_index = vlan->numvtaps;
  162. q->enabled = true;
  163. vlan->numvtaps++;
  164. out:
  165. return err;
  166. }
  167. /* Requires RTNL */
  168. static int macvtap_set_queue(struct net_device *dev, struct file *file,
  169. struct macvtap_queue *q)
  170. {
  171. struct macvlan_dev *vlan = netdev_priv(dev);
  172. if (vlan->numqueues == MAX_MACVTAP_QUEUES)
  173. return -EBUSY;
  174. rcu_assign_pointer(q->vlan, vlan);
  175. rcu_assign_pointer(vlan->taps[vlan->numvtaps], q);
  176. sock_hold(&q->sk);
  177. q->file = file;
  178. q->queue_index = vlan->numvtaps;
  179. q->enabled = true;
  180. file->private_data = q;
  181. list_add_tail(&q->next, &vlan->queue_list);
  182. vlan->numvtaps++;
  183. vlan->numqueues++;
  184. return 0;
  185. }
  186. static int macvtap_disable_queue(struct macvtap_queue *q)
  187. {
  188. struct macvlan_dev *vlan;
  189. struct macvtap_queue *nq;
  190. ASSERT_RTNL();
  191. if (!q->enabled)
  192. return -EINVAL;
  193. vlan = rtnl_dereference(q->vlan);
  194. if (vlan) {
  195. int index = q->queue_index;
  196. BUG_ON(index >= vlan->numvtaps);
  197. nq = rtnl_dereference(vlan->taps[vlan->numvtaps - 1]);
  198. nq->queue_index = index;
  199. rcu_assign_pointer(vlan->taps[index], nq);
  200. RCU_INIT_POINTER(vlan->taps[vlan->numvtaps - 1], NULL);
  201. q->enabled = false;
  202. vlan->numvtaps--;
  203. }
  204. return 0;
  205. }
  206. /*
  207. * The file owning the queue got closed, give up both
  208. * the reference that the files holds as well as the
  209. * one from the macvlan_dev if that still exists.
  210. *
  211. * Using the spinlock makes sure that we don't get
  212. * to the queue again after destroying it.
  213. */
  214. static void macvtap_put_queue(struct macvtap_queue *q)
  215. {
  216. struct macvlan_dev *vlan;
  217. rtnl_lock();
  218. vlan = rtnl_dereference(q->vlan);
  219. if (vlan) {
  220. if (q->enabled)
  221. BUG_ON(macvtap_disable_queue(q));
  222. vlan->numqueues--;
  223. RCU_INIT_POINTER(q->vlan, NULL);
  224. sock_put(&q->sk);
  225. list_del_init(&q->next);
  226. }
  227. rtnl_unlock();
  228. synchronize_rcu();
  229. skb_array_cleanup(&q->skb_array);
  230. sock_put(&q->sk);
  231. }
  232. /*
  233. * Select a queue based on the rxq of the device on which this packet
  234. * arrived. If the incoming device is not mq, calculate a flow hash
  235. * to select a queue. If all fails, find the first available queue.
  236. * Cache vlan->numvtaps since it can become zero during the execution
  237. * of this function.
  238. */
  239. static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
  240. struct sk_buff *skb)
  241. {
  242. struct macvlan_dev *vlan = netdev_priv(dev);
  243. struct macvtap_queue *tap = NULL;
  244. /* Access to taps array is protected by rcu, but access to numvtaps
  245. * isn't. Below we use it to lookup a queue, but treat it as a hint
  246. * and validate that the result isn't NULL - in case we are
  247. * racing against queue removal.
  248. */
  249. int numvtaps = ACCESS_ONCE(vlan->numvtaps);
  250. __u32 rxq;
  251. if (!numvtaps)
  252. goto out;
  253. if (numvtaps == 1)
  254. goto single;
  255. /* Check if we can use flow to select a queue */
  256. rxq = skb_get_hash(skb);
  257. if (rxq) {
  258. tap = rcu_dereference(vlan->taps[rxq % numvtaps]);
  259. goto out;
  260. }
  261. if (likely(skb_rx_queue_recorded(skb))) {
  262. rxq = skb_get_rx_queue(skb);
  263. while (unlikely(rxq >= numvtaps))
  264. rxq -= numvtaps;
  265. tap = rcu_dereference(vlan->taps[rxq]);
  266. goto out;
  267. }
  268. single:
  269. tap = rcu_dereference(vlan->taps[0]);
  270. out:
  271. return tap;
  272. }
  273. /*
  274. * The net_device is going away, give up the reference
  275. * that it holds on all queues and safely set the pointer
  276. * from the queues to NULL.
  277. */
  278. static void macvtap_del_queues(struct net_device *dev)
  279. {
  280. struct macvlan_dev *vlan = netdev_priv(dev);
  281. struct macvtap_queue *q, *tmp;
  282. ASSERT_RTNL();
  283. list_for_each_entry_safe(q, tmp, &vlan->queue_list, next) {
  284. list_del_init(&q->next);
  285. RCU_INIT_POINTER(q->vlan, NULL);
  286. if (q->enabled)
  287. vlan->numvtaps--;
  288. vlan->numqueues--;
  289. sock_put(&q->sk);
  290. }
  291. BUG_ON(vlan->numvtaps);
  292. BUG_ON(vlan->numqueues);
  293. /* guarantee that any future macvtap_set_queue will fail */
  294. vlan->numvtaps = MAX_MACVTAP_QUEUES;
  295. }
  296. static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
  297. {
  298. struct sk_buff *skb = *pskb;
  299. struct net_device *dev = skb->dev;
  300. struct macvlan_dev *vlan;
  301. struct macvtap_queue *q;
  302. netdev_features_t features = TAP_FEATURES;
  303. vlan = macvtap_get_vlan_rcu(dev);
  304. if (!vlan)
  305. return RX_HANDLER_PASS;
  306. q = macvtap_get_queue(dev, skb);
  307. if (!q)
  308. return RX_HANDLER_PASS;
  309. if (__skb_array_full(&q->skb_array))
  310. goto drop;
  311. skb_push(skb, ETH_HLEN);
  312. /* Apply the forward feature mask so that we perform segmentation
  313. * according to users wishes. This only works if VNET_HDR is
  314. * enabled.
  315. */
  316. if (q->flags & IFF_VNET_HDR)
  317. features |= vlan->tap_features;
  318. if (netif_needs_gso(skb, features)) {
  319. struct sk_buff *segs = __skb_gso_segment(skb, features, false);
  320. if (IS_ERR(segs))
  321. goto drop;
  322. if (!segs) {
  323. if (skb_array_produce(&q->skb_array, skb))
  324. goto drop;
  325. goto wake_up;
  326. }
  327. consume_skb(skb);
  328. while (segs) {
  329. struct sk_buff *nskb = segs->next;
  330. segs->next = NULL;
  331. if (skb_array_produce(&q->skb_array, segs)) {
  332. kfree_skb(segs);
  333. kfree_skb_list(nskb);
  334. break;
  335. }
  336. segs = nskb;
  337. }
  338. } else {
  339. /* If we receive a partial checksum and the tap side
  340. * doesn't support checksum offload, compute the checksum.
  341. * Note: it doesn't matter which checksum feature to
  342. * check, we either support them all or none.
  343. */
  344. if (skb->ip_summed == CHECKSUM_PARTIAL &&
  345. !(features & NETIF_F_CSUM_MASK) &&
  346. skb_checksum_help(skb))
  347. goto drop;
  348. if (skb_array_produce(&q->skb_array, skb))
  349. goto drop;
  350. }
  351. wake_up:
  352. wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND);
  353. return RX_HANDLER_CONSUMED;
  354. drop:
  355. /* Count errors/drops only here, thus don't care about args. */
  356. macvlan_count_rx(vlan, 0, 0, 0);
  357. kfree_skb(skb);
  358. return RX_HANDLER_CONSUMED;
  359. }
  360. static int macvtap_get_minor(struct macvlan_dev *vlan)
  361. {
  362. int retval = -ENOMEM;
  363. mutex_lock(&minor_lock);
  364. retval = idr_alloc(&minor_idr, vlan, 1, MACVTAP_NUM_DEVS, GFP_KERNEL);
  365. if (retval >= 0) {
  366. vlan->minor = retval;
  367. } else if (retval == -ENOSPC) {
  368. printk(KERN_ERR "too many macvtap devices\n");
  369. retval = -EINVAL;
  370. }
  371. mutex_unlock(&minor_lock);
  372. return retval < 0 ? retval : 0;
  373. }
  374. static void macvtap_free_minor(struct macvlan_dev *vlan)
  375. {
  376. mutex_lock(&minor_lock);
  377. if (vlan->minor) {
  378. idr_remove(&minor_idr, vlan->minor);
  379. vlan->minor = 0;
  380. }
  381. mutex_unlock(&minor_lock);
  382. }
  383. static struct net_device *dev_get_by_macvtap_minor(int minor)
  384. {
  385. struct net_device *dev = NULL;
  386. struct macvlan_dev *vlan;
  387. mutex_lock(&minor_lock);
  388. vlan = idr_find(&minor_idr, minor);
  389. if (vlan) {
  390. dev = vlan->dev;
  391. dev_hold(dev);
  392. }
  393. mutex_unlock(&minor_lock);
  394. return dev;
  395. }
  396. static int macvtap_newlink(struct net *src_net,
  397. struct net_device *dev,
  398. struct nlattr *tb[],
  399. struct nlattr *data[])
  400. {
  401. struct macvlan_dev *vlan = netdev_priv(dev);
  402. int err;
  403. INIT_LIST_HEAD(&vlan->queue_list);
  404. /* Since macvlan supports all offloads by default, make
  405. * tap support all offloads also.
  406. */
  407. vlan->tap_features = TUN_OFFLOADS;
  408. err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan);
  409. if (err)
  410. return err;
  411. /* Don't put anything that may fail after macvlan_common_newlink
  412. * because we can't undo what it does.
  413. */
  414. return macvlan_common_newlink(src_net, dev, tb, data);
  415. }
  416. static void macvtap_dellink(struct net_device *dev,
  417. struct list_head *head)
  418. {
  419. netdev_rx_handler_unregister(dev);
  420. macvtap_del_queues(dev);
  421. macvlan_dellink(dev, head);
  422. }
  423. static void macvtap_setup(struct net_device *dev)
  424. {
  425. macvlan_common_setup(dev);
  426. dev->tx_queue_len = TUN_READQ_SIZE;
  427. }
  428. static struct rtnl_link_ops macvtap_link_ops __read_mostly = {
  429. .kind = "macvtap",
  430. .setup = macvtap_setup,
  431. .newlink = macvtap_newlink,
  432. .dellink = macvtap_dellink,
  433. };
  434. static void macvtap_sock_write_space(struct sock *sk)
  435. {
  436. wait_queue_head_t *wqueue;
  437. if (!sock_writeable(sk) ||
  438. !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
  439. return;
  440. wqueue = sk_sleep(sk);
  441. if (wqueue && waitqueue_active(wqueue))
  442. wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND);
  443. }
  444. static void macvtap_sock_destruct(struct sock *sk)
  445. {
  446. struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk);
  447. struct sk_buff *skb;
  448. while ((skb = skb_array_consume(&q->skb_array)) != NULL)
  449. kfree_skb(skb);
  450. }
  451. static int macvtap_open(struct inode *inode, struct file *file)
  452. {
  453. struct net *net = current->nsproxy->net_ns;
  454. struct net_device *dev;
  455. struct macvtap_queue *q;
  456. int err = -ENODEV;
  457. rtnl_lock();
  458. dev = dev_get_by_macvtap_minor(iminor(inode));
  459. if (!dev)
  460. goto err;
  461. err = -ENOMEM;
  462. q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
  463. &macvtap_proto, 0);
  464. if (!q)
  465. goto err;
  466. RCU_INIT_POINTER(q->sock.wq, &q->wq);
  467. init_waitqueue_head(&q->wq.wait);
  468. q->sock.type = SOCK_RAW;
  469. q->sock.state = SS_CONNECTED;
  470. q->sock.file = file;
  471. q->sock.ops = &macvtap_socket_ops;
  472. sock_init_data(&q->sock, &q->sk);
  473. q->sk.sk_write_space = macvtap_sock_write_space;
  474. q->sk.sk_destruct = macvtap_sock_destruct;
  475. q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP;
  476. q->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
  477. /*
  478. * so far only KVM virtio_net uses macvtap, enable zero copy between
  479. * guest kernel and host kernel when lower device supports zerocopy
  480. *
  481. * The macvlan supports zerocopy iff the lower device supports zero
  482. * copy so we don't have to look at the lower device directly.
  483. */
  484. if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG))
  485. sock_set_flag(&q->sk, SOCK_ZEROCOPY);
  486. err = -ENOMEM;
  487. if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL))
  488. goto err_array;
  489. err = macvtap_set_queue(dev, file, q);
  490. if (err)
  491. goto err_queue;
  492. dev_put(dev);
  493. rtnl_unlock();
  494. return err;
  495. err_queue:
  496. skb_array_cleanup(&q->skb_array);
  497. err_array:
  498. sock_put(&q->sk);
  499. err:
  500. if (dev)
  501. dev_put(dev);
  502. rtnl_unlock();
  503. return err;
  504. }
  505. static int macvtap_release(struct inode *inode, struct file *file)
  506. {
  507. struct macvtap_queue *q = file->private_data;
  508. macvtap_put_queue(q);
  509. return 0;
  510. }
  511. static unsigned int macvtap_poll(struct file *file, poll_table * wait)
  512. {
  513. struct macvtap_queue *q = file->private_data;
  514. unsigned int mask = POLLERR;
  515. if (!q)
  516. goto out;
  517. mask = 0;
  518. poll_wait(file, &q->wq.wait, wait);
  519. if (!skb_array_empty(&q->skb_array))
  520. mask |= POLLIN | POLLRDNORM;
  521. if (sock_writeable(&q->sk) ||
  522. (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock.flags) &&
  523. sock_writeable(&q->sk)))
  524. mask |= POLLOUT | POLLWRNORM;
  525. out:
  526. return mask;
  527. }
  528. static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
  529. size_t len, size_t linear,
  530. int noblock, int *err)
  531. {
  532. struct sk_buff *skb;
  533. /* Under a page? Don't bother with paged skb. */
  534. if (prepad + len < PAGE_SIZE || !linear)
  535. linear = len;
  536. skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
  537. err, 0);
  538. if (!skb)
  539. return NULL;
  540. skb_reserve(skb, prepad);
  541. skb_put(skb, linear);
  542. skb->data_len = len - linear;
  543. skb->len += len - linear;
  544. return skb;
  545. }
  546. /* Neighbour code has some assumptions on HH_DATA_MOD alignment */
  547. #define MACVTAP_RESERVE HH_DATA_OFF(ETH_HLEN)
  548. /* Get packet from user space buffer */
  549. static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
  550. struct iov_iter *from, int noblock)
  551. {
  552. int good_linear = SKB_MAX_HEAD(MACVTAP_RESERVE);
  553. struct sk_buff *skb;
  554. struct macvlan_dev *vlan;
  555. unsigned long total_len = iov_iter_count(from);
  556. unsigned long len = total_len;
  557. int err;
  558. struct virtio_net_hdr vnet_hdr = { 0 };
  559. int vnet_hdr_len = 0;
  560. int copylen = 0;
  561. int depth;
  562. bool zerocopy = false;
  563. size_t linear;
  564. ssize_t n;
  565. if (q->flags & IFF_VNET_HDR) {
  566. vnet_hdr_len = q->vnet_hdr_sz;
  567. err = -EINVAL;
  568. if (len < vnet_hdr_len)
  569. goto err;
  570. len -= vnet_hdr_len;
  571. err = -EFAULT;
  572. n = copy_from_iter(&vnet_hdr, sizeof(vnet_hdr), from);
  573. if (n != sizeof(vnet_hdr))
  574. goto err;
  575. iov_iter_advance(from, vnet_hdr_len - sizeof(vnet_hdr));
  576. if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
  577. macvtap16_to_cpu(q, vnet_hdr.csum_start) +
  578. macvtap16_to_cpu(q, vnet_hdr.csum_offset) + 2 >
  579. macvtap16_to_cpu(q, vnet_hdr.hdr_len))
  580. vnet_hdr.hdr_len = cpu_to_macvtap16(q,
  581. macvtap16_to_cpu(q, vnet_hdr.csum_start) +
  582. macvtap16_to_cpu(q, vnet_hdr.csum_offset) + 2);
  583. err = -EINVAL;
  584. if (macvtap16_to_cpu(q, vnet_hdr.hdr_len) > len)
  585. goto err;
  586. }
  587. err = -EINVAL;
  588. if (unlikely(len < ETH_HLEN))
  589. goto err;
  590. if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) {
  591. struct iov_iter i;
  592. copylen = vnet_hdr.hdr_len ?
  593. macvtap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN;
  594. if (copylen > good_linear)
  595. copylen = good_linear;
  596. else if (copylen < ETH_HLEN)
  597. copylen = ETH_HLEN;
  598. linear = copylen;
  599. i = *from;
  600. iov_iter_advance(&i, copylen);
  601. if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS)
  602. zerocopy = true;
  603. }
  604. if (!zerocopy) {
  605. copylen = len;
  606. linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len);
  607. if (linear > good_linear)
  608. linear = good_linear;
  609. else if (linear < ETH_HLEN)
  610. linear = ETH_HLEN;
  611. }
  612. skb = macvtap_alloc_skb(&q->sk, MACVTAP_RESERVE, copylen,
  613. linear, noblock, &err);
  614. if (!skb)
  615. goto err;
  616. if (zerocopy)
  617. err = zerocopy_sg_from_iter(skb, from);
  618. else {
  619. err = skb_copy_datagram_from_iter(skb, 0, from, len);
  620. if (!err && m && m->msg_control) {
  621. struct ubuf_info *uarg = m->msg_control;
  622. uarg->callback(uarg, false);
  623. }
  624. }
  625. if (err)
  626. goto err_kfree;
  627. skb_set_network_header(skb, ETH_HLEN);
  628. skb_reset_mac_header(skb);
  629. skb->protocol = eth_hdr(skb)->h_proto;
  630. if (vnet_hdr_len) {
  631. err = virtio_net_hdr_to_skb(skb, &vnet_hdr,
  632. macvtap_is_little_endian(q));
  633. if (err)
  634. goto err_kfree;
  635. }
  636. skb_probe_transport_header(skb, ETH_HLEN);
  637. /* Move network header to the right position for VLAN tagged packets */
  638. if ((skb->protocol == htons(ETH_P_8021Q) ||
  639. skb->protocol == htons(ETH_P_8021AD)) &&
  640. __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
  641. skb_set_network_header(skb, depth);
  642. rcu_read_lock();
  643. vlan = rcu_dereference(q->vlan);
  644. /* copy skb_ubuf_info for callback when skb has no error */
  645. if (zerocopy) {
  646. skb_shinfo(skb)->destructor_arg = m->msg_control;
  647. skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
  648. skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
  649. }
  650. if (vlan) {
  651. skb->dev = vlan->dev;
  652. dev_queue_xmit(skb);
  653. } else {
  654. kfree_skb(skb);
  655. }
  656. rcu_read_unlock();
  657. return total_len;
  658. err_kfree:
  659. kfree_skb(skb);
  660. err:
  661. rcu_read_lock();
  662. vlan = rcu_dereference(q->vlan);
  663. if (vlan)
  664. this_cpu_inc(vlan->pcpu_stats->tx_dropped);
  665. rcu_read_unlock();
  666. return err;
  667. }
  668. static ssize_t macvtap_write_iter(struct kiocb *iocb, struct iov_iter *from)
  669. {
  670. struct file *file = iocb->ki_filp;
  671. struct macvtap_queue *q = file->private_data;
  672. return macvtap_get_user(q, NULL, from, file->f_flags & O_NONBLOCK);
  673. }
  674. /* Put packet to the user space buffer */
  675. static ssize_t macvtap_put_user(struct macvtap_queue *q,
  676. const struct sk_buff *skb,
  677. struct iov_iter *iter)
  678. {
  679. int ret;
  680. int vnet_hdr_len = 0;
  681. int vlan_offset = 0;
  682. int total;
  683. if (q->flags & IFF_VNET_HDR) {
  684. struct virtio_net_hdr vnet_hdr;
  685. vnet_hdr_len = q->vnet_hdr_sz;
  686. if (iov_iter_count(iter) < vnet_hdr_len)
  687. return -EINVAL;
  688. ret = virtio_net_hdr_from_skb(skb, &vnet_hdr,
  689. macvtap_is_little_endian(q));
  690. if (ret)
  691. BUG();
  692. if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) !=
  693. sizeof(vnet_hdr))
  694. return -EFAULT;
  695. iov_iter_advance(iter, vnet_hdr_len - sizeof(vnet_hdr));
  696. }
  697. total = vnet_hdr_len;
  698. total += skb->len;
  699. if (skb_vlan_tag_present(skb)) {
  700. struct {
  701. __be16 h_vlan_proto;
  702. __be16 h_vlan_TCI;
  703. } veth;
  704. veth.h_vlan_proto = skb->vlan_proto;
  705. veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
  706. vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);
  707. total += VLAN_HLEN;
  708. ret = skb_copy_datagram_iter(skb, 0, iter, vlan_offset);
  709. if (ret || !iov_iter_count(iter))
  710. goto done;
  711. ret = copy_to_iter(&veth, sizeof(veth), iter);
  712. if (ret != sizeof(veth) || !iov_iter_count(iter))
  713. goto done;
  714. }
  715. ret = skb_copy_datagram_iter(skb, vlan_offset, iter,
  716. skb->len - vlan_offset);
  717. done:
  718. return ret ? ret : total;
  719. }
  720. static ssize_t macvtap_do_read(struct macvtap_queue *q,
  721. struct iov_iter *to,
  722. int noblock)
  723. {
  724. DEFINE_WAIT(wait);
  725. struct sk_buff *skb;
  726. ssize_t ret = 0;
  727. if (!iov_iter_count(to))
  728. return 0;
  729. while (1) {
  730. if (!noblock)
  731. prepare_to_wait(sk_sleep(&q->sk), &wait,
  732. TASK_INTERRUPTIBLE);
  733. /* Read frames from the queue */
  734. skb = skb_array_consume(&q->skb_array);
  735. if (skb)
  736. break;
  737. if (noblock) {
  738. ret = -EAGAIN;
  739. break;
  740. }
  741. if (signal_pending(current)) {
  742. ret = -ERESTARTSYS;
  743. break;
  744. }
  745. /* Nothing to read, let's sleep */
  746. schedule();
  747. }
  748. if (!noblock)
  749. finish_wait(sk_sleep(&q->sk), &wait);
  750. if (skb) {
  751. ret = macvtap_put_user(q, skb, to);
  752. if (unlikely(ret < 0))
  753. kfree_skb(skb);
  754. else
  755. consume_skb(skb);
  756. }
  757. return ret;
  758. }
  759. static ssize_t macvtap_read_iter(struct kiocb *iocb, struct iov_iter *to)
  760. {
  761. struct file *file = iocb->ki_filp;
  762. struct macvtap_queue *q = file->private_data;
  763. ssize_t len = iov_iter_count(to), ret;
  764. ret = macvtap_do_read(q, to, file->f_flags & O_NONBLOCK);
  765. ret = min_t(ssize_t, ret, len);
  766. if (ret > 0)
  767. iocb->ki_pos = ret;
  768. return ret;
  769. }
  770. static struct macvlan_dev *macvtap_get_vlan(struct macvtap_queue *q)
  771. {
  772. struct macvlan_dev *vlan;
  773. ASSERT_RTNL();
  774. vlan = rtnl_dereference(q->vlan);
  775. if (vlan)
  776. dev_hold(vlan->dev);
  777. return vlan;
  778. }
  779. static void macvtap_put_vlan(struct macvlan_dev *vlan)
  780. {
  781. dev_put(vlan->dev);
  782. }
  783. static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags)
  784. {
  785. struct macvtap_queue *q = file->private_data;
  786. struct macvlan_dev *vlan;
  787. int ret;
  788. vlan = macvtap_get_vlan(q);
  789. if (!vlan)
  790. return -EINVAL;
  791. if (flags & IFF_ATTACH_QUEUE)
  792. ret = macvtap_enable_queue(vlan->dev, file, q);
  793. else if (flags & IFF_DETACH_QUEUE)
  794. ret = macvtap_disable_queue(q);
  795. else
  796. ret = -EINVAL;
  797. macvtap_put_vlan(vlan);
  798. return ret;
  799. }
  800. static int set_offload(struct macvtap_queue *q, unsigned long arg)
  801. {
  802. struct macvlan_dev *vlan;
  803. netdev_features_t features;
  804. netdev_features_t feature_mask = 0;
  805. vlan = rtnl_dereference(q->vlan);
  806. if (!vlan)
  807. return -ENOLINK;
  808. features = vlan->dev->features;
  809. if (arg & TUN_F_CSUM) {
  810. feature_mask = NETIF_F_HW_CSUM;
  811. if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) {
  812. if (arg & TUN_F_TSO_ECN)
  813. feature_mask |= NETIF_F_TSO_ECN;
  814. if (arg & TUN_F_TSO4)
  815. feature_mask |= NETIF_F_TSO;
  816. if (arg & TUN_F_TSO6)
  817. feature_mask |= NETIF_F_TSO6;
  818. }
  819. if (arg & TUN_F_UFO)
  820. feature_mask |= NETIF_F_UFO;
  821. }
  822. /* tun/tap driver inverts the usage for TSO offloads, where
  823. * setting the TSO bit means that the userspace wants to
  824. * accept TSO frames and turning it off means that user space
  825. * does not support TSO.
  826. * For macvtap, we have to invert it to mean the same thing.
  827. * When user space turns off TSO, we turn off GSO/LRO so that
  828. * user-space will not receive TSO frames.
  829. */
  830. if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO))
  831. features |= RX_OFFLOADS;
  832. else
  833. features &= ~RX_OFFLOADS;
  834. /* tap_features are the same as features on tun/tap and
  835. * reflect user expectations.
  836. */
  837. vlan->tap_features = feature_mask;
  838. vlan->set_features = features;
  839. netdev_update_features(vlan->dev);
  840. return 0;
  841. }
  842. /*
  843. * provide compatibility with generic tun/tap interface
  844. */
  845. static long macvtap_ioctl(struct file *file, unsigned int cmd,
  846. unsigned long arg)
  847. {
  848. struct macvtap_queue *q = file->private_data;
  849. struct macvlan_dev *vlan;
  850. void __user *argp = (void __user *)arg;
  851. struct ifreq __user *ifr = argp;
  852. unsigned int __user *up = argp;
  853. unsigned short u;
  854. int __user *sp = argp;
  855. struct sockaddr sa;
  856. int s;
  857. int ret;
  858. switch (cmd) {
  859. case TUNSETIFF:
  860. /* ignore the name, just look at flags */
  861. if (get_user(u, &ifr->ifr_flags))
  862. return -EFAULT;
  863. ret = 0;
  864. if ((u & ~MACVTAP_FEATURES) != (IFF_NO_PI | IFF_TAP))
  865. ret = -EINVAL;
  866. else
  867. q->flags = (q->flags & ~MACVTAP_FEATURES) | u;
  868. return ret;
  869. case TUNGETIFF:
  870. rtnl_lock();
  871. vlan = macvtap_get_vlan(q);
  872. if (!vlan) {
  873. rtnl_unlock();
  874. return -ENOLINK;
  875. }
  876. ret = 0;
  877. u = q->flags;
  878. if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) ||
  879. put_user(u, &ifr->ifr_flags))
  880. ret = -EFAULT;
  881. macvtap_put_vlan(vlan);
  882. rtnl_unlock();
  883. return ret;
  884. case TUNSETQUEUE:
  885. if (get_user(u, &ifr->ifr_flags))
  886. return -EFAULT;
  887. rtnl_lock();
  888. ret = macvtap_ioctl_set_queue(file, u);
  889. rtnl_unlock();
  890. return ret;
  891. case TUNGETFEATURES:
  892. if (put_user(IFF_TAP | IFF_NO_PI | MACVTAP_FEATURES, up))
  893. return -EFAULT;
  894. return 0;
  895. case TUNSETSNDBUF:
  896. if (get_user(s, sp))
  897. return -EFAULT;
  898. q->sk.sk_sndbuf = s;
  899. return 0;
  900. case TUNGETVNETHDRSZ:
  901. s = q->vnet_hdr_sz;
  902. if (put_user(s, sp))
  903. return -EFAULT;
  904. return 0;
  905. case TUNSETVNETHDRSZ:
  906. if (get_user(s, sp))
  907. return -EFAULT;
  908. if (s < (int)sizeof(struct virtio_net_hdr))
  909. return -EINVAL;
  910. q->vnet_hdr_sz = s;
  911. return 0;
  912. case TUNGETVNETLE:
  913. s = !!(q->flags & MACVTAP_VNET_LE);
  914. if (put_user(s, sp))
  915. return -EFAULT;
  916. return 0;
  917. case TUNSETVNETLE:
  918. if (get_user(s, sp))
  919. return -EFAULT;
  920. if (s)
  921. q->flags |= MACVTAP_VNET_LE;
  922. else
  923. q->flags &= ~MACVTAP_VNET_LE;
  924. return 0;
  925. case TUNGETVNETBE:
  926. return macvtap_get_vnet_be(q, sp);
  927. case TUNSETVNETBE:
  928. return macvtap_set_vnet_be(q, sp);
  929. case TUNSETOFFLOAD:
  930. /* let the user check for future flags */
  931. if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
  932. TUN_F_TSO_ECN | TUN_F_UFO))
  933. return -EINVAL;
  934. rtnl_lock();
  935. ret = set_offload(q, arg);
  936. rtnl_unlock();
  937. return ret;
  938. case SIOCGIFHWADDR:
  939. rtnl_lock();
  940. vlan = macvtap_get_vlan(q);
  941. if (!vlan) {
  942. rtnl_unlock();
  943. return -ENOLINK;
  944. }
  945. ret = 0;
  946. u = vlan->dev->type;
  947. if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) ||
  948. copy_to_user(&ifr->ifr_hwaddr.sa_data, vlan->dev->dev_addr, ETH_ALEN) ||
  949. put_user(u, &ifr->ifr_hwaddr.sa_family))
  950. ret = -EFAULT;
  951. macvtap_put_vlan(vlan);
  952. rtnl_unlock();
  953. return ret;
  954. case SIOCSIFHWADDR:
  955. if (copy_from_user(&sa, &ifr->ifr_hwaddr, sizeof(sa)))
  956. return -EFAULT;
  957. rtnl_lock();
  958. vlan = macvtap_get_vlan(q);
  959. if (!vlan) {
  960. rtnl_unlock();
  961. return -ENOLINK;
  962. }
  963. ret = dev_set_mac_address(vlan->dev, &sa);
  964. macvtap_put_vlan(vlan);
  965. rtnl_unlock();
  966. return ret;
  967. default:
  968. return -EINVAL;
  969. }
  970. }
  971. #ifdef CONFIG_COMPAT
  972. static long macvtap_compat_ioctl(struct file *file, unsigned int cmd,
  973. unsigned long arg)
  974. {
  975. return macvtap_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
  976. }
  977. #endif
  978. static const struct file_operations macvtap_fops = {
  979. .owner = THIS_MODULE,
  980. .open = macvtap_open,
  981. .release = macvtap_release,
  982. .read_iter = macvtap_read_iter,
  983. .write_iter = macvtap_write_iter,
  984. .poll = macvtap_poll,
  985. .llseek = no_llseek,
  986. .unlocked_ioctl = macvtap_ioctl,
  987. #ifdef CONFIG_COMPAT
  988. .compat_ioctl = macvtap_compat_ioctl,
  989. #endif
  990. };
  991. static int macvtap_sendmsg(struct socket *sock, struct msghdr *m,
  992. size_t total_len)
  993. {
  994. struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
  995. return macvtap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT);
  996. }
  997. static int macvtap_recvmsg(struct socket *sock, struct msghdr *m,
  998. size_t total_len, int flags)
  999. {
  1000. struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
  1001. int ret;
  1002. if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
  1003. return -EINVAL;
  1004. ret = macvtap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT);
  1005. if (ret > total_len) {
  1006. m->msg_flags |= MSG_TRUNC;
  1007. ret = flags & MSG_TRUNC ? ret : total_len;
  1008. }
  1009. return ret;
  1010. }
  1011. static int macvtap_peek_len(struct socket *sock)
  1012. {
  1013. struct macvtap_queue *q = container_of(sock, struct macvtap_queue,
  1014. sock);
  1015. return skb_array_peek_len(&q->skb_array);
  1016. }
  1017. /* Ops structure to mimic raw sockets with tun */
  1018. static const struct proto_ops macvtap_socket_ops = {
  1019. .sendmsg = macvtap_sendmsg,
  1020. .recvmsg = macvtap_recvmsg,
  1021. .peek_len = macvtap_peek_len,
  1022. };
  1023. /* Get an underlying socket object from tun file. Returns error unless file is
  1024. * attached to a device. The returned object works like a packet socket, it
  1025. * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
  1026. * holding a reference to the file for as long as the socket is in use. */
  1027. struct socket *macvtap_get_socket(struct file *file)
  1028. {
  1029. struct macvtap_queue *q;
  1030. if (file->f_op != &macvtap_fops)
  1031. return ERR_PTR(-EINVAL);
  1032. q = file->private_data;
  1033. if (!q)
  1034. return ERR_PTR(-EBADFD);
  1035. return &q->sock;
  1036. }
  1037. EXPORT_SYMBOL_GPL(macvtap_get_socket);
  1038. static int macvtap_queue_resize(struct macvlan_dev *vlan)
  1039. {
  1040. struct net_device *dev = vlan->dev;
  1041. struct macvtap_queue *q;
  1042. struct skb_array **arrays;
  1043. int n = vlan->numqueues;
  1044. int ret, i = 0;
  1045. arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
  1046. if (!arrays)
  1047. return -ENOMEM;
  1048. list_for_each_entry(q, &vlan->queue_list, next)
  1049. arrays[i++] = &q->skb_array;
  1050. ret = skb_array_resize_multiple(arrays, n,
  1051. dev->tx_queue_len, GFP_KERNEL);
  1052. kfree(arrays);
  1053. return ret;
  1054. }
  1055. static int macvtap_device_event(struct notifier_block *unused,
  1056. unsigned long event, void *ptr)
  1057. {
  1058. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  1059. struct macvlan_dev *vlan;
  1060. struct device *classdev;
  1061. dev_t devt;
  1062. int err;
  1063. char tap_name[IFNAMSIZ];
  1064. if (dev->rtnl_link_ops != &macvtap_link_ops)
  1065. return NOTIFY_DONE;
  1066. snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex);
  1067. vlan = netdev_priv(dev);
  1068. switch (event) {
  1069. case NETDEV_REGISTER:
  1070. /* Create the device node here after the network device has
  1071. * been registered but before register_netdevice has
  1072. * finished running.
  1073. */
  1074. err = macvtap_get_minor(vlan);
  1075. if (err)
  1076. return notifier_from_errno(err);
  1077. devt = MKDEV(MAJOR(macvtap_major), vlan->minor);
  1078. classdev = device_create(&macvtap_class, &dev->dev, devt,
  1079. dev, tap_name);
  1080. if (IS_ERR(classdev)) {
  1081. macvtap_free_minor(vlan);
  1082. return notifier_from_errno(PTR_ERR(classdev));
  1083. }
  1084. err = sysfs_create_link(&dev->dev.kobj, &classdev->kobj,
  1085. tap_name);
  1086. if (err)
  1087. return notifier_from_errno(err);
  1088. break;
  1089. case NETDEV_UNREGISTER:
  1090. /* vlan->minor == 0 if NETDEV_REGISTER above failed */
  1091. if (vlan->minor == 0)
  1092. break;
  1093. sysfs_remove_link(&dev->dev.kobj, tap_name);
  1094. devt = MKDEV(MAJOR(macvtap_major), vlan->minor);
  1095. device_destroy(&macvtap_class, devt);
  1096. macvtap_free_minor(vlan);
  1097. break;
  1098. case NETDEV_CHANGE_TX_QUEUE_LEN:
  1099. if (macvtap_queue_resize(vlan))
  1100. return NOTIFY_BAD;
  1101. break;
  1102. }
  1103. return NOTIFY_DONE;
  1104. }
  1105. static struct notifier_block macvtap_notifier_block __read_mostly = {
  1106. .notifier_call = macvtap_device_event,
  1107. };
  1108. static int macvtap_init(void)
  1109. {
  1110. int err;
  1111. err = alloc_chrdev_region(&macvtap_major, 0,
  1112. MACVTAP_NUM_DEVS, "macvtap");
  1113. if (err)
  1114. goto out1;
  1115. cdev_init(&macvtap_cdev, &macvtap_fops);
  1116. err = cdev_add(&macvtap_cdev, macvtap_major, MACVTAP_NUM_DEVS);
  1117. if (err)
  1118. goto out2;
  1119. err = class_register(&macvtap_class);
  1120. if (err)
  1121. goto out3;
  1122. err = register_netdevice_notifier(&macvtap_notifier_block);
  1123. if (err)
  1124. goto out4;
  1125. err = macvlan_link_register(&macvtap_link_ops);
  1126. if (err)
  1127. goto out5;
  1128. return 0;
  1129. out5:
  1130. unregister_netdevice_notifier(&macvtap_notifier_block);
  1131. out4:
  1132. class_unregister(&macvtap_class);
  1133. out3:
  1134. cdev_del(&macvtap_cdev);
  1135. out2:
  1136. unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS);
  1137. out1:
  1138. return err;
  1139. }
  1140. module_init(macvtap_init);
  1141. static void macvtap_exit(void)
  1142. {
  1143. rtnl_link_unregister(&macvtap_link_ops);
  1144. unregister_netdevice_notifier(&macvtap_notifier_block);
  1145. class_unregister(&macvtap_class);
  1146. cdev_del(&macvtap_cdev);
  1147. unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS);
  1148. idr_destroy(&minor_idr);
  1149. }
  1150. module_exit(macvtap_exit);
  1151. MODULE_ALIAS_RTNL_LINK("macvtap");
  1152. MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>");
  1153. MODULE_LICENSE("GPL");