net_namespace.c 24 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057
  1. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  2. #include <linux/workqueue.h>
  3. #include <linux/rtnetlink.h>
  4. #include <linux/cache.h>
  5. #include <linux/slab.h>
  6. #include <linux/list.h>
  7. #include <linux/delay.h>
  8. #include <linux/sched.h>
  9. #include <linux/idr.h>
  10. #include <linux/rculist.h>
  11. #include <linux/nsproxy.h>
  12. #include <linux/fs.h>
  13. #include <linux/proc_ns.h>
  14. #include <linux/file.h>
  15. #include <linux/export.h>
  16. #include <linux/user_namespace.h>
  17. #include <linux/net_namespace.h>
  18. #include <net/sock.h>
  19. #include <net/netlink.h>
  20. #include <net/net_namespace.h>
  21. #include <net/netns/generic.h>
  22. /*
  23. * Our network namespace constructor/destructor lists
  24. */
  25. static LIST_HEAD(pernet_list);
  26. static struct list_head *first_device = &pernet_list;
  27. DEFINE_MUTEX(net_mutex);
  28. LIST_HEAD(net_namespace_list);
  29. EXPORT_SYMBOL_GPL(net_namespace_list);
  30. struct net init_net = {
  31. .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
  32. };
  33. EXPORT_SYMBOL(init_net);
  34. static bool init_net_initialized;
  35. #define MIN_PERNET_OPS_ID \
  36. ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
  37. #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
  38. static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
  39. static struct net_generic *net_alloc_generic(void)
  40. {
  41. struct net_generic *ng;
  42. unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
  43. ng = kzalloc(generic_size, GFP_KERNEL);
  44. if (ng)
  45. ng->s.len = max_gen_ptrs;
  46. return ng;
  47. }
  48. static int net_assign_generic(struct net *net, unsigned int id, void *data)
  49. {
  50. struct net_generic *ng, *old_ng;
  51. BUG_ON(!mutex_is_locked(&net_mutex));
  52. BUG_ON(id < MIN_PERNET_OPS_ID);
  53. old_ng = rcu_dereference_protected(net->gen,
  54. lockdep_is_held(&net_mutex));
  55. if (old_ng->s.len > id) {
  56. old_ng->ptr[id] = data;
  57. return 0;
  58. }
  59. ng = net_alloc_generic();
  60. if (ng == NULL)
  61. return -ENOMEM;
  62. /*
  63. * Some synchronisation notes:
  64. *
  65. * The net_generic explores the net->gen array inside rcu
  66. * read section. Besides once set the net->gen->ptr[x]
  67. * pointer never changes (see rules in netns/generic.h).
  68. *
  69. * That said, we simply duplicate this array and schedule
  70. * the old copy for kfree after a grace period.
  71. */
  72. memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
  73. (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
  74. ng->ptr[id] = data;
  75. rcu_assign_pointer(net->gen, ng);
  76. kfree_rcu(old_ng, s.rcu);
  77. return 0;
  78. }
  79. static int ops_init(const struct pernet_operations *ops, struct net *net)
  80. {
  81. int err = -ENOMEM;
  82. void *data = NULL;
  83. if (ops->id && ops->size) {
  84. data = kzalloc(ops->size, GFP_KERNEL);
  85. if (!data)
  86. goto out;
  87. err = net_assign_generic(net, *ops->id, data);
  88. if (err)
  89. goto cleanup;
  90. }
  91. err = 0;
  92. if (ops->init)
  93. err = ops->init(net);
  94. if (!err)
  95. return 0;
  96. cleanup:
  97. kfree(data);
  98. out:
  99. return err;
  100. }
  101. static void ops_free(const struct pernet_operations *ops, struct net *net)
  102. {
  103. if (ops->id && ops->size) {
  104. kfree(net_generic(net, *ops->id));
  105. }
  106. }
  107. static void ops_exit_list(const struct pernet_operations *ops,
  108. struct list_head *net_exit_list)
  109. {
  110. struct net *net;
  111. if (ops->exit) {
  112. list_for_each_entry(net, net_exit_list, exit_list)
  113. ops->exit(net);
  114. }
  115. if (ops->exit_batch)
  116. ops->exit_batch(net_exit_list);
  117. }
  118. static void ops_free_list(const struct pernet_operations *ops,
  119. struct list_head *net_exit_list)
  120. {
  121. struct net *net;
  122. if (ops->size && ops->id) {
  123. list_for_each_entry(net, net_exit_list, exit_list)
  124. ops_free(ops, net);
  125. }
  126. }
  127. /* should be called with nsid_lock held */
  128. static int alloc_netid(struct net *net, struct net *peer, int reqid)
  129. {
  130. int min = 0, max = 0;
  131. if (reqid >= 0) {
  132. min = reqid;
  133. max = reqid + 1;
  134. }
  135. return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
  136. }
  137. /* This function is used by idr_for_each(). If net is equal to peer, the
  138. * function returns the id so that idr_for_each() stops. Because we cannot
  139. * returns the id 0 (idr_for_each() will not stop), we return the magic value
  140. * NET_ID_ZERO (-1) for it.
  141. */
  142. #define NET_ID_ZERO -1
  143. static int net_eq_idr(int id, void *net, void *peer)
  144. {
  145. if (net_eq(net, peer))
  146. return id ? : NET_ID_ZERO;
  147. return 0;
  148. }
  149. /* Should be called with nsid_lock held. If a new id is assigned, the bool alloc
  150. * is set to true, thus the caller knows that the new id must be notified via
  151. * rtnl.
  152. */
  153. static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
  154. {
  155. int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
  156. bool alloc_it = *alloc;
  157. *alloc = false;
  158. /* Magic value for id 0. */
  159. if (id == NET_ID_ZERO)
  160. return 0;
  161. if (id > 0)
  162. return id;
  163. if (alloc_it) {
  164. id = alloc_netid(net, peer, -1);
  165. *alloc = true;
  166. return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
  167. }
  168. return NETNSA_NSID_NOT_ASSIGNED;
  169. }
  170. /* should be called with nsid_lock held */
  171. static int __peernet2id(struct net *net, struct net *peer)
  172. {
  173. bool no = false;
  174. return __peernet2id_alloc(net, peer, &no);
  175. }
  176. static void rtnl_net_notifyid(struct net *net, int cmd, int id);
  177. /* This function returns the id of a peer netns. If no id is assigned, one will
  178. * be allocated and returned.
  179. */
  180. int peernet2id_alloc(struct net *net, struct net *peer)
  181. {
  182. unsigned long flags;
  183. bool alloc;
  184. int id;
  185. if (atomic_read(&net->count) == 0)
  186. return NETNSA_NSID_NOT_ASSIGNED;
  187. spin_lock_irqsave(&net->nsid_lock, flags);
  188. alloc = atomic_read(&peer->count) == 0 ? false : true;
  189. id = __peernet2id_alloc(net, peer, &alloc);
  190. spin_unlock_irqrestore(&net->nsid_lock, flags);
  191. if (alloc && id >= 0)
  192. rtnl_net_notifyid(net, RTM_NEWNSID, id);
  193. return id;
  194. }
  195. /* This function returns, if assigned, the id of a peer netns. */
  196. int peernet2id(struct net *net, struct net *peer)
  197. {
  198. unsigned long flags;
  199. int id;
  200. spin_lock_irqsave(&net->nsid_lock, flags);
  201. id = __peernet2id(net, peer);
  202. spin_unlock_irqrestore(&net->nsid_lock, flags);
  203. return id;
  204. }
  205. EXPORT_SYMBOL(peernet2id);
  206. /* This function returns true is the peer netns has an id assigned into the
  207. * current netns.
  208. */
  209. bool peernet_has_id(struct net *net, struct net *peer)
  210. {
  211. return peernet2id(net, peer) >= 0;
  212. }
  213. struct net *get_net_ns_by_id(struct net *net, int id)
  214. {
  215. unsigned long flags;
  216. struct net *peer;
  217. if (id < 0)
  218. return NULL;
  219. rcu_read_lock();
  220. spin_lock_irqsave(&net->nsid_lock, flags);
  221. peer = idr_find(&net->netns_ids, id);
  222. if (peer)
  223. get_net(peer);
  224. spin_unlock_irqrestore(&net->nsid_lock, flags);
  225. rcu_read_unlock();
  226. return peer;
  227. }
  228. /*
  229. * setup_net runs the initializers for the network namespace object.
  230. */
  231. static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
  232. {
  233. /* Must be called with net_mutex held */
  234. const struct pernet_operations *ops, *saved_ops;
  235. int error = 0;
  236. LIST_HEAD(net_exit_list);
  237. atomic_set(&net->count, 1);
  238. atomic_set(&net->passive, 1);
  239. net->dev_base_seq = 1;
  240. net->user_ns = user_ns;
  241. idr_init(&net->netns_ids);
  242. spin_lock_init(&net->nsid_lock);
  243. list_for_each_entry(ops, &pernet_list, list) {
  244. error = ops_init(ops, net);
  245. if (error < 0)
  246. goto out_undo;
  247. }
  248. out:
  249. return error;
  250. out_undo:
  251. /* Walk through the list backwards calling the exit functions
  252. * for the pernet modules whose init functions did not fail.
  253. */
  254. list_add(&net->exit_list, &net_exit_list);
  255. saved_ops = ops;
  256. list_for_each_entry_continue_reverse(ops, &pernet_list, list)
  257. ops_exit_list(ops, &net_exit_list);
  258. ops = saved_ops;
  259. list_for_each_entry_continue_reverse(ops, &pernet_list, list)
  260. ops_free_list(ops, &net_exit_list);
  261. rcu_barrier();
  262. goto out;
  263. }
  264. #ifdef CONFIG_NET_NS
  265. static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
  266. {
  267. return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
  268. }
  269. static void dec_net_namespaces(struct ucounts *ucounts)
  270. {
  271. dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
  272. }
  273. static struct kmem_cache *net_cachep;
  274. static struct workqueue_struct *netns_wq;
  275. static struct net *net_alloc(void)
  276. {
  277. struct net *net = NULL;
  278. struct net_generic *ng;
  279. ng = net_alloc_generic();
  280. if (!ng)
  281. goto out;
  282. net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
  283. if (!net)
  284. goto out_free;
  285. rcu_assign_pointer(net->gen, ng);
  286. out:
  287. return net;
  288. out_free:
  289. kfree(ng);
  290. goto out;
  291. }
  292. static void net_free(struct net *net)
  293. {
  294. kfree(rcu_access_pointer(net->gen));
  295. kmem_cache_free(net_cachep, net);
  296. }
  297. void net_drop_ns(void *p)
  298. {
  299. struct net *ns = p;
  300. if (ns && atomic_dec_and_test(&ns->passive))
  301. net_free(ns);
  302. }
  303. struct net *copy_net_ns(unsigned long flags,
  304. struct user_namespace *user_ns, struct net *old_net)
  305. {
  306. struct ucounts *ucounts;
  307. struct net *net;
  308. int rv;
  309. if (!(flags & CLONE_NEWNET))
  310. return get_net(old_net);
  311. ucounts = inc_net_namespaces(user_ns);
  312. if (!ucounts)
  313. return ERR_PTR(-ENOSPC);
  314. net = net_alloc();
  315. if (!net) {
  316. dec_net_namespaces(ucounts);
  317. return ERR_PTR(-ENOMEM);
  318. }
  319. get_user_ns(user_ns);
  320. rv = mutex_lock_killable(&net_mutex);
  321. if (rv < 0) {
  322. net_free(net);
  323. dec_net_namespaces(ucounts);
  324. put_user_ns(user_ns);
  325. return ERR_PTR(rv);
  326. }
  327. net->ucounts = ucounts;
  328. rv = setup_net(net, user_ns);
  329. if (rv == 0) {
  330. rtnl_lock();
  331. list_add_tail_rcu(&net->list, &net_namespace_list);
  332. rtnl_unlock();
  333. }
  334. mutex_unlock(&net_mutex);
  335. if (rv < 0) {
  336. dec_net_namespaces(ucounts);
  337. put_user_ns(user_ns);
  338. net_drop_ns(net);
  339. return ERR_PTR(rv);
  340. }
  341. return net;
  342. }
  343. static DEFINE_SPINLOCK(cleanup_list_lock);
  344. static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
  345. static void cleanup_net(struct work_struct *work)
  346. {
  347. const struct pernet_operations *ops;
  348. struct net *net, *tmp;
  349. struct list_head net_kill_list;
  350. LIST_HEAD(net_exit_list);
  351. /* Atomically snapshot the list of namespaces to cleanup */
  352. spin_lock_irq(&cleanup_list_lock);
  353. list_replace_init(&cleanup_list, &net_kill_list);
  354. spin_unlock_irq(&cleanup_list_lock);
  355. mutex_lock(&net_mutex);
  356. /* Don't let anyone else find us. */
  357. rtnl_lock();
  358. list_for_each_entry(net, &net_kill_list, cleanup_list) {
  359. list_del_rcu(&net->list);
  360. list_add_tail(&net->exit_list, &net_exit_list);
  361. for_each_net(tmp) {
  362. int id;
  363. spin_lock_irq(&tmp->nsid_lock);
  364. id = __peernet2id(tmp, net);
  365. if (id >= 0)
  366. idr_remove(&tmp->netns_ids, id);
  367. spin_unlock_irq(&tmp->nsid_lock);
  368. if (id >= 0)
  369. rtnl_net_notifyid(tmp, RTM_DELNSID, id);
  370. }
  371. spin_lock_irq(&net->nsid_lock);
  372. idr_destroy(&net->netns_ids);
  373. spin_unlock_irq(&net->nsid_lock);
  374. }
  375. rtnl_unlock();
  376. /*
  377. * Another CPU might be rcu-iterating the list, wait for it.
  378. * This needs to be before calling the exit() notifiers, so
  379. * the rcu_barrier() below isn't sufficient alone.
  380. */
  381. synchronize_rcu();
  382. /* Run all of the network namespace exit methods */
  383. list_for_each_entry_reverse(ops, &pernet_list, list)
  384. ops_exit_list(ops, &net_exit_list);
  385. /* Free the net generic variables */
  386. list_for_each_entry_reverse(ops, &pernet_list, list)
  387. ops_free_list(ops, &net_exit_list);
  388. mutex_unlock(&net_mutex);
  389. /* Ensure there are no outstanding rcu callbacks using this
  390. * network namespace.
  391. */
  392. rcu_barrier();
  393. /* Finally it is safe to free my network namespace structure */
  394. list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
  395. list_del_init(&net->exit_list);
  396. dec_net_namespaces(net->ucounts);
  397. put_user_ns(net->user_ns);
  398. net_drop_ns(net);
  399. }
  400. }
  401. static DECLARE_WORK(net_cleanup_work, cleanup_net);
  402. void __put_net(struct net *net)
  403. {
  404. /* Cleanup the network namespace in process context */
  405. unsigned long flags;
  406. spin_lock_irqsave(&cleanup_list_lock, flags);
  407. list_add(&net->cleanup_list, &cleanup_list);
  408. spin_unlock_irqrestore(&cleanup_list_lock, flags);
  409. queue_work(netns_wq, &net_cleanup_work);
  410. }
  411. EXPORT_SYMBOL_GPL(__put_net);
  412. struct net *get_net_ns_by_fd(int fd)
  413. {
  414. struct file *file;
  415. struct ns_common *ns;
  416. struct net *net;
  417. file = proc_ns_fget(fd);
  418. if (IS_ERR(file))
  419. return ERR_CAST(file);
  420. ns = get_proc_ns(file_inode(file));
  421. if (ns->ops == &netns_operations)
  422. net = get_net(container_of(ns, struct net, ns));
  423. else
  424. net = ERR_PTR(-EINVAL);
  425. fput(file);
  426. return net;
  427. }
  428. #else
  429. struct net *get_net_ns_by_fd(int fd)
  430. {
  431. return ERR_PTR(-EINVAL);
  432. }
  433. #endif
  434. EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
  435. struct net *get_net_ns_by_pid(pid_t pid)
  436. {
  437. struct task_struct *tsk;
  438. struct net *net;
  439. /* Lookup the network namespace */
  440. net = ERR_PTR(-ESRCH);
  441. rcu_read_lock();
  442. tsk = find_task_by_vpid(pid);
  443. if (tsk) {
  444. struct nsproxy *nsproxy;
  445. task_lock(tsk);
  446. nsproxy = tsk->nsproxy;
  447. if (nsproxy)
  448. net = get_net(nsproxy->net_ns);
  449. task_unlock(tsk);
  450. }
  451. rcu_read_unlock();
  452. return net;
  453. }
  454. EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
  455. static __net_init int net_ns_net_init(struct net *net)
  456. {
  457. #ifdef CONFIG_NET_NS
  458. net->ns.ops = &netns_operations;
  459. #endif
  460. return ns_alloc_inum(&net->ns);
  461. }
  462. static __net_exit void net_ns_net_exit(struct net *net)
  463. {
  464. ns_free_inum(&net->ns);
  465. }
  466. static struct pernet_operations __net_initdata net_ns_ops = {
  467. .init = net_ns_net_init,
  468. .exit = net_ns_net_exit,
  469. };
  470. static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
  471. [NETNSA_NONE] = { .type = NLA_UNSPEC },
  472. [NETNSA_NSID] = { .type = NLA_S32 },
  473. [NETNSA_PID] = { .type = NLA_U32 },
  474. [NETNSA_FD] = { .type = NLA_U32 },
  475. };
  476. static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
  477. {
  478. struct net *net = sock_net(skb->sk);
  479. struct nlattr *tb[NETNSA_MAX + 1];
  480. unsigned long flags;
  481. struct net *peer;
  482. int nsid, err;
  483. err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
  484. rtnl_net_policy);
  485. if (err < 0)
  486. return err;
  487. if (!tb[NETNSA_NSID])
  488. return -EINVAL;
  489. nsid = nla_get_s32(tb[NETNSA_NSID]);
  490. if (tb[NETNSA_PID])
  491. peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
  492. else if (tb[NETNSA_FD])
  493. peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
  494. else
  495. return -EINVAL;
  496. if (IS_ERR(peer))
  497. return PTR_ERR(peer);
  498. spin_lock_irqsave(&net->nsid_lock, flags);
  499. if (__peernet2id(net, peer) >= 0) {
  500. spin_unlock_irqrestore(&net->nsid_lock, flags);
  501. err = -EEXIST;
  502. goto out;
  503. }
  504. err = alloc_netid(net, peer, nsid);
  505. spin_unlock_irqrestore(&net->nsid_lock, flags);
  506. if (err >= 0) {
  507. rtnl_net_notifyid(net, RTM_NEWNSID, err);
  508. err = 0;
  509. }
  510. out:
  511. put_net(peer);
  512. return err;
  513. }
  514. static int rtnl_net_get_size(void)
  515. {
  516. return NLMSG_ALIGN(sizeof(struct rtgenmsg))
  517. + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
  518. ;
  519. }
  520. static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
  521. int cmd, struct net *net, int nsid)
  522. {
  523. struct nlmsghdr *nlh;
  524. struct rtgenmsg *rth;
  525. nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
  526. if (!nlh)
  527. return -EMSGSIZE;
  528. rth = nlmsg_data(nlh);
  529. rth->rtgen_family = AF_UNSPEC;
  530. if (nla_put_s32(skb, NETNSA_NSID, nsid))
  531. goto nla_put_failure;
  532. nlmsg_end(skb, nlh);
  533. return 0;
  534. nla_put_failure:
  535. nlmsg_cancel(skb, nlh);
  536. return -EMSGSIZE;
  537. }
  538. static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
  539. {
  540. struct net *net = sock_net(skb->sk);
  541. struct nlattr *tb[NETNSA_MAX + 1];
  542. struct sk_buff *msg;
  543. struct net *peer;
  544. int err, id;
  545. err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
  546. rtnl_net_policy);
  547. if (err < 0)
  548. return err;
  549. if (tb[NETNSA_PID])
  550. peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
  551. else if (tb[NETNSA_FD])
  552. peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
  553. else
  554. return -EINVAL;
  555. if (IS_ERR(peer))
  556. return PTR_ERR(peer);
  557. msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
  558. if (!msg) {
  559. err = -ENOMEM;
  560. goto out;
  561. }
  562. id = peernet2id(net, peer);
  563. err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
  564. RTM_NEWNSID, net, id);
  565. if (err < 0)
  566. goto err_out;
  567. err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
  568. goto out;
  569. err_out:
  570. nlmsg_free(msg);
  571. out:
  572. put_net(peer);
  573. return err;
  574. }
  575. struct rtnl_net_dump_cb {
  576. struct net *net;
  577. struct sk_buff *skb;
  578. struct netlink_callback *cb;
  579. int idx;
  580. int s_idx;
  581. };
  582. static int rtnl_net_dumpid_one(int id, void *peer, void *data)
  583. {
  584. struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
  585. int ret;
  586. if (net_cb->idx < net_cb->s_idx)
  587. goto cont;
  588. ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
  589. net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
  590. RTM_NEWNSID, net_cb->net, id);
  591. if (ret < 0)
  592. return ret;
  593. cont:
  594. net_cb->idx++;
  595. return 0;
  596. }
  597. static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
  598. {
  599. struct net *net = sock_net(skb->sk);
  600. struct rtnl_net_dump_cb net_cb = {
  601. .net = net,
  602. .skb = skb,
  603. .cb = cb,
  604. .idx = 0,
  605. .s_idx = cb->args[0],
  606. };
  607. unsigned long flags;
  608. spin_lock_irqsave(&net->nsid_lock, flags);
  609. idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
  610. spin_unlock_irqrestore(&net->nsid_lock, flags);
  611. cb->args[0] = net_cb.idx;
  612. return skb->len;
  613. }
  614. static void rtnl_net_notifyid(struct net *net, int cmd, int id)
  615. {
  616. struct sk_buff *msg;
  617. int err = -ENOMEM;
  618. msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
  619. if (!msg)
  620. goto out;
  621. err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
  622. if (err < 0)
  623. goto err_out;
  624. rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
  625. return;
  626. err_out:
  627. nlmsg_free(msg);
  628. out:
  629. rtnl_set_sk_err(net, RTNLGRP_NSID, err);
  630. }
  631. static int __init net_ns_init(void)
  632. {
  633. struct net_generic *ng;
  634. #ifdef CONFIG_NET_NS
  635. net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
  636. SMP_CACHE_BYTES,
  637. SLAB_PANIC, NULL);
  638. /* Create workqueue for cleanup */
  639. netns_wq = create_singlethread_workqueue("netns");
  640. if (!netns_wq)
  641. panic("Could not create netns workq");
  642. #endif
  643. ng = net_alloc_generic();
  644. if (!ng)
  645. panic("Could not allocate generic netns");
  646. rcu_assign_pointer(init_net.gen, ng);
  647. mutex_lock(&net_mutex);
  648. if (setup_net(&init_net, &init_user_ns))
  649. panic("Could not setup the initial network namespace");
  650. init_net_initialized = true;
  651. rtnl_lock();
  652. list_add_tail_rcu(&init_net.list, &net_namespace_list);
  653. rtnl_unlock();
  654. mutex_unlock(&net_mutex);
  655. register_pernet_subsys(&net_ns_ops);
  656. rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
  657. rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
  658. NULL);
  659. return 0;
  660. }
  661. pure_initcall(net_ns_init);
  662. #ifdef CONFIG_NET_NS
  663. static int __register_pernet_operations(struct list_head *list,
  664. struct pernet_operations *ops)
  665. {
  666. struct net *net;
  667. int error;
  668. LIST_HEAD(net_exit_list);
  669. list_add_tail(&ops->list, list);
  670. if (ops->init || (ops->id && ops->size)) {
  671. for_each_net(net) {
  672. error = ops_init(ops, net);
  673. if (error)
  674. goto out_undo;
  675. list_add_tail(&net->exit_list, &net_exit_list);
  676. }
  677. }
  678. return 0;
  679. out_undo:
  680. /* If I have an error cleanup all namespaces I initialized */
  681. list_del(&ops->list);
  682. ops_exit_list(ops, &net_exit_list);
  683. ops_free_list(ops, &net_exit_list);
  684. return error;
  685. }
  686. static void __unregister_pernet_operations(struct pernet_operations *ops)
  687. {
  688. struct net *net;
  689. LIST_HEAD(net_exit_list);
  690. list_del(&ops->list);
  691. for_each_net(net)
  692. list_add_tail(&net->exit_list, &net_exit_list);
  693. ops_exit_list(ops, &net_exit_list);
  694. ops_free_list(ops, &net_exit_list);
  695. }
  696. #else
  697. static int __register_pernet_operations(struct list_head *list,
  698. struct pernet_operations *ops)
  699. {
  700. if (!init_net_initialized) {
  701. list_add_tail(&ops->list, list);
  702. return 0;
  703. }
  704. return ops_init(ops, &init_net);
  705. }
  706. static void __unregister_pernet_operations(struct pernet_operations *ops)
  707. {
  708. if (!init_net_initialized) {
  709. list_del(&ops->list);
  710. } else {
  711. LIST_HEAD(net_exit_list);
  712. list_add(&init_net.exit_list, &net_exit_list);
  713. ops_exit_list(ops, &net_exit_list);
  714. ops_free_list(ops, &net_exit_list);
  715. }
  716. }
  717. #endif /* CONFIG_NET_NS */
  718. static DEFINE_IDA(net_generic_ids);
  719. static int register_pernet_operations(struct list_head *list,
  720. struct pernet_operations *ops)
  721. {
  722. int error;
  723. if (ops->id) {
  724. again:
  725. error = ida_get_new_above(&net_generic_ids, MIN_PERNET_OPS_ID, ops->id);
  726. if (error < 0) {
  727. if (error == -EAGAIN) {
  728. ida_pre_get(&net_generic_ids, GFP_KERNEL);
  729. goto again;
  730. }
  731. return error;
  732. }
  733. max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
  734. }
  735. error = __register_pernet_operations(list, ops);
  736. if (error) {
  737. rcu_barrier();
  738. if (ops->id)
  739. ida_remove(&net_generic_ids, *ops->id);
  740. }
  741. return error;
  742. }
  743. static void unregister_pernet_operations(struct pernet_operations *ops)
  744. {
  745. __unregister_pernet_operations(ops);
  746. rcu_barrier();
  747. if (ops->id)
  748. ida_remove(&net_generic_ids, *ops->id);
  749. }
  750. /**
  751. * register_pernet_subsys - register a network namespace subsystem
  752. * @ops: pernet operations structure for the subsystem
  753. *
  754. * Register a subsystem which has init and exit functions
  755. * that are called when network namespaces are created and
  756. * destroyed respectively.
  757. *
  758. * When registered all network namespace init functions are
  759. * called for every existing network namespace. Allowing kernel
  760. * modules to have a race free view of the set of network namespaces.
  761. *
  762. * When a new network namespace is created all of the init
  763. * methods are called in the order in which they were registered.
  764. *
  765. * When a network namespace is destroyed all of the exit methods
  766. * are called in the reverse of the order with which they were
  767. * registered.
  768. */
  769. int register_pernet_subsys(struct pernet_operations *ops)
  770. {
  771. int error;
  772. mutex_lock(&net_mutex);
  773. error = register_pernet_operations(first_device, ops);
  774. mutex_unlock(&net_mutex);
  775. return error;
  776. }
  777. EXPORT_SYMBOL_GPL(register_pernet_subsys);
  778. /**
  779. * unregister_pernet_subsys - unregister a network namespace subsystem
  780. * @ops: pernet operations structure to manipulate
  781. *
  782. * Remove the pernet operations structure from the list to be
  783. * used when network namespaces are created or destroyed. In
  784. * addition run the exit method for all existing network
  785. * namespaces.
  786. */
  787. void unregister_pernet_subsys(struct pernet_operations *ops)
  788. {
  789. mutex_lock(&net_mutex);
  790. unregister_pernet_operations(ops);
  791. mutex_unlock(&net_mutex);
  792. }
  793. EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
  794. /**
  795. * register_pernet_device - register a network namespace device
  796. * @ops: pernet operations structure for the subsystem
  797. *
  798. * Register a device which has init and exit functions
  799. * that are called when network namespaces are created and
  800. * destroyed respectively.
  801. *
  802. * When registered all network namespace init functions are
  803. * called for every existing network namespace. Allowing kernel
  804. * modules to have a race free view of the set of network namespaces.
  805. *
  806. * When a new network namespace is created all of the init
  807. * methods are called in the order in which they were registered.
  808. *
  809. * When a network namespace is destroyed all of the exit methods
  810. * are called in the reverse of the order with which they were
  811. * registered.
  812. */
  813. int register_pernet_device(struct pernet_operations *ops)
  814. {
  815. int error;
  816. mutex_lock(&net_mutex);
  817. error = register_pernet_operations(&pernet_list, ops);
  818. if (!error && (first_device == &pernet_list))
  819. first_device = &ops->list;
  820. mutex_unlock(&net_mutex);
  821. return error;
  822. }
  823. EXPORT_SYMBOL_GPL(register_pernet_device);
  824. /**
  825. * unregister_pernet_device - unregister a network namespace netdevice
  826. * @ops: pernet operations structure to manipulate
  827. *
  828. * Remove the pernet operations structure from the list to be
  829. * used when network namespaces are created or destroyed. In
  830. * addition run the exit method for all existing network
  831. * namespaces.
  832. */
  833. void unregister_pernet_device(struct pernet_operations *ops)
  834. {
  835. mutex_lock(&net_mutex);
  836. if (&ops->list == first_device)
  837. first_device = first_device->next;
  838. unregister_pernet_operations(ops);
  839. mutex_unlock(&net_mutex);
  840. }
  841. EXPORT_SYMBOL_GPL(unregister_pernet_device);
  842. #ifdef CONFIG_NET_NS
  843. static struct ns_common *netns_get(struct task_struct *task)
  844. {
  845. struct net *net = NULL;
  846. struct nsproxy *nsproxy;
  847. task_lock(task);
  848. nsproxy = task->nsproxy;
  849. if (nsproxy)
  850. net = get_net(nsproxy->net_ns);
  851. task_unlock(task);
  852. return net ? &net->ns : NULL;
  853. }
  854. static inline struct net *to_net_ns(struct ns_common *ns)
  855. {
  856. return container_of(ns, struct net, ns);
  857. }
  858. static void netns_put(struct ns_common *ns)
  859. {
  860. put_net(to_net_ns(ns));
  861. }
  862. static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
  863. {
  864. struct net *net = to_net_ns(ns);
  865. if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
  866. !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
  867. return -EPERM;
  868. put_net(nsproxy->net_ns);
  869. nsproxy->net_ns = get_net(net);
  870. return 0;
  871. }
  872. static struct user_namespace *netns_owner(struct ns_common *ns)
  873. {
  874. return to_net_ns(ns)->user_ns;
  875. }
  876. const struct proc_ns_operations netns_operations = {
  877. .name = "net",
  878. .type = CLONE_NEWNET,
  879. .get = netns_get,
  880. .put = netns_put,
  881. .install = netns_install,
  882. .owner = netns_owner,
  883. };
  884. #endif