net_namespace.c 24 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054
  1. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  2. #include <linux/workqueue.h>
  3. #include <linux/rtnetlink.h>
  4. #include <linux/cache.h>
  5. #include <linux/slab.h>
  6. #include <linux/list.h>
  7. #include <linux/delay.h>
  8. #include <linux/sched.h>
  9. #include <linux/idr.h>
  10. #include <linux/rculist.h>
  11. #include <linux/nsproxy.h>
  12. #include <linux/fs.h>
  13. #include <linux/proc_ns.h>
  14. #include <linux/file.h>
  15. #include <linux/export.h>
  16. #include <linux/user_namespace.h>
  17. #include <linux/net_namespace.h>
  18. #include <linux/sched/task.h>
  19. #include <net/sock.h>
  20. #include <net/netlink.h>
  21. #include <net/net_namespace.h>
  22. #include <net/netns/generic.h>
  23. /*
  24. * Our network namespace constructor/destructor lists
  25. */
  26. static LIST_HEAD(pernet_list);
  27. static struct list_head *first_device = &pernet_list;
  28. DEFINE_MUTEX(net_mutex);
  29. LIST_HEAD(net_namespace_list);
  30. EXPORT_SYMBOL_GPL(net_namespace_list);
  31. struct net init_net = {
  32. .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
  33. };
  34. EXPORT_SYMBOL(init_net);
  35. static bool init_net_initialized;
  36. #define MIN_PERNET_OPS_ID \
  37. ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
  38. #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
  39. static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
  40. static struct net_generic *net_alloc_generic(void)
  41. {
  42. struct net_generic *ng;
  43. unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
  44. ng = kzalloc(generic_size, GFP_KERNEL);
  45. if (ng)
  46. ng->s.len = max_gen_ptrs;
  47. return ng;
  48. }
  49. static int net_assign_generic(struct net *net, unsigned int id, void *data)
  50. {
  51. struct net_generic *ng, *old_ng;
  52. BUG_ON(!mutex_is_locked(&net_mutex));
  53. BUG_ON(id < MIN_PERNET_OPS_ID);
  54. old_ng = rcu_dereference_protected(net->gen,
  55. lockdep_is_held(&net_mutex));
  56. if (old_ng->s.len > id) {
  57. old_ng->ptr[id] = data;
  58. return 0;
  59. }
  60. ng = net_alloc_generic();
  61. if (ng == NULL)
  62. return -ENOMEM;
  63. /*
  64. * Some synchronisation notes:
  65. *
  66. * The net_generic explores the net->gen array inside rcu
  67. * read section. Besides once set the net->gen->ptr[x]
  68. * pointer never changes (see rules in netns/generic.h).
  69. *
  70. * That said, we simply duplicate this array and schedule
  71. * the old copy for kfree after a grace period.
  72. */
  73. memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
  74. (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
  75. ng->ptr[id] = data;
  76. rcu_assign_pointer(net->gen, ng);
  77. kfree_rcu(old_ng, s.rcu);
  78. return 0;
  79. }
  80. static int ops_init(const struct pernet_operations *ops, struct net *net)
  81. {
  82. int err = -ENOMEM;
  83. void *data = NULL;
  84. if (ops->id && ops->size) {
  85. data = kzalloc(ops->size, GFP_KERNEL);
  86. if (!data)
  87. goto out;
  88. err = net_assign_generic(net, *ops->id, data);
  89. if (err)
  90. goto cleanup;
  91. }
  92. err = 0;
  93. if (ops->init)
  94. err = ops->init(net);
  95. if (!err)
  96. return 0;
  97. cleanup:
  98. kfree(data);
  99. out:
  100. return err;
  101. }
  102. static void ops_free(const struct pernet_operations *ops, struct net *net)
  103. {
  104. if (ops->id && ops->size) {
  105. kfree(net_generic(net, *ops->id));
  106. }
  107. }
  108. static void ops_exit_list(const struct pernet_operations *ops,
  109. struct list_head *net_exit_list)
  110. {
  111. struct net *net;
  112. if (ops->exit) {
  113. list_for_each_entry(net, net_exit_list, exit_list)
  114. ops->exit(net);
  115. }
  116. if (ops->exit_batch)
  117. ops->exit_batch(net_exit_list);
  118. }
  119. static void ops_free_list(const struct pernet_operations *ops,
  120. struct list_head *net_exit_list)
  121. {
  122. struct net *net;
  123. if (ops->size && ops->id) {
  124. list_for_each_entry(net, net_exit_list, exit_list)
  125. ops_free(ops, net);
  126. }
  127. }
  128. /* should be called with nsid_lock held */
  129. static int alloc_netid(struct net *net, struct net *peer, int reqid)
  130. {
  131. int min = 0, max = 0;
  132. if (reqid >= 0) {
  133. min = reqid;
  134. max = reqid + 1;
  135. }
  136. return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
  137. }
  138. /* This function is used by idr_for_each(). If net is equal to peer, the
  139. * function returns the id so that idr_for_each() stops. Because we cannot
  140. * returns the id 0 (idr_for_each() will not stop), we return the magic value
  141. * NET_ID_ZERO (-1) for it.
  142. */
  143. #define NET_ID_ZERO -1
  144. static int net_eq_idr(int id, void *net, void *peer)
  145. {
  146. if (net_eq(net, peer))
  147. return id ? : NET_ID_ZERO;
  148. return 0;
  149. }
  150. /* Should be called with nsid_lock held. If a new id is assigned, the bool alloc
  151. * is set to true, thus the caller knows that the new id must be notified via
  152. * rtnl.
  153. */
  154. static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
  155. {
  156. int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
  157. bool alloc_it = *alloc;
  158. *alloc = false;
  159. /* Magic value for id 0. */
  160. if (id == NET_ID_ZERO)
  161. return 0;
  162. if (id > 0)
  163. return id;
  164. if (alloc_it) {
  165. id = alloc_netid(net, peer, -1);
  166. *alloc = true;
  167. return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
  168. }
  169. return NETNSA_NSID_NOT_ASSIGNED;
  170. }
  171. /* should be called with nsid_lock held */
  172. static int __peernet2id(struct net *net, struct net *peer)
  173. {
  174. bool no = false;
  175. return __peernet2id_alloc(net, peer, &no);
  176. }
  177. static void rtnl_net_notifyid(struct net *net, int cmd, int id);
  178. /* This function returns the id of a peer netns. If no id is assigned, one will
  179. * be allocated and returned.
  180. */
  181. int peernet2id_alloc(struct net *net, struct net *peer)
  182. {
  183. bool alloc;
  184. int id;
  185. if (atomic_read(&net->count) == 0)
  186. return NETNSA_NSID_NOT_ASSIGNED;
  187. spin_lock_bh(&net->nsid_lock);
  188. alloc = atomic_read(&peer->count) == 0 ? false : true;
  189. id = __peernet2id_alloc(net, peer, &alloc);
  190. spin_unlock_bh(&net->nsid_lock);
  191. if (alloc && id >= 0)
  192. rtnl_net_notifyid(net, RTM_NEWNSID, id);
  193. return id;
  194. }
  195. /* This function returns, if assigned, the id of a peer netns. */
  196. int peernet2id(struct net *net, struct net *peer)
  197. {
  198. int id;
  199. spin_lock_bh(&net->nsid_lock);
  200. id = __peernet2id(net, peer);
  201. spin_unlock_bh(&net->nsid_lock);
  202. return id;
  203. }
  204. EXPORT_SYMBOL(peernet2id);
  205. /* This function returns true is the peer netns has an id assigned into the
  206. * current netns.
  207. */
  208. bool peernet_has_id(struct net *net, struct net *peer)
  209. {
  210. return peernet2id(net, peer) >= 0;
  211. }
  212. struct net *get_net_ns_by_id(struct net *net, int id)
  213. {
  214. struct net *peer;
  215. if (id < 0)
  216. return NULL;
  217. rcu_read_lock();
  218. spin_lock_bh(&net->nsid_lock);
  219. peer = idr_find(&net->netns_ids, id);
  220. if (peer)
  221. get_net(peer);
  222. spin_unlock_bh(&net->nsid_lock);
  223. rcu_read_unlock();
  224. return peer;
  225. }
  226. /*
  227. * setup_net runs the initializers for the network namespace object.
  228. */
  229. static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
  230. {
  231. /* Must be called with net_mutex held */
  232. const struct pernet_operations *ops, *saved_ops;
  233. int error = 0;
  234. LIST_HEAD(net_exit_list);
  235. atomic_set(&net->count, 1);
  236. atomic_set(&net->passive, 1);
  237. net->dev_base_seq = 1;
  238. net->user_ns = user_ns;
  239. idr_init(&net->netns_ids);
  240. spin_lock_init(&net->nsid_lock);
  241. list_for_each_entry(ops, &pernet_list, list) {
  242. error = ops_init(ops, net);
  243. if (error < 0)
  244. goto out_undo;
  245. }
  246. out:
  247. return error;
  248. out_undo:
  249. /* Walk through the list backwards calling the exit functions
  250. * for the pernet modules whose init functions did not fail.
  251. */
  252. list_add(&net->exit_list, &net_exit_list);
  253. saved_ops = ops;
  254. list_for_each_entry_continue_reverse(ops, &pernet_list, list)
  255. ops_exit_list(ops, &net_exit_list);
  256. ops = saved_ops;
  257. list_for_each_entry_continue_reverse(ops, &pernet_list, list)
  258. ops_free_list(ops, &net_exit_list);
  259. rcu_barrier();
  260. goto out;
  261. }
  262. #ifdef CONFIG_NET_NS
  263. static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
  264. {
  265. return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
  266. }
  267. static void dec_net_namespaces(struct ucounts *ucounts)
  268. {
  269. dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
  270. }
  271. static struct kmem_cache *net_cachep;
  272. static struct workqueue_struct *netns_wq;
  273. static struct net *net_alloc(void)
  274. {
  275. struct net *net = NULL;
  276. struct net_generic *ng;
  277. ng = net_alloc_generic();
  278. if (!ng)
  279. goto out;
  280. net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
  281. if (!net)
  282. goto out_free;
  283. rcu_assign_pointer(net->gen, ng);
  284. out:
  285. return net;
  286. out_free:
  287. kfree(ng);
  288. goto out;
  289. }
  290. static void net_free(struct net *net)
  291. {
  292. kfree(rcu_access_pointer(net->gen));
  293. kmem_cache_free(net_cachep, net);
  294. }
  295. void net_drop_ns(void *p)
  296. {
  297. struct net *ns = p;
  298. if (ns && atomic_dec_and_test(&ns->passive))
  299. net_free(ns);
  300. }
  301. struct net *copy_net_ns(unsigned long flags,
  302. struct user_namespace *user_ns, struct net *old_net)
  303. {
  304. struct ucounts *ucounts;
  305. struct net *net;
  306. int rv;
  307. if (!(flags & CLONE_NEWNET))
  308. return get_net(old_net);
  309. ucounts = inc_net_namespaces(user_ns);
  310. if (!ucounts)
  311. return ERR_PTR(-ENOSPC);
  312. net = net_alloc();
  313. if (!net) {
  314. dec_net_namespaces(ucounts);
  315. return ERR_PTR(-ENOMEM);
  316. }
  317. get_user_ns(user_ns);
  318. rv = mutex_lock_killable(&net_mutex);
  319. if (rv < 0) {
  320. net_free(net);
  321. dec_net_namespaces(ucounts);
  322. put_user_ns(user_ns);
  323. return ERR_PTR(rv);
  324. }
  325. net->ucounts = ucounts;
  326. rv = setup_net(net, user_ns);
  327. if (rv == 0) {
  328. rtnl_lock();
  329. list_add_tail_rcu(&net->list, &net_namespace_list);
  330. rtnl_unlock();
  331. }
  332. mutex_unlock(&net_mutex);
  333. if (rv < 0) {
  334. dec_net_namespaces(ucounts);
  335. put_user_ns(user_ns);
  336. net_drop_ns(net);
  337. return ERR_PTR(rv);
  338. }
  339. return net;
  340. }
  341. static DEFINE_SPINLOCK(cleanup_list_lock);
  342. static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
  343. static void cleanup_net(struct work_struct *work)
  344. {
  345. const struct pernet_operations *ops;
  346. struct net *net, *tmp;
  347. struct list_head net_kill_list;
  348. LIST_HEAD(net_exit_list);
  349. /* Atomically snapshot the list of namespaces to cleanup */
  350. spin_lock_irq(&cleanup_list_lock);
  351. list_replace_init(&cleanup_list, &net_kill_list);
  352. spin_unlock_irq(&cleanup_list_lock);
  353. mutex_lock(&net_mutex);
  354. /* Don't let anyone else find us. */
  355. rtnl_lock();
  356. list_for_each_entry(net, &net_kill_list, cleanup_list) {
  357. list_del_rcu(&net->list);
  358. list_add_tail(&net->exit_list, &net_exit_list);
  359. for_each_net(tmp) {
  360. int id;
  361. spin_lock_bh(&tmp->nsid_lock);
  362. id = __peernet2id(tmp, net);
  363. if (id >= 0)
  364. idr_remove(&tmp->netns_ids, id);
  365. spin_unlock_bh(&tmp->nsid_lock);
  366. if (id >= 0)
  367. rtnl_net_notifyid(tmp, RTM_DELNSID, id);
  368. }
  369. spin_lock_bh(&net->nsid_lock);
  370. idr_destroy(&net->netns_ids);
  371. spin_unlock_bh(&net->nsid_lock);
  372. }
  373. rtnl_unlock();
  374. /*
  375. * Another CPU might be rcu-iterating the list, wait for it.
  376. * This needs to be before calling the exit() notifiers, so
  377. * the rcu_barrier() below isn't sufficient alone.
  378. */
  379. synchronize_rcu();
  380. /* Run all of the network namespace exit methods */
  381. list_for_each_entry_reverse(ops, &pernet_list, list)
  382. ops_exit_list(ops, &net_exit_list);
  383. /* Free the net generic variables */
  384. list_for_each_entry_reverse(ops, &pernet_list, list)
  385. ops_free_list(ops, &net_exit_list);
  386. mutex_unlock(&net_mutex);
  387. /* Ensure there are no outstanding rcu callbacks using this
  388. * network namespace.
  389. */
  390. rcu_barrier();
  391. /* Finally it is safe to free my network namespace structure */
  392. list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
  393. list_del_init(&net->exit_list);
  394. dec_net_namespaces(net->ucounts);
  395. put_user_ns(net->user_ns);
  396. net_drop_ns(net);
  397. }
  398. }
  399. static DECLARE_WORK(net_cleanup_work, cleanup_net);
  400. void __put_net(struct net *net)
  401. {
  402. /* Cleanup the network namespace in process context */
  403. unsigned long flags;
  404. spin_lock_irqsave(&cleanup_list_lock, flags);
  405. list_add(&net->cleanup_list, &cleanup_list);
  406. spin_unlock_irqrestore(&cleanup_list_lock, flags);
  407. queue_work(netns_wq, &net_cleanup_work);
  408. }
  409. EXPORT_SYMBOL_GPL(__put_net);
  410. struct net *get_net_ns_by_fd(int fd)
  411. {
  412. struct file *file;
  413. struct ns_common *ns;
  414. struct net *net;
  415. file = proc_ns_fget(fd);
  416. if (IS_ERR(file))
  417. return ERR_CAST(file);
  418. ns = get_proc_ns(file_inode(file));
  419. if (ns->ops == &netns_operations)
  420. net = get_net(container_of(ns, struct net, ns));
  421. else
  422. net = ERR_PTR(-EINVAL);
  423. fput(file);
  424. return net;
  425. }
  426. #else
  427. struct net *get_net_ns_by_fd(int fd)
  428. {
  429. return ERR_PTR(-EINVAL);
  430. }
  431. #endif
  432. EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
  433. struct net *get_net_ns_by_pid(pid_t pid)
  434. {
  435. struct task_struct *tsk;
  436. struct net *net;
  437. /* Lookup the network namespace */
  438. net = ERR_PTR(-ESRCH);
  439. rcu_read_lock();
  440. tsk = find_task_by_vpid(pid);
  441. if (tsk) {
  442. struct nsproxy *nsproxy;
  443. task_lock(tsk);
  444. nsproxy = tsk->nsproxy;
  445. if (nsproxy)
  446. net = get_net(nsproxy->net_ns);
  447. task_unlock(tsk);
  448. }
  449. rcu_read_unlock();
  450. return net;
  451. }
  452. EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
  453. static __net_init int net_ns_net_init(struct net *net)
  454. {
  455. #ifdef CONFIG_NET_NS
  456. net->ns.ops = &netns_operations;
  457. #endif
  458. return ns_alloc_inum(&net->ns);
  459. }
  460. static __net_exit void net_ns_net_exit(struct net *net)
  461. {
  462. ns_free_inum(&net->ns);
  463. }
  464. static struct pernet_operations __net_initdata net_ns_ops = {
  465. .init = net_ns_net_init,
  466. .exit = net_ns_net_exit,
  467. };
  468. static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
  469. [NETNSA_NONE] = { .type = NLA_UNSPEC },
  470. [NETNSA_NSID] = { .type = NLA_S32 },
  471. [NETNSA_PID] = { .type = NLA_U32 },
  472. [NETNSA_FD] = { .type = NLA_U32 },
  473. };
  474. static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
  475. {
  476. struct net *net = sock_net(skb->sk);
  477. struct nlattr *tb[NETNSA_MAX + 1];
  478. struct net *peer;
  479. int nsid, err;
  480. err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
  481. rtnl_net_policy, NULL);
  482. if (err < 0)
  483. return err;
  484. if (!tb[NETNSA_NSID])
  485. return -EINVAL;
  486. nsid = nla_get_s32(tb[NETNSA_NSID]);
  487. if (tb[NETNSA_PID])
  488. peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
  489. else if (tb[NETNSA_FD])
  490. peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
  491. else
  492. return -EINVAL;
  493. if (IS_ERR(peer))
  494. return PTR_ERR(peer);
  495. spin_lock_bh(&net->nsid_lock);
  496. if (__peernet2id(net, peer) >= 0) {
  497. spin_unlock_bh(&net->nsid_lock);
  498. err = -EEXIST;
  499. goto out;
  500. }
  501. err = alloc_netid(net, peer, nsid);
  502. spin_unlock_bh(&net->nsid_lock);
  503. if (err >= 0) {
  504. rtnl_net_notifyid(net, RTM_NEWNSID, err);
  505. err = 0;
  506. }
  507. out:
  508. put_net(peer);
  509. return err;
  510. }
  511. static int rtnl_net_get_size(void)
  512. {
  513. return NLMSG_ALIGN(sizeof(struct rtgenmsg))
  514. + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
  515. ;
  516. }
  517. static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
  518. int cmd, struct net *net, int nsid)
  519. {
  520. struct nlmsghdr *nlh;
  521. struct rtgenmsg *rth;
  522. nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
  523. if (!nlh)
  524. return -EMSGSIZE;
  525. rth = nlmsg_data(nlh);
  526. rth->rtgen_family = AF_UNSPEC;
  527. if (nla_put_s32(skb, NETNSA_NSID, nsid))
  528. goto nla_put_failure;
  529. nlmsg_end(skb, nlh);
  530. return 0;
  531. nla_put_failure:
  532. nlmsg_cancel(skb, nlh);
  533. return -EMSGSIZE;
  534. }
  535. static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
  536. {
  537. struct net *net = sock_net(skb->sk);
  538. struct nlattr *tb[NETNSA_MAX + 1];
  539. struct sk_buff *msg;
  540. struct net *peer;
  541. int err, id;
  542. err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
  543. rtnl_net_policy, NULL);
  544. if (err < 0)
  545. return err;
  546. if (tb[NETNSA_PID])
  547. peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
  548. else if (tb[NETNSA_FD])
  549. peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
  550. else
  551. return -EINVAL;
  552. if (IS_ERR(peer))
  553. return PTR_ERR(peer);
  554. msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
  555. if (!msg) {
  556. err = -ENOMEM;
  557. goto out;
  558. }
  559. id = peernet2id(net, peer);
  560. err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
  561. RTM_NEWNSID, net, id);
  562. if (err < 0)
  563. goto err_out;
  564. err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
  565. goto out;
  566. err_out:
  567. nlmsg_free(msg);
  568. out:
  569. put_net(peer);
  570. return err;
  571. }
  572. struct rtnl_net_dump_cb {
  573. struct net *net;
  574. struct sk_buff *skb;
  575. struct netlink_callback *cb;
  576. int idx;
  577. int s_idx;
  578. };
  579. static int rtnl_net_dumpid_one(int id, void *peer, void *data)
  580. {
  581. struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
  582. int ret;
  583. if (net_cb->idx < net_cb->s_idx)
  584. goto cont;
  585. ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
  586. net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
  587. RTM_NEWNSID, net_cb->net, id);
  588. if (ret < 0)
  589. return ret;
  590. cont:
  591. net_cb->idx++;
  592. return 0;
  593. }
  594. static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
  595. {
  596. struct net *net = sock_net(skb->sk);
  597. struct rtnl_net_dump_cb net_cb = {
  598. .net = net,
  599. .skb = skb,
  600. .cb = cb,
  601. .idx = 0,
  602. .s_idx = cb->args[0],
  603. };
  604. spin_lock_bh(&net->nsid_lock);
  605. idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
  606. spin_unlock_bh(&net->nsid_lock);
  607. cb->args[0] = net_cb.idx;
  608. return skb->len;
  609. }
  610. static void rtnl_net_notifyid(struct net *net, int cmd, int id)
  611. {
  612. struct sk_buff *msg;
  613. int err = -ENOMEM;
  614. msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
  615. if (!msg)
  616. goto out;
  617. err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
  618. if (err < 0)
  619. goto err_out;
  620. rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
  621. return;
  622. err_out:
  623. nlmsg_free(msg);
  624. out:
  625. rtnl_set_sk_err(net, RTNLGRP_NSID, err);
  626. }
  627. static int __init net_ns_init(void)
  628. {
  629. struct net_generic *ng;
  630. #ifdef CONFIG_NET_NS
  631. net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
  632. SMP_CACHE_BYTES,
  633. SLAB_PANIC, NULL);
  634. /* Create workqueue for cleanup */
  635. netns_wq = create_singlethread_workqueue("netns");
  636. if (!netns_wq)
  637. panic("Could not create netns workq");
  638. #endif
  639. ng = net_alloc_generic();
  640. if (!ng)
  641. panic("Could not allocate generic netns");
  642. rcu_assign_pointer(init_net.gen, ng);
  643. mutex_lock(&net_mutex);
  644. if (setup_net(&init_net, &init_user_ns))
  645. panic("Could not setup the initial network namespace");
  646. init_net_initialized = true;
  647. rtnl_lock();
  648. list_add_tail_rcu(&init_net.list, &net_namespace_list);
  649. rtnl_unlock();
  650. mutex_unlock(&net_mutex);
  651. register_pernet_subsys(&net_ns_ops);
  652. rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
  653. rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
  654. NULL);
  655. return 0;
  656. }
  657. pure_initcall(net_ns_init);
  658. #ifdef CONFIG_NET_NS
  659. static int __register_pernet_operations(struct list_head *list,
  660. struct pernet_operations *ops)
  661. {
  662. struct net *net;
  663. int error;
  664. LIST_HEAD(net_exit_list);
  665. list_add_tail(&ops->list, list);
  666. if (ops->init || (ops->id && ops->size)) {
  667. for_each_net(net) {
  668. error = ops_init(ops, net);
  669. if (error)
  670. goto out_undo;
  671. list_add_tail(&net->exit_list, &net_exit_list);
  672. }
  673. }
  674. return 0;
  675. out_undo:
  676. /* If I have an error cleanup all namespaces I initialized */
  677. list_del(&ops->list);
  678. ops_exit_list(ops, &net_exit_list);
  679. ops_free_list(ops, &net_exit_list);
  680. return error;
  681. }
  682. static void __unregister_pernet_operations(struct pernet_operations *ops)
  683. {
  684. struct net *net;
  685. LIST_HEAD(net_exit_list);
  686. list_del(&ops->list);
  687. for_each_net(net)
  688. list_add_tail(&net->exit_list, &net_exit_list);
  689. ops_exit_list(ops, &net_exit_list);
  690. ops_free_list(ops, &net_exit_list);
  691. }
  692. #else
  693. static int __register_pernet_operations(struct list_head *list,
  694. struct pernet_operations *ops)
  695. {
  696. if (!init_net_initialized) {
  697. list_add_tail(&ops->list, list);
  698. return 0;
  699. }
  700. return ops_init(ops, &init_net);
  701. }
  702. static void __unregister_pernet_operations(struct pernet_operations *ops)
  703. {
  704. if (!init_net_initialized) {
  705. list_del(&ops->list);
  706. } else {
  707. LIST_HEAD(net_exit_list);
  708. list_add(&init_net.exit_list, &net_exit_list);
  709. ops_exit_list(ops, &net_exit_list);
  710. ops_free_list(ops, &net_exit_list);
  711. }
  712. }
  713. #endif /* CONFIG_NET_NS */
  714. static DEFINE_IDA(net_generic_ids);
  715. static int register_pernet_operations(struct list_head *list,
  716. struct pernet_operations *ops)
  717. {
  718. int error;
  719. if (ops->id) {
  720. again:
  721. error = ida_get_new_above(&net_generic_ids, MIN_PERNET_OPS_ID, ops->id);
  722. if (error < 0) {
  723. if (error == -EAGAIN) {
  724. ida_pre_get(&net_generic_ids, GFP_KERNEL);
  725. goto again;
  726. }
  727. return error;
  728. }
  729. max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
  730. }
  731. error = __register_pernet_operations(list, ops);
  732. if (error) {
  733. rcu_barrier();
  734. if (ops->id)
  735. ida_remove(&net_generic_ids, *ops->id);
  736. }
  737. return error;
  738. }
  739. static void unregister_pernet_operations(struct pernet_operations *ops)
  740. {
  741. __unregister_pernet_operations(ops);
  742. rcu_barrier();
  743. if (ops->id)
  744. ida_remove(&net_generic_ids, *ops->id);
  745. }
  746. /**
  747. * register_pernet_subsys - register a network namespace subsystem
  748. * @ops: pernet operations structure for the subsystem
  749. *
  750. * Register a subsystem which has init and exit functions
  751. * that are called when network namespaces are created and
  752. * destroyed respectively.
  753. *
  754. * When registered all network namespace init functions are
  755. * called for every existing network namespace. Allowing kernel
  756. * modules to have a race free view of the set of network namespaces.
  757. *
  758. * When a new network namespace is created all of the init
  759. * methods are called in the order in which they were registered.
  760. *
  761. * When a network namespace is destroyed all of the exit methods
  762. * are called in the reverse of the order with which they were
  763. * registered.
  764. */
  765. int register_pernet_subsys(struct pernet_operations *ops)
  766. {
  767. int error;
  768. mutex_lock(&net_mutex);
  769. error = register_pernet_operations(first_device, ops);
  770. mutex_unlock(&net_mutex);
  771. return error;
  772. }
  773. EXPORT_SYMBOL_GPL(register_pernet_subsys);
  774. /**
  775. * unregister_pernet_subsys - unregister a network namespace subsystem
  776. * @ops: pernet operations structure to manipulate
  777. *
  778. * Remove the pernet operations structure from the list to be
  779. * used when network namespaces are created or destroyed. In
  780. * addition run the exit method for all existing network
  781. * namespaces.
  782. */
  783. void unregister_pernet_subsys(struct pernet_operations *ops)
  784. {
  785. mutex_lock(&net_mutex);
  786. unregister_pernet_operations(ops);
  787. mutex_unlock(&net_mutex);
  788. }
  789. EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
  790. /**
  791. * register_pernet_device - register a network namespace device
  792. * @ops: pernet operations structure for the subsystem
  793. *
  794. * Register a device which has init and exit functions
  795. * that are called when network namespaces are created and
  796. * destroyed respectively.
  797. *
  798. * When registered all network namespace init functions are
  799. * called for every existing network namespace. Allowing kernel
  800. * modules to have a race free view of the set of network namespaces.
  801. *
  802. * When a new network namespace is created all of the init
  803. * methods are called in the order in which they were registered.
  804. *
  805. * When a network namespace is destroyed all of the exit methods
  806. * are called in the reverse of the order with which they were
  807. * registered.
  808. */
  809. int register_pernet_device(struct pernet_operations *ops)
  810. {
  811. int error;
  812. mutex_lock(&net_mutex);
  813. error = register_pernet_operations(&pernet_list, ops);
  814. if (!error && (first_device == &pernet_list))
  815. first_device = &ops->list;
  816. mutex_unlock(&net_mutex);
  817. return error;
  818. }
  819. EXPORT_SYMBOL_GPL(register_pernet_device);
  820. /**
  821. * unregister_pernet_device - unregister a network namespace netdevice
  822. * @ops: pernet operations structure to manipulate
  823. *
  824. * Remove the pernet operations structure from the list to be
  825. * used when network namespaces are created or destroyed. In
  826. * addition run the exit method for all existing network
  827. * namespaces.
  828. */
  829. void unregister_pernet_device(struct pernet_operations *ops)
  830. {
  831. mutex_lock(&net_mutex);
  832. if (&ops->list == first_device)
  833. first_device = first_device->next;
  834. unregister_pernet_operations(ops);
  835. mutex_unlock(&net_mutex);
  836. }
  837. EXPORT_SYMBOL_GPL(unregister_pernet_device);
  838. #ifdef CONFIG_NET_NS
  839. static struct ns_common *netns_get(struct task_struct *task)
  840. {
  841. struct net *net = NULL;
  842. struct nsproxy *nsproxy;
  843. task_lock(task);
  844. nsproxy = task->nsproxy;
  845. if (nsproxy)
  846. net = get_net(nsproxy->net_ns);
  847. task_unlock(task);
  848. return net ? &net->ns : NULL;
  849. }
  850. static inline struct net *to_net_ns(struct ns_common *ns)
  851. {
  852. return container_of(ns, struct net, ns);
  853. }
  854. static void netns_put(struct ns_common *ns)
  855. {
  856. put_net(to_net_ns(ns));
  857. }
  858. static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
  859. {
  860. struct net *net = to_net_ns(ns);
  861. if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
  862. !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
  863. return -EPERM;
  864. put_net(nsproxy->net_ns);
  865. nsproxy->net_ns = get_net(net);
  866. return 0;
  867. }
  868. static struct user_namespace *netns_owner(struct ns_common *ns)
  869. {
  870. return to_net_ns(ns)->user_ns;
  871. }
  872. const struct proc_ns_operations netns_operations = {
  873. .name = "net",
  874. .type = CLONE_NEWNET,
  875. .get = netns_get,
  876. .put = netns_put,
  877. .install = netns_install,
  878. .owner = netns_owner,
  879. };
  880. #endif