user_namespace.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096
  1. /*
  2. * This program is free software; you can redistribute it and/or
  3. * modify it under the terms of the GNU General Public License as
  4. * published by the Free Software Foundation, version 2 of the
  5. * License.
  6. */
  7. #include <linux/export.h>
  8. #include <linux/nsproxy.h>
  9. #include <linux/slab.h>
  10. #include <linux/sched/signal.h>
  11. #include <linux/user_namespace.h>
  12. #include <linux/proc_ns.h>
  13. #include <linux/highuid.h>
  14. #include <linux/cred.h>
  15. #include <linux/securebits.h>
  16. #include <linux/keyctl.h>
  17. #include <linux/key-type.h>
  18. #include <keys/user-type.h>
  19. #include <linux/seq_file.h>
  20. #include <linux/fs.h>
  21. #include <linux/uaccess.h>
  22. #include <linux/ctype.h>
  23. #include <linux/projid.h>
  24. #include <linux/fs_struct.h>
  25. static struct kmem_cache *user_ns_cachep __read_mostly;
  26. static DEFINE_MUTEX(userns_state_mutex);
  27. static bool new_idmap_permitted(const struct file *file,
  28. struct user_namespace *ns, int cap_setid,
  29. struct uid_gid_map *map);
  30. static void free_user_ns(struct work_struct *work);
  31. static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
  32. {
  33. return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES);
  34. }
  35. static void dec_user_namespaces(struct ucounts *ucounts)
  36. {
  37. return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES);
  38. }
  39. static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
  40. {
  41. /* Start with the same capabilities as init but useless for doing
  42. * anything as the capabilities are bound to the new user namespace.
  43. */
  44. cred->securebits = SECUREBITS_DEFAULT;
  45. cred->cap_inheritable = CAP_EMPTY_SET;
  46. cred->cap_permitted = CAP_FULL_SET;
  47. cred->cap_effective = CAP_FULL_SET;
  48. cred->cap_ambient = CAP_EMPTY_SET;
  49. cred->cap_bset = CAP_FULL_SET;
  50. #ifdef CONFIG_KEYS
  51. key_put(cred->request_key_auth);
  52. cred->request_key_auth = NULL;
  53. #endif
  54. /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
  55. cred->user_ns = user_ns;
  56. }
  57. /*
  58. * Create a new user namespace, deriving the creator from the user in the
  59. * passed credentials, and replacing that user with the new root user for the
  60. * new namespace.
  61. *
  62. * This is called by copy_creds(), which will finish setting the target task's
  63. * credentials.
  64. */
  65. int create_user_ns(struct cred *new)
  66. {
  67. struct user_namespace *ns, *parent_ns = new->user_ns;
  68. kuid_t owner = new->euid;
  69. kgid_t group = new->egid;
  70. struct ucounts *ucounts;
  71. int ret, i;
  72. ret = -ENOSPC;
  73. if (parent_ns->level > 32)
  74. goto fail;
  75. ucounts = inc_user_namespaces(parent_ns, owner);
  76. if (!ucounts)
  77. goto fail;
  78. /*
  79. * Verify that we can not violate the policy of which files
  80. * may be accessed that is specified by the root directory,
  81. * by verifing that the root directory is at the root of the
  82. * mount namespace which allows all files to be accessed.
  83. */
  84. ret = -EPERM;
  85. if (current_chrooted())
  86. goto fail_dec;
  87. /* The creator needs a mapping in the parent user namespace
  88. * or else we won't be able to reasonably tell userspace who
  89. * created a user_namespace.
  90. */
  91. ret = -EPERM;
  92. if (!kuid_has_mapping(parent_ns, owner) ||
  93. !kgid_has_mapping(parent_ns, group))
  94. goto fail_dec;
  95. ret = -ENOMEM;
  96. ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
  97. if (!ns)
  98. goto fail_dec;
  99. ret = ns_alloc_inum(&ns->ns);
  100. if (ret)
  101. goto fail_free;
  102. ns->ns.ops = &userns_operations;
  103. atomic_set(&ns->count, 1);
  104. /* Leave the new->user_ns reference with the new user namespace. */
  105. ns->parent = parent_ns;
  106. ns->level = parent_ns->level + 1;
  107. ns->owner = owner;
  108. ns->group = group;
  109. INIT_WORK(&ns->work, free_user_ns);
  110. for (i = 0; i < UCOUNT_COUNTS; i++) {
  111. ns->ucount_max[i] = INT_MAX;
  112. }
  113. ns->ucounts = ucounts;
  114. /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
  115. mutex_lock(&userns_state_mutex);
  116. ns->flags = parent_ns->flags;
  117. mutex_unlock(&userns_state_mutex);
  118. #ifdef CONFIG_PERSISTENT_KEYRINGS
  119. init_rwsem(&ns->persistent_keyring_register_sem);
  120. #endif
  121. ret = -ENOMEM;
  122. if (!setup_userns_sysctls(ns))
  123. goto fail_keyring;
  124. set_cred_user_ns(new, ns);
  125. return 0;
  126. fail_keyring:
  127. #ifdef CONFIG_PERSISTENT_KEYRINGS
  128. key_put(ns->persistent_keyring_register);
  129. #endif
  130. ns_free_inum(&ns->ns);
  131. fail_free:
  132. kmem_cache_free(user_ns_cachep, ns);
  133. fail_dec:
  134. dec_user_namespaces(ucounts);
  135. fail:
  136. return ret;
  137. }
  138. int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
  139. {
  140. struct cred *cred;
  141. int err = -ENOMEM;
  142. if (!(unshare_flags & CLONE_NEWUSER))
  143. return 0;
  144. cred = prepare_creds();
  145. if (cred) {
  146. err = create_user_ns(cred);
  147. if (err)
  148. put_cred(cred);
  149. else
  150. *new_cred = cred;
  151. }
  152. return err;
  153. }
  154. static void free_user_ns(struct work_struct *work)
  155. {
  156. struct user_namespace *parent, *ns =
  157. container_of(work, struct user_namespace, work);
  158. do {
  159. struct ucounts *ucounts = ns->ucounts;
  160. parent = ns->parent;
  161. retire_userns_sysctls(ns);
  162. #ifdef CONFIG_PERSISTENT_KEYRINGS
  163. key_put(ns->persistent_keyring_register);
  164. #endif
  165. ns_free_inum(&ns->ns);
  166. kmem_cache_free(user_ns_cachep, ns);
  167. dec_user_namespaces(ucounts);
  168. ns = parent;
  169. } while (atomic_dec_and_test(&parent->count));
  170. }
  171. void __put_user_ns(struct user_namespace *ns)
  172. {
  173. schedule_work(&ns->work);
  174. }
  175. EXPORT_SYMBOL(__put_user_ns);
  176. static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
  177. {
  178. unsigned idx, extents;
  179. u32 first, last, id2;
  180. id2 = id + count - 1;
  181. /* Find the matching extent */
  182. extents = map->nr_extents;
  183. smp_rmb();
  184. for (idx = 0; idx < extents; idx++) {
  185. first = map->extent[idx].first;
  186. last = first + map->extent[idx].count - 1;
  187. if (id >= first && id <= last &&
  188. (id2 >= first && id2 <= last))
  189. break;
  190. }
  191. /* Map the id or note failure */
  192. if (idx < extents)
  193. id = (id - first) + map->extent[idx].lower_first;
  194. else
  195. id = (u32) -1;
  196. return id;
  197. }
  198. static u32 map_id_down(struct uid_gid_map *map, u32 id)
  199. {
  200. unsigned idx, extents;
  201. u32 first, last;
  202. /* Find the matching extent */
  203. extents = map->nr_extents;
  204. smp_rmb();
  205. for (idx = 0; idx < extents; idx++) {
  206. first = map->extent[idx].first;
  207. last = first + map->extent[idx].count - 1;
  208. if (id >= first && id <= last)
  209. break;
  210. }
  211. /* Map the id or note failure */
  212. if (idx < extents)
  213. id = (id - first) + map->extent[idx].lower_first;
  214. else
  215. id = (u32) -1;
  216. return id;
  217. }
  218. static u32 map_id_up(struct uid_gid_map *map, u32 id)
  219. {
  220. unsigned idx, extents;
  221. u32 first, last;
  222. /* Find the matching extent */
  223. extents = map->nr_extents;
  224. smp_rmb();
  225. for (idx = 0; idx < extents; idx++) {
  226. first = map->extent[idx].lower_first;
  227. last = first + map->extent[idx].count - 1;
  228. if (id >= first && id <= last)
  229. break;
  230. }
  231. /* Map the id or note failure */
  232. if (idx < extents)
  233. id = (id - first) + map->extent[idx].first;
  234. else
  235. id = (u32) -1;
  236. return id;
  237. }
  238. /**
  239. * make_kuid - Map a user-namespace uid pair into a kuid.
  240. * @ns: User namespace that the uid is in
  241. * @uid: User identifier
  242. *
  243. * Maps a user-namespace uid pair into a kernel internal kuid,
  244. * and returns that kuid.
  245. *
  246. * When there is no mapping defined for the user-namespace uid
  247. * pair INVALID_UID is returned. Callers are expected to test
  248. * for and handle INVALID_UID being returned. INVALID_UID
  249. * may be tested for using uid_valid().
  250. */
  251. kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
  252. {
  253. /* Map the uid to a global kernel uid */
  254. return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
  255. }
  256. EXPORT_SYMBOL(make_kuid);
  257. /**
  258. * from_kuid - Create a uid from a kuid user-namespace pair.
  259. * @targ: The user namespace we want a uid in.
  260. * @kuid: The kernel internal uid to start with.
  261. *
  262. * Map @kuid into the user-namespace specified by @targ and
  263. * return the resulting uid.
  264. *
  265. * There is always a mapping into the initial user_namespace.
  266. *
  267. * If @kuid has no mapping in @targ (uid_t)-1 is returned.
  268. */
  269. uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
  270. {
  271. /* Map the uid from a global kernel uid */
  272. return map_id_up(&targ->uid_map, __kuid_val(kuid));
  273. }
  274. EXPORT_SYMBOL(from_kuid);
  275. /**
  276. * from_kuid_munged - Create a uid from a kuid user-namespace pair.
  277. * @targ: The user namespace we want a uid in.
  278. * @kuid: The kernel internal uid to start with.
  279. *
  280. * Map @kuid into the user-namespace specified by @targ and
  281. * return the resulting uid.
  282. *
  283. * There is always a mapping into the initial user_namespace.
  284. *
  285. * Unlike from_kuid from_kuid_munged never fails and always
  286. * returns a valid uid. This makes from_kuid_munged appropriate
  287. * for use in syscalls like stat and getuid where failing the
  288. * system call and failing to provide a valid uid are not an
  289. * options.
  290. *
  291. * If @kuid has no mapping in @targ overflowuid is returned.
  292. */
  293. uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
  294. {
  295. uid_t uid;
  296. uid = from_kuid(targ, kuid);
  297. if (uid == (uid_t) -1)
  298. uid = overflowuid;
  299. return uid;
  300. }
  301. EXPORT_SYMBOL(from_kuid_munged);
  302. /**
  303. * make_kgid - Map a user-namespace gid pair into a kgid.
  304. * @ns: User namespace that the gid is in
  305. * @gid: group identifier
  306. *
  307. * Maps a user-namespace gid pair into a kernel internal kgid,
  308. * and returns that kgid.
  309. *
  310. * When there is no mapping defined for the user-namespace gid
  311. * pair INVALID_GID is returned. Callers are expected to test
  312. * for and handle INVALID_GID being returned. INVALID_GID may be
  313. * tested for using gid_valid().
  314. */
  315. kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
  316. {
  317. /* Map the gid to a global kernel gid */
  318. return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
  319. }
  320. EXPORT_SYMBOL(make_kgid);
  321. /**
  322. * from_kgid - Create a gid from a kgid user-namespace pair.
  323. * @targ: The user namespace we want a gid in.
  324. * @kgid: The kernel internal gid to start with.
  325. *
  326. * Map @kgid into the user-namespace specified by @targ and
  327. * return the resulting gid.
  328. *
  329. * There is always a mapping into the initial user_namespace.
  330. *
  331. * If @kgid has no mapping in @targ (gid_t)-1 is returned.
  332. */
  333. gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
  334. {
  335. /* Map the gid from a global kernel gid */
  336. return map_id_up(&targ->gid_map, __kgid_val(kgid));
  337. }
  338. EXPORT_SYMBOL(from_kgid);
  339. /**
  340. * from_kgid_munged - Create a gid from a kgid user-namespace pair.
  341. * @targ: The user namespace we want a gid in.
  342. * @kgid: The kernel internal gid to start with.
  343. *
  344. * Map @kgid into the user-namespace specified by @targ and
  345. * return the resulting gid.
  346. *
  347. * There is always a mapping into the initial user_namespace.
  348. *
  349. * Unlike from_kgid from_kgid_munged never fails and always
  350. * returns a valid gid. This makes from_kgid_munged appropriate
  351. * for use in syscalls like stat and getgid where failing the
  352. * system call and failing to provide a valid gid are not options.
  353. *
  354. * If @kgid has no mapping in @targ overflowgid is returned.
  355. */
  356. gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
  357. {
  358. gid_t gid;
  359. gid = from_kgid(targ, kgid);
  360. if (gid == (gid_t) -1)
  361. gid = overflowgid;
  362. return gid;
  363. }
  364. EXPORT_SYMBOL(from_kgid_munged);
  365. /**
  366. * make_kprojid - Map a user-namespace projid pair into a kprojid.
  367. * @ns: User namespace that the projid is in
  368. * @projid: Project identifier
  369. *
  370. * Maps a user-namespace uid pair into a kernel internal kuid,
  371. * and returns that kuid.
  372. *
  373. * When there is no mapping defined for the user-namespace projid
  374. * pair INVALID_PROJID is returned. Callers are expected to test
  375. * for and handle handle INVALID_PROJID being returned. INVALID_PROJID
  376. * may be tested for using projid_valid().
  377. */
  378. kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
  379. {
  380. /* Map the uid to a global kernel uid */
  381. return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
  382. }
  383. EXPORT_SYMBOL(make_kprojid);
  384. /**
  385. * from_kprojid - Create a projid from a kprojid user-namespace pair.
  386. * @targ: The user namespace we want a projid in.
  387. * @kprojid: The kernel internal project identifier to start with.
  388. *
  389. * Map @kprojid into the user-namespace specified by @targ and
  390. * return the resulting projid.
  391. *
  392. * There is always a mapping into the initial user_namespace.
  393. *
  394. * If @kprojid has no mapping in @targ (projid_t)-1 is returned.
  395. */
  396. projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
  397. {
  398. /* Map the uid from a global kernel uid */
  399. return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
  400. }
  401. EXPORT_SYMBOL(from_kprojid);
  402. /**
  403. * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
  404. * @targ: The user namespace we want a projid in.
  405. * @kprojid: The kernel internal projid to start with.
  406. *
  407. * Map @kprojid into the user-namespace specified by @targ and
  408. * return the resulting projid.
  409. *
  410. * There is always a mapping into the initial user_namespace.
  411. *
  412. * Unlike from_kprojid from_kprojid_munged never fails and always
  413. * returns a valid projid. This makes from_kprojid_munged
  414. * appropriate for use in syscalls like stat and where
  415. * failing the system call and failing to provide a valid projid are
  416. * not an options.
  417. *
  418. * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
  419. */
  420. projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
  421. {
  422. projid_t projid;
  423. projid = from_kprojid(targ, kprojid);
  424. if (projid == (projid_t) -1)
  425. projid = OVERFLOW_PROJID;
  426. return projid;
  427. }
  428. EXPORT_SYMBOL(from_kprojid_munged);
  429. static int uid_m_show(struct seq_file *seq, void *v)
  430. {
  431. struct user_namespace *ns = seq->private;
  432. struct uid_gid_extent *extent = v;
  433. struct user_namespace *lower_ns;
  434. uid_t lower;
  435. lower_ns = seq_user_ns(seq);
  436. if ((lower_ns == ns) && lower_ns->parent)
  437. lower_ns = lower_ns->parent;
  438. lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
  439. seq_printf(seq, "%10u %10u %10u\n",
  440. extent->first,
  441. lower,
  442. extent->count);
  443. return 0;
  444. }
  445. static int gid_m_show(struct seq_file *seq, void *v)
  446. {
  447. struct user_namespace *ns = seq->private;
  448. struct uid_gid_extent *extent = v;
  449. struct user_namespace *lower_ns;
  450. gid_t lower;
  451. lower_ns = seq_user_ns(seq);
  452. if ((lower_ns == ns) && lower_ns->parent)
  453. lower_ns = lower_ns->parent;
  454. lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
  455. seq_printf(seq, "%10u %10u %10u\n",
  456. extent->first,
  457. lower,
  458. extent->count);
  459. return 0;
  460. }
  461. static int projid_m_show(struct seq_file *seq, void *v)
  462. {
  463. struct user_namespace *ns = seq->private;
  464. struct uid_gid_extent *extent = v;
  465. struct user_namespace *lower_ns;
  466. projid_t lower;
  467. lower_ns = seq_user_ns(seq);
  468. if ((lower_ns == ns) && lower_ns->parent)
  469. lower_ns = lower_ns->parent;
  470. lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
  471. seq_printf(seq, "%10u %10u %10u\n",
  472. extent->first,
  473. lower,
  474. extent->count);
  475. return 0;
  476. }
  477. static void *m_start(struct seq_file *seq, loff_t *ppos,
  478. struct uid_gid_map *map)
  479. {
  480. struct uid_gid_extent *extent = NULL;
  481. loff_t pos = *ppos;
  482. if (pos < map->nr_extents)
  483. extent = &map->extent[pos];
  484. return extent;
  485. }
  486. static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
  487. {
  488. struct user_namespace *ns = seq->private;
  489. return m_start(seq, ppos, &ns->uid_map);
  490. }
  491. static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
  492. {
  493. struct user_namespace *ns = seq->private;
  494. return m_start(seq, ppos, &ns->gid_map);
  495. }
  496. static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
  497. {
  498. struct user_namespace *ns = seq->private;
  499. return m_start(seq, ppos, &ns->projid_map);
  500. }
  501. static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
  502. {
  503. (*pos)++;
  504. return seq->op->start(seq, pos);
  505. }
  506. static void m_stop(struct seq_file *seq, void *v)
  507. {
  508. return;
  509. }
  510. const struct seq_operations proc_uid_seq_operations = {
  511. .start = uid_m_start,
  512. .stop = m_stop,
  513. .next = m_next,
  514. .show = uid_m_show,
  515. };
  516. const struct seq_operations proc_gid_seq_operations = {
  517. .start = gid_m_start,
  518. .stop = m_stop,
  519. .next = m_next,
  520. .show = gid_m_show,
  521. };
  522. const struct seq_operations proc_projid_seq_operations = {
  523. .start = projid_m_start,
  524. .stop = m_stop,
  525. .next = m_next,
  526. .show = projid_m_show,
  527. };
  528. static bool mappings_overlap(struct uid_gid_map *new_map,
  529. struct uid_gid_extent *extent)
  530. {
  531. u32 upper_first, lower_first, upper_last, lower_last;
  532. unsigned idx;
  533. upper_first = extent->first;
  534. lower_first = extent->lower_first;
  535. upper_last = upper_first + extent->count - 1;
  536. lower_last = lower_first + extent->count - 1;
  537. for (idx = 0; idx < new_map->nr_extents; idx++) {
  538. u32 prev_upper_first, prev_lower_first;
  539. u32 prev_upper_last, prev_lower_last;
  540. struct uid_gid_extent *prev;
  541. prev = &new_map->extent[idx];
  542. prev_upper_first = prev->first;
  543. prev_lower_first = prev->lower_first;
  544. prev_upper_last = prev_upper_first + prev->count - 1;
  545. prev_lower_last = prev_lower_first + prev->count - 1;
  546. /* Does the upper range intersect a previous extent? */
  547. if ((prev_upper_first <= upper_last) &&
  548. (prev_upper_last >= upper_first))
  549. return true;
  550. /* Does the lower range intersect a previous extent? */
  551. if ((prev_lower_first <= lower_last) &&
  552. (prev_lower_last >= lower_first))
  553. return true;
  554. }
  555. return false;
  556. }
  557. static ssize_t map_write(struct file *file, const char __user *buf,
  558. size_t count, loff_t *ppos,
  559. int cap_setid,
  560. struct uid_gid_map *map,
  561. struct uid_gid_map *parent_map)
  562. {
  563. struct seq_file *seq = file->private_data;
  564. struct user_namespace *ns = seq->private;
  565. struct uid_gid_map new_map;
  566. unsigned idx;
  567. struct uid_gid_extent *extent = NULL;
  568. char *kbuf = NULL, *pos, *next_line;
  569. ssize_t ret = -EINVAL;
  570. /*
  571. * The userns_state_mutex serializes all writes to any given map.
  572. *
  573. * Any map is only ever written once.
  574. *
  575. * An id map fits within 1 cache line on most architectures.
  576. *
  577. * On read nothing needs to be done unless you are on an
  578. * architecture with a crazy cache coherency model like alpha.
  579. *
  580. * There is a one time data dependency between reading the
  581. * count of the extents and the values of the extents. The
  582. * desired behavior is to see the values of the extents that
  583. * were written before the count of the extents.
  584. *
  585. * To achieve this smp_wmb() is used on guarantee the write
  586. * order and smp_rmb() is guaranteed that we don't have crazy
  587. * architectures returning stale data.
  588. */
  589. mutex_lock(&userns_state_mutex);
  590. ret = -EPERM;
  591. /* Only allow one successful write to the map */
  592. if (map->nr_extents != 0)
  593. goto out;
  594. /*
  595. * Adjusting namespace settings requires capabilities on the target.
  596. */
  597. if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
  598. goto out;
  599. /* Only allow < page size writes at the beginning of the file */
  600. ret = -EINVAL;
  601. if ((*ppos != 0) || (count >= PAGE_SIZE))
  602. goto out;
  603. /* Slurp in the user data */
  604. kbuf = memdup_user_nul(buf, count);
  605. if (IS_ERR(kbuf)) {
  606. ret = PTR_ERR(kbuf);
  607. kbuf = NULL;
  608. goto out;
  609. }
  610. /* Parse the user data */
  611. ret = -EINVAL;
  612. pos = kbuf;
  613. new_map.nr_extents = 0;
  614. for (; pos; pos = next_line) {
  615. extent = &new_map.extent[new_map.nr_extents];
  616. /* Find the end of line and ensure I don't look past it */
  617. next_line = strchr(pos, '\n');
  618. if (next_line) {
  619. *next_line = '\0';
  620. next_line++;
  621. if (*next_line == '\0')
  622. next_line = NULL;
  623. }
  624. pos = skip_spaces(pos);
  625. extent->first = simple_strtoul(pos, &pos, 10);
  626. if (!isspace(*pos))
  627. goto out;
  628. pos = skip_spaces(pos);
  629. extent->lower_first = simple_strtoul(pos, &pos, 10);
  630. if (!isspace(*pos))
  631. goto out;
  632. pos = skip_spaces(pos);
  633. extent->count = simple_strtoul(pos, &pos, 10);
  634. if (*pos && !isspace(*pos))
  635. goto out;
  636. /* Verify there is not trailing junk on the line */
  637. pos = skip_spaces(pos);
  638. if (*pos != '\0')
  639. goto out;
  640. /* Verify we have been given valid starting values */
  641. if ((extent->first == (u32) -1) ||
  642. (extent->lower_first == (u32) -1))
  643. goto out;
  644. /* Verify count is not zero and does not cause the
  645. * extent to wrap
  646. */
  647. if ((extent->first + extent->count) <= extent->first)
  648. goto out;
  649. if ((extent->lower_first + extent->count) <=
  650. extent->lower_first)
  651. goto out;
  652. /* Do the ranges in extent overlap any previous extents? */
  653. if (mappings_overlap(&new_map, extent))
  654. goto out;
  655. new_map.nr_extents++;
  656. /* Fail if the file contains too many extents */
  657. if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
  658. (next_line != NULL))
  659. goto out;
  660. }
  661. /* Be very certaint the new map actually exists */
  662. if (new_map.nr_extents == 0)
  663. goto out;
  664. ret = -EPERM;
  665. /* Validate the user is allowed to use user id's mapped to. */
  666. if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
  667. goto out;
  668. /* Map the lower ids from the parent user namespace to the
  669. * kernel global id space.
  670. */
  671. for (idx = 0; idx < new_map.nr_extents; idx++) {
  672. u32 lower_first;
  673. extent = &new_map.extent[idx];
  674. lower_first = map_id_range_down(parent_map,
  675. extent->lower_first,
  676. extent->count);
  677. /* Fail if we can not map the specified extent to
  678. * the kernel global id space.
  679. */
  680. if (lower_first == (u32) -1)
  681. goto out;
  682. extent->lower_first = lower_first;
  683. }
  684. /* Install the map */
  685. memcpy(map->extent, new_map.extent,
  686. new_map.nr_extents*sizeof(new_map.extent[0]));
  687. smp_wmb();
  688. map->nr_extents = new_map.nr_extents;
  689. *ppos = count;
  690. ret = count;
  691. out:
  692. mutex_unlock(&userns_state_mutex);
  693. kfree(kbuf);
  694. return ret;
  695. }
  696. ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
  697. size_t size, loff_t *ppos)
  698. {
  699. struct seq_file *seq = file->private_data;
  700. struct user_namespace *ns = seq->private;
  701. struct user_namespace *seq_ns = seq_user_ns(seq);
  702. if (!ns->parent)
  703. return -EPERM;
  704. if ((seq_ns != ns) && (seq_ns != ns->parent))
  705. return -EPERM;
  706. return map_write(file, buf, size, ppos, CAP_SETUID,
  707. &ns->uid_map, &ns->parent->uid_map);
  708. }
  709. ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
  710. size_t size, loff_t *ppos)
  711. {
  712. struct seq_file *seq = file->private_data;
  713. struct user_namespace *ns = seq->private;
  714. struct user_namespace *seq_ns = seq_user_ns(seq);
  715. if (!ns->parent)
  716. return -EPERM;
  717. if ((seq_ns != ns) && (seq_ns != ns->parent))
  718. return -EPERM;
  719. return map_write(file, buf, size, ppos, CAP_SETGID,
  720. &ns->gid_map, &ns->parent->gid_map);
  721. }
  722. ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
  723. size_t size, loff_t *ppos)
  724. {
  725. struct seq_file *seq = file->private_data;
  726. struct user_namespace *ns = seq->private;
  727. struct user_namespace *seq_ns = seq_user_ns(seq);
  728. if (!ns->parent)
  729. return -EPERM;
  730. if ((seq_ns != ns) && (seq_ns != ns->parent))
  731. return -EPERM;
  732. /* Anyone can set any valid project id no capability needed */
  733. return map_write(file, buf, size, ppos, -1,
  734. &ns->projid_map, &ns->parent->projid_map);
  735. }
  736. static bool new_idmap_permitted(const struct file *file,
  737. struct user_namespace *ns, int cap_setid,
  738. struct uid_gid_map *new_map)
  739. {
  740. const struct cred *cred = file->f_cred;
  741. /* Don't allow mappings that would allow anything that wouldn't
  742. * be allowed without the establishment of unprivileged mappings.
  743. */
  744. if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
  745. uid_eq(ns->owner, cred->euid)) {
  746. u32 id = new_map->extent[0].lower_first;
  747. if (cap_setid == CAP_SETUID) {
  748. kuid_t uid = make_kuid(ns->parent, id);
  749. if (uid_eq(uid, cred->euid))
  750. return true;
  751. } else if (cap_setid == CAP_SETGID) {
  752. kgid_t gid = make_kgid(ns->parent, id);
  753. if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
  754. gid_eq(gid, cred->egid))
  755. return true;
  756. }
  757. }
  758. /* Allow anyone to set a mapping that doesn't require privilege */
  759. if (!cap_valid(cap_setid))
  760. return true;
  761. /* Allow the specified ids if we have the appropriate capability
  762. * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
  763. * And the opener of the id file also had the approprpiate capability.
  764. */
  765. if (ns_capable(ns->parent, cap_setid) &&
  766. file_ns_capable(file, ns->parent, cap_setid))
  767. return true;
  768. return false;
  769. }
  770. int proc_setgroups_show(struct seq_file *seq, void *v)
  771. {
  772. struct user_namespace *ns = seq->private;
  773. unsigned long userns_flags = ACCESS_ONCE(ns->flags);
  774. seq_printf(seq, "%s\n",
  775. (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
  776. "allow" : "deny");
  777. return 0;
  778. }
  779. ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
  780. size_t count, loff_t *ppos)
  781. {
  782. struct seq_file *seq = file->private_data;
  783. struct user_namespace *ns = seq->private;
  784. char kbuf[8], *pos;
  785. bool setgroups_allowed;
  786. ssize_t ret;
  787. /* Only allow a very narrow range of strings to be written */
  788. ret = -EINVAL;
  789. if ((*ppos != 0) || (count >= sizeof(kbuf)))
  790. goto out;
  791. /* What was written? */
  792. ret = -EFAULT;
  793. if (copy_from_user(kbuf, buf, count))
  794. goto out;
  795. kbuf[count] = '\0';
  796. pos = kbuf;
  797. /* What is being requested? */
  798. ret = -EINVAL;
  799. if (strncmp(pos, "allow", 5) == 0) {
  800. pos += 5;
  801. setgroups_allowed = true;
  802. }
  803. else if (strncmp(pos, "deny", 4) == 0) {
  804. pos += 4;
  805. setgroups_allowed = false;
  806. }
  807. else
  808. goto out;
  809. /* Verify there is not trailing junk on the line */
  810. pos = skip_spaces(pos);
  811. if (*pos != '\0')
  812. goto out;
  813. ret = -EPERM;
  814. mutex_lock(&userns_state_mutex);
  815. if (setgroups_allowed) {
  816. /* Enabling setgroups after setgroups has been disabled
  817. * is not allowed.
  818. */
  819. if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
  820. goto out_unlock;
  821. } else {
  822. /* Permanently disabling setgroups after setgroups has
  823. * been enabled by writing the gid_map is not allowed.
  824. */
  825. if (ns->gid_map.nr_extents != 0)
  826. goto out_unlock;
  827. ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
  828. }
  829. mutex_unlock(&userns_state_mutex);
  830. /* Report a successful write */
  831. *ppos = count;
  832. ret = count;
  833. out:
  834. return ret;
  835. out_unlock:
  836. mutex_unlock(&userns_state_mutex);
  837. goto out;
  838. }
  839. bool userns_may_setgroups(const struct user_namespace *ns)
  840. {
  841. bool allowed;
  842. mutex_lock(&userns_state_mutex);
  843. /* It is not safe to use setgroups until a gid mapping in
  844. * the user namespace has been established.
  845. */
  846. allowed = ns->gid_map.nr_extents != 0;
  847. /* Is setgroups allowed? */
  848. allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
  849. mutex_unlock(&userns_state_mutex);
  850. return allowed;
  851. }
  852. /*
  853. * Returns true if @child is the same namespace or a descendant of
  854. * @ancestor.
  855. */
  856. bool in_userns(const struct user_namespace *ancestor,
  857. const struct user_namespace *child)
  858. {
  859. const struct user_namespace *ns;
  860. for (ns = child; ns->level > ancestor->level; ns = ns->parent)
  861. ;
  862. return (ns == ancestor);
  863. }
  864. bool current_in_userns(const struct user_namespace *target_ns)
  865. {
  866. return in_userns(target_ns, current_user_ns());
  867. }
  868. static inline struct user_namespace *to_user_ns(struct ns_common *ns)
  869. {
  870. return container_of(ns, struct user_namespace, ns);
  871. }
  872. static struct ns_common *userns_get(struct task_struct *task)
  873. {
  874. struct user_namespace *user_ns;
  875. rcu_read_lock();
  876. user_ns = get_user_ns(__task_cred(task)->user_ns);
  877. rcu_read_unlock();
  878. return user_ns ? &user_ns->ns : NULL;
  879. }
  880. static void userns_put(struct ns_common *ns)
  881. {
  882. put_user_ns(to_user_ns(ns));
  883. }
  884. static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
  885. {
  886. struct user_namespace *user_ns = to_user_ns(ns);
  887. struct cred *cred;
  888. /* Don't allow gaining capabilities by reentering
  889. * the same user namespace.
  890. */
  891. if (user_ns == current_user_ns())
  892. return -EINVAL;
  893. /* Tasks that share a thread group must share a user namespace */
  894. if (!thread_group_empty(current))
  895. return -EINVAL;
  896. if (current->fs->users != 1)
  897. return -EINVAL;
  898. if (!ns_capable(user_ns, CAP_SYS_ADMIN))
  899. return -EPERM;
  900. cred = prepare_creds();
  901. if (!cred)
  902. return -ENOMEM;
  903. put_user_ns(cred->user_ns);
  904. set_cred_user_ns(cred, get_user_ns(user_ns));
  905. return commit_creds(cred);
  906. }
  907. struct ns_common *ns_get_owner(struct ns_common *ns)
  908. {
  909. struct user_namespace *my_user_ns = current_user_ns();
  910. struct user_namespace *owner, *p;
  911. /* See if the owner is in the current user namespace */
  912. owner = p = ns->ops->owner(ns);
  913. for (;;) {
  914. if (!p)
  915. return ERR_PTR(-EPERM);
  916. if (p == my_user_ns)
  917. break;
  918. p = p->parent;
  919. }
  920. return &get_user_ns(owner)->ns;
  921. }
  922. static struct user_namespace *userns_owner(struct ns_common *ns)
  923. {
  924. return to_user_ns(ns)->parent;
  925. }
  926. const struct proc_ns_operations userns_operations = {
  927. .name = "user",
  928. .type = CLONE_NEWUSER,
  929. .get = userns_get,
  930. .put = userns_put,
  931. .install = userns_install,
  932. .owner = userns_owner,
  933. .get_parent = ns_get_owner,
  934. };
  935. static __init int user_namespaces_init(void)
  936. {
  937. user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
  938. return 0;
  939. }
  940. subsys_initcall(user_namespaces_init);