sys.c 56 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368
  1. /*
  2. * linux/kernel/sys.c
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. */
  6. #include <linux/export.h>
  7. #include <linux/mm.h>
  8. #include <linux/utsname.h>
  9. #include <linux/mman.h>
  10. #include <linux/reboot.h>
  11. #include <linux/prctl.h>
  12. #include <linux/highuid.h>
  13. #include <linux/fs.h>
  14. #include <linux/kmod.h>
  15. #include <linux/perf_event.h>
  16. #include <linux/resource.h>
  17. #include <linux/kernel.h>
  18. #include <linux/workqueue.h>
  19. #include <linux/capability.h>
  20. #include <linux/device.h>
  21. #include <linux/key.h>
  22. #include <linux/times.h>
  23. #include <linux/posix-timers.h>
  24. #include <linux/security.h>
  25. #include <linux/dcookies.h>
  26. #include <linux/suspend.h>
  27. #include <linux/tty.h>
  28. #include <linux/signal.h>
  29. #include <linux/cn_proc.h>
  30. #include <linux/getcpu.h>
  31. #include <linux/task_io_accounting_ops.h>
  32. #include <linux/seccomp.h>
  33. #include <linux/cpu.h>
  34. #include <linux/personality.h>
  35. #include <linux/ptrace.h>
  36. #include <linux/fs_struct.h>
  37. #include <linux/file.h>
  38. #include <linux/mount.h>
  39. #include <linux/gfp.h>
  40. #include <linux/syscore_ops.h>
  41. #include <linux/version.h>
  42. #include <linux/ctype.h>
  43. #include <linux/compat.h>
  44. #include <linux/syscalls.h>
  45. #include <linux/kprobes.h>
  46. #include <linux/user_namespace.h>
  47. #include <linux/binfmts.h>
  48. #include <linux/sched.h>
  49. #include <linux/rcupdate.h>
  50. #include <linux/uidgid.h>
  51. #include <linux/cred.h>
  52. #include <linux/kmsg_dump.h>
  53. /* Move somewhere else to avoid recompiling? */
  54. #include <generated/utsrelease.h>
  55. #include <asm/uaccess.h>
  56. #include <asm/io.h>
  57. #include <asm/unistd.h>
  58. #ifndef SET_UNALIGN_CTL
  59. # define SET_UNALIGN_CTL(a, b) (-EINVAL)
  60. #endif
  61. #ifndef GET_UNALIGN_CTL
  62. # define GET_UNALIGN_CTL(a, b) (-EINVAL)
  63. #endif
  64. #ifndef SET_FPEMU_CTL
  65. # define SET_FPEMU_CTL(a, b) (-EINVAL)
  66. #endif
  67. #ifndef GET_FPEMU_CTL
  68. # define GET_FPEMU_CTL(a, b) (-EINVAL)
  69. #endif
  70. #ifndef SET_FPEXC_CTL
  71. # define SET_FPEXC_CTL(a, b) (-EINVAL)
  72. #endif
  73. #ifndef GET_FPEXC_CTL
  74. # define GET_FPEXC_CTL(a, b) (-EINVAL)
  75. #endif
  76. #ifndef GET_ENDIAN
  77. # define GET_ENDIAN(a, b) (-EINVAL)
  78. #endif
  79. #ifndef SET_ENDIAN
  80. # define SET_ENDIAN(a, b) (-EINVAL)
  81. #endif
  82. #ifndef GET_TSC_CTL
  83. # define GET_TSC_CTL(a) (-EINVAL)
  84. #endif
  85. #ifndef SET_TSC_CTL
  86. # define SET_TSC_CTL(a) (-EINVAL)
  87. #endif
  88. /*
  89. * this is where the system-wide overflow UID and GID are defined, for
  90. * architectures that now have 32-bit UID/GID but didn't in the past
  91. */
  92. int overflowuid = DEFAULT_OVERFLOWUID;
  93. int overflowgid = DEFAULT_OVERFLOWGID;
  94. EXPORT_SYMBOL(overflowuid);
  95. EXPORT_SYMBOL(overflowgid);
  96. /*
  97. * the same as above, but for filesystems which can only store a 16-bit
  98. * UID and GID. as such, this is needed on all architectures
  99. */
  100. int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
  101. int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
  102. EXPORT_SYMBOL(fs_overflowuid);
  103. EXPORT_SYMBOL(fs_overflowgid);
  104. /*
  105. * Returns true if current's euid is same as p's uid or euid,
  106. * or has CAP_SYS_NICE to p's user_ns.
  107. *
  108. * Called with rcu_read_lock, creds are safe
  109. */
  110. static bool set_one_prio_perm(struct task_struct *p)
  111. {
  112. const struct cred *cred = current_cred(), *pcred = __task_cred(p);
  113. if (uid_eq(pcred->uid, cred->euid) ||
  114. uid_eq(pcred->euid, cred->euid))
  115. return true;
  116. if (ns_capable(pcred->user_ns, CAP_SYS_NICE))
  117. return true;
  118. return false;
  119. }
  120. /*
  121. * set the priority of a task
  122. * - the caller must hold the RCU read lock
  123. */
  124. static int set_one_prio(struct task_struct *p, int niceval, int error)
  125. {
  126. int no_nice;
  127. if (!set_one_prio_perm(p)) {
  128. error = -EPERM;
  129. goto out;
  130. }
  131. if (niceval < task_nice(p) && !can_nice(p, niceval)) {
  132. error = -EACCES;
  133. goto out;
  134. }
  135. no_nice = security_task_setnice(p, niceval);
  136. if (no_nice) {
  137. error = no_nice;
  138. goto out;
  139. }
  140. if (error == -ESRCH)
  141. error = 0;
  142. set_user_nice(p, niceval);
  143. out:
  144. return error;
  145. }
  146. SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
  147. {
  148. struct task_struct *g, *p;
  149. struct user_struct *user;
  150. const struct cred *cred = current_cred();
  151. int error = -EINVAL;
  152. struct pid *pgrp;
  153. kuid_t uid;
  154. if (which > PRIO_USER || which < PRIO_PROCESS)
  155. goto out;
  156. /* normalize: avoid signed division (rounding problems) */
  157. error = -ESRCH;
  158. if (niceval < MIN_NICE)
  159. niceval = MIN_NICE;
  160. if (niceval > MAX_NICE)
  161. niceval = MAX_NICE;
  162. rcu_read_lock();
  163. read_lock(&tasklist_lock);
  164. switch (which) {
  165. case PRIO_PROCESS:
  166. if (who)
  167. p = find_task_by_vpid(who);
  168. else
  169. p = current;
  170. if (p)
  171. error = set_one_prio(p, niceval, error);
  172. break;
  173. case PRIO_PGRP:
  174. if (who)
  175. pgrp = find_vpid(who);
  176. else
  177. pgrp = task_pgrp(current);
  178. do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
  179. error = set_one_prio(p, niceval, error);
  180. } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
  181. break;
  182. case PRIO_USER:
  183. uid = make_kuid(cred->user_ns, who);
  184. user = cred->user;
  185. if (!who)
  186. uid = cred->uid;
  187. else if (!uid_eq(uid, cred->uid)) {
  188. user = find_user(uid);
  189. if (!user)
  190. goto out_unlock; /* No processes for this user */
  191. }
  192. do_each_thread(g, p) {
  193. if (uid_eq(task_uid(p), uid))
  194. error = set_one_prio(p, niceval, error);
  195. } while_each_thread(g, p);
  196. if (!uid_eq(uid, cred->uid))
  197. free_uid(user); /* For find_user() */
  198. break;
  199. }
  200. out_unlock:
  201. read_unlock(&tasklist_lock);
  202. rcu_read_unlock();
  203. out:
  204. return error;
  205. }
  206. /*
  207. * Ugh. To avoid negative return values, "getpriority()" will
  208. * not return the normal nice-value, but a negated value that
  209. * has been offset by 20 (ie it returns 40..1 instead of -20..19)
  210. * to stay compatible.
  211. */
  212. SYSCALL_DEFINE2(getpriority, int, which, int, who)
  213. {
  214. struct task_struct *g, *p;
  215. struct user_struct *user;
  216. const struct cred *cred = current_cred();
  217. long niceval, retval = -ESRCH;
  218. struct pid *pgrp;
  219. kuid_t uid;
  220. if (which > PRIO_USER || which < PRIO_PROCESS)
  221. return -EINVAL;
  222. rcu_read_lock();
  223. read_lock(&tasklist_lock);
  224. switch (which) {
  225. case PRIO_PROCESS:
  226. if (who)
  227. p = find_task_by_vpid(who);
  228. else
  229. p = current;
  230. if (p) {
  231. niceval = nice_to_rlimit(task_nice(p));
  232. if (niceval > retval)
  233. retval = niceval;
  234. }
  235. break;
  236. case PRIO_PGRP:
  237. if (who)
  238. pgrp = find_vpid(who);
  239. else
  240. pgrp = task_pgrp(current);
  241. do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
  242. niceval = nice_to_rlimit(task_nice(p));
  243. if (niceval > retval)
  244. retval = niceval;
  245. } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
  246. break;
  247. case PRIO_USER:
  248. uid = make_kuid(cred->user_ns, who);
  249. user = cred->user;
  250. if (!who)
  251. uid = cred->uid;
  252. else if (!uid_eq(uid, cred->uid)) {
  253. user = find_user(uid);
  254. if (!user)
  255. goto out_unlock; /* No processes for this user */
  256. }
  257. do_each_thread(g, p) {
  258. if (uid_eq(task_uid(p), uid)) {
  259. niceval = nice_to_rlimit(task_nice(p));
  260. if (niceval > retval)
  261. retval = niceval;
  262. }
  263. } while_each_thread(g, p);
  264. if (!uid_eq(uid, cred->uid))
  265. free_uid(user); /* for find_user() */
  266. break;
  267. }
  268. out_unlock:
  269. read_unlock(&tasklist_lock);
  270. rcu_read_unlock();
  271. return retval;
  272. }
  273. /*
  274. * Unprivileged users may change the real gid to the effective gid
  275. * or vice versa. (BSD-style)
  276. *
  277. * If you set the real gid at all, or set the effective gid to a value not
  278. * equal to the real gid, then the saved gid is set to the new effective gid.
  279. *
  280. * This makes it possible for a setgid program to completely drop its
  281. * privileges, which is often a useful assertion to make when you are doing
  282. * a security audit over a program.
  283. *
  284. * The general idea is that a program which uses just setregid() will be
  285. * 100% compatible with BSD. A program which uses just setgid() will be
  286. * 100% compatible with POSIX with saved IDs.
  287. *
  288. * SMP: There are not races, the GIDs are checked only by filesystem
  289. * operations (as far as semantic preservation is concerned).
  290. */
  291. SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
  292. {
  293. struct user_namespace *ns = current_user_ns();
  294. const struct cred *old;
  295. struct cred *new;
  296. int retval;
  297. kgid_t krgid, kegid;
  298. krgid = make_kgid(ns, rgid);
  299. kegid = make_kgid(ns, egid);
  300. if ((rgid != (gid_t) -1) && !gid_valid(krgid))
  301. return -EINVAL;
  302. if ((egid != (gid_t) -1) && !gid_valid(kegid))
  303. return -EINVAL;
  304. new = prepare_creds();
  305. if (!new)
  306. return -ENOMEM;
  307. old = current_cred();
  308. retval = -EPERM;
  309. if (rgid != (gid_t) -1) {
  310. if (gid_eq(old->gid, krgid) ||
  311. gid_eq(old->egid, krgid) ||
  312. ns_capable(old->user_ns, CAP_SETGID))
  313. new->gid = krgid;
  314. else
  315. goto error;
  316. }
  317. if (egid != (gid_t) -1) {
  318. if (gid_eq(old->gid, kegid) ||
  319. gid_eq(old->egid, kegid) ||
  320. gid_eq(old->sgid, kegid) ||
  321. ns_capable(old->user_ns, CAP_SETGID))
  322. new->egid = kegid;
  323. else
  324. goto error;
  325. }
  326. if (rgid != (gid_t) -1 ||
  327. (egid != (gid_t) -1 && !gid_eq(kegid, old->gid)))
  328. new->sgid = new->egid;
  329. new->fsgid = new->egid;
  330. return commit_creds(new);
  331. error:
  332. abort_creds(new);
  333. return retval;
  334. }
  335. /*
  336. * setgid() is implemented like SysV w/ SAVED_IDS
  337. *
  338. * SMP: Same implicit races as above.
  339. */
  340. SYSCALL_DEFINE1(setgid, gid_t, gid)
  341. {
  342. struct user_namespace *ns = current_user_ns();
  343. const struct cred *old;
  344. struct cred *new;
  345. int retval;
  346. kgid_t kgid;
  347. kgid = make_kgid(ns, gid);
  348. if (!gid_valid(kgid))
  349. return -EINVAL;
  350. new = prepare_creds();
  351. if (!new)
  352. return -ENOMEM;
  353. old = current_cred();
  354. retval = -EPERM;
  355. if (ns_capable(old->user_ns, CAP_SETGID))
  356. new->gid = new->egid = new->sgid = new->fsgid = kgid;
  357. else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid))
  358. new->egid = new->fsgid = kgid;
  359. else
  360. goto error;
  361. return commit_creds(new);
  362. error:
  363. abort_creds(new);
  364. return retval;
  365. }
  366. /*
  367. * change the user struct in a credentials set to match the new UID
  368. */
  369. static int set_user(struct cred *new)
  370. {
  371. struct user_struct *new_user;
  372. new_user = alloc_uid(new->uid);
  373. if (!new_user)
  374. return -EAGAIN;
  375. /*
  376. * We don't fail in case of NPROC limit excess here because too many
  377. * poorly written programs don't check set*uid() return code, assuming
  378. * it never fails if called by root. We may still enforce NPROC limit
  379. * for programs doing set*uid()+execve() by harmlessly deferring the
  380. * failure to the execve() stage.
  381. */
  382. if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
  383. new_user != INIT_USER)
  384. current->flags |= PF_NPROC_EXCEEDED;
  385. else
  386. current->flags &= ~PF_NPROC_EXCEEDED;
  387. free_uid(new->user);
  388. new->user = new_user;
  389. return 0;
  390. }
  391. /*
  392. * Unprivileged users may change the real uid to the effective uid
  393. * or vice versa. (BSD-style)
  394. *
  395. * If you set the real uid at all, or set the effective uid to a value not
  396. * equal to the real uid, then the saved uid is set to the new effective uid.
  397. *
  398. * This makes it possible for a setuid program to completely drop its
  399. * privileges, which is often a useful assertion to make when you are doing
  400. * a security audit over a program.
  401. *
  402. * The general idea is that a program which uses just setreuid() will be
  403. * 100% compatible with BSD. A program which uses just setuid() will be
  404. * 100% compatible with POSIX with saved IDs.
  405. */
  406. SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
  407. {
  408. struct user_namespace *ns = current_user_ns();
  409. const struct cred *old;
  410. struct cred *new;
  411. int retval;
  412. kuid_t kruid, keuid;
  413. kruid = make_kuid(ns, ruid);
  414. keuid = make_kuid(ns, euid);
  415. if ((ruid != (uid_t) -1) && !uid_valid(kruid))
  416. return -EINVAL;
  417. if ((euid != (uid_t) -1) && !uid_valid(keuid))
  418. return -EINVAL;
  419. new = prepare_creds();
  420. if (!new)
  421. return -ENOMEM;
  422. old = current_cred();
  423. retval = -EPERM;
  424. if (ruid != (uid_t) -1) {
  425. new->uid = kruid;
  426. if (!uid_eq(old->uid, kruid) &&
  427. !uid_eq(old->euid, kruid) &&
  428. !ns_capable(old->user_ns, CAP_SETUID))
  429. goto error;
  430. }
  431. if (euid != (uid_t) -1) {
  432. new->euid = keuid;
  433. if (!uid_eq(old->uid, keuid) &&
  434. !uid_eq(old->euid, keuid) &&
  435. !uid_eq(old->suid, keuid) &&
  436. !ns_capable(old->user_ns, CAP_SETUID))
  437. goto error;
  438. }
  439. if (!uid_eq(new->uid, old->uid)) {
  440. retval = set_user(new);
  441. if (retval < 0)
  442. goto error;
  443. }
  444. if (ruid != (uid_t) -1 ||
  445. (euid != (uid_t) -1 && !uid_eq(keuid, old->uid)))
  446. new->suid = new->euid;
  447. new->fsuid = new->euid;
  448. retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
  449. if (retval < 0)
  450. goto error;
  451. return commit_creds(new);
  452. error:
  453. abort_creds(new);
  454. return retval;
  455. }
  456. /*
  457. * setuid() is implemented like SysV with SAVED_IDS
  458. *
  459. * Note that SAVED_ID's is deficient in that a setuid root program
  460. * like sendmail, for example, cannot set its uid to be a normal
  461. * user and then switch back, because if you're root, setuid() sets
  462. * the saved uid too. If you don't like this, blame the bright people
  463. * in the POSIX committee and/or USG. Note that the BSD-style setreuid()
  464. * will allow a root program to temporarily drop privileges and be able to
  465. * regain them by swapping the real and effective uid.
  466. */
  467. SYSCALL_DEFINE1(setuid, uid_t, uid)
  468. {
  469. struct user_namespace *ns = current_user_ns();
  470. const struct cred *old;
  471. struct cred *new;
  472. int retval;
  473. kuid_t kuid;
  474. kuid = make_kuid(ns, uid);
  475. if (!uid_valid(kuid))
  476. return -EINVAL;
  477. new = prepare_creds();
  478. if (!new)
  479. return -ENOMEM;
  480. old = current_cred();
  481. retval = -EPERM;
  482. if (ns_capable(old->user_ns, CAP_SETUID)) {
  483. new->suid = new->uid = kuid;
  484. if (!uid_eq(kuid, old->uid)) {
  485. retval = set_user(new);
  486. if (retval < 0)
  487. goto error;
  488. }
  489. } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) {
  490. goto error;
  491. }
  492. new->fsuid = new->euid = kuid;
  493. retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
  494. if (retval < 0)
  495. goto error;
  496. return commit_creds(new);
  497. error:
  498. abort_creds(new);
  499. return retval;
  500. }
  501. /*
  502. * This function implements a generic ability to update ruid, euid,
  503. * and suid. This allows you to implement the 4.4 compatible seteuid().
  504. */
  505. SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
  506. {
  507. struct user_namespace *ns = current_user_ns();
  508. const struct cred *old;
  509. struct cred *new;
  510. int retval;
  511. kuid_t kruid, keuid, ksuid;
  512. kruid = make_kuid(ns, ruid);
  513. keuid = make_kuid(ns, euid);
  514. ksuid = make_kuid(ns, suid);
  515. if ((ruid != (uid_t) -1) && !uid_valid(kruid))
  516. return -EINVAL;
  517. if ((euid != (uid_t) -1) && !uid_valid(keuid))
  518. return -EINVAL;
  519. if ((suid != (uid_t) -1) && !uid_valid(ksuid))
  520. return -EINVAL;
  521. new = prepare_creds();
  522. if (!new)
  523. return -ENOMEM;
  524. old = current_cred();
  525. retval = -EPERM;
  526. if (!ns_capable(old->user_ns, CAP_SETUID)) {
  527. if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
  528. !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
  529. goto error;
  530. if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
  531. !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
  532. goto error;
  533. if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
  534. !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
  535. goto error;
  536. }
  537. if (ruid != (uid_t) -1) {
  538. new->uid = kruid;
  539. if (!uid_eq(kruid, old->uid)) {
  540. retval = set_user(new);
  541. if (retval < 0)
  542. goto error;
  543. }
  544. }
  545. if (euid != (uid_t) -1)
  546. new->euid = keuid;
  547. if (suid != (uid_t) -1)
  548. new->suid = ksuid;
  549. new->fsuid = new->euid;
  550. retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
  551. if (retval < 0)
  552. goto error;
  553. return commit_creds(new);
  554. error:
  555. abort_creds(new);
  556. return retval;
  557. }
  558. SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp)
  559. {
  560. const struct cred *cred = current_cred();
  561. int retval;
  562. uid_t ruid, euid, suid;
  563. ruid = from_kuid_munged(cred->user_ns, cred->uid);
  564. euid = from_kuid_munged(cred->user_ns, cred->euid);
  565. suid = from_kuid_munged(cred->user_ns, cred->suid);
  566. retval = put_user(ruid, ruidp);
  567. if (!retval) {
  568. retval = put_user(euid, euidp);
  569. if (!retval)
  570. return put_user(suid, suidp);
  571. }
  572. return retval;
  573. }
  574. /*
  575. * Same as above, but for rgid, egid, sgid.
  576. */
  577. SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
  578. {
  579. struct user_namespace *ns = current_user_ns();
  580. const struct cred *old;
  581. struct cred *new;
  582. int retval;
  583. kgid_t krgid, kegid, ksgid;
  584. krgid = make_kgid(ns, rgid);
  585. kegid = make_kgid(ns, egid);
  586. ksgid = make_kgid(ns, sgid);
  587. if ((rgid != (gid_t) -1) && !gid_valid(krgid))
  588. return -EINVAL;
  589. if ((egid != (gid_t) -1) && !gid_valid(kegid))
  590. return -EINVAL;
  591. if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
  592. return -EINVAL;
  593. new = prepare_creds();
  594. if (!new)
  595. return -ENOMEM;
  596. old = current_cred();
  597. retval = -EPERM;
  598. if (!ns_capable(old->user_ns, CAP_SETGID)) {
  599. if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
  600. !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
  601. goto error;
  602. if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
  603. !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
  604. goto error;
  605. if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
  606. !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
  607. goto error;
  608. }
  609. if (rgid != (gid_t) -1)
  610. new->gid = krgid;
  611. if (egid != (gid_t) -1)
  612. new->egid = kegid;
  613. if (sgid != (gid_t) -1)
  614. new->sgid = ksgid;
  615. new->fsgid = new->egid;
  616. return commit_creds(new);
  617. error:
  618. abort_creds(new);
  619. return retval;
  620. }
  621. SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp)
  622. {
  623. const struct cred *cred = current_cred();
  624. int retval;
  625. gid_t rgid, egid, sgid;
  626. rgid = from_kgid_munged(cred->user_ns, cred->gid);
  627. egid = from_kgid_munged(cred->user_ns, cred->egid);
  628. sgid = from_kgid_munged(cred->user_ns, cred->sgid);
  629. retval = put_user(rgid, rgidp);
  630. if (!retval) {
  631. retval = put_user(egid, egidp);
  632. if (!retval)
  633. retval = put_user(sgid, sgidp);
  634. }
  635. return retval;
  636. }
  637. /*
  638. * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
  639. * is used for "access()" and for the NFS daemon (letting nfsd stay at
  640. * whatever uid it wants to). It normally shadows "euid", except when
  641. * explicitly set by setfsuid() or for access..
  642. */
  643. SYSCALL_DEFINE1(setfsuid, uid_t, uid)
  644. {
  645. const struct cred *old;
  646. struct cred *new;
  647. uid_t old_fsuid;
  648. kuid_t kuid;
  649. old = current_cred();
  650. old_fsuid = from_kuid_munged(old->user_ns, old->fsuid);
  651. kuid = make_kuid(old->user_ns, uid);
  652. if (!uid_valid(kuid))
  653. return old_fsuid;
  654. new = prepare_creds();
  655. if (!new)
  656. return old_fsuid;
  657. if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) ||
  658. uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
  659. ns_capable(old->user_ns, CAP_SETUID)) {
  660. if (!uid_eq(kuid, old->fsuid)) {
  661. new->fsuid = kuid;
  662. if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
  663. goto change_okay;
  664. }
  665. }
  666. abort_creds(new);
  667. return old_fsuid;
  668. change_okay:
  669. commit_creds(new);
  670. return old_fsuid;
  671. }
  672. /*
  673. * Samma på svenska..
  674. */
  675. SYSCALL_DEFINE1(setfsgid, gid_t, gid)
  676. {
  677. const struct cred *old;
  678. struct cred *new;
  679. gid_t old_fsgid;
  680. kgid_t kgid;
  681. old = current_cred();
  682. old_fsgid = from_kgid_munged(old->user_ns, old->fsgid);
  683. kgid = make_kgid(old->user_ns, gid);
  684. if (!gid_valid(kgid))
  685. return old_fsgid;
  686. new = prepare_creds();
  687. if (!new)
  688. return old_fsgid;
  689. if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) ||
  690. gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) ||
  691. ns_capable(old->user_ns, CAP_SETGID)) {
  692. if (!gid_eq(kgid, old->fsgid)) {
  693. new->fsgid = kgid;
  694. goto change_okay;
  695. }
  696. }
  697. abort_creds(new);
  698. return old_fsgid;
  699. change_okay:
  700. commit_creds(new);
  701. return old_fsgid;
  702. }
  703. /**
  704. * sys_getpid - return the thread group id of the current process
  705. *
  706. * Note, despite the name, this returns the tgid not the pid. The tgid and
  707. * the pid are identical unless CLONE_THREAD was specified on clone() in
  708. * which case the tgid is the same in all threads of the same group.
  709. *
  710. * This is SMP safe as current->tgid does not change.
  711. */
  712. SYSCALL_DEFINE0(getpid)
  713. {
  714. return task_tgid_vnr(current);
  715. }
  716. /* Thread ID - the internal kernel "pid" */
  717. SYSCALL_DEFINE0(gettid)
  718. {
  719. return task_pid_vnr(current);
  720. }
  721. /*
  722. * Accessing ->real_parent is not SMP-safe, it could
  723. * change from under us. However, we can use a stale
  724. * value of ->real_parent under rcu_read_lock(), see
  725. * release_task()->call_rcu(delayed_put_task_struct).
  726. */
  727. SYSCALL_DEFINE0(getppid)
  728. {
  729. int pid;
  730. rcu_read_lock();
  731. pid = task_tgid_vnr(rcu_dereference(current->real_parent));
  732. rcu_read_unlock();
  733. return pid;
  734. }
  735. SYSCALL_DEFINE0(getuid)
  736. {
  737. /* Only we change this so SMP safe */
  738. return from_kuid_munged(current_user_ns(), current_uid());
  739. }
  740. SYSCALL_DEFINE0(geteuid)
  741. {
  742. /* Only we change this so SMP safe */
  743. return from_kuid_munged(current_user_ns(), current_euid());
  744. }
  745. SYSCALL_DEFINE0(getgid)
  746. {
  747. /* Only we change this so SMP safe */
  748. return from_kgid_munged(current_user_ns(), current_gid());
  749. }
  750. SYSCALL_DEFINE0(getegid)
  751. {
  752. /* Only we change this so SMP safe */
  753. return from_kgid_munged(current_user_ns(), current_egid());
  754. }
  755. void do_sys_times(struct tms *tms)
  756. {
  757. cputime_t tgutime, tgstime, cutime, cstime;
  758. thread_group_cputime_adjusted(current, &tgutime, &tgstime);
  759. cutime = current->signal->cutime;
  760. cstime = current->signal->cstime;
  761. tms->tms_utime = cputime_to_clock_t(tgutime);
  762. tms->tms_stime = cputime_to_clock_t(tgstime);
  763. tms->tms_cutime = cputime_to_clock_t(cutime);
  764. tms->tms_cstime = cputime_to_clock_t(cstime);
  765. }
  766. SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
  767. {
  768. if (tbuf) {
  769. struct tms tmp;
  770. do_sys_times(&tmp);
  771. if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
  772. return -EFAULT;
  773. }
  774. force_successful_syscall_return();
  775. return (long) jiffies_64_to_clock_t(get_jiffies_64());
  776. }
  777. /*
  778. * This needs some heavy checking ...
  779. * I just haven't the stomach for it. I also don't fully
  780. * understand sessions/pgrp etc. Let somebody who does explain it.
  781. *
  782. * OK, I think I have the protection semantics right.... this is really
  783. * only important on a multi-user system anyway, to make sure one user
  784. * can't send a signal to a process owned by another. -TYT, 12/12/91
  785. *
  786. * !PF_FORKNOEXEC check to conform completely to POSIX.
  787. */
  788. SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
  789. {
  790. struct task_struct *p;
  791. struct task_struct *group_leader = current->group_leader;
  792. struct pid *pgrp;
  793. int err;
  794. if (!pid)
  795. pid = task_pid_vnr(group_leader);
  796. if (!pgid)
  797. pgid = pid;
  798. if (pgid < 0)
  799. return -EINVAL;
  800. rcu_read_lock();
  801. /* From this point forward we keep holding onto the tasklist lock
  802. * so that our parent does not change from under us. -DaveM
  803. */
  804. write_lock_irq(&tasklist_lock);
  805. err = -ESRCH;
  806. p = find_task_by_vpid(pid);
  807. if (!p)
  808. goto out;
  809. err = -EINVAL;
  810. if (!thread_group_leader(p))
  811. goto out;
  812. if (same_thread_group(p->real_parent, group_leader)) {
  813. err = -EPERM;
  814. if (task_session(p) != task_session(group_leader))
  815. goto out;
  816. err = -EACCES;
  817. if (!(p->flags & PF_FORKNOEXEC))
  818. goto out;
  819. } else {
  820. err = -ESRCH;
  821. if (p != group_leader)
  822. goto out;
  823. }
  824. err = -EPERM;
  825. if (p->signal->leader)
  826. goto out;
  827. pgrp = task_pid(p);
  828. if (pgid != pid) {
  829. struct task_struct *g;
  830. pgrp = find_vpid(pgid);
  831. g = pid_task(pgrp, PIDTYPE_PGID);
  832. if (!g || task_session(g) != task_session(group_leader))
  833. goto out;
  834. }
  835. err = security_task_setpgid(p, pgid);
  836. if (err)
  837. goto out;
  838. if (task_pgrp(p) != pgrp)
  839. change_pid(p, PIDTYPE_PGID, pgrp);
  840. err = 0;
  841. out:
  842. /* All paths lead to here, thus we are safe. -DaveM */
  843. write_unlock_irq(&tasklist_lock);
  844. rcu_read_unlock();
  845. return err;
  846. }
  847. SYSCALL_DEFINE1(getpgid, pid_t, pid)
  848. {
  849. struct task_struct *p;
  850. struct pid *grp;
  851. int retval;
  852. rcu_read_lock();
  853. if (!pid)
  854. grp = task_pgrp(current);
  855. else {
  856. retval = -ESRCH;
  857. p = find_task_by_vpid(pid);
  858. if (!p)
  859. goto out;
  860. grp = task_pgrp(p);
  861. if (!grp)
  862. goto out;
  863. retval = security_task_getpgid(p);
  864. if (retval)
  865. goto out;
  866. }
  867. retval = pid_vnr(grp);
  868. out:
  869. rcu_read_unlock();
  870. return retval;
  871. }
  872. #ifdef __ARCH_WANT_SYS_GETPGRP
  873. SYSCALL_DEFINE0(getpgrp)
  874. {
  875. return sys_getpgid(0);
  876. }
  877. #endif
  878. SYSCALL_DEFINE1(getsid, pid_t, pid)
  879. {
  880. struct task_struct *p;
  881. struct pid *sid;
  882. int retval;
  883. rcu_read_lock();
  884. if (!pid)
  885. sid = task_session(current);
  886. else {
  887. retval = -ESRCH;
  888. p = find_task_by_vpid(pid);
  889. if (!p)
  890. goto out;
  891. sid = task_session(p);
  892. if (!sid)
  893. goto out;
  894. retval = security_task_getsid(p);
  895. if (retval)
  896. goto out;
  897. }
  898. retval = pid_vnr(sid);
  899. out:
  900. rcu_read_unlock();
  901. return retval;
  902. }
  903. static void set_special_pids(struct pid *pid)
  904. {
  905. struct task_struct *curr = current->group_leader;
  906. if (task_session(curr) != pid)
  907. change_pid(curr, PIDTYPE_SID, pid);
  908. if (task_pgrp(curr) != pid)
  909. change_pid(curr, PIDTYPE_PGID, pid);
  910. }
  911. SYSCALL_DEFINE0(setsid)
  912. {
  913. struct task_struct *group_leader = current->group_leader;
  914. struct pid *sid = task_pid(group_leader);
  915. pid_t session = pid_vnr(sid);
  916. int err = -EPERM;
  917. write_lock_irq(&tasklist_lock);
  918. /* Fail if I am already a session leader */
  919. if (group_leader->signal->leader)
  920. goto out;
  921. /* Fail if a process group id already exists that equals the
  922. * proposed session id.
  923. */
  924. if (pid_task(sid, PIDTYPE_PGID))
  925. goto out;
  926. group_leader->signal->leader = 1;
  927. set_special_pids(sid);
  928. proc_clear_tty(group_leader);
  929. err = session;
  930. out:
  931. write_unlock_irq(&tasklist_lock);
  932. if (err > 0) {
  933. proc_sid_connector(group_leader);
  934. sched_autogroup_create_attach(group_leader);
  935. }
  936. return err;
  937. }
  938. DECLARE_RWSEM(uts_sem);
  939. #ifdef COMPAT_UTS_MACHINE
  940. #define override_architecture(name) \
  941. (personality(current->personality) == PER_LINUX32 && \
  942. copy_to_user(name->machine, COMPAT_UTS_MACHINE, \
  943. sizeof(COMPAT_UTS_MACHINE)))
  944. #else
  945. #define override_architecture(name) 0
  946. #endif
  947. /*
  948. * Work around broken programs that cannot handle "Linux 3.0".
  949. * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
  950. */
  951. static int override_release(char __user *release, size_t len)
  952. {
  953. int ret = 0;
  954. if (current->personality & UNAME26) {
  955. const char *rest = UTS_RELEASE;
  956. char buf[65] = { 0 };
  957. int ndots = 0;
  958. unsigned v;
  959. size_t copy;
  960. while (*rest) {
  961. if (*rest == '.' && ++ndots >= 3)
  962. break;
  963. if (!isdigit(*rest) && *rest != '.')
  964. break;
  965. rest++;
  966. }
  967. v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
  968. copy = clamp_t(size_t, len, 1, sizeof(buf));
  969. copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
  970. ret = copy_to_user(release, buf, copy + 1);
  971. }
  972. return ret;
  973. }
  974. SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
  975. {
  976. int errno = 0;
  977. down_read(&uts_sem);
  978. if (copy_to_user(name, utsname(), sizeof *name))
  979. errno = -EFAULT;
  980. up_read(&uts_sem);
  981. if (!errno && override_release(name->release, sizeof(name->release)))
  982. errno = -EFAULT;
  983. if (!errno && override_architecture(name))
  984. errno = -EFAULT;
  985. return errno;
  986. }
  987. #ifdef __ARCH_WANT_SYS_OLD_UNAME
  988. /*
  989. * Old cruft
  990. */
  991. SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
  992. {
  993. int error = 0;
  994. if (!name)
  995. return -EFAULT;
  996. down_read(&uts_sem);
  997. if (copy_to_user(name, utsname(), sizeof(*name)))
  998. error = -EFAULT;
  999. up_read(&uts_sem);
  1000. if (!error && override_release(name->release, sizeof(name->release)))
  1001. error = -EFAULT;
  1002. if (!error && override_architecture(name))
  1003. error = -EFAULT;
  1004. return error;
  1005. }
  1006. SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
  1007. {
  1008. int error;
  1009. if (!name)
  1010. return -EFAULT;
  1011. if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
  1012. return -EFAULT;
  1013. down_read(&uts_sem);
  1014. error = __copy_to_user(&name->sysname, &utsname()->sysname,
  1015. __OLD_UTS_LEN);
  1016. error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
  1017. error |= __copy_to_user(&name->nodename, &utsname()->nodename,
  1018. __OLD_UTS_LEN);
  1019. error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
  1020. error |= __copy_to_user(&name->release, &utsname()->release,
  1021. __OLD_UTS_LEN);
  1022. error |= __put_user(0, name->release + __OLD_UTS_LEN);
  1023. error |= __copy_to_user(&name->version, &utsname()->version,
  1024. __OLD_UTS_LEN);
  1025. error |= __put_user(0, name->version + __OLD_UTS_LEN);
  1026. error |= __copy_to_user(&name->machine, &utsname()->machine,
  1027. __OLD_UTS_LEN);
  1028. error |= __put_user(0, name->machine + __OLD_UTS_LEN);
  1029. up_read(&uts_sem);
  1030. if (!error && override_architecture(name))
  1031. error = -EFAULT;
  1032. if (!error && override_release(name->release, sizeof(name->release)))
  1033. error = -EFAULT;
  1034. return error ? -EFAULT : 0;
  1035. }
  1036. #endif
  1037. SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
  1038. {
  1039. int errno;
  1040. char tmp[__NEW_UTS_LEN];
  1041. if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
  1042. return -EPERM;
  1043. if (len < 0 || len > __NEW_UTS_LEN)
  1044. return -EINVAL;
  1045. down_write(&uts_sem);
  1046. errno = -EFAULT;
  1047. if (!copy_from_user(tmp, name, len)) {
  1048. struct new_utsname *u = utsname();
  1049. memcpy(u->nodename, tmp, len);
  1050. memset(u->nodename + len, 0, sizeof(u->nodename) - len);
  1051. errno = 0;
  1052. uts_proc_notify(UTS_PROC_HOSTNAME);
  1053. }
  1054. up_write(&uts_sem);
  1055. return errno;
  1056. }
  1057. #ifdef __ARCH_WANT_SYS_GETHOSTNAME
  1058. SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
  1059. {
  1060. int i, errno;
  1061. struct new_utsname *u;
  1062. if (len < 0)
  1063. return -EINVAL;
  1064. down_read(&uts_sem);
  1065. u = utsname();
  1066. i = 1 + strlen(u->nodename);
  1067. if (i > len)
  1068. i = len;
  1069. errno = 0;
  1070. if (copy_to_user(name, u->nodename, i))
  1071. errno = -EFAULT;
  1072. up_read(&uts_sem);
  1073. return errno;
  1074. }
  1075. #endif
  1076. /*
  1077. * Only setdomainname; getdomainname can be implemented by calling
  1078. * uname()
  1079. */
  1080. SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
  1081. {
  1082. int errno;
  1083. char tmp[__NEW_UTS_LEN];
  1084. if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
  1085. return -EPERM;
  1086. if (len < 0 || len > __NEW_UTS_LEN)
  1087. return -EINVAL;
  1088. down_write(&uts_sem);
  1089. errno = -EFAULT;
  1090. if (!copy_from_user(tmp, name, len)) {
  1091. struct new_utsname *u = utsname();
  1092. memcpy(u->domainname, tmp, len);
  1093. memset(u->domainname + len, 0, sizeof(u->domainname) - len);
  1094. errno = 0;
  1095. uts_proc_notify(UTS_PROC_DOMAINNAME);
  1096. }
  1097. up_write(&uts_sem);
  1098. return errno;
  1099. }
  1100. SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
  1101. {
  1102. struct rlimit value;
  1103. int ret;
  1104. ret = do_prlimit(current, resource, NULL, &value);
  1105. if (!ret)
  1106. ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
  1107. return ret;
  1108. }
  1109. #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
  1110. /*
  1111. * Back compatibility for getrlimit. Needed for some apps.
  1112. */
  1113. SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
  1114. struct rlimit __user *, rlim)
  1115. {
  1116. struct rlimit x;
  1117. if (resource >= RLIM_NLIMITS)
  1118. return -EINVAL;
  1119. task_lock(current->group_leader);
  1120. x = current->signal->rlim[resource];
  1121. task_unlock(current->group_leader);
  1122. if (x.rlim_cur > 0x7FFFFFFF)
  1123. x.rlim_cur = 0x7FFFFFFF;
  1124. if (x.rlim_max > 0x7FFFFFFF)
  1125. x.rlim_max = 0x7FFFFFFF;
  1126. return copy_to_user(rlim, &x, sizeof(x)) ? -EFAULT : 0;
  1127. }
  1128. #endif
  1129. static inline bool rlim64_is_infinity(__u64 rlim64)
  1130. {
  1131. #if BITS_PER_LONG < 64
  1132. return rlim64 >= ULONG_MAX;
  1133. #else
  1134. return rlim64 == RLIM64_INFINITY;
  1135. #endif
  1136. }
  1137. static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
  1138. {
  1139. if (rlim->rlim_cur == RLIM_INFINITY)
  1140. rlim64->rlim_cur = RLIM64_INFINITY;
  1141. else
  1142. rlim64->rlim_cur = rlim->rlim_cur;
  1143. if (rlim->rlim_max == RLIM_INFINITY)
  1144. rlim64->rlim_max = RLIM64_INFINITY;
  1145. else
  1146. rlim64->rlim_max = rlim->rlim_max;
  1147. }
  1148. static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
  1149. {
  1150. if (rlim64_is_infinity(rlim64->rlim_cur))
  1151. rlim->rlim_cur = RLIM_INFINITY;
  1152. else
  1153. rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
  1154. if (rlim64_is_infinity(rlim64->rlim_max))
  1155. rlim->rlim_max = RLIM_INFINITY;
  1156. else
  1157. rlim->rlim_max = (unsigned long)rlim64->rlim_max;
  1158. }
  1159. /* make sure you are allowed to change @tsk limits before calling this */
  1160. int do_prlimit(struct task_struct *tsk, unsigned int resource,
  1161. struct rlimit *new_rlim, struct rlimit *old_rlim)
  1162. {
  1163. struct rlimit *rlim;
  1164. int retval = 0;
  1165. if (resource >= RLIM_NLIMITS)
  1166. return -EINVAL;
  1167. if (new_rlim) {
  1168. if (new_rlim->rlim_cur > new_rlim->rlim_max)
  1169. return -EINVAL;
  1170. if (resource == RLIMIT_NOFILE &&
  1171. new_rlim->rlim_max > sysctl_nr_open)
  1172. return -EPERM;
  1173. }
  1174. /* protect tsk->signal and tsk->sighand from disappearing */
  1175. read_lock(&tasklist_lock);
  1176. if (!tsk->sighand) {
  1177. retval = -ESRCH;
  1178. goto out;
  1179. }
  1180. rlim = tsk->signal->rlim + resource;
  1181. task_lock(tsk->group_leader);
  1182. if (new_rlim) {
  1183. /* Keep the capable check against init_user_ns until
  1184. cgroups can contain all limits */
  1185. if (new_rlim->rlim_max > rlim->rlim_max &&
  1186. !capable(CAP_SYS_RESOURCE))
  1187. retval = -EPERM;
  1188. if (!retval)
  1189. retval = security_task_setrlimit(tsk->group_leader,
  1190. resource, new_rlim);
  1191. if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
  1192. /*
  1193. * The caller is asking for an immediate RLIMIT_CPU
  1194. * expiry. But we use the zero value to mean "it was
  1195. * never set". So let's cheat and make it one second
  1196. * instead
  1197. */
  1198. new_rlim->rlim_cur = 1;
  1199. }
  1200. }
  1201. if (!retval) {
  1202. if (old_rlim)
  1203. *old_rlim = *rlim;
  1204. if (new_rlim)
  1205. *rlim = *new_rlim;
  1206. }
  1207. task_unlock(tsk->group_leader);
  1208. /*
  1209. * RLIMIT_CPU handling. Note that the kernel fails to return an error
  1210. * code if it rejected the user's attempt to set RLIMIT_CPU. This is a
  1211. * very long-standing error, and fixing it now risks breakage of
  1212. * applications, so we live with it
  1213. */
  1214. if (!retval && new_rlim && resource == RLIMIT_CPU &&
  1215. new_rlim->rlim_cur != RLIM_INFINITY)
  1216. update_rlimit_cpu(tsk, new_rlim->rlim_cur);
  1217. out:
  1218. read_unlock(&tasklist_lock);
  1219. return retval;
  1220. }
  1221. /* rcu lock must be held */
  1222. static int check_prlimit_permission(struct task_struct *task)
  1223. {
  1224. const struct cred *cred = current_cred(), *tcred;
  1225. if (current == task)
  1226. return 0;
  1227. tcred = __task_cred(task);
  1228. if (uid_eq(cred->uid, tcred->euid) &&
  1229. uid_eq(cred->uid, tcred->suid) &&
  1230. uid_eq(cred->uid, tcred->uid) &&
  1231. gid_eq(cred->gid, tcred->egid) &&
  1232. gid_eq(cred->gid, tcred->sgid) &&
  1233. gid_eq(cred->gid, tcred->gid))
  1234. return 0;
  1235. if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
  1236. return 0;
  1237. return -EPERM;
  1238. }
  1239. SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
  1240. const struct rlimit64 __user *, new_rlim,
  1241. struct rlimit64 __user *, old_rlim)
  1242. {
  1243. struct rlimit64 old64, new64;
  1244. struct rlimit old, new;
  1245. struct task_struct *tsk;
  1246. int ret;
  1247. if (new_rlim) {
  1248. if (copy_from_user(&new64, new_rlim, sizeof(new64)))
  1249. return -EFAULT;
  1250. rlim64_to_rlim(&new64, &new);
  1251. }
  1252. rcu_read_lock();
  1253. tsk = pid ? find_task_by_vpid(pid) : current;
  1254. if (!tsk) {
  1255. rcu_read_unlock();
  1256. return -ESRCH;
  1257. }
  1258. ret = check_prlimit_permission(tsk);
  1259. if (ret) {
  1260. rcu_read_unlock();
  1261. return ret;
  1262. }
  1263. get_task_struct(tsk);
  1264. rcu_read_unlock();
  1265. ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
  1266. old_rlim ? &old : NULL);
  1267. if (!ret && old_rlim) {
  1268. rlim_to_rlim64(&old, &old64);
  1269. if (copy_to_user(old_rlim, &old64, sizeof(old64)))
  1270. ret = -EFAULT;
  1271. }
  1272. put_task_struct(tsk);
  1273. return ret;
  1274. }
  1275. SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
  1276. {
  1277. struct rlimit new_rlim;
  1278. if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
  1279. return -EFAULT;
  1280. return do_prlimit(current, resource, &new_rlim, NULL);
  1281. }
  1282. /*
  1283. * It would make sense to put struct rusage in the task_struct,
  1284. * except that would make the task_struct be *really big*. After
  1285. * task_struct gets moved into malloc'ed memory, it would
  1286. * make sense to do this. It will make moving the rest of the information
  1287. * a lot simpler! (Which we're not doing right now because we're not
  1288. * measuring them yet).
  1289. *
  1290. * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
  1291. * races with threads incrementing their own counters. But since word
  1292. * reads are atomic, we either get new values or old values and we don't
  1293. * care which for the sums. We always take the siglock to protect reading
  1294. * the c* fields from p->signal from races with exit.c updating those
  1295. * fields when reaping, so a sample either gets all the additions of a
  1296. * given child after it's reaped, or none so this sample is before reaping.
  1297. *
  1298. * Locking:
  1299. * We need to take the siglock for CHILDEREN, SELF and BOTH
  1300. * for the cases current multithreaded, non-current single threaded
  1301. * non-current multithreaded. Thread traversal is now safe with
  1302. * the siglock held.
  1303. * Strictly speaking, we donot need to take the siglock if we are current and
  1304. * single threaded, as no one else can take our signal_struct away, no one
  1305. * else can reap the children to update signal->c* counters, and no one else
  1306. * can race with the signal-> fields. If we do not take any lock, the
  1307. * signal-> fields could be read out of order while another thread was just
  1308. * exiting. So we should place a read memory barrier when we avoid the lock.
  1309. * On the writer side, write memory barrier is implied in __exit_signal
  1310. * as __exit_signal releases the siglock spinlock after updating the signal->
  1311. * fields. But we don't do this yet to keep things simple.
  1312. *
  1313. */
  1314. static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
  1315. {
  1316. r->ru_nvcsw += t->nvcsw;
  1317. r->ru_nivcsw += t->nivcsw;
  1318. r->ru_minflt += t->min_flt;
  1319. r->ru_majflt += t->maj_flt;
  1320. r->ru_inblock += task_io_get_inblock(t);
  1321. r->ru_oublock += task_io_get_oublock(t);
  1322. }
  1323. static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
  1324. {
  1325. struct task_struct *t;
  1326. unsigned long flags;
  1327. cputime_t tgutime, tgstime, utime, stime;
  1328. unsigned long maxrss = 0;
  1329. memset((char *)r, 0, sizeof (*r));
  1330. utime = stime = 0;
  1331. if (who == RUSAGE_THREAD) {
  1332. task_cputime_adjusted(current, &utime, &stime);
  1333. accumulate_thread_rusage(p, r);
  1334. maxrss = p->signal->maxrss;
  1335. goto out;
  1336. }
  1337. if (!lock_task_sighand(p, &flags))
  1338. return;
  1339. switch (who) {
  1340. case RUSAGE_BOTH:
  1341. case RUSAGE_CHILDREN:
  1342. utime = p->signal->cutime;
  1343. stime = p->signal->cstime;
  1344. r->ru_nvcsw = p->signal->cnvcsw;
  1345. r->ru_nivcsw = p->signal->cnivcsw;
  1346. r->ru_minflt = p->signal->cmin_flt;
  1347. r->ru_majflt = p->signal->cmaj_flt;
  1348. r->ru_inblock = p->signal->cinblock;
  1349. r->ru_oublock = p->signal->coublock;
  1350. maxrss = p->signal->cmaxrss;
  1351. if (who == RUSAGE_CHILDREN)
  1352. break;
  1353. case RUSAGE_SELF:
  1354. thread_group_cputime_adjusted(p, &tgutime, &tgstime);
  1355. utime += tgutime;
  1356. stime += tgstime;
  1357. r->ru_nvcsw += p->signal->nvcsw;
  1358. r->ru_nivcsw += p->signal->nivcsw;
  1359. r->ru_minflt += p->signal->min_flt;
  1360. r->ru_majflt += p->signal->maj_flt;
  1361. r->ru_inblock += p->signal->inblock;
  1362. r->ru_oublock += p->signal->oublock;
  1363. if (maxrss < p->signal->maxrss)
  1364. maxrss = p->signal->maxrss;
  1365. t = p;
  1366. do {
  1367. accumulate_thread_rusage(t, r);
  1368. } while_each_thread(p, t);
  1369. break;
  1370. default:
  1371. BUG();
  1372. }
  1373. unlock_task_sighand(p, &flags);
  1374. out:
  1375. cputime_to_timeval(utime, &r->ru_utime);
  1376. cputime_to_timeval(stime, &r->ru_stime);
  1377. if (who != RUSAGE_CHILDREN) {
  1378. struct mm_struct *mm = get_task_mm(p);
  1379. if (mm) {
  1380. setmax_mm_hiwater_rss(&maxrss, mm);
  1381. mmput(mm);
  1382. }
  1383. }
  1384. r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
  1385. }
  1386. int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
  1387. {
  1388. struct rusage r;
  1389. k_getrusage(p, who, &r);
  1390. return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
  1391. }
  1392. SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
  1393. {
  1394. if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
  1395. who != RUSAGE_THREAD)
  1396. return -EINVAL;
  1397. return getrusage(current, who, ru);
  1398. }
  1399. #ifdef CONFIG_COMPAT
  1400. COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru)
  1401. {
  1402. struct rusage r;
  1403. if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
  1404. who != RUSAGE_THREAD)
  1405. return -EINVAL;
  1406. k_getrusage(current, who, &r);
  1407. return put_compat_rusage(&r, ru);
  1408. }
  1409. #endif
  1410. SYSCALL_DEFINE1(umask, int, mask)
  1411. {
  1412. mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
  1413. return mask;
  1414. }
  1415. static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd)
  1416. {
  1417. struct fd exe;
  1418. struct inode *inode;
  1419. int err;
  1420. VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
  1421. exe = fdget(fd);
  1422. if (!exe.file)
  1423. return -EBADF;
  1424. inode = file_inode(exe.file);
  1425. /*
  1426. * Because the original mm->exe_file points to executable file, make
  1427. * sure that this one is executable as well, to avoid breaking an
  1428. * overall picture.
  1429. */
  1430. err = -EACCES;
  1431. if (!S_ISREG(inode->i_mode) ||
  1432. exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC)
  1433. goto exit;
  1434. err = inode_permission(inode, MAY_EXEC);
  1435. if (err)
  1436. goto exit;
  1437. /*
  1438. * Forbid mm->exe_file change if old file still mapped.
  1439. */
  1440. err = -EBUSY;
  1441. if (mm->exe_file) {
  1442. struct vm_area_struct *vma;
  1443. for (vma = mm->mmap; vma; vma = vma->vm_next)
  1444. if (vma->vm_file &&
  1445. path_equal(&vma->vm_file->f_path,
  1446. &mm->exe_file->f_path))
  1447. goto exit;
  1448. }
  1449. /*
  1450. * The symlink can be changed only once, just to disallow arbitrary
  1451. * transitions malicious software might bring in. This means one
  1452. * could make a snapshot over all processes running and monitor
  1453. * /proc/pid/exe changes to notice unusual activity if needed.
  1454. */
  1455. err = -EPERM;
  1456. if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
  1457. goto exit;
  1458. err = 0;
  1459. set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */
  1460. exit:
  1461. fdput(exe);
  1462. return err;
  1463. }
  1464. #ifdef CONFIG_CHECKPOINT_RESTORE
  1465. /*
  1466. * WARNING: we don't require any capability here so be very careful
  1467. * in what is allowed for modification from userspace.
  1468. */
  1469. static int validate_prctl_map(struct prctl_mm_map *prctl_map)
  1470. {
  1471. unsigned long mmap_max_addr = TASK_SIZE;
  1472. struct mm_struct *mm = current->mm;
  1473. int error = -EINVAL, i;
  1474. static const unsigned char offsets[] = {
  1475. offsetof(struct prctl_mm_map, start_code),
  1476. offsetof(struct prctl_mm_map, end_code),
  1477. offsetof(struct prctl_mm_map, start_data),
  1478. offsetof(struct prctl_mm_map, end_data),
  1479. offsetof(struct prctl_mm_map, start_brk),
  1480. offsetof(struct prctl_mm_map, brk),
  1481. offsetof(struct prctl_mm_map, start_stack),
  1482. offsetof(struct prctl_mm_map, arg_start),
  1483. offsetof(struct prctl_mm_map, arg_end),
  1484. offsetof(struct prctl_mm_map, env_start),
  1485. offsetof(struct prctl_mm_map, env_end),
  1486. };
  1487. /*
  1488. * Make sure the members are not somewhere outside
  1489. * of allowed address space.
  1490. */
  1491. for (i = 0; i < ARRAY_SIZE(offsets); i++) {
  1492. u64 val = *(u64 *)((char *)prctl_map + offsets[i]);
  1493. if ((unsigned long)val >= mmap_max_addr ||
  1494. (unsigned long)val < mmap_min_addr)
  1495. goto out;
  1496. }
  1497. /*
  1498. * Make sure the pairs are ordered.
  1499. */
  1500. #define __prctl_check_order(__m1, __op, __m2) \
  1501. ((unsigned long)prctl_map->__m1 __op \
  1502. (unsigned long)prctl_map->__m2) ? 0 : -EINVAL
  1503. error = __prctl_check_order(start_code, <, end_code);
  1504. error |= __prctl_check_order(start_data, <, end_data);
  1505. error |= __prctl_check_order(start_brk, <=, brk);
  1506. error |= __prctl_check_order(arg_start, <=, arg_end);
  1507. error |= __prctl_check_order(env_start, <=, env_end);
  1508. if (error)
  1509. goto out;
  1510. #undef __prctl_check_order
  1511. error = -EINVAL;
  1512. /*
  1513. * @brk should be after @end_data in traditional maps.
  1514. */
  1515. if (prctl_map->start_brk <= prctl_map->end_data ||
  1516. prctl_map->brk <= prctl_map->end_data)
  1517. goto out;
  1518. /*
  1519. * Neither we should allow to override limits if they set.
  1520. */
  1521. if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk,
  1522. prctl_map->start_brk, prctl_map->end_data,
  1523. prctl_map->start_data))
  1524. goto out;
  1525. /*
  1526. * Someone is trying to cheat the auxv vector.
  1527. */
  1528. if (prctl_map->auxv_size) {
  1529. if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv))
  1530. goto out;
  1531. }
  1532. /*
  1533. * Finally, make sure the caller has the rights to
  1534. * change /proc/pid/exe link: only local root should
  1535. * be allowed to.
  1536. */
  1537. if (prctl_map->exe_fd != (u32)-1) {
  1538. struct user_namespace *ns = current_user_ns();
  1539. const struct cred *cred = current_cred();
  1540. if (!uid_eq(cred->uid, make_kuid(ns, 0)) ||
  1541. !gid_eq(cred->gid, make_kgid(ns, 0)))
  1542. goto out;
  1543. }
  1544. error = 0;
  1545. out:
  1546. return error;
  1547. }
  1548. static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size)
  1549. {
  1550. struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, };
  1551. unsigned long user_auxv[AT_VECTOR_SIZE];
  1552. struct mm_struct *mm = current->mm;
  1553. int error;
  1554. BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
  1555. BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256);
  1556. if (opt == PR_SET_MM_MAP_SIZE)
  1557. return put_user((unsigned int)sizeof(prctl_map),
  1558. (unsigned int __user *)addr);
  1559. if (data_size != sizeof(prctl_map))
  1560. return -EINVAL;
  1561. if (copy_from_user(&prctl_map, addr, sizeof(prctl_map)))
  1562. return -EFAULT;
  1563. error = validate_prctl_map(&prctl_map);
  1564. if (error)
  1565. return error;
  1566. if (prctl_map.auxv_size) {
  1567. memset(user_auxv, 0, sizeof(user_auxv));
  1568. if (copy_from_user(user_auxv,
  1569. (const void __user *)prctl_map.auxv,
  1570. prctl_map.auxv_size))
  1571. return -EFAULT;
  1572. /* Last entry must be AT_NULL as specification requires */
  1573. user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL;
  1574. user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
  1575. }
  1576. down_write(&mm->mmap_sem);
  1577. if (prctl_map.exe_fd != (u32)-1)
  1578. error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd);
  1579. downgrade_write(&mm->mmap_sem);
  1580. if (error)
  1581. goto out;
  1582. /*
  1583. * We don't validate if these members are pointing to
  1584. * real present VMAs because application may have correspond
  1585. * VMAs already unmapped and kernel uses these members for statistics
  1586. * output in procfs mostly, except
  1587. *
  1588. * - @start_brk/@brk which are used in do_brk but kernel lookups
  1589. * for VMAs when updating these memvers so anything wrong written
  1590. * here cause kernel to swear at userspace program but won't lead
  1591. * to any problem in kernel itself
  1592. */
  1593. mm->start_code = prctl_map.start_code;
  1594. mm->end_code = prctl_map.end_code;
  1595. mm->start_data = prctl_map.start_data;
  1596. mm->end_data = prctl_map.end_data;
  1597. mm->start_brk = prctl_map.start_brk;
  1598. mm->brk = prctl_map.brk;
  1599. mm->start_stack = prctl_map.start_stack;
  1600. mm->arg_start = prctl_map.arg_start;
  1601. mm->arg_end = prctl_map.arg_end;
  1602. mm->env_start = prctl_map.env_start;
  1603. mm->env_end = prctl_map.env_end;
  1604. /*
  1605. * Note this update of @saved_auxv is lockless thus
  1606. * if someone reads this member in procfs while we're
  1607. * updating -- it may get partly updated results. It's
  1608. * known and acceptable trade off: we leave it as is to
  1609. * not introduce additional locks here making the kernel
  1610. * more complex.
  1611. */
  1612. if (prctl_map.auxv_size)
  1613. memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
  1614. error = 0;
  1615. out:
  1616. up_read(&mm->mmap_sem);
  1617. return error;
  1618. }
  1619. #endif /* CONFIG_CHECKPOINT_RESTORE */
  1620. static int prctl_set_mm(int opt, unsigned long addr,
  1621. unsigned long arg4, unsigned long arg5)
  1622. {
  1623. struct mm_struct *mm = current->mm;
  1624. struct vm_area_struct *vma;
  1625. int error;
  1626. if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV &&
  1627. opt != PR_SET_MM_MAP &&
  1628. opt != PR_SET_MM_MAP_SIZE)))
  1629. return -EINVAL;
  1630. #ifdef CONFIG_CHECKPOINT_RESTORE
  1631. if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE)
  1632. return prctl_set_mm_map(opt, (const void __user *)addr, arg4);
  1633. #endif
  1634. if (!capable(CAP_SYS_RESOURCE))
  1635. return -EPERM;
  1636. if (opt == PR_SET_MM_EXE_FILE) {
  1637. down_write(&mm->mmap_sem);
  1638. error = prctl_set_mm_exe_file_locked(mm, (unsigned int)addr);
  1639. up_write(&mm->mmap_sem);
  1640. return error;
  1641. }
  1642. if (addr >= TASK_SIZE || addr < mmap_min_addr)
  1643. return -EINVAL;
  1644. error = -EINVAL;
  1645. down_read(&mm->mmap_sem);
  1646. vma = find_vma(mm, addr);
  1647. switch (opt) {
  1648. case PR_SET_MM_START_CODE:
  1649. mm->start_code = addr;
  1650. break;
  1651. case PR_SET_MM_END_CODE:
  1652. mm->end_code = addr;
  1653. break;
  1654. case PR_SET_MM_START_DATA:
  1655. mm->start_data = addr;
  1656. break;
  1657. case PR_SET_MM_END_DATA:
  1658. mm->end_data = addr;
  1659. break;
  1660. case PR_SET_MM_START_BRK:
  1661. if (addr <= mm->end_data)
  1662. goto out;
  1663. if (check_data_rlimit(rlimit(RLIMIT_DATA), mm->brk, addr,
  1664. mm->end_data, mm->start_data))
  1665. goto out;
  1666. mm->start_brk = addr;
  1667. break;
  1668. case PR_SET_MM_BRK:
  1669. if (addr <= mm->end_data)
  1670. goto out;
  1671. if (check_data_rlimit(rlimit(RLIMIT_DATA), addr, mm->start_brk,
  1672. mm->end_data, mm->start_data))
  1673. goto out;
  1674. mm->brk = addr;
  1675. break;
  1676. /*
  1677. * If command line arguments and environment
  1678. * are placed somewhere else on stack, we can
  1679. * set them up here, ARG_START/END to setup
  1680. * command line argumets and ENV_START/END
  1681. * for environment.
  1682. */
  1683. case PR_SET_MM_START_STACK:
  1684. case PR_SET_MM_ARG_START:
  1685. case PR_SET_MM_ARG_END:
  1686. case PR_SET_MM_ENV_START:
  1687. case PR_SET_MM_ENV_END:
  1688. if (!vma) {
  1689. error = -EFAULT;
  1690. goto out;
  1691. }
  1692. if (opt == PR_SET_MM_START_STACK)
  1693. mm->start_stack = addr;
  1694. else if (opt == PR_SET_MM_ARG_START)
  1695. mm->arg_start = addr;
  1696. else if (opt == PR_SET_MM_ARG_END)
  1697. mm->arg_end = addr;
  1698. else if (opt == PR_SET_MM_ENV_START)
  1699. mm->env_start = addr;
  1700. else if (opt == PR_SET_MM_ENV_END)
  1701. mm->env_end = addr;
  1702. break;
  1703. /*
  1704. * This doesn't move auxiliary vector itself
  1705. * since it's pinned to mm_struct, but allow
  1706. * to fill vector with new values. It's up
  1707. * to a caller to provide sane values here
  1708. * otherwise user space tools which use this
  1709. * vector might be unhappy.
  1710. */
  1711. case PR_SET_MM_AUXV: {
  1712. unsigned long user_auxv[AT_VECTOR_SIZE];
  1713. if (arg4 > sizeof(user_auxv))
  1714. goto out;
  1715. up_read(&mm->mmap_sem);
  1716. if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
  1717. return -EFAULT;
  1718. /* Make sure the last entry is always AT_NULL */
  1719. user_auxv[AT_VECTOR_SIZE - 2] = 0;
  1720. user_auxv[AT_VECTOR_SIZE - 1] = 0;
  1721. BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
  1722. task_lock(current);
  1723. memcpy(mm->saved_auxv, user_auxv, arg4);
  1724. task_unlock(current);
  1725. return 0;
  1726. }
  1727. default:
  1728. goto out;
  1729. }
  1730. error = 0;
  1731. out:
  1732. up_read(&mm->mmap_sem);
  1733. return error;
  1734. }
  1735. #ifdef CONFIG_CHECKPOINT_RESTORE
  1736. static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
  1737. {
  1738. return put_user(me->clear_child_tid, tid_addr);
  1739. }
  1740. #else
  1741. static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
  1742. {
  1743. return -EINVAL;
  1744. }
  1745. #endif
  1746. SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
  1747. unsigned long, arg4, unsigned long, arg5)
  1748. {
  1749. struct task_struct *me = current;
  1750. unsigned char comm[sizeof(me->comm)];
  1751. long error;
  1752. error = security_task_prctl(option, arg2, arg3, arg4, arg5);
  1753. if (error != -ENOSYS)
  1754. return error;
  1755. error = 0;
  1756. switch (option) {
  1757. case PR_SET_PDEATHSIG:
  1758. if (!valid_signal(arg2)) {
  1759. error = -EINVAL;
  1760. break;
  1761. }
  1762. me->pdeath_signal = arg2;
  1763. break;
  1764. case PR_GET_PDEATHSIG:
  1765. error = put_user(me->pdeath_signal, (int __user *)arg2);
  1766. break;
  1767. case PR_GET_DUMPABLE:
  1768. error = get_dumpable(me->mm);
  1769. break;
  1770. case PR_SET_DUMPABLE:
  1771. if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) {
  1772. error = -EINVAL;
  1773. break;
  1774. }
  1775. set_dumpable(me->mm, arg2);
  1776. break;
  1777. case PR_SET_UNALIGN:
  1778. error = SET_UNALIGN_CTL(me, arg2);
  1779. break;
  1780. case PR_GET_UNALIGN:
  1781. error = GET_UNALIGN_CTL(me, arg2);
  1782. break;
  1783. case PR_SET_FPEMU:
  1784. error = SET_FPEMU_CTL(me, arg2);
  1785. break;
  1786. case PR_GET_FPEMU:
  1787. error = GET_FPEMU_CTL(me, arg2);
  1788. break;
  1789. case PR_SET_FPEXC:
  1790. error = SET_FPEXC_CTL(me, arg2);
  1791. break;
  1792. case PR_GET_FPEXC:
  1793. error = GET_FPEXC_CTL(me, arg2);
  1794. break;
  1795. case PR_GET_TIMING:
  1796. error = PR_TIMING_STATISTICAL;
  1797. break;
  1798. case PR_SET_TIMING:
  1799. if (arg2 != PR_TIMING_STATISTICAL)
  1800. error = -EINVAL;
  1801. break;
  1802. case PR_SET_NAME:
  1803. comm[sizeof(me->comm) - 1] = 0;
  1804. if (strncpy_from_user(comm, (char __user *)arg2,
  1805. sizeof(me->comm) - 1) < 0)
  1806. return -EFAULT;
  1807. set_task_comm(me, comm);
  1808. proc_comm_connector(me);
  1809. break;
  1810. case PR_GET_NAME:
  1811. get_task_comm(comm, me);
  1812. if (copy_to_user((char __user *)arg2, comm, sizeof(comm)))
  1813. return -EFAULT;
  1814. break;
  1815. case PR_GET_ENDIAN:
  1816. error = GET_ENDIAN(me, arg2);
  1817. break;
  1818. case PR_SET_ENDIAN:
  1819. error = SET_ENDIAN(me, arg2);
  1820. break;
  1821. case PR_GET_SECCOMP:
  1822. error = prctl_get_seccomp();
  1823. break;
  1824. case PR_SET_SECCOMP:
  1825. error = prctl_set_seccomp(arg2, (char __user *)arg3);
  1826. break;
  1827. case PR_GET_TSC:
  1828. error = GET_TSC_CTL(arg2);
  1829. break;
  1830. case PR_SET_TSC:
  1831. error = SET_TSC_CTL(arg2);
  1832. break;
  1833. case PR_TASK_PERF_EVENTS_DISABLE:
  1834. error = perf_event_task_disable();
  1835. break;
  1836. case PR_TASK_PERF_EVENTS_ENABLE:
  1837. error = perf_event_task_enable();
  1838. break;
  1839. case PR_GET_TIMERSLACK:
  1840. error = current->timer_slack_ns;
  1841. break;
  1842. case PR_SET_TIMERSLACK:
  1843. if (arg2 <= 0)
  1844. current->timer_slack_ns =
  1845. current->default_timer_slack_ns;
  1846. else
  1847. current->timer_slack_ns = arg2;
  1848. break;
  1849. case PR_MCE_KILL:
  1850. if (arg4 | arg5)
  1851. return -EINVAL;
  1852. switch (arg2) {
  1853. case PR_MCE_KILL_CLEAR:
  1854. if (arg3 != 0)
  1855. return -EINVAL;
  1856. current->flags &= ~PF_MCE_PROCESS;
  1857. break;
  1858. case PR_MCE_KILL_SET:
  1859. current->flags |= PF_MCE_PROCESS;
  1860. if (arg3 == PR_MCE_KILL_EARLY)
  1861. current->flags |= PF_MCE_EARLY;
  1862. else if (arg3 == PR_MCE_KILL_LATE)
  1863. current->flags &= ~PF_MCE_EARLY;
  1864. else if (arg3 == PR_MCE_KILL_DEFAULT)
  1865. current->flags &=
  1866. ~(PF_MCE_EARLY|PF_MCE_PROCESS);
  1867. else
  1868. return -EINVAL;
  1869. break;
  1870. default:
  1871. return -EINVAL;
  1872. }
  1873. break;
  1874. case PR_MCE_KILL_GET:
  1875. if (arg2 | arg3 | arg4 | arg5)
  1876. return -EINVAL;
  1877. if (current->flags & PF_MCE_PROCESS)
  1878. error = (current->flags & PF_MCE_EARLY) ?
  1879. PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
  1880. else
  1881. error = PR_MCE_KILL_DEFAULT;
  1882. break;
  1883. case PR_SET_MM:
  1884. error = prctl_set_mm(arg2, arg3, arg4, arg5);
  1885. break;
  1886. case PR_GET_TID_ADDRESS:
  1887. error = prctl_get_tid_address(me, (int __user **)arg2);
  1888. break;
  1889. case PR_SET_CHILD_SUBREAPER:
  1890. me->signal->is_child_subreaper = !!arg2;
  1891. break;
  1892. case PR_GET_CHILD_SUBREAPER:
  1893. error = put_user(me->signal->is_child_subreaper,
  1894. (int __user *)arg2);
  1895. break;
  1896. case PR_SET_NO_NEW_PRIVS:
  1897. if (arg2 != 1 || arg3 || arg4 || arg5)
  1898. return -EINVAL;
  1899. task_set_no_new_privs(current);
  1900. break;
  1901. case PR_GET_NO_NEW_PRIVS:
  1902. if (arg2 || arg3 || arg4 || arg5)
  1903. return -EINVAL;
  1904. return task_no_new_privs(current) ? 1 : 0;
  1905. case PR_GET_THP_DISABLE:
  1906. if (arg2 || arg3 || arg4 || arg5)
  1907. return -EINVAL;
  1908. error = !!(me->mm->def_flags & VM_NOHUGEPAGE);
  1909. break;
  1910. case PR_SET_THP_DISABLE:
  1911. if (arg3 || arg4 || arg5)
  1912. return -EINVAL;
  1913. down_write(&me->mm->mmap_sem);
  1914. if (arg2)
  1915. me->mm->def_flags |= VM_NOHUGEPAGE;
  1916. else
  1917. me->mm->def_flags &= ~VM_NOHUGEPAGE;
  1918. up_write(&me->mm->mmap_sem);
  1919. break;
  1920. default:
  1921. error = -EINVAL;
  1922. break;
  1923. }
  1924. return error;
  1925. }
  1926. SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
  1927. struct getcpu_cache __user *, unused)
  1928. {
  1929. int err = 0;
  1930. int cpu = raw_smp_processor_id();
  1931. if (cpup)
  1932. err |= put_user(cpu, cpup);
  1933. if (nodep)
  1934. err |= put_user(cpu_to_node(cpu), nodep);
  1935. return err ? -EFAULT : 0;
  1936. }
  1937. /**
  1938. * do_sysinfo - fill in sysinfo struct
  1939. * @info: pointer to buffer to fill
  1940. */
  1941. static int do_sysinfo(struct sysinfo *info)
  1942. {
  1943. unsigned long mem_total, sav_total;
  1944. unsigned int mem_unit, bitcount;
  1945. struct timespec tp;
  1946. memset(info, 0, sizeof(struct sysinfo));
  1947. get_monotonic_boottime(&tp);
  1948. info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
  1949. get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
  1950. info->procs = nr_threads;
  1951. si_meminfo(info);
  1952. si_swapinfo(info);
  1953. /*
  1954. * If the sum of all the available memory (i.e. ram + swap)
  1955. * is less than can be stored in a 32 bit unsigned long then
  1956. * we can be binary compatible with 2.2.x kernels. If not,
  1957. * well, in that case 2.2.x was broken anyways...
  1958. *
  1959. * -Erik Andersen <andersee@debian.org>
  1960. */
  1961. mem_total = info->totalram + info->totalswap;
  1962. if (mem_total < info->totalram || mem_total < info->totalswap)
  1963. goto out;
  1964. bitcount = 0;
  1965. mem_unit = info->mem_unit;
  1966. while (mem_unit > 1) {
  1967. bitcount++;
  1968. mem_unit >>= 1;
  1969. sav_total = mem_total;
  1970. mem_total <<= 1;
  1971. if (mem_total < sav_total)
  1972. goto out;
  1973. }
  1974. /*
  1975. * If mem_total did not overflow, multiply all memory values by
  1976. * info->mem_unit and set it to 1. This leaves things compatible
  1977. * with 2.2.x, and also retains compatibility with earlier 2.4.x
  1978. * kernels...
  1979. */
  1980. info->mem_unit = 1;
  1981. info->totalram <<= bitcount;
  1982. info->freeram <<= bitcount;
  1983. info->sharedram <<= bitcount;
  1984. info->bufferram <<= bitcount;
  1985. info->totalswap <<= bitcount;
  1986. info->freeswap <<= bitcount;
  1987. info->totalhigh <<= bitcount;
  1988. info->freehigh <<= bitcount;
  1989. out:
  1990. return 0;
  1991. }
  1992. SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
  1993. {
  1994. struct sysinfo val;
  1995. do_sysinfo(&val);
  1996. if (copy_to_user(info, &val, sizeof(struct sysinfo)))
  1997. return -EFAULT;
  1998. return 0;
  1999. }
  2000. #ifdef CONFIG_COMPAT
  2001. struct compat_sysinfo {
  2002. s32 uptime;
  2003. u32 loads[3];
  2004. u32 totalram;
  2005. u32 freeram;
  2006. u32 sharedram;
  2007. u32 bufferram;
  2008. u32 totalswap;
  2009. u32 freeswap;
  2010. u16 procs;
  2011. u16 pad;
  2012. u32 totalhigh;
  2013. u32 freehigh;
  2014. u32 mem_unit;
  2015. char _f[20-2*sizeof(u32)-sizeof(int)];
  2016. };
  2017. COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
  2018. {
  2019. struct sysinfo s;
  2020. do_sysinfo(&s);
  2021. /* Check to see if any memory value is too large for 32-bit and scale
  2022. * down if needed
  2023. */
  2024. if (upper_32_bits(s.totalram) || upper_32_bits(s.totalswap)) {
  2025. int bitcount = 0;
  2026. while (s.mem_unit < PAGE_SIZE) {
  2027. s.mem_unit <<= 1;
  2028. bitcount++;
  2029. }
  2030. s.totalram >>= bitcount;
  2031. s.freeram >>= bitcount;
  2032. s.sharedram >>= bitcount;
  2033. s.bufferram >>= bitcount;
  2034. s.totalswap >>= bitcount;
  2035. s.freeswap >>= bitcount;
  2036. s.totalhigh >>= bitcount;
  2037. s.freehigh >>= bitcount;
  2038. }
  2039. if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) ||
  2040. __put_user(s.uptime, &info->uptime) ||
  2041. __put_user(s.loads[0], &info->loads[0]) ||
  2042. __put_user(s.loads[1], &info->loads[1]) ||
  2043. __put_user(s.loads[2], &info->loads[2]) ||
  2044. __put_user(s.totalram, &info->totalram) ||
  2045. __put_user(s.freeram, &info->freeram) ||
  2046. __put_user(s.sharedram, &info->sharedram) ||
  2047. __put_user(s.bufferram, &info->bufferram) ||
  2048. __put_user(s.totalswap, &info->totalswap) ||
  2049. __put_user(s.freeswap, &info->freeswap) ||
  2050. __put_user(s.procs, &info->procs) ||
  2051. __put_user(s.totalhigh, &info->totalhigh) ||
  2052. __put_user(s.freehigh, &info->freehigh) ||
  2053. __put_user(s.mem_unit, &info->mem_unit))
  2054. return -EFAULT;
  2055. return 0;
  2056. }
  2057. #endif /* CONFIG_COMPAT */