select.c 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * This file contains the procedures for the handling of select and poll
  4. *
  5. * Created for Linux based loosely upon Mathius Lattner's minix
  6. * patches by Peter MacDonald. Heavily edited by Linus.
  7. *
  8. * 4 February 1994
  9. * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
  10. * flag set in its personality we do *not* modify the given timeout
  11. * parameter to reflect time remaining.
  12. *
  13. * 24 January 2000
  14. * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation
  15. * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
  16. */
  17. #include <linux/kernel.h>
  18. #include <linux/sched/signal.h>
  19. #include <linux/sched/rt.h>
  20. #include <linux/syscalls.h>
  21. #include <linux/export.h>
  22. #include <linux/slab.h>
  23. #include <linux/poll.h>
  24. #include <linux/personality.h> /* for STICKY_TIMEOUTS */
  25. #include <linux/file.h>
  26. #include <linux/fdtable.h>
  27. #include <linux/fs.h>
  28. #include <linux/rcupdate.h>
  29. #include <linux/hrtimer.h>
  30. #include <linux/freezer.h>
  31. #include <net/busy_poll.h>
  32. #include <linux/vmalloc.h>
  33. #include <linux/uaccess.h>
  34. __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt)
  35. {
  36. if (file->f_op->poll) {
  37. return file->f_op->poll(file, pt);
  38. } else if (file_has_poll_mask(file)) {
  39. unsigned int events = poll_requested_events(pt);
  40. struct wait_queue_head *head;
  41. if (pt && pt->_qproc) {
  42. head = file->f_op->get_poll_head(file, events);
  43. if (!head)
  44. return DEFAULT_POLLMASK;
  45. if (IS_ERR(head))
  46. return EPOLLERR;
  47. pt->_qproc(file, head, pt);
  48. }
  49. return file->f_op->poll_mask(file, events);
  50. } else {
  51. return DEFAULT_POLLMASK;
  52. }
  53. }
  54. EXPORT_SYMBOL_GPL(vfs_poll);
  55. /*
  56. * Estimate expected accuracy in ns from a timeval.
  57. *
  58. * After quite a bit of churning around, we've settled on
  59. * a simple thing of taking 0.1% of the timeout as the
  60. * slack, with a cap of 100 msec.
  61. * "nice" tasks get a 0.5% slack instead.
  62. *
  63. * Consider this comment an open invitation to come up with even
  64. * better solutions..
  65. */
  66. #define MAX_SLACK (100 * NSEC_PER_MSEC)
  67. static long __estimate_accuracy(struct timespec64 *tv)
  68. {
  69. long slack;
  70. int divfactor = 1000;
  71. if (tv->tv_sec < 0)
  72. return 0;
  73. if (task_nice(current) > 0)
  74. divfactor = divfactor / 5;
  75. if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
  76. return MAX_SLACK;
  77. slack = tv->tv_nsec / divfactor;
  78. slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
  79. if (slack > MAX_SLACK)
  80. return MAX_SLACK;
  81. return slack;
  82. }
  83. u64 select_estimate_accuracy(struct timespec64 *tv)
  84. {
  85. u64 ret;
  86. struct timespec64 now;
  87. /*
  88. * Realtime tasks get a slack of 0 for obvious reasons.
  89. */
  90. if (rt_task(current))
  91. return 0;
  92. ktime_get_ts64(&now);
  93. now = timespec64_sub(*tv, now);
  94. ret = __estimate_accuracy(&now);
  95. if (ret < current->timer_slack_ns)
  96. return current->timer_slack_ns;
  97. return ret;
  98. }
  99. struct poll_table_page {
  100. struct poll_table_page * next;
  101. struct poll_table_entry * entry;
  102. struct poll_table_entry entries[0];
  103. };
  104. #define POLL_TABLE_FULL(table) \
  105. ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
  106. /*
  107. * Ok, Peter made a complicated, but straightforward multiple_wait() function.
  108. * I have rewritten this, taking some shortcuts: This code may not be easy to
  109. * follow, but it should be free of race-conditions, and it's practical. If you
  110. * understand what I'm doing here, then you understand how the linux
  111. * sleep/wakeup mechanism works.
  112. *
  113. * Two very simple procedures, poll_wait() and poll_freewait() make all the
  114. * work. poll_wait() is an inline-function defined in <linux/poll.h>,
  115. * as all select/poll functions have to call it to add an entry to the
  116. * poll table.
  117. */
  118. static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
  119. poll_table *p);
  120. void poll_initwait(struct poll_wqueues *pwq)
  121. {
  122. init_poll_funcptr(&pwq->pt, __pollwait);
  123. pwq->polling_task = current;
  124. pwq->triggered = 0;
  125. pwq->error = 0;
  126. pwq->table = NULL;
  127. pwq->inline_index = 0;
  128. }
  129. EXPORT_SYMBOL(poll_initwait);
  130. static void free_poll_entry(struct poll_table_entry *entry)
  131. {
  132. remove_wait_queue(entry->wait_address, &entry->wait);
  133. fput(entry->filp);
  134. }
  135. void poll_freewait(struct poll_wqueues *pwq)
  136. {
  137. struct poll_table_page * p = pwq->table;
  138. int i;
  139. for (i = 0; i < pwq->inline_index; i++)
  140. free_poll_entry(pwq->inline_entries + i);
  141. while (p) {
  142. struct poll_table_entry * entry;
  143. struct poll_table_page *old;
  144. entry = p->entry;
  145. do {
  146. entry--;
  147. free_poll_entry(entry);
  148. } while (entry > p->entries);
  149. old = p;
  150. p = p->next;
  151. free_page((unsigned long) old);
  152. }
  153. }
  154. EXPORT_SYMBOL(poll_freewait);
  155. static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
  156. {
  157. struct poll_table_page *table = p->table;
  158. if (p->inline_index < N_INLINE_POLL_ENTRIES)
  159. return p->inline_entries + p->inline_index++;
  160. if (!table || POLL_TABLE_FULL(table)) {
  161. struct poll_table_page *new_table;
  162. new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
  163. if (!new_table) {
  164. p->error = -ENOMEM;
  165. return NULL;
  166. }
  167. new_table->entry = new_table->entries;
  168. new_table->next = table;
  169. p->table = new_table;
  170. table = new_table;
  171. }
  172. return table->entry++;
  173. }
  174. static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
  175. {
  176. struct poll_wqueues *pwq = wait->private;
  177. DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
  178. /*
  179. * Although this function is called under waitqueue lock, LOCK
  180. * doesn't imply write barrier and the users expect write
  181. * barrier semantics on wakeup functions. The following
  182. * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
  183. * and is paired with smp_store_mb() in poll_schedule_timeout.
  184. */
  185. smp_wmb();
  186. pwq->triggered = 1;
  187. /*
  188. * Perform the default wake up operation using a dummy
  189. * waitqueue.
  190. *
  191. * TODO: This is hacky but there currently is no interface to
  192. * pass in @sync. @sync is scheduled to be removed and once
  193. * that happens, wake_up_process() can be used directly.
  194. */
  195. return default_wake_function(&dummy_wait, mode, sync, key);
  196. }
  197. static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
  198. {
  199. struct poll_table_entry *entry;
  200. entry = container_of(wait, struct poll_table_entry, wait);
  201. if (key && !(key_to_poll(key) & entry->key))
  202. return 0;
  203. return __pollwake(wait, mode, sync, key);
  204. }
  205. /* Add a new entry */
  206. static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
  207. poll_table *p)
  208. {
  209. struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
  210. struct poll_table_entry *entry = poll_get_entry(pwq);
  211. if (!entry)
  212. return;
  213. entry->filp = get_file(filp);
  214. entry->wait_address = wait_address;
  215. entry->key = p->_key;
  216. init_waitqueue_func_entry(&entry->wait, pollwake);
  217. entry->wait.private = pwq;
  218. add_wait_queue(wait_address, &entry->wait);
  219. }
  220. static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
  221. ktime_t *expires, unsigned long slack)
  222. {
  223. int rc = -EINTR;
  224. set_current_state(state);
  225. if (!pwq->triggered)
  226. rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
  227. __set_current_state(TASK_RUNNING);
  228. /*
  229. * Prepare for the next iteration.
  230. *
  231. * The following smp_store_mb() serves two purposes. First, it's
  232. * the counterpart rmb of the wmb in pollwake() such that data
  233. * written before wake up is always visible after wake up.
  234. * Second, the full barrier guarantees that triggered clearing
  235. * doesn't pass event check of the next iteration. Note that
  236. * this problem doesn't exist for the first iteration as
  237. * add_wait_queue() has full barrier semantics.
  238. */
  239. smp_store_mb(pwq->triggered, 0);
  240. return rc;
  241. }
  242. /**
  243. * poll_select_set_timeout - helper function to setup the timeout value
  244. * @to: pointer to timespec64 variable for the final timeout
  245. * @sec: seconds (from user space)
  246. * @nsec: nanoseconds (from user space)
  247. *
  248. * Note, we do not use a timespec for the user space value here, That
  249. * way we can use the function for timeval and compat interfaces as well.
  250. *
  251. * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
  252. */
  253. int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec)
  254. {
  255. struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec};
  256. if (!timespec64_valid(&ts))
  257. return -EINVAL;
  258. /* Optimize for the zero timeout value here */
  259. if (!sec && !nsec) {
  260. to->tv_sec = to->tv_nsec = 0;
  261. } else {
  262. ktime_get_ts64(to);
  263. *to = timespec64_add_safe(*to, ts);
  264. }
  265. return 0;
  266. }
  267. static int poll_select_copy_remaining(struct timespec64 *end_time,
  268. void __user *p,
  269. int timeval, int ret)
  270. {
  271. struct timespec64 rts;
  272. struct timeval rtv;
  273. if (!p)
  274. return ret;
  275. if (current->personality & STICKY_TIMEOUTS)
  276. goto sticky;
  277. /* No update for zero timeout */
  278. if (!end_time->tv_sec && !end_time->tv_nsec)
  279. return ret;
  280. ktime_get_ts64(&rts);
  281. rts = timespec64_sub(*end_time, rts);
  282. if (rts.tv_sec < 0)
  283. rts.tv_sec = rts.tv_nsec = 0;
  284. if (timeval) {
  285. if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
  286. memset(&rtv, 0, sizeof(rtv));
  287. rtv.tv_sec = rts.tv_sec;
  288. rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
  289. if (!copy_to_user(p, &rtv, sizeof(rtv)))
  290. return ret;
  291. } else if (!put_timespec64(&rts, p))
  292. return ret;
  293. /*
  294. * If an application puts its timeval in read-only memory, we
  295. * don't want the Linux-specific update to the timeval to
  296. * cause a fault after the select has completed
  297. * successfully. However, because we're not updating the
  298. * timeval, we can't restart the system call.
  299. */
  300. sticky:
  301. if (ret == -ERESTARTNOHAND)
  302. ret = -EINTR;
  303. return ret;
  304. }
  305. /*
  306. * Scalable version of the fd_set.
  307. */
  308. typedef struct {
  309. unsigned long *in, *out, *ex;
  310. unsigned long *res_in, *res_out, *res_ex;
  311. } fd_set_bits;
  312. /*
  313. * How many longwords for "nr" bits?
  314. */
  315. #define FDS_BITPERLONG (8*sizeof(long))
  316. #define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
  317. #define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long))
  318. /*
  319. * We do a VERIFY_WRITE here even though we are only reading this time:
  320. * we'll write to it eventually..
  321. *
  322. * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
  323. */
  324. static inline
  325. int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
  326. {
  327. nr = FDS_BYTES(nr);
  328. if (ufdset)
  329. return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0;
  330. memset(fdset, 0, nr);
  331. return 0;
  332. }
  333. static inline unsigned long __must_check
  334. set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
  335. {
  336. if (ufdset)
  337. return __copy_to_user(ufdset, fdset, FDS_BYTES(nr));
  338. return 0;
  339. }
  340. static inline
  341. void zero_fd_set(unsigned long nr, unsigned long *fdset)
  342. {
  343. memset(fdset, 0, FDS_BYTES(nr));
  344. }
  345. #define FDS_IN(fds, n) (fds->in + n)
  346. #define FDS_OUT(fds, n) (fds->out + n)
  347. #define FDS_EX(fds, n) (fds->ex + n)
  348. #define BITS(fds, n) (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n))
  349. static int max_select_fd(unsigned long n, fd_set_bits *fds)
  350. {
  351. unsigned long *open_fds;
  352. unsigned long set;
  353. int max;
  354. struct fdtable *fdt;
  355. /* handle last in-complete long-word first */
  356. set = ~(~0UL << (n & (BITS_PER_LONG-1)));
  357. n /= BITS_PER_LONG;
  358. fdt = files_fdtable(current->files);
  359. open_fds = fdt->open_fds + n;
  360. max = 0;
  361. if (set) {
  362. set &= BITS(fds, n);
  363. if (set) {
  364. if (!(set & ~*open_fds))
  365. goto get_max;
  366. return -EBADF;
  367. }
  368. }
  369. while (n) {
  370. open_fds--;
  371. n--;
  372. set = BITS(fds, n);
  373. if (!set)
  374. continue;
  375. if (set & ~*open_fds)
  376. return -EBADF;
  377. if (max)
  378. continue;
  379. get_max:
  380. do {
  381. max++;
  382. set >>= 1;
  383. } while (set);
  384. max += n * BITS_PER_LONG;
  385. }
  386. return max;
  387. }
  388. #define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)
  389. #define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR)
  390. #define POLLEX_SET (EPOLLPRI)
  391. static inline void wait_key_set(poll_table *wait, unsigned long in,
  392. unsigned long out, unsigned long bit,
  393. __poll_t ll_flag)
  394. {
  395. wait->_key = POLLEX_SET | ll_flag;
  396. if (in & bit)
  397. wait->_key |= POLLIN_SET;
  398. if (out & bit)
  399. wait->_key |= POLLOUT_SET;
  400. }
  401. static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
  402. {
  403. ktime_t expire, *to = NULL;
  404. struct poll_wqueues table;
  405. poll_table *wait;
  406. int retval, i, timed_out = 0;
  407. u64 slack = 0;
  408. __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
  409. unsigned long busy_start = 0;
  410. rcu_read_lock();
  411. retval = max_select_fd(n, fds);
  412. rcu_read_unlock();
  413. if (retval < 0)
  414. return retval;
  415. n = retval;
  416. poll_initwait(&table);
  417. wait = &table.pt;
  418. if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
  419. wait->_qproc = NULL;
  420. timed_out = 1;
  421. }
  422. if (end_time && !timed_out)
  423. slack = select_estimate_accuracy(end_time);
  424. retval = 0;
  425. for (;;) {
  426. unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
  427. bool can_busy_loop = false;
  428. inp = fds->in; outp = fds->out; exp = fds->ex;
  429. rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
  430. for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
  431. unsigned long in, out, ex, all_bits, bit = 1, j;
  432. unsigned long res_in = 0, res_out = 0, res_ex = 0;
  433. __poll_t mask;
  434. in = *inp++; out = *outp++; ex = *exp++;
  435. all_bits = in | out | ex;
  436. if (all_bits == 0) {
  437. i += BITS_PER_LONG;
  438. continue;
  439. }
  440. for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
  441. struct fd f;
  442. if (i >= n)
  443. break;
  444. if (!(bit & all_bits))
  445. continue;
  446. f = fdget(i);
  447. if (f.file) {
  448. wait_key_set(wait, in, out, bit,
  449. busy_flag);
  450. mask = vfs_poll(f.file, wait);
  451. fdput(f);
  452. if ((mask & POLLIN_SET) && (in & bit)) {
  453. res_in |= bit;
  454. retval++;
  455. wait->_qproc = NULL;
  456. }
  457. if ((mask & POLLOUT_SET) && (out & bit)) {
  458. res_out |= bit;
  459. retval++;
  460. wait->_qproc = NULL;
  461. }
  462. if ((mask & POLLEX_SET) && (ex & bit)) {
  463. res_ex |= bit;
  464. retval++;
  465. wait->_qproc = NULL;
  466. }
  467. /* got something, stop busy polling */
  468. if (retval) {
  469. can_busy_loop = false;
  470. busy_flag = 0;
  471. /*
  472. * only remember a returned
  473. * POLL_BUSY_LOOP if we asked for it
  474. */
  475. } else if (busy_flag & mask)
  476. can_busy_loop = true;
  477. }
  478. }
  479. if (res_in)
  480. *rinp = res_in;
  481. if (res_out)
  482. *routp = res_out;
  483. if (res_ex)
  484. *rexp = res_ex;
  485. cond_resched();
  486. }
  487. wait->_qproc = NULL;
  488. if (retval || timed_out || signal_pending(current))
  489. break;
  490. if (table.error) {
  491. retval = table.error;
  492. break;
  493. }
  494. /* only if found POLL_BUSY_LOOP sockets && not out of time */
  495. if (can_busy_loop && !need_resched()) {
  496. if (!busy_start) {
  497. busy_start = busy_loop_current_time();
  498. continue;
  499. }
  500. if (!busy_loop_timeout(busy_start))
  501. continue;
  502. }
  503. busy_flag = 0;
  504. /*
  505. * If this is the first loop and we have a timeout
  506. * given, then we convert to ktime_t and set the to
  507. * pointer to the expiry value.
  508. */
  509. if (end_time && !to) {
  510. expire = timespec64_to_ktime(*end_time);
  511. to = &expire;
  512. }
  513. if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
  514. to, slack))
  515. timed_out = 1;
  516. }
  517. poll_freewait(&table);
  518. return retval;
  519. }
  520. /*
  521. * We can actually return ERESTARTSYS instead of EINTR, but I'd
  522. * like to be certain this leads to no problems. So I return
  523. * EINTR just for safety.
  524. *
  525. * Update: ERESTARTSYS breaks at least the xview clock binary, so
  526. * I'm trying ERESTARTNOHAND which restart only when you want to.
  527. */
  528. int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
  529. fd_set __user *exp, struct timespec64 *end_time)
  530. {
  531. fd_set_bits fds;
  532. void *bits;
  533. int ret, max_fds;
  534. size_t size, alloc_size;
  535. struct fdtable *fdt;
  536. /* Allocate small arguments on the stack to save memory and be faster */
  537. long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
  538. ret = -EINVAL;
  539. if (n < 0)
  540. goto out_nofds;
  541. /* max_fds can increase, so grab it once to avoid race */
  542. rcu_read_lock();
  543. fdt = files_fdtable(current->files);
  544. max_fds = fdt->max_fds;
  545. rcu_read_unlock();
  546. if (n > max_fds)
  547. n = max_fds;
  548. /*
  549. * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
  550. * since we used fdset we need to allocate memory in units of
  551. * long-words.
  552. */
  553. size = FDS_BYTES(n);
  554. bits = stack_fds;
  555. if (size > sizeof(stack_fds) / 6) {
  556. /* Not enough space in on-stack array; must use kmalloc */
  557. ret = -ENOMEM;
  558. if (size > (SIZE_MAX / 6))
  559. goto out_nofds;
  560. alloc_size = 6 * size;
  561. bits = kvmalloc(alloc_size, GFP_KERNEL);
  562. if (!bits)
  563. goto out_nofds;
  564. }
  565. fds.in = bits;
  566. fds.out = bits + size;
  567. fds.ex = bits + 2*size;
  568. fds.res_in = bits + 3*size;
  569. fds.res_out = bits + 4*size;
  570. fds.res_ex = bits + 5*size;
  571. if ((ret = get_fd_set(n, inp, fds.in)) ||
  572. (ret = get_fd_set(n, outp, fds.out)) ||
  573. (ret = get_fd_set(n, exp, fds.ex)))
  574. goto out;
  575. zero_fd_set(n, fds.res_in);
  576. zero_fd_set(n, fds.res_out);
  577. zero_fd_set(n, fds.res_ex);
  578. ret = do_select(n, &fds, end_time);
  579. if (ret < 0)
  580. goto out;
  581. if (!ret) {
  582. ret = -ERESTARTNOHAND;
  583. if (signal_pending(current))
  584. goto out;
  585. ret = 0;
  586. }
  587. if (set_fd_set(n, inp, fds.res_in) ||
  588. set_fd_set(n, outp, fds.res_out) ||
  589. set_fd_set(n, exp, fds.res_ex))
  590. ret = -EFAULT;
  591. out:
  592. if (bits != stack_fds)
  593. kvfree(bits);
  594. out_nofds:
  595. return ret;
  596. }
  597. static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
  598. fd_set __user *exp, struct timeval __user *tvp)
  599. {
  600. struct timespec64 end_time, *to = NULL;
  601. struct timeval tv;
  602. int ret;
  603. if (tvp) {
  604. if (copy_from_user(&tv, tvp, sizeof(tv)))
  605. return -EFAULT;
  606. to = &end_time;
  607. if (poll_select_set_timeout(to,
  608. tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
  609. (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
  610. return -EINVAL;
  611. }
  612. ret = core_sys_select(n, inp, outp, exp, to);
  613. ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
  614. return ret;
  615. }
  616. SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
  617. fd_set __user *, exp, struct timeval __user *, tvp)
  618. {
  619. return kern_select(n, inp, outp, exp, tvp);
  620. }
  621. static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
  622. fd_set __user *exp, struct timespec __user *tsp,
  623. const sigset_t __user *sigmask, size_t sigsetsize)
  624. {
  625. sigset_t ksigmask, sigsaved;
  626. struct timespec64 ts, end_time, *to = NULL;
  627. int ret;
  628. if (tsp) {
  629. if (get_timespec64(&ts, tsp))
  630. return -EFAULT;
  631. to = &end_time;
  632. if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  633. return -EINVAL;
  634. }
  635. if (sigmask) {
  636. /* XXX: Don't preclude handling different sized sigset_t's. */
  637. if (sigsetsize != sizeof(sigset_t))
  638. return -EINVAL;
  639. if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
  640. return -EFAULT;
  641. sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  642. sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  643. }
  644. ret = core_sys_select(n, inp, outp, exp, to);
  645. ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
  646. if (ret == -ERESTARTNOHAND) {
  647. /*
  648. * Don't restore the signal mask yet. Let do_signal() deliver
  649. * the signal on the way back to userspace, before the signal
  650. * mask is restored.
  651. */
  652. if (sigmask) {
  653. memcpy(&current->saved_sigmask, &sigsaved,
  654. sizeof(sigsaved));
  655. set_restore_sigmask();
  656. }
  657. } else if (sigmask)
  658. sigprocmask(SIG_SETMASK, &sigsaved, NULL);
  659. return ret;
  660. }
  661. /*
  662. * Most architectures can't handle 7-argument syscalls. So we provide a
  663. * 6-argument version where the sixth argument is a pointer to a structure
  664. * which has a pointer to the sigset_t itself followed by a size_t containing
  665. * the sigset size.
  666. */
  667. SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
  668. fd_set __user *, exp, struct timespec __user *, tsp,
  669. void __user *, sig)
  670. {
  671. size_t sigsetsize = 0;
  672. sigset_t __user *up = NULL;
  673. if (sig) {
  674. if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
  675. || __get_user(up, (sigset_t __user * __user *)sig)
  676. || __get_user(sigsetsize,
  677. (size_t __user *)(sig+sizeof(void *))))
  678. return -EFAULT;
  679. }
  680. return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize);
  681. }
  682. #ifdef __ARCH_WANT_SYS_OLD_SELECT
  683. struct sel_arg_struct {
  684. unsigned long n;
  685. fd_set __user *inp, *outp, *exp;
  686. struct timeval __user *tvp;
  687. };
  688. SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
  689. {
  690. struct sel_arg_struct a;
  691. if (copy_from_user(&a, arg, sizeof(a)))
  692. return -EFAULT;
  693. return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp);
  694. }
  695. #endif
  696. struct poll_list {
  697. struct poll_list *next;
  698. int len;
  699. struct pollfd entries[0];
  700. };
  701. #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
  702. /*
  703. * Fish for pollable events on the pollfd->fd file descriptor. We're only
  704. * interested in events matching the pollfd->events mask, and the result
  705. * matching that mask is both recorded in pollfd->revents and returned. The
  706. * pwait poll_table will be used by the fd-provided poll handler for waiting,
  707. * if pwait->_qproc is non-NULL.
  708. */
  709. static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
  710. bool *can_busy_poll,
  711. __poll_t busy_flag)
  712. {
  713. int fd = pollfd->fd;
  714. __poll_t mask = 0, filter;
  715. struct fd f;
  716. if (fd < 0)
  717. goto out;
  718. mask = EPOLLNVAL;
  719. f = fdget(fd);
  720. if (!f.file)
  721. goto out;
  722. /* userland u16 ->events contains POLL... bitmap */
  723. filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
  724. pwait->_key = filter | busy_flag;
  725. mask = vfs_poll(f.file, pwait);
  726. if (mask & busy_flag)
  727. *can_busy_poll = true;
  728. mask &= filter; /* Mask out unneeded events. */
  729. fdput(f);
  730. out:
  731. /* ... and so does ->revents */
  732. pollfd->revents = mangle_poll(mask);
  733. return mask;
  734. }
  735. static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
  736. struct timespec64 *end_time)
  737. {
  738. poll_table* pt = &wait->pt;
  739. ktime_t expire, *to = NULL;
  740. int timed_out = 0, count = 0;
  741. u64 slack = 0;
  742. __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
  743. unsigned long busy_start = 0;
  744. /* Optimise the no-wait case */
  745. if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
  746. pt->_qproc = NULL;
  747. timed_out = 1;
  748. }
  749. if (end_time && !timed_out)
  750. slack = select_estimate_accuracy(end_time);
  751. for (;;) {
  752. struct poll_list *walk;
  753. bool can_busy_loop = false;
  754. for (walk = list; walk != NULL; walk = walk->next) {
  755. struct pollfd * pfd, * pfd_end;
  756. pfd = walk->entries;
  757. pfd_end = pfd + walk->len;
  758. for (; pfd != pfd_end; pfd++) {
  759. /*
  760. * Fish for events. If we found one, record it
  761. * and kill poll_table->_qproc, so we don't
  762. * needlessly register any other waiters after
  763. * this. They'll get immediately deregistered
  764. * when we break out and return.
  765. */
  766. if (do_pollfd(pfd, pt, &can_busy_loop,
  767. busy_flag)) {
  768. count++;
  769. pt->_qproc = NULL;
  770. /* found something, stop busy polling */
  771. busy_flag = 0;
  772. can_busy_loop = false;
  773. }
  774. }
  775. }
  776. /*
  777. * All waiters have already been registered, so don't provide
  778. * a poll_table->_qproc to them on the next loop iteration.
  779. */
  780. pt->_qproc = NULL;
  781. if (!count) {
  782. count = wait->error;
  783. if (signal_pending(current))
  784. count = -EINTR;
  785. }
  786. if (count || timed_out)
  787. break;
  788. /* only if found POLL_BUSY_LOOP sockets && not out of time */
  789. if (can_busy_loop && !need_resched()) {
  790. if (!busy_start) {
  791. busy_start = busy_loop_current_time();
  792. continue;
  793. }
  794. if (!busy_loop_timeout(busy_start))
  795. continue;
  796. }
  797. busy_flag = 0;
  798. /*
  799. * If this is the first loop and we have a timeout
  800. * given, then we convert to ktime_t and set the to
  801. * pointer to the expiry value.
  802. */
  803. if (end_time && !to) {
  804. expire = timespec64_to_ktime(*end_time);
  805. to = &expire;
  806. }
  807. if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
  808. timed_out = 1;
  809. }
  810. return count;
  811. }
  812. #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
  813. sizeof(struct pollfd))
  814. static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
  815. struct timespec64 *end_time)
  816. {
  817. struct poll_wqueues table;
  818. int err = -EFAULT, fdcount, len, size;
  819. /* Allocate small arguments on the stack to save memory and be
  820. faster - use long to make sure the buffer is aligned properly
  821. on 64 bit archs to avoid unaligned access */
  822. long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
  823. struct poll_list *const head = (struct poll_list *)stack_pps;
  824. struct poll_list *walk = head;
  825. unsigned long todo = nfds;
  826. if (nfds > rlimit(RLIMIT_NOFILE))
  827. return -EINVAL;
  828. len = min_t(unsigned int, nfds, N_STACK_PPS);
  829. for (;;) {
  830. walk->next = NULL;
  831. walk->len = len;
  832. if (!len)
  833. break;
  834. if (copy_from_user(walk->entries, ufds + nfds-todo,
  835. sizeof(struct pollfd) * walk->len))
  836. goto out_fds;
  837. todo -= walk->len;
  838. if (!todo)
  839. break;
  840. len = min(todo, POLLFD_PER_PAGE);
  841. size = sizeof(struct poll_list) + sizeof(struct pollfd) * len;
  842. walk = walk->next = kmalloc(size, GFP_KERNEL);
  843. if (!walk) {
  844. err = -ENOMEM;
  845. goto out_fds;
  846. }
  847. }
  848. poll_initwait(&table);
  849. fdcount = do_poll(head, &table, end_time);
  850. poll_freewait(&table);
  851. for (walk = head; walk; walk = walk->next) {
  852. struct pollfd *fds = walk->entries;
  853. int j;
  854. for (j = 0; j < walk->len; j++, ufds++)
  855. if (__put_user(fds[j].revents, &ufds->revents))
  856. goto out_fds;
  857. }
  858. err = fdcount;
  859. out_fds:
  860. walk = head->next;
  861. while (walk) {
  862. struct poll_list *pos = walk;
  863. walk = walk->next;
  864. kfree(pos);
  865. }
  866. return err;
  867. }
  868. static long do_restart_poll(struct restart_block *restart_block)
  869. {
  870. struct pollfd __user *ufds = restart_block->poll.ufds;
  871. int nfds = restart_block->poll.nfds;
  872. struct timespec64 *to = NULL, end_time;
  873. int ret;
  874. if (restart_block->poll.has_timeout) {
  875. end_time.tv_sec = restart_block->poll.tv_sec;
  876. end_time.tv_nsec = restart_block->poll.tv_nsec;
  877. to = &end_time;
  878. }
  879. ret = do_sys_poll(ufds, nfds, to);
  880. if (ret == -EINTR) {
  881. restart_block->fn = do_restart_poll;
  882. ret = -ERESTART_RESTARTBLOCK;
  883. }
  884. return ret;
  885. }
  886. SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
  887. int, timeout_msecs)
  888. {
  889. struct timespec64 end_time, *to = NULL;
  890. int ret;
  891. if (timeout_msecs >= 0) {
  892. to = &end_time;
  893. poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
  894. NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
  895. }
  896. ret = do_sys_poll(ufds, nfds, to);
  897. if (ret == -EINTR) {
  898. struct restart_block *restart_block;
  899. restart_block = &current->restart_block;
  900. restart_block->fn = do_restart_poll;
  901. restart_block->poll.ufds = ufds;
  902. restart_block->poll.nfds = nfds;
  903. if (timeout_msecs >= 0) {
  904. restart_block->poll.tv_sec = end_time.tv_sec;
  905. restart_block->poll.tv_nsec = end_time.tv_nsec;
  906. restart_block->poll.has_timeout = 1;
  907. } else
  908. restart_block->poll.has_timeout = 0;
  909. ret = -ERESTART_RESTARTBLOCK;
  910. }
  911. return ret;
  912. }
  913. SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
  914. struct timespec __user *, tsp, const sigset_t __user *, sigmask,
  915. size_t, sigsetsize)
  916. {
  917. sigset_t ksigmask, sigsaved;
  918. struct timespec64 ts, end_time, *to = NULL;
  919. int ret;
  920. if (tsp) {
  921. if (get_timespec64(&ts, tsp))
  922. return -EFAULT;
  923. to = &end_time;
  924. if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  925. return -EINVAL;
  926. }
  927. if (sigmask) {
  928. /* XXX: Don't preclude handling different sized sigset_t's. */
  929. if (sigsetsize != sizeof(sigset_t))
  930. return -EINVAL;
  931. if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
  932. return -EFAULT;
  933. sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  934. sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  935. }
  936. ret = do_sys_poll(ufds, nfds, to);
  937. /* We can restart this syscall, usually */
  938. if (ret == -EINTR) {
  939. /*
  940. * Don't restore the signal mask yet. Let do_signal() deliver
  941. * the signal on the way back to userspace, before the signal
  942. * mask is restored.
  943. */
  944. if (sigmask) {
  945. memcpy(&current->saved_sigmask, &sigsaved,
  946. sizeof(sigsaved));
  947. set_restore_sigmask();
  948. }
  949. ret = -ERESTARTNOHAND;
  950. } else if (sigmask)
  951. sigprocmask(SIG_SETMASK, &sigsaved, NULL);
  952. ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
  953. return ret;
  954. }
  955. #ifdef CONFIG_COMPAT
  956. #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
  957. static
  958. int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *p,
  959. int timeval, int ret)
  960. {
  961. struct timespec64 ts;
  962. if (!p)
  963. return ret;
  964. if (current->personality & STICKY_TIMEOUTS)
  965. goto sticky;
  966. /* No update for zero timeout */
  967. if (!end_time->tv_sec && !end_time->tv_nsec)
  968. return ret;
  969. ktime_get_ts64(&ts);
  970. ts = timespec64_sub(*end_time, ts);
  971. if (ts.tv_sec < 0)
  972. ts.tv_sec = ts.tv_nsec = 0;
  973. if (timeval) {
  974. struct compat_timeval rtv;
  975. rtv.tv_sec = ts.tv_sec;
  976. rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
  977. if (!copy_to_user(p, &rtv, sizeof(rtv)))
  978. return ret;
  979. } else {
  980. if (!compat_put_timespec64(&ts, p))
  981. return ret;
  982. }
  983. /*
  984. * If an application puts its timeval in read-only memory, we
  985. * don't want the Linux-specific update to the timeval to
  986. * cause a fault after the select has completed
  987. * successfully. However, because we're not updating the
  988. * timeval, we can't restart the system call.
  989. */
  990. sticky:
  991. if (ret == -ERESTARTNOHAND)
  992. ret = -EINTR;
  993. return ret;
  994. }
  995. /*
  996. * Ooo, nasty. We need here to frob 32-bit unsigned longs to
  997. * 64-bit unsigned longs.
  998. */
  999. static
  1000. int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
  1001. unsigned long *fdset)
  1002. {
  1003. if (ufdset) {
  1004. return compat_get_bitmap(fdset, ufdset, nr);
  1005. } else {
  1006. zero_fd_set(nr, fdset);
  1007. return 0;
  1008. }
  1009. }
  1010. static
  1011. int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
  1012. unsigned long *fdset)
  1013. {
  1014. if (!ufdset)
  1015. return 0;
  1016. return compat_put_bitmap(ufdset, fdset, nr);
  1017. }
  1018. /*
  1019. * This is a virtual copy of sys_select from fs/select.c and probably
  1020. * should be compared to it from time to time
  1021. */
  1022. /*
  1023. * We can actually return ERESTARTSYS instead of EINTR, but I'd
  1024. * like to be certain this leads to no problems. So I return
  1025. * EINTR just for safety.
  1026. *
  1027. * Update: ERESTARTSYS breaks at least the xview clock binary, so
  1028. * I'm trying ERESTARTNOHAND which restart only when you want to.
  1029. */
  1030. static int compat_core_sys_select(int n, compat_ulong_t __user *inp,
  1031. compat_ulong_t __user *outp, compat_ulong_t __user *exp,
  1032. struct timespec64 *end_time)
  1033. {
  1034. fd_set_bits fds;
  1035. void *bits;
  1036. int size, max_fds, ret = -EINVAL;
  1037. struct fdtable *fdt;
  1038. long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
  1039. if (n < 0)
  1040. goto out_nofds;
  1041. /* max_fds can increase, so grab it once to avoid race */
  1042. rcu_read_lock();
  1043. fdt = files_fdtable(current->files);
  1044. max_fds = fdt->max_fds;
  1045. rcu_read_unlock();
  1046. if (n > max_fds)
  1047. n = max_fds;
  1048. /*
  1049. * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
  1050. * since we used fdset we need to allocate memory in units of
  1051. * long-words.
  1052. */
  1053. size = FDS_BYTES(n);
  1054. bits = stack_fds;
  1055. if (size > sizeof(stack_fds) / 6) {
  1056. bits = kmalloc_array(6, size, GFP_KERNEL);
  1057. ret = -ENOMEM;
  1058. if (!bits)
  1059. goto out_nofds;
  1060. }
  1061. fds.in = (unsigned long *) bits;
  1062. fds.out = (unsigned long *) (bits + size);
  1063. fds.ex = (unsigned long *) (bits + 2*size);
  1064. fds.res_in = (unsigned long *) (bits + 3*size);
  1065. fds.res_out = (unsigned long *) (bits + 4*size);
  1066. fds.res_ex = (unsigned long *) (bits + 5*size);
  1067. if ((ret = compat_get_fd_set(n, inp, fds.in)) ||
  1068. (ret = compat_get_fd_set(n, outp, fds.out)) ||
  1069. (ret = compat_get_fd_set(n, exp, fds.ex)))
  1070. goto out;
  1071. zero_fd_set(n, fds.res_in);
  1072. zero_fd_set(n, fds.res_out);
  1073. zero_fd_set(n, fds.res_ex);
  1074. ret = do_select(n, &fds, end_time);
  1075. if (ret < 0)
  1076. goto out;
  1077. if (!ret) {
  1078. ret = -ERESTARTNOHAND;
  1079. if (signal_pending(current))
  1080. goto out;
  1081. ret = 0;
  1082. }
  1083. if (compat_set_fd_set(n, inp, fds.res_in) ||
  1084. compat_set_fd_set(n, outp, fds.res_out) ||
  1085. compat_set_fd_set(n, exp, fds.res_ex))
  1086. ret = -EFAULT;
  1087. out:
  1088. if (bits != stack_fds)
  1089. kfree(bits);
  1090. out_nofds:
  1091. return ret;
  1092. }
  1093. static int do_compat_select(int n, compat_ulong_t __user *inp,
  1094. compat_ulong_t __user *outp, compat_ulong_t __user *exp,
  1095. struct compat_timeval __user *tvp)
  1096. {
  1097. struct timespec64 end_time, *to = NULL;
  1098. struct compat_timeval tv;
  1099. int ret;
  1100. if (tvp) {
  1101. if (copy_from_user(&tv, tvp, sizeof(tv)))
  1102. return -EFAULT;
  1103. to = &end_time;
  1104. if (poll_select_set_timeout(to,
  1105. tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
  1106. (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
  1107. return -EINVAL;
  1108. }
  1109. ret = compat_core_sys_select(n, inp, outp, exp, to);
  1110. ret = compat_poll_select_copy_remaining(&end_time, tvp, 1, ret);
  1111. return ret;
  1112. }
  1113. COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
  1114. compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
  1115. struct compat_timeval __user *, tvp)
  1116. {
  1117. return do_compat_select(n, inp, outp, exp, tvp);
  1118. }
  1119. struct compat_sel_arg_struct {
  1120. compat_ulong_t n;
  1121. compat_uptr_t inp;
  1122. compat_uptr_t outp;
  1123. compat_uptr_t exp;
  1124. compat_uptr_t tvp;
  1125. };
  1126. COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
  1127. {
  1128. struct compat_sel_arg_struct a;
  1129. if (copy_from_user(&a, arg, sizeof(a)))
  1130. return -EFAULT;
  1131. return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
  1132. compat_ptr(a.exp), compat_ptr(a.tvp));
  1133. }
  1134. static long do_compat_pselect(int n, compat_ulong_t __user *inp,
  1135. compat_ulong_t __user *outp, compat_ulong_t __user *exp,
  1136. struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
  1137. compat_size_t sigsetsize)
  1138. {
  1139. sigset_t ksigmask, sigsaved;
  1140. struct timespec64 ts, end_time, *to = NULL;
  1141. int ret;
  1142. if (tsp) {
  1143. if (compat_get_timespec64(&ts, tsp))
  1144. return -EFAULT;
  1145. to = &end_time;
  1146. if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  1147. return -EINVAL;
  1148. }
  1149. if (sigmask) {
  1150. if (sigsetsize != sizeof(compat_sigset_t))
  1151. return -EINVAL;
  1152. if (get_compat_sigset(&ksigmask, sigmask))
  1153. return -EFAULT;
  1154. sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  1155. sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  1156. }
  1157. ret = compat_core_sys_select(n, inp, outp, exp, to);
  1158. ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret);
  1159. if (ret == -ERESTARTNOHAND) {
  1160. /*
  1161. * Don't restore the signal mask yet. Let do_signal() deliver
  1162. * the signal on the way back to userspace, before the signal
  1163. * mask is restored.
  1164. */
  1165. if (sigmask) {
  1166. memcpy(&current->saved_sigmask, &sigsaved,
  1167. sizeof(sigsaved));
  1168. set_restore_sigmask();
  1169. }
  1170. } else if (sigmask)
  1171. sigprocmask(SIG_SETMASK, &sigsaved, NULL);
  1172. return ret;
  1173. }
  1174. COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
  1175. compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
  1176. struct compat_timespec __user *, tsp, void __user *, sig)
  1177. {
  1178. compat_size_t sigsetsize = 0;
  1179. compat_uptr_t up = 0;
  1180. if (sig) {
  1181. if (!access_ok(VERIFY_READ, sig,
  1182. sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
  1183. __get_user(up, (compat_uptr_t __user *)sig) ||
  1184. __get_user(sigsetsize,
  1185. (compat_size_t __user *)(sig+sizeof(up))))
  1186. return -EFAULT;
  1187. }
  1188. return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up),
  1189. sigsetsize);
  1190. }
  1191. COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
  1192. unsigned int, nfds, struct compat_timespec __user *, tsp,
  1193. const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
  1194. {
  1195. sigset_t ksigmask, sigsaved;
  1196. struct timespec64 ts, end_time, *to = NULL;
  1197. int ret;
  1198. if (tsp) {
  1199. if (compat_get_timespec64(&ts, tsp))
  1200. return -EFAULT;
  1201. to = &end_time;
  1202. if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  1203. return -EINVAL;
  1204. }
  1205. if (sigmask) {
  1206. if (sigsetsize != sizeof(compat_sigset_t))
  1207. return -EINVAL;
  1208. if (get_compat_sigset(&ksigmask, sigmask))
  1209. return -EFAULT;
  1210. sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  1211. sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  1212. }
  1213. ret = do_sys_poll(ufds, nfds, to);
  1214. /* We can restart this syscall, usually */
  1215. if (ret == -EINTR) {
  1216. /*
  1217. * Don't restore the signal mask yet. Let do_signal() deliver
  1218. * the signal on the way back to userspace, before the signal
  1219. * mask is restored.
  1220. */
  1221. if (sigmask) {
  1222. memcpy(&current->saved_sigmask, &sigsaved,
  1223. sizeof(sigsaved));
  1224. set_restore_sigmask();
  1225. }
  1226. ret = -ERESTARTNOHAND;
  1227. } else if (sigmask)
  1228. sigprocmask(SIG_SETMASK, &sigsaved, NULL);
  1229. ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret);
  1230. return ret;
  1231. }
  1232. #endif