LCOV - code coverage report
Current view: top level - fs - select.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 367 626 58.6 %
Date: 2021-04-22 12:43:58 Functions: 26 48 54.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * This file contains the procedures for the handling of select and poll
       4             :  *
       5             :  * Created for Linux based loosely upon Mathius Lattner's minix
       6             :  * patches by Peter MacDonald. Heavily edited by Linus.
       7             :  *
       8             :  *  4 February 1994
       9             :  *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
      10             :  *     flag set in its personality we do *not* modify the given timeout
      11             :  *     parameter to reflect time remaining.
      12             :  *
      13             :  *  24 January 2000
      14             :  *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 
      15             :  *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
      16             :  */
      17             : 
      18             : #include <linux/kernel.h>
      19             : #include <linux/sched/signal.h>
      20             : #include <linux/sched/rt.h>
      21             : #include <linux/syscalls.h>
      22             : #include <linux/export.h>
      23             : #include <linux/slab.h>
      24             : #include <linux/poll.h>
      25             : #include <linux/personality.h> /* for STICKY_TIMEOUTS */
      26             : #include <linux/file.h>
      27             : #include <linux/fdtable.h>
      28             : #include <linux/fs.h>
      29             : #include <linux/rcupdate.h>
      30             : #include <linux/hrtimer.h>
      31             : #include <linux/freezer.h>
      32             : #include <net/busy_poll.h>
      33             : #include <linux/vmalloc.h>
      34             : 
      35             : #include <linux/uaccess.h>
      36             : 
      37             : 
      38             : /*
      39             :  * Estimate expected accuracy in ns from a timeval.
      40             :  *
      41             :  * After quite a bit of churning around, we've settled on
      42             :  * a simple thing of taking 0.1% of the timeout as the
      43             :  * slack, with a cap of 100 msec.
      44             :  * "nice" tasks get a 0.5% slack instead.
      45             :  *
      46             :  * Consider this comment an open invitation to come up with even
      47             :  * better solutions..
      48             :  */
      49             : 
      50             : #define MAX_SLACK       (100 * NSEC_PER_MSEC)
      51             : 
      52         303 : static long __estimate_accuracy(struct timespec64 *tv)
      53             : {
      54         303 :         long slack;
      55         303 :         int divfactor = 1000;
      56             : 
      57         303 :         if (tv->tv_sec < 0)
      58             :                 return 0;
      59             : 
      60         303 :         if (task_nice(current) > 0)
      61           2 :                 divfactor = divfactor / 5;
      62             : 
      63         303 :         if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
      64             :                 return MAX_SLACK;
      65             : 
      66         296 :         slack = tv->tv_nsec / divfactor;
      67         296 :         slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
      68             : 
      69         296 :         if (slack > MAX_SLACK)
      70             :                 return MAX_SLACK;
      71             : 
      72             :         return slack;
      73             : }
      74             : 
      75         303 : u64 select_estimate_accuracy(struct timespec64 *tv)
      76             : {
      77         303 :         u64 ret;
      78         303 :         struct timespec64 now;
      79             : 
      80             :         /*
      81             :          * Realtime tasks get a slack of 0 for obvious reasons.
      82             :          */
      83             : 
      84         303 :         if (rt_task(current))
      85             :                 return 0;
      86             : 
      87         303 :         ktime_get_ts64(&now);
      88         303 :         now = timespec64_sub(*tv, now);
      89         303 :         ret = __estimate_accuracy(&now);
      90         303 :         if (ret < current->timer_slack_ns)
      91           0 :                 return current->timer_slack_ns;
      92             :         return ret;
      93             : }
      94             : 
      95             : 
      96             : 
      97             : struct poll_table_page {
      98             :         struct poll_table_page * next;
      99             :         struct poll_table_entry * entry;
     100             :         struct poll_table_entry entries[];
     101             : };
     102             : 
     103             : #define POLL_TABLE_FULL(table) \
     104             :         ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
     105             : 
     106             : /*
     107             :  * Ok, Peter made a complicated, but straightforward multiple_wait() function.
     108             :  * I have rewritten this, taking some shortcuts: This code may not be easy to
     109             :  * follow, but it should be free of race-conditions, and it's practical. If you
     110             :  * understand what I'm doing here, then you understand how the linux
     111             :  * sleep/wakeup mechanism works.
     112             :  *
     113             :  * Two very simple procedures, poll_wait() and poll_freewait() make all the
     114             :  * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
     115             :  * as all select/poll functions have to call it to add an entry to the
     116             :  * poll table.
     117             :  */
     118             : static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
     119             :                        poll_table *p);
     120             : 
     121        1150 : void poll_initwait(struct poll_wqueues *pwq)
     122             : {
     123        1150 :         init_poll_funcptr(&pwq->pt, __pollwait);
     124        1150 :         pwq->polling_task = current;
     125        1150 :         pwq->triggered = 0;
     126        1150 :         pwq->error = 0;
     127        1150 :         pwq->table = NULL;
     128        1150 :         pwq->inline_index = 0;
     129           0 : }
     130             : EXPORT_SYMBOL(poll_initwait);
     131             : 
     132        1473 : static void free_poll_entry(struct poll_table_entry *entry)
     133             : {
     134        1473 :         remove_wait_queue(entry->wait_address, &entry->wait);
     135        1473 :         fput(entry->filp);
     136        1473 : }
     137             : 
     138        1142 : void poll_freewait(struct poll_wqueues *pwq)
     139             : {
     140        1142 :         struct poll_table_page * p = pwq->table;
     141        1142 :         int i;
     142        2615 :         for (i = 0; i < pwq->inline_index; i++)
     143        1473 :                 free_poll_entry(pwq->inline_entries + i);
     144        1142 :         while (p) {
     145           0 :                 struct poll_table_entry * entry;
     146           0 :                 struct poll_table_page *old;
     147             : 
     148           0 :                 entry = p->entry;
     149           0 :                 do {
     150           0 :                         entry--;
     151           0 :                         free_poll_entry(entry);
     152           0 :                 } while (entry > p->entries);
     153           0 :                 old = p;
     154           0 :                 p = p->next;
     155           0 :                 free_page((unsigned long) old);
     156             :         }
     157        1142 : }
     158             : EXPORT_SYMBOL(poll_freewait);
     159             : 
     160        1488 : static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
     161             : {
     162        1488 :         struct poll_table_page *table = p->table;
     163             : 
     164        1488 :         if (p->inline_index < N_INLINE_POLL_ENTRIES)
     165        1488 :                 return p->inline_entries + p->inline_index++;
     166             : 
     167           0 :         if (!table || POLL_TABLE_FULL(table)) {
     168           0 :                 struct poll_table_page *new_table;
     169             : 
     170           0 :                 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
     171           0 :                 if (!new_table) {
     172           0 :                         p->error = -ENOMEM;
     173           0 :                         return NULL;
     174             :                 }
     175           0 :                 new_table->entry = new_table->entries;
     176           0 :                 new_table->next = table;
     177           0 :                 p->table = new_table;
     178           0 :                 table = new_table;
     179             :         }
     180             : 
     181           0 :         return table->entry++;
     182             : }
     183             : 
     184         608 : static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
     185             : {
     186         608 :         struct poll_wqueues *pwq = wait->private;
     187         608 :         DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
     188             : 
     189             :         /*
     190             :          * Although this function is called under waitqueue lock, LOCK
     191             :          * doesn't imply write barrier and the users expect write
     192             :          * barrier semantics on wakeup functions.  The following
     193             :          * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
     194             :          * and is paired with smp_store_mb() in poll_schedule_timeout.
     195             :          */
     196         608 :         smp_wmb();
     197         608 :         pwq->triggered = 1;
     198             : 
     199             :         /*
     200             :          * Perform the default wake up operation using a dummy
     201             :          * waitqueue.
     202             :          *
     203             :          * TODO: This is hacky but there currently is no interface to
     204             :          * pass in @sync.  @sync is scheduled to be removed and once
     205             :          * that happens, wake_up_process() can be used directly.
     206             :          */
     207         608 :         return default_wake_function(&dummy_wait, mode, sync, key);
     208             : }
     209             : 
     210         800 : static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
     211             : {
     212         800 :         struct poll_table_entry *entry;
     213             : 
     214         800 :         entry = container_of(wait, struct poll_table_entry, wait);
     215         800 :         if (key && !(key_to_poll(key) & entry->key))
     216             :                 return 0;
     217         608 :         return __pollwake(wait, mode, sync, key);
     218             : }
     219             : 
     220             : /* Add a new entry */
     221        1488 : static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
     222             :                                 poll_table *p)
     223             : {
     224        1488 :         struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
     225        1488 :         struct poll_table_entry *entry = poll_get_entry(pwq);
     226        1488 :         if (!entry)
     227             :                 return;
     228        1488 :         entry->filp = get_file(filp);
     229        1488 :         entry->wait_address = wait_address;
     230        1488 :         entry->key = p->_key;
     231        1488 :         init_waitqueue_func_entry(&entry->wait, pollwake);
     232        1488 :         entry->wait.private = pwq;
     233        1488 :         add_wait_queue(wait_address, &entry->wait);
     234             : }
     235             : 
     236         574 : static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
     237             :                           ktime_t *expires, unsigned long slack)
     238             : {
     239         574 :         int rc = -EINTR;
     240             : 
     241         574 :         set_current_state(state);
     242         574 :         if (!pwq->triggered)
     243         574 :                 rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
     244         566 :         __set_current_state(TASK_RUNNING);
     245             : 
     246             :         /*
     247             :          * Prepare for the next iteration.
     248             :          *
     249             :          * The following smp_store_mb() serves two purposes.  First, it's
     250             :          * the counterpart rmb of the wmb in pollwake() such that data
     251             :          * written before wake up is always visible after wake up.
     252             :          * Second, the full barrier guarantees that triggered clearing
     253             :          * doesn't pass event check of the next iteration.  Note that
     254             :          * this problem doesn't exist for the first iteration as
     255             :          * add_wait_queue() has full barrier semantics.
     256             :          */
     257         566 :         smp_store_mb(pwq->triggered, 0);
     258             : 
     259         566 :         return rc;
     260             : }
     261             : 
     262             : /**
     263             :  * poll_select_set_timeout - helper function to setup the timeout value
     264             :  * @to:         pointer to timespec64 variable for the final timeout
     265             :  * @sec:        seconds (from user space)
     266             :  * @nsec:       nanoseconds (from user space)
     267             :  *
     268             :  * Note, we do not use a timespec for the user space value here, That
     269             :  * way we can use the function for timeval and compat interfaces as well.
     270             :  *
     271             :  * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
     272             :  */
     273         268 : int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec)
     274             : {
     275         268 :         struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec};
     276             : 
     277         268 :         if (!timespec64_valid(&ts))
     278             :                 return -EINVAL;
     279             : 
     280             :         /* Optimize for the zero timeout value here */
     281         268 :         if (!sec && !nsec) {
     282          43 :                 to->tv_sec = to->tv_nsec = 0;
     283             :         } else {
     284         225 :                 ktime_get_ts64(to);
     285         225 :                 *to = timespec64_add_safe(*to, ts);
     286             :         }
     287             :         return 0;
     288             : }
     289             : 
     290             : enum poll_time_type {
     291             :         PT_TIMEVAL = 0,
     292             :         PT_OLD_TIMEVAL = 1,
     293             :         PT_TIMESPEC = 2,
     294             :         PT_OLD_TIMESPEC = 3,
     295             : };
     296             : 
     297         777 : static int poll_select_finish(struct timespec64 *end_time,
     298             :                               void __user *p,
     299             :                               enum poll_time_type pt_type, int ret)
     300             : {
     301         777 :         struct timespec64 rts;
     302             : 
     303         777 :         restore_saved_sigmask_unless(ret == -ERESTARTNOHAND);
     304             : 
     305         777 :         if (!p)
     306             :                 return ret;
     307             : 
     308         211 :         if (current->personality & STICKY_TIMEOUTS)
     309           0 :                 goto sticky;
     310             : 
     311             :         /* No update for zero timeout */
     312         211 :         if (!end_time->tv_sec && !end_time->tv_nsec)
     313             :                 return ret;
     314             : 
     315         210 :         ktime_get_ts64(&rts);
     316         210 :         rts = timespec64_sub(*end_time, rts);
     317         210 :         if (rts.tv_sec < 0)
     318           2 :                 rts.tv_sec = rts.tv_nsec = 0;
     319             : 
     320             : 
     321         210 :         switch (pt_type) {
     322             :         case PT_TIMEVAL:
     323             :                 {
     324           7 :                         struct __kernel_old_timeval rtv;
     325             : 
     326           7 :                         if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
     327             :                                 memset(&rtv, 0, sizeof(rtv));
     328           7 :                         rtv.tv_sec = rts.tv_sec;
     329           7 :                         rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
     330           7 :                         if (!copy_to_user(p, &rtv, sizeof(rtv)))
     331           7 :                                 return ret;
     332             :                 }
     333           0 :                 break;
     334           0 :         case PT_OLD_TIMEVAL:
     335             :                 {
     336           0 :                         struct old_timeval32 rtv;
     337             : 
     338           0 :                         rtv.tv_sec = rts.tv_sec;
     339           0 :                         rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
     340           0 :                         if (!copy_to_user(p, &rtv, sizeof(rtv)))
     341           0 :                                 return ret;
     342             :                 }
     343           0 :                 break;
     344         203 :         case PT_TIMESPEC:
     345         203 :                 if (!put_timespec64(&rts, p))
     346             :                         return ret;
     347             :                 break;
     348           0 :         case PT_OLD_TIMESPEC:
     349           0 :                 if (!put_old_timespec32(&rts, p))
     350             :                         return ret;
     351             :                 break;
     352           0 :         default:
     353           0 :                 BUG();
     354             :         }
     355             :         /*
     356             :          * If an application puts its timeval in read-only memory, we
     357             :          * don't want the Linux-specific update to the timeval to
     358             :          * cause a fault after the select has completed
     359             :          * successfully. However, because we're not updating the
     360             :          * timeval, we can't restart the system call.
     361             :          */
     362             : 
     363           0 : sticky:
     364           0 :         if (ret == -ERESTARTNOHAND)
     365           0 :                 ret = -EINTR;
     366             :         return ret;
     367             : }
     368             : 
     369             : /*
     370             :  * Scalable version of the fd_set.
     371             :  */
     372             : 
     373             : typedef struct {
     374             :         unsigned long *in, *out, *ex;
     375             :         unsigned long *res_in, *res_out, *res_ex;
     376             : } fd_set_bits;
     377             : 
     378             : /*
     379             :  * How many longwords for "nr" bits?
     380             :  */
     381             : #define FDS_BITPERLONG  (8*sizeof(long))
     382             : #define FDS_LONGS(nr)   (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
     383             : #define FDS_BYTES(nr)   (FDS_LONGS(nr)*sizeof(long))
     384             : 
     385             : /*
     386             :  * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
     387             :  */
     388             : static inline
     389        1734 : int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
     390             : {
     391        1734 :         nr = FDS_BYTES(nr);
     392        1734 :         if (ufdset)
     393        2218 :                 return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0;
     394             : 
     395         625 :         memset(fdset, 0, nr);
     396         625 :         return 0;
     397             : }
     398             : 
     399             : static inline unsigned long __must_check
     400        1704 : set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
     401             : {
     402        1704 :         if (ufdset)
     403        2200 :                 return __copy_to_user(ufdset, fdset, FDS_BYTES(nr));
     404             :         return 0;
     405             : }
     406             : 
     407             : static inline
     408        1734 : void zero_fd_set(unsigned long nr, unsigned long *fdset)
     409             : {
     410        1734 :         memset(fdset, 0, FDS_BYTES(nr));
     411        1734 : }
     412             : 
     413             : #define FDS_IN(fds, n)          (fds->in + n)
     414             : #define FDS_OUT(fds, n)         (fds->out + n)
     415             : #define FDS_EX(fds, n)          (fds->ex + n)
     416             : 
     417             : #define BITS(fds, n)    (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n))
     418             : 
     419         578 : static int max_select_fd(unsigned long n, fd_set_bits *fds)
     420             : {
     421         578 :         unsigned long *open_fds;
     422         578 :         unsigned long set;
     423         578 :         int max;
     424         578 :         struct fdtable *fdt;
     425             : 
     426             :         /* handle last in-complete long-word first */
     427         578 :         set = ~(~0UL << (n & (BITS_PER_LONG-1)));
     428         578 :         n /= BITS_PER_LONG;
     429         578 :         fdt = files_fdtable(current->files);
     430         578 :         open_fds = fdt->open_fds + n;
     431         578 :         max = 0;
     432         578 :         if (set) {
     433         578 :                 set &= BITS(fds, n);
     434         578 :                 if (set) {
     435         575 :                         if (!(set & ~*open_fds))
     436         575 :                                 goto get_max;
     437             :                         return -EBADF;
     438             :                 }
     439             :         }
     440         578 :         while (n) {
     441           0 :                 open_fds--;
     442           0 :                 n--;
     443           0 :                 set = BITS(fds, n);
     444           0 :                 if (!set)
     445           0 :                         continue;
     446           0 :                 if (set & ~*open_fds)
     447             :                         return -EBADF;
     448           0 :                 if (max)
     449           0 :                         continue;
     450           0 : get_max:
     451        7457 :                 do {
     452        7457 :                         max++;
     453        7457 :                         set >>= 1;
     454        7457 :                 } while (set);
     455         575 :                 max += n * BITS_PER_LONG;
     456             :         }
     457             : 
     458             :         return max;
     459             : }
     460             : 
     461             : #define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)
     462             : #define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR)
     463             : #define POLLEX_SET (EPOLLPRI)
     464             : 
     465        2618 : static inline void wait_key_set(poll_table *wait, unsigned long in,
     466             :                                 unsigned long out, unsigned long bit,
     467             :                                 __poll_t ll_flag)
     468             : {
     469        2618 :         wait->_key = POLLEX_SET | ll_flag;
     470        2618 :         if (in & bit)
     471        2590 :                 wait->_key |= POLLIN_SET;
     472        2618 :         if (out & bit)
     473         403 :                 wait->_key |= POLLOUT_SET;
     474             : }
     475             : 
     476         578 : static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
     477             : {
     478         578 :         ktime_t expire, *to = NULL;
     479         578 :         struct poll_wqueues table;
     480         578 :         poll_table *wait;
     481         578 :         int retval, i, timed_out = 0;
     482         578 :         u64 slack = 0;
     483         578 :         __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
     484         578 :         unsigned long busy_start = 0;
     485             : 
     486         578 :         rcu_read_lock();
     487         578 :         retval = max_select_fd(n, fds);
     488         578 :         rcu_read_unlock();
     489             : 
     490         578 :         if (retval < 0)
     491             :                 return retval;
     492         578 :         n = retval;
     493             : 
     494         578 :         poll_initwait(&table);
     495         578 :         wait = &table.pt;
     496         578 :         if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
     497           1 :                 wait->_qproc = NULL;
     498           1 :                 timed_out = 1;
     499             :         }
     500             : 
     501         578 :         if (end_time && !timed_out)
     502           9 :                 slack = select_estimate_accuracy(end_time);
     503             : 
     504         578 :         retval = 0;
     505         722 :         for (;;) {
     506         722 :                 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
     507         722 :                 bool can_busy_loop = false;
     508             : 
     509         722 :                 inp = fds->in; outp = fds->out; exp = fds->ex;
     510         722 :                 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
     511             : 
     512        1439 :                 for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
     513         717 :                         unsigned long in, out, ex, all_bits, bit = 1, j;
     514         717 :                         unsigned long res_in = 0, res_out = 0, res_ex = 0;
     515         717 :                         __poll_t mask;
     516             : 
     517         717 :                         in = *inp++; out = *outp++; ex = *exp++;
     518         717 :                         all_bits = in | out | ex;
     519         717 :                         if (all_bits == 0) {
     520           0 :                                 i += BITS_PER_LONG;
     521           0 :                                 continue;
     522             :                         }
     523             : 
     524        9774 :                         for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
     525        9774 :                                 struct fd f;
     526        9774 :                                 if (i >= n)
     527             :                                         break;
     528        9057 :                                 if (!(bit & all_bits))
     529        6439 :                                         continue;
     530        2618 :                                 f = fdget(i);
     531        2618 :                                 if (f.file) {
     532        2618 :                                         wait_key_set(wait, in, out, bit,
     533             :                                                      busy_flag);
     534        2618 :                                         mask = vfs_poll(f.file, wait);
     535             : 
     536        2618 :                                         fdput(f);
     537        2618 :                                         if ((mask & POLLIN_SET) && (in & bit)) {
     538         454 :                                                 res_in |= bit;
     539         454 :                                                 retval++;
     540         454 :                                                 wait->_qproc = NULL;
     541             :                                         }
     542        2618 :                                         if ((mask & POLLOUT_SET) && (out & bit)) {
     543         403 :                                                 res_out |= bit;
     544         403 :                                                 retval++;
     545         403 :                                                 wait->_qproc = NULL;
     546             :                                         }
     547        2618 :                                         if ((mask & POLLEX_SET) && (ex & bit)) {
     548           0 :                                                 res_ex |= bit;
     549           0 :                                                 retval++;
     550           0 :                                                 wait->_qproc = NULL;
     551             :                                         }
     552             :                                         /* got something, stop busy polling */
     553        2618 :                                         if (retval) {
     554             :                                                 can_busy_loop = false;
     555             :                                                 busy_flag = 0;
     556             : 
     557             :                                         /*
     558             :                                          * only remember a returned
     559             :                                          * POLL_BUSY_LOOP if we asked for it
     560             :                                          */
     561         704 :                                         } else if (busy_flag & mask)
     562           0 :                                                 can_busy_loop = true;
     563             : 
     564             :                                 }
     565             :                         }
     566         717 :                         if (res_in)
     567         449 :                                 *rinp = res_in;
     568         717 :                         if (res_out)
     569         400 :                                 *routp = res_out;
     570         717 :                         if (res_ex)
     571           0 :                                 *rexp = res_ex;
     572         717 :                         cond_resched();
     573             :                 }
     574         722 :                 wait->_qproc = NULL;
     575         722 :                 if (retval || timed_out || signal_pending(current))
     576             :                         break;
     577         149 :                 if (table.error) {
     578             :                         retval = table.error;
     579             :                         break;
     580             :                 }
     581             : 
     582             :                 /* only if found POLL_BUSY_LOOP sockets && not out of time */
     583         149 :                 if (can_busy_loop && !need_resched()) {
     584           0 :                         if (!busy_start) {
     585           0 :                                 busy_start = busy_loop_current_time();
     586           0 :                                 continue;
     587             :                         }
     588           0 :                         if (!busy_loop_timeout(busy_start))
     589           0 :                                 continue;
     590             :                 }
     591         149 :                 busy_flag = 0;
     592             : 
     593             :                 /*
     594             :                  * If this is the first loop and we have a timeout
     595             :                  * given, then we convert to ktime_t and set the to
     596             :                  * pointer to the expiry value.
     597             :                  */
     598         149 :                 if (end_time && !to) {
     599           7 :                         expire = timespec64_to_ktime(*end_time);
     600           7 :                         to = &expire;
     601             :                 }
     602             : 
     603         149 :                 if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
     604             :                                            to, slack))
     605           1 :                         timed_out = 1;
     606             :         }
     607             : 
     608         573 :         poll_freewait(&table);
     609             : 
     610         573 :         return retval;
     611             : }
     612             : 
     613             : /*
     614             :  * We can actually return ERESTARTSYS instead of EINTR, but I'd
     615             :  * like to be certain this leads to no problems. So I return
     616             :  * EINTR just for safety.
     617             :  *
     618             :  * Update: ERESTARTSYS breaks at least the xview clock binary, so
     619             :  * I'm trying ERESTARTNOHAND which restart only when you want to.
     620             :  */
     621         578 : int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
     622             :                            fd_set __user *exp, struct timespec64 *end_time)
     623             : {
     624         578 :         fd_set_bits fds;
     625         578 :         void *bits;
     626         578 :         int ret, max_fds;
     627         578 :         size_t size, alloc_size;
     628         578 :         struct fdtable *fdt;
     629             :         /* Allocate small arguments on the stack to save memory and be faster */
     630         578 :         long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
     631             : 
     632         578 :         ret = -EINVAL;
     633         578 :         if (n < 0)
     634           0 :                 goto out_nofds;
     635             : 
     636             :         /* max_fds can increase, so grab it once to avoid race */
     637         578 :         rcu_read_lock();
     638         578 :         fdt = files_fdtable(current->files);
     639         578 :         max_fds = fdt->max_fds;
     640         578 :         rcu_read_unlock();
     641         578 :         if (n > max_fds)
     642             :                 n = max_fds;
     643             : 
     644             :         /*
     645             :          * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
     646             :          * since we used fdset we need to allocate memory in units of
     647             :          * long-words. 
     648             :          */
     649         578 :         size = FDS_BYTES(n);
     650         578 :         bits = stack_fds;
     651         578 :         if (size > sizeof(stack_fds) / 6) {
     652             :                 /* Not enough space in on-stack array; must use kmalloc */
     653           0 :                 ret = -ENOMEM;
     654           0 :                 if (size > (SIZE_MAX / 6))
     655             :                         goto out_nofds;
     656             : 
     657           0 :                 alloc_size = 6 * size;
     658           0 :                 bits = kvmalloc(alloc_size, GFP_KERNEL);
     659           0 :                 if (!bits)
     660           0 :                         goto out_nofds;
     661             :         }
     662         578 :         fds.in      = bits;
     663         578 :         fds.out     = bits +   size;
     664         578 :         fds.ex      = bits + 2*size;
     665         578 :         fds.res_in  = bits + 3*size;
     666         578 :         fds.res_out = bits + 4*size;
     667         578 :         fds.res_ex  = bits + 5*size;
     668             : 
     669         578 :         if ((ret = get_fd_set(n, inp, fds.in)) ||
     670         578 :             (ret = get_fd_set(n, outp, fds.out)) ||
     671         578 :             (ret = get_fd_set(n, exp, fds.ex)))
     672           0 :                 goto out;
     673         578 :         zero_fd_set(n, fds.res_in);
     674         578 :         zero_fd_set(n, fds.res_out);
     675         578 :         zero_fd_set(n, fds.res_ex);
     676             : 
     677         578 :         ret = do_select(n, &fds, end_time);
     678             : 
     679         573 :         if (ret < 0)
     680           0 :                 goto out;
     681         573 :         if (!ret) {
     682           7 :                 ret = -ERESTARTNOHAND;
     683           7 :                 if (signal_pending(current))
     684           5 :                         goto out;
     685             :                 ret = 0;
     686             :         }
     687             : 
     688        1136 :         if (set_fd_set(n, inp, fds.res_in) ||
     689        1136 :             set_fd_set(n, outp, fds.res_out) ||
     690         568 :             set_fd_set(n, exp, fds.res_ex))
     691             :                 ret = -EFAULT;
     692             : 
     693         568 : out:
     694         573 :         if (bits != stack_fds)
     695           0 :                 kvfree(bits);
     696         573 : out_nofds:
     697         573 :         return ret;
     698             : }
     699             : 
     700         578 : static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
     701             :                        fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
     702             : {
     703         578 :         struct timespec64 end_time, *to = NULL;
     704         578 :         struct __kernel_old_timeval tv;
     705         578 :         int ret;
     706             : 
     707         578 :         if (tvp) {
     708          10 :                 if (copy_from_user(&tv, tvp, sizeof(tv)))
     709             :                         return -EFAULT;
     710             : 
     711          10 :                 to = &end_time;
     712          10 :                 if (poll_select_set_timeout(to,
     713          10 :                                 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
     714          10 :                                 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
     715             :                         return -EINVAL;
     716             :         }
     717             : 
     718         578 :         ret = core_sys_select(n, inp, outp, exp, to);
     719         573 :         return poll_select_finish(&end_time, tvp, PT_TIMEVAL, ret);
     720             : }
     721             : 
     722        1151 : SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
     723             :                 fd_set __user *, exp, struct __kernel_old_timeval __user *, tvp)
     724             : {
     725         578 :         return kern_select(n, inp, outp, exp, tvp);
     726             : }
     727             : 
     728           0 : static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
     729             :                        fd_set __user *exp, void __user *tsp,
     730             :                        const sigset_t __user *sigmask, size_t sigsetsize,
     731             :                        enum poll_time_type type)
     732             : {
     733           0 :         struct timespec64 ts, end_time, *to = NULL;
     734           0 :         int ret;
     735             : 
     736           0 :         if (tsp) {
     737           0 :                 switch (type) {
     738           0 :                 case PT_TIMESPEC:
     739           0 :                         if (get_timespec64(&ts, tsp))
     740             :                                 return -EFAULT;
     741             :                         break;
     742           0 :                 case PT_OLD_TIMESPEC:
     743           0 :                         if (get_old_timespec32(&ts, tsp))
     744             :                                 return -EFAULT;
     745             :                         break;
     746           0 :                 default:
     747           0 :                         BUG();
     748             :                 }
     749             : 
     750           0 :                 to = &end_time;
     751           0 :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
     752             :                         return -EINVAL;
     753             :         }
     754             : 
     755           0 :         ret = set_user_sigmask(sigmask, sigsetsize);
     756           0 :         if (ret)
     757           0 :                 return ret;
     758             : 
     759           0 :         ret = core_sys_select(n, inp, outp, exp, to);
     760           0 :         return poll_select_finish(&end_time, tsp, type, ret);
     761             : }
     762             : 
     763             : /*
     764             :  * Most architectures can't handle 7-argument syscalls. So we provide a
     765             :  * 6-argument version where the sixth argument is a pointer to a structure
     766             :  * which has a pointer to the sigset_t itself followed by a size_t containing
     767             :  * the sigset size.
     768             :  */
     769             : struct sigset_argpack {
     770             :         sigset_t __user *p;
     771             :         size_t size;
     772             : };
     773             : 
     774           0 : static inline int get_sigset_argpack(struct sigset_argpack *to,
     775             :                                      struct sigset_argpack __user *from)
     776             : {
     777             :         // the path is hot enough for overhead of copy_from_user() to matter
     778           0 :         if (from) {
     779           0 :                 if (!user_read_access_begin(from, sizeof(*from)))
     780             :                         return -EFAULT;
     781           0 :                 unsafe_get_user(to->p, &from->p, Efault);
     782           0 :                 unsafe_get_user(to->size, &from->size, Efault);
     783             :                 user_read_access_end();
     784             :         }
     785             :         return 0;
     786             : Efault:
     787             :         user_access_end();
     788             :         return -EFAULT;
     789             : }
     790             : 
     791           0 : SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
     792             :                 fd_set __user *, exp, struct __kernel_timespec __user *, tsp,
     793             :                 void __user *, sig)
     794             : {
     795           0 :         struct sigset_argpack x = {NULL, 0};
     796             : 
     797           0 :         if (get_sigset_argpack(&x, sig))
     798             :                 return -EFAULT;
     799             : 
     800           0 :         return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_TIMESPEC);
     801             : }
     802             : 
     803             : #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
     804             : 
     805             : SYSCALL_DEFINE6(pselect6_time32, int, n, fd_set __user *, inp, fd_set __user *, outp,
     806             :                 fd_set __user *, exp, struct old_timespec32 __user *, tsp,
     807             :                 void __user *, sig)
     808             : {
     809             :         struct sigset_argpack x = {NULL, 0};
     810             : 
     811             :         if (get_sigset_argpack(&x, sig))
     812             :                 return -EFAULT;
     813             : 
     814             :         return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_OLD_TIMESPEC);
     815             : }
     816             : 
     817             : #endif
     818             : 
     819             : #ifdef __ARCH_WANT_SYS_OLD_SELECT
     820             : struct sel_arg_struct {
     821             :         unsigned long n;
     822             :         fd_set __user *inp, *outp, *exp;
     823             :         struct __kernel_old_timeval __user *tvp;
     824             : };
     825             : 
     826             : SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
     827             : {
     828             :         struct sel_arg_struct a;
     829             : 
     830             :         if (copy_from_user(&a, arg, sizeof(a)))
     831             :                 return -EFAULT;
     832             :         return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp);
     833             : }
     834             : #endif
     835             : 
     836             : struct poll_list {
     837             :         struct poll_list *next;
     838             :         int len;
     839             :         struct pollfd entries[];
     840             : };
     841             : 
     842             : #define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
     843             : 
     844             : /*
     845             :  * Fish for pollable events on the pollfd->fd file descriptor. We're only
     846             :  * interested in events matching the pollfd->events mask, and the result
     847             :  * matching that mask is both recorded in pollfd->revents and returned. The
     848             :  * pwait poll_table will be used by the fd-provided poll handler for waiting,
     849             :  * if pwait->_qproc is non-NULL.
     850             :  */
     851        1051 : static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
     852             :                                      bool *can_busy_poll,
     853             :                                      __poll_t busy_flag)
     854             : {
     855        1051 :         int fd = pollfd->fd;
     856        1051 :         __poll_t mask = 0, filter;
     857        1051 :         struct fd f;
     858             : 
     859        1051 :         if (fd < 0)
     860           0 :                 goto out;
     861        1051 :         mask = EPOLLNVAL;
     862        1051 :         f = fdget(fd);
     863        1051 :         if (!f.file)
     864           0 :                 goto out;
     865             : 
     866             :         /* userland u16 ->events contains POLL... bitmap */
     867        1051 :         filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
     868        1051 :         pwait->_key = filter | busy_flag;
     869        1051 :         mask = vfs_poll(f.file, pwait);
     870        1051 :         if (mask & busy_flag)
     871           0 :                 *can_busy_poll = true;
     872        1051 :         mask &= filter;             /* Mask out unneeded events. */
     873        1506 :         fdput(f);
     874             : 
     875        1051 : out:
     876             :         /* ... and so does ->revents */
     877        1051 :         pollfd->revents = mangle_poll(mask);
     878        1051 :         return mask;
     879             : }
     880             : 
     881         572 : static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
     882             :                    struct timespec64 *end_time)
     883             : {
     884         572 :         poll_table* pt = &wait->pt;
     885         572 :         ktime_t expire, *to = NULL;
     886         572 :         int timed_out = 0, count = 0;
     887         572 :         u64 slack = 0;
     888         572 :         __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
     889         572 :         unsigned long busy_start = 0;
     890             : 
     891             :         /* Optimise the no-wait case */
     892         572 :         if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
     893          42 :                 pt->_qproc = NULL;
     894          42 :                 timed_out = 1;
     895             :         }
     896             : 
     897         572 :         if (end_time && !timed_out)
     898         216 :                 slack = select_estimate_accuracy(end_time);
     899             : 
     900         994 :         for (;;) {
     901         994 :                 struct poll_list *walk;
     902         994 :                 bool can_busy_loop = false;
     903             : 
     904        1988 :                 for (walk = list; walk != NULL; walk = walk->next) {
     905         994 :                         struct pollfd * pfd, * pfd_end;
     906             : 
     907         994 :                         pfd = walk->entries;
     908         994 :                         pfd_end = pfd + walk->len;
     909        2045 :                         for (; pfd != pfd_end; pfd++) {
     910             :                                 /*
     911             :                                  * Fish for events. If we found one, record it
     912             :                                  * and kill poll_table->_qproc, so we don't
     913             :                                  * needlessly register any other waiters after
     914             :                                  * this. They'll get immediately deregistered
     915             :                                  * when we break out and return.
     916             :                                  */
     917        1051 :                                 if (do_pollfd(pfd, pt, &can_busy_loop,
     918             :                                               busy_flag)) {
     919         530 :                                         count++;
     920         530 :                                         pt->_qproc = NULL;
     921             :                                         /* found something, stop busy polling */
     922         530 :                                         busy_flag = 0;
     923         530 :                                         can_busy_loop = false;
     924             :                                 }
     925             :                         }
     926             :                 }
     927             :                 /*
     928             :                  * All waiters have already been registered, so don't provide
     929             :                  * a poll_table->_qproc to them on the next loop iteration.
     930             :                  */
     931         994 :                 pt->_qproc = NULL;
     932         994 :                 if (!count) {
     933         464 :                         count = wait->error;
     934         464 :                         if (signal_pending(current))
     935           0 :                                 count = -ERESTARTNOHAND;
     936             :                 }
     937         994 :                 if (count || timed_out)
     938             :                         break;
     939             : 
     940             :                 /* only if found POLL_BUSY_LOOP sockets && not out of time */
     941         425 :                 if (can_busy_loop && !need_resched()) {
     942           0 :                         if (!busy_start) {
     943           0 :                                 busy_start = busy_loop_current_time();
     944           0 :                                 continue;
     945             :                         }
     946           0 :                         if (!busy_loop_timeout(busy_start))
     947           0 :                                 continue;
     948             :                 }
     949         425 :                 busy_flag = 0;
     950             : 
     951             :                 /*
     952             :                  * If this is the first loop and we have a timeout
     953             :                  * given, then we convert to ktime_t and set the to
     954             :                  * pointer to the expiry value.
     955             :                  */
     956         425 :                 if (end_time && !to) {
     957         210 :                         expire = timespec64_to_ktime(*end_time);
     958         210 :                         to = &expire;
     959             :                 }
     960             : 
     961         425 :                 if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
     962           9 :                         timed_out = 1;
     963             :         }
     964         569 :         return count;
     965             : }
     966             : 
     967             : #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
     968             :                         sizeof(struct pollfd))
     969             : 
     970         572 : static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
     971             :                 struct timespec64 *end_time)
     972             : {
     973         572 :         struct poll_wqueues table;
     974         572 :         int err = -EFAULT, fdcount, len;
     975             :         /* Allocate small arguments on the stack to save memory and be
     976             :            faster - use long to make sure the buffer is aligned properly
     977             :            on 64 bit archs to avoid unaligned access */
     978         572 :         long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
     979         572 :         struct poll_list *const head = (struct poll_list *)stack_pps;
     980         572 :         struct poll_list *walk = head;
     981         572 :         unsigned long todo = nfds;
     982             : 
     983         572 :         if (nfds > rlimit(RLIMIT_NOFILE))
     984             :                 return -EINVAL;
     985             : 
     986         572 :         len = min_t(unsigned int, nfds, N_STACK_PPS);
     987         572 :         for (;;) {
     988         572 :                 walk->next = NULL;
     989         572 :                 walk->len = len;
     990         572 :                 if (!len)
     991             :                         break;
     992             : 
     993         572 :                 if (copy_from_user(walk->entries, ufds + nfds-todo,
     994         572 :                                         sizeof(struct pollfd) * walk->len))
     995           0 :                         goto out_fds;
     996             : 
     997         572 :                 todo -= walk->len;
     998         572 :                 if (!todo)
     999             :                         break;
    1000             : 
    1001           0 :                 len = min(todo, POLLFD_PER_PAGE);
    1002           0 :                 walk = walk->next = kmalloc(struct_size(walk, entries, len),
    1003             :                                             GFP_KERNEL);
    1004           0 :                 if (!walk) {
    1005           0 :                         err = -ENOMEM;
    1006           0 :                         goto out_fds;
    1007             :                 }
    1008             :         }
    1009             : 
    1010         572 :         poll_initwait(&table);
    1011         572 :         fdcount = do_poll(head, &table, end_time);
    1012         569 :         poll_freewait(&table);
    1013             : 
    1014        1138 :         if (!user_write_access_begin(ufds, nfds * sizeof(*ufds)))
    1015           0 :                 goto out_fds;
    1016             : 
    1017        1138 :         for (walk = head; walk; walk = walk->next) {
    1018         569 :                 struct pollfd *fds = walk->entries;
    1019         569 :                 int j;
    1020             : 
    1021        1169 :                 for (j = walk->len; j; fds++, ufds++, j--)
    1022         600 :                         unsafe_put_user(fds->revents, &ufds->revents, Efault);
    1023             :         }
    1024             :         user_write_access_end();
    1025             : 
    1026             :         err = fdcount;
    1027         569 : out_fds:
    1028         569 :         walk = head->next;
    1029         569 :         while (walk) {
    1030           0 :                 struct poll_list *pos = walk;
    1031           0 :                 walk = walk->next;
    1032           0 :                 kfree(pos);
    1033             :         }
    1034             : 
    1035             :         return err;
    1036             : 
    1037           0 : Efault:
    1038           0 :         user_write_access_end();
    1039           0 :         err = -EFAULT;
    1040           0 :         goto out_fds;
    1041             : }
    1042             : 
    1043           0 : static long do_restart_poll(struct restart_block *restart_block)
    1044             : {
    1045           0 :         struct pollfd __user *ufds = restart_block->poll.ufds;
    1046           0 :         int nfds = restart_block->poll.nfds;
    1047           0 :         struct timespec64 *to = NULL, end_time;
    1048           0 :         int ret;
    1049             : 
    1050           0 :         if (restart_block->poll.has_timeout) {
    1051           0 :                 end_time.tv_sec = restart_block->poll.tv_sec;
    1052           0 :                 end_time.tv_nsec = restart_block->poll.tv_nsec;
    1053           0 :                 to = &end_time;
    1054             :         }
    1055             : 
    1056           0 :         ret = do_sys_poll(ufds, nfds, to);
    1057             : 
    1058           0 :         if (ret == -ERESTARTNOHAND) {
    1059           0 :                 restart_block->fn = do_restart_poll;
    1060           0 :                 ret = -ERESTART_RESTARTBLOCK;
    1061             :         }
    1062           0 :         return ret;
    1063             : }
    1064             : 
    1065         736 : SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
    1066             :                 int, timeout_msecs)
    1067             : {
    1068         368 :         struct timespec64 end_time, *to = NULL;
    1069         368 :         int ret;
    1070             : 
    1071         368 :         if (timeout_msecs >= 0) {
    1072          55 :                 to = &end_time;
    1073          55 :                 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
    1074          55 :                         NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
    1075             :         }
    1076             : 
    1077         368 :         ret = do_sys_poll(ufds, nfds, to);
    1078             : 
    1079         365 :         if (ret == -ERESTARTNOHAND) {
    1080           0 :                 struct restart_block *restart_block;
    1081             : 
    1082           0 :                 restart_block = &current->restart_block;
    1083           0 :                 restart_block->fn = do_restart_poll;
    1084           0 :                 restart_block->poll.ufds = ufds;
    1085           0 :                 restart_block->poll.nfds = nfds;
    1086             : 
    1087           0 :                 if (timeout_msecs >= 0) {
    1088           0 :                         restart_block->poll.tv_sec = end_time.tv_sec;
    1089           0 :                         restart_block->poll.tv_nsec = end_time.tv_nsec;
    1090           0 :                         restart_block->poll.has_timeout = 1;
    1091             :                 } else
    1092           0 :                         restart_block->poll.has_timeout = 0;
    1093             : 
    1094             :                 ret = -ERESTART_RESTARTBLOCK;
    1095             :         }
    1096         365 :         return ret;
    1097             : }
    1098             : 
    1099         408 : SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
    1100             :                 struct __kernel_timespec __user *, tsp, const sigset_t __user *, sigmask,
    1101             :                 size_t, sigsetsize)
    1102             : {
    1103         204 :         struct timespec64 ts, end_time, *to = NULL;
    1104         204 :         int ret;
    1105             : 
    1106         204 :         if (tsp) {
    1107         203 :                 if (get_timespec64(&ts, tsp))
    1108             :                         return -EFAULT;
    1109             : 
    1110         203 :                 to = &end_time;
    1111         203 :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1112             :                         return -EINVAL;
    1113             :         }
    1114             : 
    1115         204 :         ret = set_user_sigmask(sigmask, sigsetsize);
    1116         204 :         if (ret)
    1117           0 :                 return ret;
    1118             : 
    1119         204 :         ret = do_sys_poll(ufds, nfds, to);
    1120         204 :         return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret);
    1121             : }
    1122             : 
    1123             : #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
    1124             : 
    1125             : SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds,
    1126             :                 struct old_timespec32 __user *, tsp, const sigset_t __user *, sigmask,
    1127             :                 size_t, sigsetsize)
    1128             : {
    1129             :         struct timespec64 ts, end_time, *to = NULL;
    1130             :         int ret;
    1131             : 
    1132             :         if (tsp) {
    1133             :                 if (get_old_timespec32(&ts, tsp))
    1134             :                         return -EFAULT;
    1135             : 
    1136             :                 to = &end_time;
    1137             :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1138             :                         return -EINVAL;
    1139             :         }
    1140             : 
    1141             :         ret = set_user_sigmask(sigmask, sigsetsize);
    1142             :         if (ret)
    1143             :                 return ret;
    1144             : 
    1145             :         ret = do_sys_poll(ufds, nfds, to);
    1146             :         return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret);
    1147             : }
    1148             : #endif
    1149             : 
    1150             : #ifdef CONFIG_COMPAT
    1151             : #define __COMPAT_NFDBITS       (8 * sizeof(compat_ulong_t))
    1152             : 
    1153             : /*
    1154             :  * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
    1155             :  * 64-bit unsigned longs.
    1156             :  */
    1157             : static
    1158           0 : int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
    1159             :                         unsigned long *fdset)
    1160             : {
    1161           0 :         if (ufdset) {
    1162           0 :                 return compat_get_bitmap(fdset, ufdset, nr);
    1163             :         } else {
    1164           0 :                 zero_fd_set(nr, fdset);
    1165           0 :                 return 0;
    1166             :         }
    1167             : }
    1168             : 
    1169             : static
    1170           0 : int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
    1171             :                       unsigned long *fdset)
    1172             : {
    1173           0 :         if (!ufdset)
    1174             :                 return 0;
    1175           0 :         return compat_put_bitmap(ufdset, fdset, nr);
    1176             : }
    1177             : 
    1178             : 
    1179             : /*
    1180             :  * This is a virtual copy of sys_select from fs/select.c and probably
    1181             :  * should be compared to it from time to time
    1182             :  */
    1183             : 
    1184             : /*
    1185             :  * We can actually return ERESTARTSYS instead of EINTR, but I'd
    1186             :  * like to be certain this leads to no problems. So I return
    1187             :  * EINTR just for safety.
    1188             :  *
    1189             :  * Update: ERESTARTSYS breaks at least the xview clock binary, so
    1190             :  * I'm trying ERESTARTNOHAND which restart only when you want to.
    1191             :  */
    1192           0 : static int compat_core_sys_select(int n, compat_ulong_t __user *inp,
    1193             :         compat_ulong_t __user *outp, compat_ulong_t __user *exp,
    1194             :         struct timespec64 *end_time)
    1195             : {
    1196           0 :         fd_set_bits fds;
    1197           0 :         void *bits;
    1198           0 :         int size, max_fds, ret = -EINVAL;
    1199           0 :         struct fdtable *fdt;
    1200           0 :         long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
    1201             : 
    1202           0 :         if (n < 0)
    1203           0 :                 goto out_nofds;
    1204             : 
    1205             :         /* max_fds can increase, so grab it once to avoid race */
    1206           0 :         rcu_read_lock();
    1207           0 :         fdt = files_fdtable(current->files);
    1208           0 :         max_fds = fdt->max_fds;
    1209           0 :         rcu_read_unlock();
    1210           0 :         if (n > max_fds)
    1211             :                 n = max_fds;
    1212             : 
    1213             :         /*
    1214             :          * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
    1215             :          * since we used fdset we need to allocate memory in units of
    1216             :          * long-words.
    1217             :          */
    1218           0 :         size = FDS_BYTES(n);
    1219           0 :         bits = stack_fds;
    1220           0 :         if (size > sizeof(stack_fds) / 6) {
    1221           0 :                 bits = kmalloc_array(6, size, GFP_KERNEL);
    1222           0 :                 ret = -ENOMEM;
    1223           0 :                 if (!bits)
    1224           0 :                         goto out_nofds;
    1225             :         }
    1226           0 :         fds.in      = (unsigned long *)  bits;
    1227           0 :         fds.out     = (unsigned long *) (bits +   size);
    1228           0 :         fds.ex      = (unsigned long *) (bits + 2*size);
    1229           0 :         fds.res_in  = (unsigned long *) (bits + 3*size);
    1230           0 :         fds.res_out = (unsigned long *) (bits + 4*size);
    1231           0 :         fds.res_ex  = (unsigned long *) (bits + 5*size);
    1232             : 
    1233           0 :         if ((ret = compat_get_fd_set(n, inp, fds.in)) ||
    1234           0 :             (ret = compat_get_fd_set(n, outp, fds.out)) ||
    1235           0 :             (ret = compat_get_fd_set(n, exp, fds.ex)))
    1236           0 :                 goto out;
    1237           0 :         zero_fd_set(n, fds.res_in);
    1238           0 :         zero_fd_set(n, fds.res_out);
    1239           0 :         zero_fd_set(n, fds.res_ex);
    1240             : 
    1241           0 :         ret = do_select(n, &fds, end_time);
    1242             : 
    1243           0 :         if (ret < 0)
    1244           0 :                 goto out;
    1245           0 :         if (!ret) {
    1246           0 :                 ret = -ERESTARTNOHAND;
    1247           0 :                 if (signal_pending(current))
    1248           0 :                         goto out;
    1249             :                 ret = 0;
    1250             :         }
    1251             : 
    1252           0 :         if (compat_set_fd_set(n, inp, fds.res_in) ||
    1253           0 :             compat_set_fd_set(n, outp, fds.res_out) ||
    1254           0 :             compat_set_fd_set(n, exp, fds.res_ex))
    1255             :                 ret = -EFAULT;
    1256           0 : out:
    1257           0 :         if (bits != stack_fds)
    1258           0 :                 kfree(bits);
    1259           0 : out_nofds:
    1260           0 :         return ret;
    1261             : }
    1262             : 
    1263           0 : static int do_compat_select(int n, compat_ulong_t __user *inp,
    1264             :         compat_ulong_t __user *outp, compat_ulong_t __user *exp,
    1265             :         struct old_timeval32 __user *tvp)
    1266             : {
    1267           0 :         struct timespec64 end_time, *to = NULL;
    1268           0 :         struct old_timeval32 tv;
    1269           0 :         int ret;
    1270             : 
    1271           0 :         if (tvp) {
    1272           0 :                 if (copy_from_user(&tv, tvp, sizeof(tv)))
    1273             :                         return -EFAULT;
    1274             : 
    1275           0 :                 to = &end_time;
    1276           0 :                 if (poll_select_set_timeout(to,
    1277           0 :                                 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
    1278           0 :                                 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
    1279             :                         return -EINVAL;
    1280             :         }
    1281             : 
    1282           0 :         ret = compat_core_sys_select(n, inp, outp, exp, to);
    1283           0 :         return poll_select_finish(&end_time, tvp, PT_OLD_TIMEVAL, ret);
    1284             : }
    1285             : 
    1286           0 : COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
    1287             :         compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
    1288             :         struct old_timeval32 __user *, tvp)
    1289             : {
    1290           0 :         return do_compat_select(n, inp, outp, exp, tvp);
    1291             : }
    1292             : 
    1293             : struct compat_sel_arg_struct {
    1294             :         compat_ulong_t n;
    1295             :         compat_uptr_t inp;
    1296             :         compat_uptr_t outp;
    1297             :         compat_uptr_t exp;
    1298             :         compat_uptr_t tvp;
    1299             : };
    1300             : 
    1301           0 : COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
    1302             : {
    1303           0 :         struct compat_sel_arg_struct a;
    1304             : 
    1305           0 :         if (copy_from_user(&a, arg, sizeof(a)))
    1306             :                 return -EFAULT;
    1307           0 :         return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
    1308           0 :                                 compat_ptr(a.exp), compat_ptr(a.tvp));
    1309             : }
    1310             : 
    1311           0 : static long do_compat_pselect(int n, compat_ulong_t __user *inp,
    1312             :         compat_ulong_t __user *outp, compat_ulong_t __user *exp,
    1313             :         void __user *tsp, compat_sigset_t __user *sigmask,
    1314             :         compat_size_t sigsetsize, enum poll_time_type type)
    1315             : {
    1316           0 :         struct timespec64 ts, end_time, *to = NULL;
    1317           0 :         int ret;
    1318             : 
    1319           0 :         if (tsp) {
    1320           0 :                 switch (type) {
    1321           0 :                 case PT_OLD_TIMESPEC:
    1322           0 :                         if (get_old_timespec32(&ts, tsp))
    1323             :                                 return -EFAULT;
    1324             :                         break;
    1325           0 :                 case PT_TIMESPEC:
    1326           0 :                         if (get_timespec64(&ts, tsp))
    1327             :                                 return -EFAULT;
    1328             :                         break;
    1329           0 :                 default:
    1330           0 :                         BUG();
    1331             :                 }
    1332             : 
    1333           0 :                 to = &end_time;
    1334           0 :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1335             :                         return -EINVAL;
    1336             :         }
    1337             : 
    1338           0 :         ret = set_compat_user_sigmask(sigmask, sigsetsize);
    1339           0 :         if (ret)
    1340           0 :                 return ret;
    1341             : 
    1342           0 :         ret = compat_core_sys_select(n, inp, outp, exp, to);
    1343           0 :         return poll_select_finish(&end_time, tsp, type, ret);
    1344             : }
    1345             : 
    1346             : struct compat_sigset_argpack {
    1347             :         compat_uptr_t p;
    1348             :         compat_size_t size;
    1349             : };
    1350           0 : static inline int get_compat_sigset_argpack(struct compat_sigset_argpack *to,
    1351             :                                             struct compat_sigset_argpack __user *from)
    1352             : {
    1353           0 :         if (from) {
    1354           0 :                 if (!user_read_access_begin(from, sizeof(*from)))
    1355             :                         return -EFAULT;
    1356           0 :                 unsafe_get_user(to->p, &from->p, Efault);
    1357           0 :                 unsafe_get_user(to->size, &from->size, Efault);
    1358             :                 user_read_access_end();
    1359             :         }
    1360             :         return 0;
    1361             : Efault:
    1362             :         user_access_end();
    1363             :         return -EFAULT;
    1364             : }
    1365             : 
    1366           0 : COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp,
    1367             :         compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
    1368             :         struct __kernel_timespec __user *, tsp, void __user *, sig)
    1369             : {
    1370           0 :         struct compat_sigset_argpack x = {0, 0};
    1371             : 
    1372           0 :         if (get_compat_sigset_argpack(&x, sig))
    1373             :                 return -EFAULT;
    1374             : 
    1375           0 :         return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p),
    1376             :                                  x.size, PT_TIMESPEC);
    1377             : }
    1378             : 
    1379             : #if defined(CONFIG_COMPAT_32BIT_TIME)
    1380             : 
    1381             : COMPAT_SYSCALL_DEFINE6(pselect6_time32, int, n, compat_ulong_t __user *, inp,
    1382             :         compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
    1383             :         struct old_timespec32 __user *, tsp, void __user *, sig)
    1384             : {
    1385             :         struct compat_sigset_argpack x = {0, 0};
    1386             : 
    1387             :         if (get_compat_sigset_argpack(&x, sig))
    1388             :                 return -EFAULT;
    1389             : 
    1390             :         return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p),
    1391             :                                  x.size, PT_OLD_TIMESPEC);
    1392             : }
    1393             : 
    1394             : #endif
    1395             : 
    1396             : #if defined(CONFIG_COMPAT_32BIT_TIME)
    1397             : COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds,
    1398             :         unsigned int,  nfds, struct old_timespec32 __user *, tsp,
    1399             :         const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
    1400             : {
    1401             :         struct timespec64 ts, end_time, *to = NULL;
    1402             :         int ret;
    1403             : 
    1404             :         if (tsp) {
    1405             :                 if (get_old_timespec32(&ts, tsp))
    1406             :                         return -EFAULT;
    1407             : 
    1408             :                 to = &end_time;
    1409             :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1410             :                         return -EINVAL;
    1411             :         }
    1412             : 
    1413             :         ret = set_compat_user_sigmask(sigmask, sigsetsize);
    1414             :         if (ret)
    1415             :                 return ret;
    1416             : 
    1417             :         ret = do_sys_poll(ufds, nfds, to);
    1418             :         return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret);
    1419             : }
    1420             : #endif
    1421             : 
    1422             : /* New compat syscall for 64 bit time_t*/
    1423           0 : COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds,
    1424             :         unsigned int,  nfds, struct __kernel_timespec __user *, tsp,
    1425             :         const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
    1426             : {
    1427           0 :         struct timespec64 ts, end_time, *to = NULL;
    1428           0 :         int ret;
    1429             : 
    1430           0 :         if (tsp) {
    1431           0 :                 if (get_timespec64(&ts, tsp))
    1432             :                         return -EFAULT;
    1433             : 
    1434           0 :                 to = &end_time;
    1435           0 :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1436             :                         return -EINVAL;
    1437             :         }
    1438             : 
    1439           0 :         ret = set_compat_user_sigmask(sigmask, sigsetsize);
    1440           0 :         if (ret)
    1441           0 :                 return ret;
    1442             : 
    1443           0 :         ret = do_sys_poll(ufds, nfds, to);
    1444           0 :         return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret);
    1445             : }
    1446             : 
    1447             : #endif

Generated by: LCOV version 1.14