Print this page
OS-5566 ppoll timeout calculation can overflow
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Alex Wilson <alex.wilson@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
OS-4656 nested epoll does not mimic Linux behavior
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-5162 poll/select yield improper EINTR when nfds and timeout are 0
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4830 lxbrand convert select/poll to IKE
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
*** 27,37 ****
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
! * Copyright 2015, Joyent, Inc.
*/
/*
* Portions of this source code were derived from Berkeley 4.3 BSD
* under license from the Regents of the University of California.
--- 27,37 ----
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
! * Copyright 2016, Joyent, Inc.
*/
/*
* Portions of this source code were derived from Berkeley 4.3 BSD
* under license from the Regents of the University of California.
*** 315,381 ****
pollrelock(state);
}
return (0);
}
! static int
! poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
{
kthread_t *t = curthread;
- klwp_t *lwp = ttolwp(t);
- proc_t *p = ttoproc(t);
- int fdcnt = 0;
- int i;
hrtime_t deadline; /* hrtime value when we want to return */
pollfd_t *pollfdp;
- pollstate_t *ps;
pollcache_t *pcp;
int error = 0;
- nfds_t old_nfds;
int cacheindex = 0; /* which cache set is used */
/*
* Determine the precise future time of the requested timeout, if any.
*/
if (tsp == NULL) {
deadline = -1;
} else if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) {
deadline = 0;
} else {
/* They must wait at least a tick. */
! deadline = ((hrtime_t)tsp->tv_sec * NANOSEC) + tsp->tv_nsec;
! deadline = MAX(deadline, nsec_per_tick);
! deadline += gethrtime();
! }
/*
! * Reset our signal mask, if requested.
*/
! if (ksetp != NULL) {
! mutex_enter(&p->p_lock);
! schedctl_finish_sigblock(t);
! lwp->lwp_sigoldmask = t->t_hold;
! t->t_hold = *ksetp;
! t->t_flag |= T_TOMASK;
! /*
! * Call cv_reltimedwait_sig() just to check for signals.
! * We will return immediately with either 0 or -1.
! */
! if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0,
! TR_CLOCK_TICK)) {
! mutex_exit(&p->p_lock);
! error = EINTR;
! goto pollout;
}
- mutex_exit(&p->p_lock);
}
/*
! * Check to see if this guy just wants to use poll() as a timeout.
* If yes then bypass all the other stuff and make him sleep.
*/
if (nfds == 0) {
/*
* Sleep until we have passed the requested future
* time or until interrupted by a signal.
* Do not check for signals if we do not want to wait.
*/
--- 315,420 ----
pollrelock(state);
}
return (0);
}
! int
! poll_copyin(pollstate_t *ps, pollfd_t *fds, nfds_t nfds)
{
+ pollfd_t *pollfdp;
+ nfds_t old_nfds;
+
+ /*
+ * NOTE: for performance, buffers are saved across poll() calls.
+ * The theory is that if a process polls heavily, it tends to poll
+ * on the same set of descriptors. Therefore, we only reallocate
+ * buffers when nfds changes. There is no hysteresis control,
+ * because there is no data to suggest that this is necessary;
+ * the penalty of reallocating is not *that* great in any event.
+ */
+ old_nfds = ps->ps_nfds;
+ if (nfds != old_nfds) {
+ kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t));
+ pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP);
+ ps->ps_pollfd = pollfdp;
+ ps->ps_nfds = nfds;
+ }
+
+ pollfdp = ps->ps_pollfd;
+ if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) {
+ return (EFAULT);
+ }
+
+ if (fds == NULL) {
+ /*
+ * If the process has page 0 mapped, then the copyin() above
+ * will succeed even if fds is NULL. However, our cached
+ * poll lists are keyed by the address of the passed-in fds
+ * structure, and we use the value NULL to indicate an unused
+ * poll cache list entry. As such, we elect not to support
+ * NULL as a valid (user) memory address and fail the poll()
+ * call.
+ */
+ return (EFAULT);
+ }
+ return (0);
+ }
+
+ int
+ poll_common(pollstate_t *ps, pollfd_t *fds, nfds_t nfds, timespec_t *tsp,
+ int *fdcnt)
+ {
kthread_t *t = curthread;
hrtime_t deadline; /* hrtime value when we want to return */
pollfd_t *pollfdp;
pollcache_t *pcp;
int error = 0;
int cacheindex = 0; /* which cache set is used */
/*
* Determine the precise future time of the requested timeout, if any.
*/
if (tsp == NULL) {
deadline = -1;
} else if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) {
deadline = 0;
+ } else if (tsp->tv_sec >= HRTIME_MAX/NANOSEC) {
+ /* Use an indefinite timeout if tv_sec would cause overflow */
+ deadline = -1;
} else {
+ /*
+ * The above check, when combined with the protections offered
+ * by itimerspecfix (ensuring that neither field is negative
+ * and that tv_nsec represents less than a whole second), will
+ * prevent overflow during the conversion from timespec_t to
+ * uhrtime_t.
+ */
+ uhrtime_t utime = tsp->tv_sec * NANOSEC;
+ utime += tsp->tv_nsec;
+
/* They must wait at least a tick. */
! utime = MAX(utime, nsec_per_tick);
/*
! * Since utime has an upper bound of HRTIME_MAX, adding the
! * gethrtime() result cannot incur an overflow as the unsigned
! * type has an adequate bound.
*/
! utime += (uhrtime_t)gethrtime();
! if (utime > HRTIME_MAX) {
! deadline = -1;
! } else {
! deadline = (hrtime_t)utime;
}
}
/*
! * Check to see if the caller just wants to use poll() as a timeout.
* If yes then bypass all the other stuff and make him sleep.
*/
if (nfds == 0) {
+ *fdcnt = 0;
/*
* Sleep until we have passed the requested future
* time or until interrupted by a signal.
* Do not check for signals if we do not want to wait.
*/
*** 383,454 ****
mutex_enter(&t->t_delay_lock);
while ((error = cv_timedwait_sig_hrtime(&t->t_delay_cv,
&t->t_delay_lock, deadline)) > 0)
continue;
mutex_exit(&t->t_delay_lock);
! error = (error == 0) ? EINTR : 0;
}
! goto pollout;
}
! if (nfds > p->p_fno_ctl) {
! mutex_enter(&p->p_lock);
! (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
! p->p_rctls, p, RCA_SAFE);
! mutex_exit(&p->p_lock);
! error = EINVAL;
! goto pollout;
! }
!
! /*
! * Need to allocate memory for pollstate before anything because
! * the mutex and cv are created in this space
! */
! ps = pollstate_create();
!
! if (ps->ps_pcache == NULL)
! ps->ps_pcache = pcache_alloc();
! pcp = ps->ps_pcache;
!
! /*
! * NOTE: for performance, buffers are saved across poll() calls.
! * The theory is that if a process polls heavily, it tends to poll
! * on the same set of descriptors. Therefore, we only reallocate
! * buffers when nfds changes. There is no hysteresis control,
! * because there is no data to suggest that this is necessary;
! * the penalty of reallocating is not *that* great in any event.
! */
! old_nfds = ps->ps_nfds;
! if (nfds != old_nfds) {
!
! kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t));
! pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP);
! ps->ps_pollfd = pollfdp;
! ps->ps_nfds = nfds;
! }
!
pollfdp = ps->ps_pollfd;
! if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) {
! error = EFAULT;
! goto pollout;
! }
- if (fds == NULL) {
/*
- * If the process has page 0 mapped, then the copyin() above
- * will succeed even if fds is NULL. However, our cached
- * poll lists are keyed by the address of the passed-in fds
- * structure, and we use the value NULL to indicate an unused
- * poll cache list entry. As such, we elect not to support
- * NULL as a valid (user) memory address and fail the poll()
- * call.
- */
- error = EINVAL;
- goto pollout;
- }
-
- /*
* If this thread polls for the first time, allocate ALL poll
* cache data structures and cache the poll fd list. This
* allocation is delayed till now because lwp's polling 0 fd
* (i.e. using poll as timeout()) don't need this memory.
*/
--- 422,441 ----
mutex_enter(&t->t_delay_lock);
while ((error = cv_timedwait_sig_hrtime(&t->t_delay_cv,
&t->t_delay_lock, deadline)) > 0)
continue;
mutex_exit(&t->t_delay_lock);
! return ((error == 0) ? EINTR : 0);
}
! return (0);
}
! VERIFY(ps != NULL);
pollfdp = ps->ps_pollfd;
! VERIFY(pollfdp != NULL);
/*
* If this thread polls for the first time, allocate ALL poll
* cache data structures and cache the poll fd list. This
* allocation is delayed till now because lwp's polling 0 fd
* (i.e. using poll as timeout()) don't need this memory.
*/
*** 458,471 ****
if (pcp->pc_bitmap == NULL) {
pcache_create(pcp, nfds);
/*
* poll and cache this poll fd list in ps_pcacheset[0].
*/
! error = pcacheset_cache_list(ps, fds, &fdcnt, cacheindex);
! if (fdcnt || error) {
mutex_exit(&ps->ps_lock);
! goto pollout;
}
} else {
pollcacheset_t *pcset = ps->ps_pcacheset;
/*
--- 445,458 ----
if (pcp->pc_bitmap == NULL) {
pcache_create(pcp, nfds);
/*
* poll and cache this poll fd list in ps_pcacheset[0].
*/
! error = pcacheset_cache_list(ps, fds, fdcnt, cacheindex);
! if (error || *fdcnt) {
mutex_exit(&ps->ps_lock);
! return (error);
}
} else {
pollcacheset_t *pcset = ps->ps_pcacheset;
/*
*** 486,500 ****
* list and previously cached one.
* If there is an error during resolve(),
* the callee will guarantee the consistency
* of cached poll list and cache content.
*/
! error = pcacheset_resolve(ps, nfds, &fdcnt,
cacheindex);
if (error) {
mutex_exit(&ps->ps_lock);
! goto pollout;
}
break;
}
/*
--- 473,487 ----
* list and previously cached one.
* If there is an error during resolve(),
* the callee will guarantee the consistency
* of cached poll list and cache content.
*/
! error = pcacheset_resolve(ps, nfds, fdcnt,
cacheindex);
if (error) {
mutex_exit(&ps->ps_lock);
! return (error);
}
break;
}
/*
*** 507,521 ****
if (pcset[cacheindex].pcs_usradr == NULL) {
/*
* found an unused entry. Use it to cache
* this poll list.
*/
! error = pcacheset_cache_list(ps, fds, &fdcnt,
cacheindex);
! if (fdcnt || error) {
mutex_exit(&ps->ps_lock);
! goto pollout;
}
break;
}
}
if (cacheindex == ps->ps_nsets) {
--- 494,508 ----
if (pcset[cacheindex].pcs_usradr == NULL) {
/*
* found an unused entry. Use it to cache
* this poll list.
*/
! error = pcacheset_cache_list(ps, fds, fdcnt,
cacheindex);
! if (error || *fdcnt) {
mutex_exit(&ps->ps_lock);
! return (error);
}
break;
}
}
if (cacheindex == ps->ps_nsets) {
*** 525,538 ****
*/
pollstats.polllistmiss.value.ui64++;
cacheindex = pcacheset_replace(ps);
ASSERT(cacheindex < ps->ps_nsets);
pcset[cacheindex].pcs_usradr = (uintptr_t)fds;
! error = pcacheset_resolve(ps, nfds, &fdcnt, cacheindex);
if (error) {
mutex_exit(&ps->ps_lock);
! goto pollout;
}
}
}
/*
--- 512,525 ----
*/
pollstats.polllistmiss.value.ui64++;
cacheindex = pcacheset_replace(ps);
ASSERT(cacheindex < ps->ps_nsets);
pcset[cacheindex].pcs_usradr = (uintptr_t)fds;
! error = pcacheset_resolve(ps, nfds, fdcnt, cacheindex);
if (error) {
mutex_exit(&ps->ps_lock);
! return (error);
}
}
}
/*
*** 546,557 ****
* the pcache before updating poll bitmap.
*/
mutex_enter(&pcp->pc_lock);
for (;;) {
pcp->pc_flag = 0;
! error = pcache_poll(pollfdp, ps, nfds, &fdcnt, cacheindex);
! if (fdcnt || error) {
mutex_exit(&pcp->pc_lock);
mutex_exit(&ps->ps_lock);
break;
}
--- 533,544 ----
* the pcache before updating poll bitmap.
*/
mutex_enter(&pcp->pc_lock);
for (;;) {
pcp->pc_flag = 0;
! error = pcache_poll(pollfdp, ps, nfds, fdcnt, cacheindex);
! if (error || *fdcnt) {
mutex_exit(&pcp->pc_lock);
mutex_exit(&ps->ps_lock);
break;
}
*** 593,609 ****
*/
mutex_enter(&ps->ps_lock);
mutex_enter(&pcp->pc_lock);
}
pollout:
/*
! * If we changed the signal mask but we received
! * no signal then restore the signal mask.
! * Otherwise psig() will deal with the signal mask.
*/
! if (ksetp != NULL) {
mutex_enter(&p->p_lock);
if (lwp->lwp_cursig == 0) {
t->t_hold = lwp->lwp_sigoldmask;
t->t_flag &= ~T_TOMASK;
}
--- 580,699 ----
*/
mutex_enter(&ps->ps_lock);
mutex_enter(&pcp->pc_lock);
}
+ return (error);
+ }
+
+ /*
+ * This is the system call trap that poll(),
+ * select() and pselect() are built upon.
+ * It is a private interface between libc and the kernel.
+ */
+ int
+ pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp)
+ {
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ proc_t *p = ttoproc(t);
+ timespec_t ts;
+ timespec_t *tsp;
+ k_sigset_t kset;
+ pollstate_t *ps = NULL;
+ pollfd_t *pollfdp = NULL;
+ int error = 0, fdcnt = 0;
+
+ /*
+ * Copy in timeout
+ */
+ if (timeoutp == NULL) {
+ tsp = NULL;
+ } else {
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyin(timeoutp, &ts, sizeof (ts)))
+ return (set_errno(EFAULT));
+ } else {
+ timespec32_t ts32;
+
+ if (copyin(timeoutp, &ts32, sizeof (ts32)))
+ return (set_errno(EFAULT));
+ TIMESPEC32_TO_TIMESPEC(&ts, &ts32)
+ }
+
+ if (itimerspecfix(&ts))
+ return (set_errno(EINVAL));
+ tsp = &ts;
+ }
+
+ /*
+ * Copy in and reset signal mask, if requested.
+ */
+ if (setp != NULL) {
+ sigset_t set;
+
+ if (copyin(setp, &set, sizeof (set)))
+ return (set_errno(EFAULT));
+ sigutok(&set, &kset);
+
+ mutex_enter(&p->p_lock);
+ schedctl_finish_sigblock(t);
+ lwp->lwp_sigoldmask = t->t_hold;
+ t->t_hold = kset;
+ t->t_flag |= T_TOMASK;
+ /*
+ * Call cv_reltimedwait_sig() just to check for signals.
+ * We will return immediately with either 0 or -1.
+ */
+ if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0,
+ TR_CLOCK_TICK)) {
+ mutex_exit(&p->p_lock);
+ error = EINTR;
+ goto pollout;
+ }
+ mutex_exit(&p->p_lock);
+ }
+
+ /*
+ * Initialize pollstate and copy in pollfd data if present.
+ * If nfds == 0, we will skip all of the copying and check steps and
+ * proceed directly into poll_common to process the supplied timeout.
+ */
+ if (nfds != 0) {
+ if (nfds > p->p_fno_ctl) {
+ mutex_enter(&p->p_lock);
+ (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
+ p->p_rctls, p, RCA_SAFE);
+ mutex_exit(&p->p_lock);
+ error = EINVAL;
+ goto pollout;
+ }
+
+ /*
+ * Need to allocate memory for pollstate before anything
+ * because the mutex and cv are created in this space
+ */
+ ps = pollstate_create();
+ if (ps->ps_pcache == NULL)
+ ps->ps_pcache = pcache_alloc();
+
+ if ((error = poll_copyin(ps, fds, nfds)) != 0)
+ goto pollout;
+ pollfdp = ps->ps_pollfd;
+ }
+
+ /*
+ * Perform the actual poll.
+ */
+ error = poll_common(ps, fds, nfds, tsp, &fdcnt);
+
pollout:
/*
! * If we changed the signal mask but we received no signal then restore
! * the signal mask. Otherwise psig() will deal with the signal mask.
*/
! if (setp != NULL) {
mutex_enter(&p->p_lock);
if (lwp->lwp_cursig == 0) {
t->t_hold = lwp->lwp_sigoldmask;
t->t_flag &= ~T_TOMASK;
}
*** 610,633 ****
mutex_exit(&p->p_lock);
}
if (error)
return (set_errno(error));
-
/*
* Copy out the events and return the fdcnt to the user.
*/
! if (nfds != 0 &&
! copyout(pollfdp, fds, nfds * sizeof (pollfd_t)))
return (set_errno(EFAULT));
#ifdef DEBUG
/*
* Another sanity check:
*/
if (fdcnt) {
! int reventcnt = 0;
for (i = 0; i < nfds; i++) {
if (pollfdp[i].fd < 0) {
ASSERT(pollfdp[i].revents == 0);
continue;
--- 700,721 ----
mutex_exit(&p->p_lock);
}
if (error)
return (set_errno(error));
/*
* Copy out the events and return the fdcnt to the user.
*/
! if (nfds != 0 && copyout(pollfdp, fds, nfds * sizeof (pollfd_t)))
return (set_errno(EFAULT));
#ifdef DEBUG
/*
* Another sanity check:
*/
if (fdcnt) {
! int i, reventcnt = 0;
for (i = 0; i < nfds; i++) {
if (pollfdp[i].fd < 0) {
ASSERT(pollfdp[i].revents == 0);
continue;
*** 636,700 ****
reventcnt++;
}
}
ASSERT(fdcnt == reventcnt);
} else {
for (i = 0; i < nfds; i++) {
ASSERT(pollfdp[i].revents == 0);
}
}
#endif /* DEBUG */
return (fdcnt);
}
- /*
- * This is the system call trap that poll(),
- * select() and pselect() are built upon.
- * It is a private interface between libc and the kernel.
- */
- int
- pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp)
- {
- timespec_t ts;
- timespec_t *tsp;
- sigset_t set;
- k_sigset_t kset;
- k_sigset_t *ksetp;
- model_t datamodel = get_udatamodel();
-
- if (timeoutp == NULL)
- tsp = NULL;
- else {
- if (datamodel == DATAMODEL_NATIVE) {
- if (copyin(timeoutp, &ts, sizeof (ts)))
- return (set_errno(EFAULT));
- } else {
- timespec32_t ts32;
-
- if (copyin(timeoutp, &ts32, sizeof (ts32)))
- return (set_errno(EFAULT));
- TIMESPEC32_TO_TIMESPEC(&ts, &ts32)
- }
-
- if (itimerspecfix(&ts))
- return (set_errno(EINVAL));
- tsp = &ts;
- }
-
- if (setp == NULL)
- ksetp = NULL;
- else {
- if (copyin(setp, &set, sizeof (set)))
- return (set_errno(EFAULT));
- sigutok(&set, &kset);
- ksetp = &kset;
- }
-
- return (poll_common(fds, nfds, tsp, ksetp));
- }
-
/*
* Clean up any state left around by poll(2). Called when a thread exits.
*/
void
pollcleanup()
--- 724,744 ----
reventcnt++;
}
}
ASSERT(fdcnt == reventcnt);
} else {
+ int i;
+
for (i = 0; i < nfds; i++) {
ASSERT(pollfdp[i].revents == 0);
}
}
#endif /* DEBUG */
return (fdcnt);
}
/*
* Clean up any state left around by poll(2). Called when a thread exits.
*/
void
pollcleanup()