Print this page
8634 epoll fails to wake on certain edge-triggered conditions
8635 epoll should not emit POLLNVAL
8636 recursive epoll should emit EPOLLRDNORM
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Igor Kozhukhov <igor@dilos.org>
*** 23,33 ****
* Use is subject to license terms.
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
! * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
#include <sys/devops.h>
#include <sys/conf.h>
--- 23,33 ----
* Use is subject to license terms.
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
! * Copyright 2017 Joyent, Inc.
*/
#include <sys/types.h>
#include <sys/devops.h>
#include <sys/conf.h>
*** 364,389 ****
}
}
if (fp != pdp->pd_fp) {
/*
! * user is polling on a cached fd which was
! * closed and then reused. Unfortunately
! * there is no good way to inform user.
! * If the file struct is also reused, we
! * may not be able to detect the fd reuse
! * at all. As long as this does not
! * cause system failure and/or memory leak,
! * we will play along. Man page states if
! * user does not clean up closed fds, polling
* results will be indeterministic.
*
! * XXX - perhaps log the detection of fd
! * reuse?
*/
pdp->pd_fp = fp;
}
/*
* XXX - pollrelock() logic needs to know which
* which pollcache lock to grab. It'd be a
* cleaner solution if we could pass pcp as
* an arguement in VOP_POLL interface instead
--- 364,404 ----
}
}
if (fp != pdp->pd_fp) {
/*
! * The user is polling on a cached fd which was
! * closed and then reused. Unfortunately there
! * is no good way to communicate this fact to
! * the consumer.
! *
! * If the file struct is also reused, we may
! * not be able to detect the fd reuse at all.
! * As long as this does not cause system
! * failure and/or memory leaks, we will play
! * along. The man page states that if the user
! * does not clean up closed fds, polling
* results will be indeterministic.
*
! * XXX: perhaps log the detection of fd reuse?
*/
pdp->pd_fp = fp;
+
+ /*
+ * When this situation has been detected, it's
+ * likely that any existing pollhead is
+ * ill-suited to perform proper wake-ups.
+ *
+ * Clean up the old entry under the expectation
+ * that a valid one will be provided as part of
+ * the later VOP_POLL.
+ */
+ if (pdp->pd_php != NULL) {
+ pollhead_delete(pdp->pd_php, pdp);
+ pdp->pd_php = NULL;
}
+ }
/*
* XXX - pollrelock() logic needs to know which
* which pollcache lock to grab. It'd be a
* cleaner solution if we could pass pcp as
* an arguement in VOP_POLL interface instead
*** 394,403 ****
--- 409,439 ----
* the tradeoff later.
*/
curthread->t_pollcache = pcp;
error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0,
&revent, &php, NULL);
+
+ /*
+ * Recheck edge-triggered descriptors which lack a
+ * pollhead. While this check is performed when an fd
+ * is added to the pollcache in dpwrite(), subsequent
+ * descriptor manipulation could cause a different
+ * resource to be present now.
+ */
+ if ((pdp->pd_events & POLLET) && error == 0 &&
+ pdp->pd_php == NULL && php == NULL && revent != 0) {
+ short levent = 0;
+
+ /*
+ * The same POLLET-only VOP_POLL is used in an
+ * attempt to coax a pollhead from older
+ * driver logic.
+ */
+ error = VOP_POLL(fp->f_vnode, POLLET,
+ 0, &levent, &php, NULL);
+ }
+
curthread->t_pollcache = NULL;
releasef(fd);
if (error != 0) {
break;
}
*** 429,438 ****
--- 465,484 ----
ASSERT(epoll != NULL);
ep->data.u64 = pdp->pd_epolldata;
/*
+ * Since POLLNVAL is a legal event for
+ * VOP_POLL handlers to emit, it must
+ * be translated epoll-legal.
+ */
+ if (revent & POLLNVAL) {
+ revent &= ~POLLNVAL;
+ revent |= POLLERR;
+ }
+
+ /*
* If any of the event bits are set for
* which poll and epoll representations
* differ, swizzle in the native epoll
* values.
*/
*** 486,508 ****
fdcnt++;
break;
}
}
/*
! * If POLLET is set, clear the bit in the
! * bitmap -- which effectively latches the
! * edge on a pollwakeup() from the driver.
*/
- if (pdp->pd_events & POLLET)
- BT_CLEAR(pcp->pc_bitmap, fd);
-
- /*
- * If POLLONESHOT is set, perform the implicit
- * POLLREMOVE.
- */
- if (pdp->pd_events & POLLONESHOT) {
pdp->pd_fp = NULL;
pdp->pd_events = 0;
if (pdp->pd_php != NULL) {
pollhead_delete(pdp->pd_php,
--- 532,547 ----
fdcnt++;
break;
}
}
+ /* Handle special polling modes. */
+ if (pdp->pd_events & POLLONESHOT) {
/*
! * If POLLONESHOT is set, perform the
! * implicit POLLREMOVE.
*/
pdp->pd_fp = NULL;
pdp->pd_events = 0;
if (pdp->pd_php != NULL) {
pollhead_delete(pdp->pd_php,
*** 509,520 ****
--- 548,581 ----
pdp);
pdp->pd_php = NULL;
}
BT_CLEAR(pcp->pc_bitmap, fd);
+ } else if (pdp->pd_events & POLLET) {
+ /*
+ * Wire up the pollhead which should
+ * have been provided. Edge-triggered
+ * polling cannot function properly
+ * with drivers which do not emit one.
+ */
+ if (php != NULL &&
+ pdp->pd_php == NULL) {
+ pollhead_insert(php, pdp);
+ pdp->pd_php = php;
}
+ /*
+ * If the driver has emitted a pollhead,
+ * clear the bit in the bitmap which
+ * effectively latches the edge on a
+ * pollwakeup() from the driver.
+ */
+ if (pdp->pd_php != NULL) {
+ BT_CLEAR(pcp->pc_bitmap, fd);
+ }
+ }
+
fdcnt++;
} else if (php != NULL) {
/*
* We clear a bit or cache a poll fd if
* the driver returns a poll head ptr,
*** 915,924 ****
--- 976,1011 ----
* the tradeoff later.
*/
curthread->t_pollcache = pcp;
error = VOP_POLL(fp->f_vnode, pfdp->events, 0,
&pfdp->revents, &php, NULL);
+
+ /*
+ * Edge-triggered polling requires a pollhead in order
+ * to initiate wake-ups properly. Drivers which are
+ * savvy to POLLET presence, which should include
+ * everything in-gate, will always emit one, regardless
+ * of revent status. Older drivers which only emit a
+ * pollhead if 'revents == 0' are given a second chance
+ * here via a second VOP_POLL, with only POLLET set in
+ * the events of interest. These circumstances should
+ * induce any cacheable drivers to emit a pollhead for
+ * wake-ups.
+ *
+ * Drivers which never emit a pollhead will simply
+ * disobey the exectation of edge-triggered behavior.
+ * This includes recursive epoll which, even on Linux,
+ * yields its events in a level-triggered fashion only.
+ */
+ if ((pdp->pd_events & POLLET) && error == 0 &&
+ php == NULL) {
+ short levent = 0;
+
+ error = VOP_POLL(fp->f_vnode, POLLET, 0,
+ &levent, &php, NULL);
+ }
+
curthread->t_pollcache = NULL;
/*
* We always set the bit when this fd is cached;
* this forces the first DP_POLL to poll this fd.
* Real performance gain comes from subsequent
*** 1456,1469 ****
if (res == PSE_SUCCESS) {
nfds_t nfds = 1;
int fdcnt = 0;
pollstate_t *ps = curthread->t_pollstate;
rc = dp_pcache_poll(dpep, NULL, pcp, nfds, &fdcnt);
! if (rc == 0) {
! *reventsp = (fdcnt > 0) ? POLLIN : 0;
}
pcachelink_assoc(pcp, ps->ps_pc_stack[0]);
pollstate_exit(pcp);
} else {
switch (res) {
case PSE_FAIL_DEPTH:
--- 1543,1568 ----
if (res == PSE_SUCCESS) {
nfds_t nfds = 1;
int fdcnt = 0;
pollstate_t *ps = curthread->t_pollstate;
+ /*
+ * Recursive polling will only emit certain events. Skip a
+ * scan of the pollcache if those events are not of interest.
+ */
+ if (events & (POLLIN|POLLRDNORM)) {
rc = dp_pcache_poll(dpep, NULL, pcp, nfds, &fdcnt);
! } else {
! rc = 0;
! fdcnt = 0;
}
+
+ if (rc == 0 && fdcnt > 0) {
+ *reventsp = POLLIN|POLLRDNORM;
+ } else {
+ *reventsp = 0;
+ }
pcachelink_assoc(pcp, ps->ps_pc_stack[0]);
pollstate_exit(pcp);
} else {
switch (res) {
case PSE_FAIL_DEPTH: