Print this page
cleanup port_free_event_local() semantics
XXXXX race between closef(portfd) and timer_delete()
*** 22,32 ****
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
! #pragma ident "%Z%%M% %I% %E% SMI"
/*
* This file containts all the functions required for interactions of
* event sources with the event port file system.
*/
--- 22,34 ----
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
! /*
! * Copyright 2020 Joyent, Inc.
! */
/*
* This file containts all the functions required for interactions of
* event sources with the event port file system.
*/
*** 202,214 ****
portq->portq_flags &= ~PORTQ_POLLIN;
/*
* Need to save port_t for calling pollwakeup since port_getn()
* may end up freeing pkevp once portq_mutex is dropped.
*/
! pp = pkevp->portkev_port;
mutex_exit(&portq->portq_mutex);
pollwakeup(&pp->port_pollhd, POLLIN);
} else {
mutex_exit(&portq->portq_mutex);
}
}
--- 204,221 ----
portq->portq_flags &= ~PORTQ_POLLIN;
/*
* Need to save port_t for calling pollwakeup since port_getn()
* may end up freeing pkevp once portq_mutex is dropped.
+ *
+ * For that matter, if PORTQ_CLOSE is set, all of the port_t
+ * may be freed upon dropping portq_mutex.
*/
! pp = (portq->portq_flags & PORTQ_CLOSE) ?
! NULL : pkevp->portkev_port;
mutex_exit(&portq->portq_mutex);
+ if (pp != NULL)
pollwakeup(&pp->port_pollhd, POLLIN);
} else {
mutex_exit(&portq->portq_mutex);
}
}
*** 248,257 ****
--- 255,265 ----
port_alloc_event(int port, int flags, int source, port_kevent_t **pkevpp)
{
port_t *pp;
file_t *fp;
port_kevent_t *pkevp;
+ int err;
if ((fp = getf(port)) == NULL)
return (EBADF);
if (fp->f_vnode->v_type != VPORT) {
*** 269,283 ****
* port_max_events is controlled by the resource control
* process.port-max-events
*/
pp = VTOEP(fp->f_vnode);
mutex_enter(&pp->port_queue.portq_mutex);
! if (pp->port_curr >= pp->port_max_events) {
mutex_exit(&pp->port_queue.portq_mutex);
kmem_cache_free(port_control.pc_cache, pkevp);
releasef(port);
! return (EAGAIN);
}
pp->port_curr++;
mutex_exit(&pp->port_queue.portq_mutex);
bzero(pkevp, sizeof (port_kevent_t));
--- 277,300 ----
* port_max_events is controlled by the resource control
* process.port-max-events
*/
pp = VTOEP(fp->f_vnode);
mutex_enter(&pp->port_queue.portq_mutex);
!
! /* Two possible error conditions. */
! if (pp->port_queue.portq_flags & PORTQ_CLOSE)
! err = EBADFD; /* Mid-close. */
! else if (pp->port_curr >= pp->port_max_events)
! err = EAGAIN; /* Resources unavailabile. */
! else
! err = 0; /* Good to go! */
!
! if (err != 0) {
mutex_exit(&pp->port_queue.portq_mutex);
kmem_cache_free(port_control.pc_cache, pkevp);
releasef(port);
! return (err);
}
pp->port_curr++;
mutex_exit(&pp->port_queue.portq_mutex);
bzero(pkevp, sizeof (port_kevent_t));
*** 317,336 ****
int
port_alloc_event_local(port_t *pp, int source, int flags,
port_kevent_t **pkevpp)
{
port_kevent_t *pkevp;
pkevp = kmem_cache_alloc(port_control.pc_cache, KM_NOSLEEP);
if (pkevp == NULL)
return (ENOMEM);
mutex_enter(&pp->port_queue.portq_mutex);
! if (pp->port_curr >= pp->port_max_events) {
mutex_exit(&pp->port_queue.portq_mutex);
kmem_cache_free(port_control.pc_cache, pkevp);
! return (EAGAIN);
}
pp->port_curr++;
mutex_exit(&pp->port_queue.portq_mutex);
bzero(pkevp, sizeof (port_kevent_t));
--- 334,363 ----
int
port_alloc_event_local(port_t *pp, int source, int flags,
port_kevent_t **pkevpp)
{
port_kevent_t *pkevp;
+ int err;
pkevp = kmem_cache_alloc(port_control.pc_cache, KM_NOSLEEP);
if (pkevp == NULL)
return (ENOMEM);
mutex_enter(&pp->port_queue.portq_mutex);
!
! /* Two possible error conditions. */
! if (pp->port_queue.portq_flags & PORTQ_CLOSE)
! err = EBADFD; /* Mid-close. */
! else if (pp->port_curr >= pp->port_max_events)
! err = EAGAIN; /* Resources unavailable. */
! else
! err = 0; /* Good to go! */
!
! if (err != 0) {
mutex_exit(&pp->port_queue.portq_mutex);
kmem_cache_free(port_control.pc_cache, pkevp);
! return (err);
}
pp->port_curr++;
mutex_exit(&pp->port_queue.portq_mutex);
bzero(pkevp, sizeof (port_kevent_t));
*** 356,373 ****
--- 383,417 ----
{
port_kevent_t *pkevp =
kmem_cache_alloc(port_control.pc_cache, KM_SLEEP);
mutex_enter(&pp->port_queue.portq_mutex);
+ /*
+ * Mid-close check needs to happen immediately before any
+ * resource checking or cv_wait()-ing of any sort.
+ */
+ if (pp->port_queue.portq_flags & PORTQ_CLOSE) {
+ mutex_exit(&pp->port_queue.portq_mutex);
+ kmem_cache_free(port_control.pc_cache, pkevp);
+ return (EBADFD);
+ }
+
while (pp->port_curr >= pp->port_max_events) {
if (!cv_wait_sig(&pp->port_cv, &pp->port_queue.portq_mutex)) {
/* signal detected */
mutex_exit(&pp->port_queue.portq_mutex);
kmem_cache_free(port_control.pc_cache, pkevp);
return (EINTR);
}
+
+ /* Oh, and we have to re-check the close state. */
+ if (pp->port_queue.portq_flags & PORTQ_CLOSE) {
+ mutex_exit(&pp->port_queue.portq_mutex);
+ kmem_cache_free(port_control.pc_cache, pkevp);
+ return (EBADFD);
}
+ }
pp->port_curr++;
mutex_exit(&pp->port_queue.portq_mutex);
bzero(pkevp, sizeof (port_kevent_t));
mutex_init(&pkevp->portkev_lock, NULL, MUTEX_DEFAULT, NULL);
*** 440,450 ****
pp = pkevp->portkev_port;
if (pp == NULL)
return;
if (pkevp->portkev_flags & PORT_ALLOC_PRIVATE) {
! port_free_event_local(pkevp, 0);
return;
}
portq = &pp->port_queue;
mutex_enter(&portq->portq_mutex);
--- 484,494 ----
pp = pkevp->portkev_port;
if (pp == NULL)
return;
if (pkevp->portkev_flags & PORT_ALLOC_PRIVATE) {
! port_free_event_local(pkevp, B_TRUE);
return;
}
portq = &pp->port_queue;
mutex_enter(&portq->portq_mutex);
*** 474,494 ****
* of the allocated event structures (port_curr).
*/
if (pp->port_curr <= portq->portq_nent)
cv_signal(&portq->portq_closecv);
}
! mutex_exit(&portq->portq_mutex);
! port_free_event_local(pkevp, 1);
}
/*
* This event port internal function is used by port_free_event() and
* other port internal functions to return event structures back to the
* kmem_cache.
*/
void
! port_free_event_local(port_kevent_t *pkevp, int counter)
{
port_t *pp = pkevp->portkev_port;
port_queue_t *portq = &pp->port_queue;
int wakeup;
--- 518,546 ----
* of the allocated event structures (port_curr).
*/
if (pp->port_curr <= portq->portq_nent)
cv_signal(&portq->portq_closecv);
}
!
! /*
! * We're holding portq_mutex and we decremented port_curr, so don't
! * have port_free_event_local() do it for us.
! */
! port_free_event_local(pkevp, B_FALSE);
}
/*
* This event port internal function is used by port_free_event() and
* other port internal functions to return event structures back to the
* kmem_cache.
+ *
+ * If the caller already holds the portq_mutex, indicate by setting
+ * lock_and_decrement to B_FALSE. This function MUST drop portq_mutex as
+ * it can call pollwakeup() at its end.
*/
void
! port_free_event_local(port_kevent_t *pkevp, boolean_t lock_and_decrement)
{
port_t *pp = pkevp->portkev_port;
port_queue_t *portq = &pp->port_queue;
int wakeup;
*** 496,511 ****
pkevp->portkev_flags = 0;
pkevp->portkev_port = NULL;
mutex_destroy(&pkevp->portkev_lock);
kmem_cache_free(port_control.pc_cache, pkevp);
mutex_enter(&portq->portq_mutex);
- if (counter == 0) {
if (--pp->port_curr < pp->port_max_events)
cv_signal(&pp->port_cv);
}
! wakeup = (portq->portq_flags & PORTQ_POLLOUT);
portq->portq_flags &= ~PORTQ_POLLOUT;
mutex_exit(&portq->portq_mutex);
/* Submit a POLLOUT event if requested */
if (wakeup)
--- 548,577 ----
pkevp->portkev_flags = 0;
pkevp->portkev_port = NULL;
mutex_destroy(&pkevp->portkev_lock);
kmem_cache_free(port_control.pc_cache, pkevp);
+ if (lock_and_decrement) {
+ /*
+ * If we're entering here from outside port_event_free(),
+ * grab the mutex. We enter here if we hadn't already
+ * decremented-and-signaled port_curr.
+ */
mutex_enter(&portq->portq_mutex);
if (--pp->port_curr < pp->port_max_events)
cv_signal(&pp->port_cv);
}
! ASSERT(MUTEX_HELD(&portq->portq_mutex));
!
! /*
! * Don't send the POLLOUT if we're mid-close. We can only send it if
! * we've dropped the mutex, and if we're closing, dropping the mutex
! * could lead to another thread freeing pp->port_pollhd and the rest
! * of *pp.
! */
! wakeup = ((portq->portq_flags & (PORTQ_POLLOUT | PORTQ_CLOSE)) ==
! PORTQ_POLLOUT);
portq->portq_flags &= ~PORTQ_POLLOUT;
mutex_exit(&portq->portq_mutex);
/* Submit a POLLOUT event if requested */
if (wakeup)
*** 648,658 ****
if (pkevp->portkev_callback) {
(void) (*pkevp->portkev_callback)(pkevp->portkev_arg,
&error, pkevp->portkev_pid, PORT_CALLBACK_DISSOCIATE,
pkevp);
}
! port_free_event_local(pkevp, 0);
/* remove polldat struct */
port_pcache_remove_fd(pcp, pfd);
return (removed);
}
--- 714,724 ----
if (pkevp->portkev_callback) {
(void) (*pkevp->portkev_callback)(pkevp->portkev_arg,
&error, pkevp->portkev_pid, PORT_CALLBACK_DISSOCIATE,
pkevp);
}
! port_free_event_local(pkevp, B_TRUE);
/* remove polldat struct */
port_pcache_remove_fd(pcp, pfd);
return (removed);
}
*** 709,718 ****
--- 775,795 ----
return (EBADFD);
}
pp = VTOEP(fp->f_vnode);
mutex_enter(&pp->port_queue.portq_source_mutex);
+ if (pp->port_queue.portq_scache == NULL) {
+ /*
+ * This event-port is mid-close.
+ * By the time portq_scache is freed, PORTQ_CLOSE was set.
+ */
+ ASSERT(pp->port_queue.portq_flags & PORTQ_CLOSE);
+ mutex_exit(&pp->port_queue.portq_source_mutex);
+ releasef(port);
+ /* Equivalent of a bad file descriptor at this point. */
+ return (EBADFD);
+ }
ps = &pp->port_queue.portq_scache[PORT_SHASH(source)];
for (pse = *ps; pse != NULL; pse = pse->portsrc_next) {
if (pse->portsrc_source == source)
break;
}