Print this page
cleanup port_free_event_local() semantics
XXXXX race between closef(portfd) and timer_delete()

*** 22,32 **** /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ ! #pragma ident "%Z%%M% %I% %E% SMI" /* * This file containts all the functions required for interactions of * event sources with the event port file system. */ --- 22,34 ---- /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ ! /* ! * Copyright 2020 Joyent, Inc. ! */ /* * This file containts all the functions required for interactions of * event sources with the event port file system. */
*** 202,214 **** portq->portq_flags &= ~PORTQ_POLLIN; /* * Need to save port_t for calling pollwakeup since port_getn() * may end up freeing pkevp once portq_mutex is dropped. */ ! pp = pkevp->portkev_port; mutex_exit(&portq->portq_mutex); pollwakeup(&pp->port_pollhd, POLLIN); } else { mutex_exit(&portq->portq_mutex); } } --- 204,221 ---- portq->portq_flags &= ~PORTQ_POLLIN; /* * Need to save port_t for calling pollwakeup since port_getn() * may end up freeing pkevp once portq_mutex is dropped. + * + * For that matter, if PORTQ_CLOSE is set, all of the port_t + * may be freed upon dropping portq_mutex. */ ! pp = (portq->portq_flags & PORTQ_CLOSE) ? ! NULL : pkevp->portkev_port; mutex_exit(&portq->portq_mutex); + if (pp != NULL) pollwakeup(&pp->port_pollhd, POLLIN); } else { mutex_exit(&portq->portq_mutex); } }
*** 248,257 **** --- 255,265 ---- port_alloc_event(int port, int flags, int source, port_kevent_t **pkevpp) { port_t *pp; file_t *fp; port_kevent_t *pkevp; + int err; if ((fp = getf(port)) == NULL) return (EBADF); if (fp->f_vnode->v_type != VPORT) {
*** 269,283 **** * port_max_events is controlled by the resource control * process.port-max-events */ pp = VTOEP(fp->f_vnode); mutex_enter(&pp->port_queue.portq_mutex); ! if (pp->port_curr >= pp->port_max_events) { mutex_exit(&pp->port_queue.portq_mutex); kmem_cache_free(port_control.pc_cache, pkevp); releasef(port); ! return (EAGAIN); } pp->port_curr++; mutex_exit(&pp->port_queue.portq_mutex); bzero(pkevp, sizeof (port_kevent_t)); --- 277,300 ---- * port_max_events is controlled by the resource control * process.port-max-events */ pp = VTOEP(fp->f_vnode); mutex_enter(&pp->port_queue.portq_mutex); ! ! /* Two possible error conditions. */ ! if (pp->port_queue.portq_flags & PORTQ_CLOSE) ! err = EBADFD; /* Mid-close. */ ! else if (pp->port_curr >= pp->port_max_events) ! err = EAGAIN; /* Resources unavailabile. */ ! else ! err = 0; /* Good to go! */ ! ! if (err != 0) { mutex_exit(&pp->port_queue.portq_mutex); kmem_cache_free(port_control.pc_cache, pkevp); releasef(port); ! return (err); } pp->port_curr++; mutex_exit(&pp->port_queue.portq_mutex); bzero(pkevp, sizeof (port_kevent_t));
*** 317,336 **** int port_alloc_event_local(port_t *pp, int source, int flags, port_kevent_t **pkevpp) { port_kevent_t *pkevp; pkevp = kmem_cache_alloc(port_control.pc_cache, KM_NOSLEEP); if (pkevp == NULL) return (ENOMEM); mutex_enter(&pp->port_queue.portq_mutex); ! if (pp->port_curr >= pp->port_max_events) { mutex_exit(&pp->port_queue.portq_mutex); kmem_cache_free(port_control.pc_cache, pkevp); ! return (EAGAIN); } pp->port_curr++; mutex_exit(&pp->port_queue.portq_mutex); bzero(pkevp, sizeof (port_kevent_t)); --- 334,363 ---- int port_alloc_event_local(port_t *pp, int source, int flags, port_kevent_t **pkevpp) { port_kevent_t *pkevp; + int err; pkevp = kmem_cache_alloc(port_control.pc_cache, KM_NOSLEEP); if (pkevp == NULL) return (ENOMEM); mutex_enter(&pp->port_queue.portq_mutex); ! ! /* Two possible error conditions. */ ! if (pp->port_queue.portq_flags & PORTQ_CLOSE) ! err = EBADFD; /* Mid-close. */ ! else if (pp->port_curr >= pp->port_max_events) ! err = EAGAIN; /* Resources unavailable. */ ! else ! err = 0; /* Good to go! */ ! ! if (err != 0) { mutex_exit(&pp->port_queue.portq_mutex); kmem_cache_free(port_control.pc_cache, pkevp); ! return (err); } pp->port_curr++; mutex_exit(&pp->port_queue.portq_mutex); bzero(pkevp, sizeof (port_kevent_t));
*** 356,373 **** --- 383,417 ---- { port_kevent_t *pkevp = kmem_cache_alloc(port_control.pc_cache, KM_SLEEP); mutex_enter(&pp->port_queue.portq_mutex); + /* + * Mid-close check needs to happen immediately before any + * resource checking or cv_wait()-ing of any sort. + */ + if (pp->port_queue.portq_flags & PORTQ_CLOSE) { + mutex_exit(&pp->port_queue.portq_mutex); + kmem_cache_free(port_control.pc_cache, pkevp); + return (EBADFD); + } + while (pp->port_curr >= pp->port_max_events) { if (!cv_wait_sig(&pp->port_cv, &pp->port_queue.portq_mutex)) { /* signal detected */ mutex_exit(&pp->port_queue.portq_mutex); kmem_cache_free(port_control.pc_cache, pkevp); return (EINTR); } + + /* Oh, and we have to re-check the close state. */ + if (pp->port_queue.portq_flags & PORTQ_CLOSE) { + mutex_exit(&pp->port_queue.portq_mutex); + kmem_cache_free(port_control.pc_cache, pkevp); + return (EBADFD); } + } pp->port_curr++; mutex_exit(&pp->port_queue.portq_mutex); bzero(pkevp, sizeof (port_kevent_t)); mutex_init(&pkevp->portkev_lock, NULL, MUTEX_DEFAULT, NULL);
*** 440,450 **** pp = pkevp->portkev_port; if (pp == NULL) return; if (pkevp->portkev_flags & PORT_ALLOC_PRIVATE) { ! port_free_event_local(pkevp, 0); return; } portq = &pp->port_queue; mutex_enter(&portq->portq_mutex); --- 484,494 ---- pp = pkevp->portkev_port; if (pp == NULL) return; if (pkevp->portkev_flags & PORT_ALLOC_PRIVATE) { ! port_free_event_local(pkevp, B_TRUE); return; } portq = &pp->port_queue; mutex_enter(&portq->portq_mutex);
*** 474,494 **** * of the allocated event structures (port_curr). */ if (pp->port_curr <= portq->portq_nent) cv_signal(&portq->portq_closecv); } ! mutex_exit(&portq->portq_mutex); ! port_free_event_local(pkevp, 1); } /* * This event port internal function is used by port_free_event() and * other port internal functions to return event structures back to the * kmem_cache. */ void ! port_free_event_local(port_kevent_t *pkevp, int counter) { port_t *pp = pkevp->portkev_port; port_queue_t *portq = &pp->port_queue; int wakeup; --- 518,546 ---- * of the allocated event structures (port_curr). */ if (pp->port_curr <= portq->portq_nent) cv_signal(&portq->portq_closecv); } ! ! /* ! * We're holding portq_mutex and we decremented port_curr, so don't ! * have port_free_event_local() do it for us. ! */ ! port_free_event_local(pkevp, B_FALSE); } /* * This event port internal function is used by port_free_event() and * other port internal functions to return event structures back to the * kmem_cache. + * + * If the caller already holds the portq_mutex, indicate by setting + * lock_and_decrement to B_FALSE. This function MUST drop portq_mutex as + * it can call pollwakeup() at its end. */ void ! port_free_event_local(port_kevent_t *pkevp, boolean_t lock_and_decrement) { port_t *pp = pkevp->portkev_port; port_queue_t *portq = &pp->port_queue; int wakeup;
*** 496,511 **** pkevp->portkev_flags = 0; pkevp->portkev_port = NULL; mutex_destroy(&pkevp->portkev_lock); kmem_cache_free(port_control.pc_cache, pkevp); mutex_enter(&portq->portq_mutex); - if (counter == 0) { if (--pp->port_curr < pp->port_max_events) cv_signal(&pp->port_cv); } ! wakeup = (portq->portq_flags & PORTQ_POLLOUT); portq->portq_flags &= ~PORTQ_POLLOUT; mutex_exit(&portq->portq_mutex); /* Submit a POLLOUT event if requested */ if (wakeup) --- 548,577 ---- pkevp->portkev_flags = 0; pkevp->portkev_port = NULL; mutex_destroy(&pkevp->portkev_lock); kmem_cache_free(port_control.pc_cache, pkevp); + if (lock_and_decrement) { + /* + * If we're entering here from outside port_event_free(), + * grab the mutex. We enter here if we hadn't already + * decremented-and-signaled port_curr. + */ mutex_enter(&portq->portq_mutex); if (--pp->port_curr < pp->port_max_events) cv_signal(&pp->port_cv); } ! ASSERT(MUTEX_HELD(&portq->portq_mutex)); ! ! /* ! * Don't send the POLLOUT if we're mid-close. We can only send it if ! * we've dropped the mutex, and if we're closing, dropping the mutex ! * could lead to another thread freeing pp->port_pollhd and the rest ! * of *pp. ! */ ! wakeup = ((portq->portq_flags & (PORTQ_POLLOUT | PORTQ_CLOSE)) == ! PORTQ_POLLOUT); portq->portq_flags &= ~PORTQ_POLLOUT; mutex_exit(&portq->portq_mutex); /* Submit a POLLOUT event if requested */ if (wakeup)
*** 648,658 **** if (pkevp->portkev_callback) { (void) (*pkevp->portkev_callback)(pkevp->portkev_arg, &error, pkevp->portkev_pid, PORT_CALLBACK_DISSOCIATE, pkevp); } ! port_free_event_local(pkevp, 0); /* remove polldat struct */ port_pcache_remove_fd(pcp, pfd); return (removed); } --- 714,724 ---- if (pkevp->portkev_callback) { (void) (*pkevp->portkev_callback)(pkevp->portkev_arg, &error, pkevp->portkev_pid, PORT_CALLBACK_DISSOCIATE, pkevp); } ! port_free_event_local(pkevp, B_TRUE); /* remove polldat struct */ port_pcache_remove_fd(pcp, pfd); return (removed); }
*** 709,718 **** --- 775,795 ---- return (EBADFD); } pp = VTOEP(fp->f_vnode); mutex_enter(&pp->port_queue.portq_source_mutex); + if (pp->port_queue.portq_scache == NULL) { + /* + * This event-port is mid-close. + * By the time portq_scache is freed, PORTQ_CLOSE was set. + */ + ASSERT(pp->port_queue.portq_flags & PORTQ_CLOSE); + mutex_exit(&pp->port_queue.portq_source_mutex); + releasef(port); + /* Equivalent of a bad file descriptor at this point. */ + return (EBADFD); + } ps = &pp->port_queue.portq_scache[PORT_SHASH(source)]; for (pse = *ps; pse != NULL; pse = pse->portsrc_next) { if (pse->portsrc_source == source) break; }