Print this page
cleanup port_free_event_local() semantics
XXXXX race between closef(portfd) and timer_delete()

@@ -22,11 +22,13 @@
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident   "%Z%%M% %I%     %E% SMI"
+/*
+ * Copyright 2020 Joyent, Inc.
+ */
 
 /*
  * This file containts all the functions required for interactions of
  * event sources with the event port file system.
  */

@@ -202,13 +204,18 @@
 
                 portq->portq_flags &= ~PORTQ_POLLIN;
                 /*
                  * Need to save port_t for calling pollwakeup since port_getn()
                  * may end up freeing pkevp once portq_mutex is dropped.
+                 *
+                 * For that matter, if PORTQ_CLOSE is set, all of the port_t
+                 * may be freed upon dropping portq_mutex.
                  */
-                pp = pkevp->portkev_port;
+                pp = (portq->portq_flags & PORTQ_CLOSE) ?
+                    NULL : pkevp->portkev_port;
                 mutex_exit(&portq->portq_mutex);
+                if (pp != NULL)
                 pollwakeup(&pp->port_pollhd, POLLIN);
         } else {
                 mutex_exit(&portq->portq_mutex);
         }
 }

@@ -248,10 +255,11 @@
 port_alloc_event(int port, int flags, int source, port_kevent_t **pkevpp)
 {
         port_t          *pp;
         file_t          *fp;
         port_kevent_t   *pkevp;
+        int             err;
 
         if ((fp = getf(port)) == NULL)
                 return (EBADF);
 
         if (fp->f_vnode->v_type != VPORT) {

@@ -269,15 +277,24 @@
          * port_max_events is controlled by the resource control
          * process.port-max-events
          */
         pp = VTOEP(fp->f_vnode);
         mutex_enter(&pp->port_queue.portq_mutex);
-        if (pp->port_curr >= pp->port_max_events) {
+
+        /* Two possible error conditions. */
+        if (pp->port_queue.portq_flags & PORTQ_CLOSE)
+                err = EBADFD;   /* Mid-close. */
+        else if (pp->port_curr >= pp->port_max_events)
+                err = EAGAIN;   /* Resources unavailabile. */
+        else
+                err = 0;        /* Good to go! */
+
+        if (err != 0) {
                 mutex_exit(&pp->port_queue.portq_mutex);
                 kmem_cache_free(port_control.pc_cache, pkevp);
                 releasef(port);
-                return (EAGAIN);
+                return (err);
         }
         pp->port_curr++;
         mutex_exit(&pp->port_queue.portq_mutex);
 
         bzero(pkevp, sizeof (port_kevent_t));

@@ -317,20 +334,30 @@
 int
 port_alloc_event_local(port_t *pp, int source, int flags,
     port_kevent_t **pkevpp)
 {
         port_kevent_t   *pkevp;
+        int             err;
 
         pkevp = kmem_cache_alloc(port_control.pc_cache, KM_NOSLEEP);
         if (pkevp == NULL)
                 return (ENOMEM);
 
         mutex_enter(&pp->port_queue.portq_mutex);
-        if (pp->port_curr >= pp->port_max_events) {
+
+        /* Two possible error conditions. */
+        if (pp->port_queue.portq_flags & PORTQ_CLOSE)
+                err = EBADFD;   /* Mid-close. */
+        else if (pp->port_curr >= pp->port_max_events)
+                err = EAGAIN;   /* Resources unavailable. */
+        else
+                err = 0;        /* Good to go! */
+
+        if (err != 0) {
                 mutex_exit(&pp->port_queue.portq_mutex);
                 kmem_cache_free(port_control.pc_cache, pkevp);
-                return (EAGAIN);
+                return (err);
         }
         pp->port_curr++;
         mutex_exit(&pp->port_queue.portq_mutex);
 
         bzero(pkevp, sizeof (port_kevent_t));

@@ -356,18 +383,35 @@
 {
         port_kevent_t *pkevp =
             kmem_cache_alloc(port_control.pc_cache, KM_SLEEP);
 
         mutex_enter(&pp->port_queue.portq_mutex);
+        /*
+         * Mid-close check needs to happen immediately before any
+         * resource checking or cv_wait()-ing of any sort.
+         */
+        if (pp->port_queue.portq_flags & PORTQ_CLOSE) {
+                mutex_exit(&pp->port_queue.portq_mutex);
+                kmem_cache_free(port_control.pc_cache, pkevp);
+                return (EBADFD);
+        }
+
         while (pp->port_curr >= pp->port_max_events) {
                 if (!cv_wait_sig(&pp->port_cv, &pp->port_queue.portq_mutex)) {
                         /* signal detected */
                         mutex_exit(&pp->port_queue.portq_mutex);
                         kmem_cache_free(port_control.pc_cache, pkevp);
                         return (EINTR);
                 }
+
+                /* Oh, and we have to re-check the close state. */
+                if (pp->port_queue.portq_flags & PORTQ_CLOSE) {
+                        mutex_exit(&pp->port_queue.portq_mutex);
+                        kmem_cache_free(port_control.pc_cache, pkevp);
+                        return (EBADFD);
         }
+        }
         pp->port_curr++;
         mutex_exit(&pp->port_queue.portq_mutex);
 
         bzero(pkevp, sizeof (port_kevent_t));
         mutex_init(&pkevp->portkev_lock, NULL, MUTEX_DEFAULT, NULL);

@@ -440,11 +484,11 @@
 
         pp = pkevp->portkev_port;
         if (pp == NULL)
                 return;
         if (pkevp->portkev_flags & PORT_ALLOC_PRIVATE) {
-                port_free_event_local(pkevp, 0);
+                port_free_event_local(pkevp, B_TRUE);
                 return;
         }
 
         portq = &pp->port_queue;
         mutex_enter(&portq->portq_mutex);

@@ -474,21 +518,29 @@
                  * of the allocated event structures (port_curr).
                  */
                 if (pp->port_curr <= portq->portq_nent)
                         cv_signal(&portq->portq_closecv);
         }
-        mutex_exit(&portq->portq_mutex);
-        port_free_event_local(pkevp, 1);
+
+        /*
+         * We're holding portq_mutex and we decremented port_curr, so don't
+         * have port_free_event_local() do it for us.
+         */
+        port_free_event_local(pkevp, B_FALSE);
 }
 
 /*
  * This event port internal function is used by port_free_event() and
  * other port internal functions to return event structures back to the
  * kmem_cache.
+ *
+ * If the caller already holds the portq_mutex, indicate by setting
+ * lock_and_decrement to B_FALSE.  This function MUST drop portq_mutex as
+ * it can call pollwakeup() at its end.
  */
 void
-port_free_event_local(port_kevent_t *pkevp, int counter)
+port_free_event_local(port_kevent_t *pkevp, boolean_t lock_and_decrement)
 {
         port_t *pp = pkevp->portkev_port;
         port_queue_t *portq = &pp->port_queue;
         int wakeup;
 

@@ -496,16 +548,30 @@
         pkevp->portkev_flags = 0;
         pkevp->portkev_port = NULL;
         mutex_destroy(&pkevp->portkev_lock);
         kmem_cache_free(port_control.pc_cache, pkevp);
 
+        if (lock_and_decrement) {
+                /*
+                 * If we're entering here from outside port_event_free(),
+                 * grab the mutex.  We enter here if we hadn't already
+                 * decremented-and-signaled port_curr.
+                 */
         mutex_enter(&portq->portq_mutex);
-        if (counter == 0) {
                 if (--pp->port_curr < pp->port_max_events)
                         cv_signal(&pp->port_cv);
         }
-        wakeup = (portq->portq_flags & PORTQ_POLLOUT);
+        ASSERT(MUTEX_HELD(&portq->portq_mutex));
+
+        /*
+         * Don't send the POLLOUT if we're mid-close.  We can only send it if
+         * we've dropped the mutex, and if we're closing, dropping the mutex
+         * could lead to another thread freeing pp->port_pollhd and the rest
+         * of *pp.
+         */
+        wakeup = ((portq->portq_flags & (PORTQ_POLLOUT | PORTQ_CLOSE)) ==
+            PORTQ_POLLOUT);
         portq->portq_flags &= ~PORTQ_POLLOUT;
         mutex_exit(&portq->portq_mutex);
 
         /* Submit a POLLOUT event if requested */
         if (wakeup)

@@ -648,11 +714,11 @@
         if (pkevp->portkev_callback) {
                 (void) (*pkevp->portkev_callback)(pkevp->portkev_arg,
                     &error, pkevp->portkev_pid, PORT_CALLBACK_DISSOCIATE,
                     pkevp);
         }
-        port_free_event_local(pkevp, 0);
+        port_free_event_local(pkevp, B_TRUE);
 
         /* remove polldat struct */
         port_pcache_remove_fd(pcp, pfd);
         return (removed);
 }

@@ -709,10 +775,21 @@
                 return (EBADFD);
         }
         pp = VTOEP(fp->f_vnode);
 
         mutex_enter(&pp->port_queue.portq_source_mutex);
+        if (pp->port_queue.portq_scache == NULL) {
+                /*
+                 * This event-port is mid-close.
+                 * By the time portq_scache is freed, PORTQ_CLOSE was set.
+                 */
+                ASSERT(pp->port_queue.portq_flags & PORTQ_CLOSE);
+                mutex_exit(&pp->port_queue.portq_source_mutex);
+                releasef(port);
+                /* Equivalent of a bad file descriptor at this point. */
+                return (EBADFD);
+        }
         ps = &pp->port_queue.portq_scache[PORT_SHASH(source)];
         for (pse = *ps; pse != NULL; pse = pse->portsrc_next) {
                 if (pse->portsrc_source == source)
                         break;
         }