Print this page
OS-5538 eventfd wrongly blocks writers in semaphore mode
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>

@@ -8,11 +8,11 @@
  * source.  A copy of the CDDL is also available via the Internet at
  * http://www.illumos.org/license/CDDL.
  */
 
 /*
- * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
+ * Copyright 2016 Joyent, Inc.
  */
 
 /*
  * Support for the eventfd facility, a Linux-borne facility for user-generated
  * file descriptor-based events.

@@ -35,10 +35,11 @@
         kmutex_t efd_lock;                      /* lock protecting state */
         boolean_t efd_semaphore;                /* boolean: sema. semantics */
         kcondvar_t efd_cv;                      /* condvar */
         pollhead_t efd_pollhd;                  /* poll head */
         uint64_t efd_value;                     /* value */
+        size_t efd_bwriters;                    /* count of blocked writers */
         eventfd_state_t *efd_next;              /* next state on global list */
 };
 
 /*
  * Internal global variables.

@@ -124,14 +125,25 @@
                 state->efd_value = 0;
         }
 
         err = uiomove(&val, sizeof (val), UIO_READ, uio);
 
+        /*
+         * Wake any writers blocked on this eventfd as this read operation may
+         * have created adequate capacity for their values.
+         */
+        if (state->efd_bwriters != 0) {
+                cv_broadcast(&state->efd_cv);
+        }
         mutex_exit(&state->efd_lock);
 
+        /*
+         * It is necessary to emit POLLOUT events only when the eventfd
+         * transitions from EVENTFD_VALMAX to a lower value.  At all other
+         * times, it is already considered writable by poll.
+         */
         if (oval == EVENTFD_VALMAX) {
-                cv_broadcast(&state->efd_cv);
                 pollwakeup(&state->efd_pollhd, POLLWRNORM | POLLOUT);
         }
 
         return (err);
 }

@@ -162,25 +174,37 @@
                 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
                         mutex_exit(&state->efd_lock);
                         return (EAGAIN);
                 }
 
+                state->efd_bwriters++;
                 if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
+                        state->efd_bwriters--;
                         mutex_exit(&state->efd_lock);
                         return (EINTR);
                 }
+                state->efd_bwriters--;
         }
 
         /*
          * We now know that we can add the value without overflowing.
          */
         state->efd_value = (oval = state->efd_value) + val;
 
+        /*
+         * If the value was previously "empty", notify blocked readers that
+         * data is available.
+         */
+        if (oval == 0) {
+                cv_broadcast(&state->efd_cv);
+        }
         mutex_exit(&state->efd_lock);
 
+        /*
+         * Notify pollers as well if the eventfd is now readable.
+         */
         if (oval == 0) {
-                cv_broadcast(&state->efd_cv);
                 pollwakeup(&state->efd_pollhd, POLLRDNORM | POLLIN);
         }
 
         return (0);
 }