Print this page
OS-5538 eventfd wrongly blocks writers in semaphore mode
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/io/eventfd.c
          +++ new/usr/src/uts/common/io/eventfd.c
↓ open down ↓ 2 lines elided ↑ open up ↑
   3    3   * Common Development and Distribution License ("CDDL"), version 1.0.
   4    4   * You may only use this file in accordance with the terms of version
   5    5   * 1.0 of the CDDL.
   6    6   *
   7    7   * A full copy of the text of the CDDL should have accompanied this
   8    8   * source.  A copy of the CDDL is also available via the Internet at
   9    9   * http://www.illumos.org/license/CDDL.
  10   10   */
  11   11  
  12   12  /*
  13      - * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
       13 + * Copyright 2016 Joyent, Inc.
  14   14   */
  15   15  
  16   16  /*
  17   17   * Support for the eventfd facility, a Linux-borne facility for user-generated
  18   18   * file descriptor-based events.
  19   19   */
  20   20  
  21   21  #include <sys/ddi.h>
  22   22  #include <sys/sunddi.h>
  23   23  #include <sys/eventfd.h>
↓ open down ↓ 6 lines elided ↑ open up ↑
  30   30  
  31   31  struct eventfd_state;
  32   32  typedef struct eventfd_state eventfd_state_t;
  33   33  
  34   34  struct eventfd_state {
  35   35          kmutex_t efd_lock;                      /* lock protecting state */
  36   36          boolean_t efd_semaphore;                /* boolean: sema. semantics */
  37   37          kcondvar_t efd_cv;                      /* condvar */
  38   38          pollhead_t efd_pollhd;                  /* poll head */
  39   39          uint64_t efd_value;                     /* value */
       40 +        size_t efd_bwriters;                    /* count of blocked writers */
  40   41          eventfd_state_t *efd_next;              /* next state on global list */
  41   42  };
  42   43  
  43   44  /*
  44   45   * Internal global variables.
  45   46   */
  46   47  static kmutex_t         eventfd_lock;           /* lock protecting state */
  47   48  static dev_info_t       *eventfd_devi;          /* device info */
  48   49  static vmem_t           *eventfd_minor;         /* minor number arena */
  49   50  static void             *eventfd_softstate;     /* softstate pointer */
↓ open down ↓ 69 lines elided ↑ open up ↑
 119  120  
 120  121          if (state->efd_semaphore) {
 121  122                  state->efd_value--;
 122  123                  val = 1;
 123  124          } else {
 124  125                  state->efd_value = 0;
 125  126          }
 126  127  
 127  128          err = uiomove(&val, sizeof (val), UIO_READ, uio);
 128  129  
      130 +        /*
      131 +         * Wake any writers blocked on this eventfd as this read operation may
      132 +         * have created adequate capacity for their values.
      133 +         */
      134 +        if (state->efd_bwriters != 0) {
      135 +                cv_broadcast(&state->efd_cv);
      136 +        }
 129  137          mutex_exit(&state->efd_lock);
 130  138  
      139 +        /*
      140 +         * It is necessary to emit POLLOUT events only when the eventfd
      141 +         * transitions from EVENTFD_VALMAX to a lower value.  At all other
      142 +         * times, it is already considered writable by poll.
      143 +         */
 131  144          if (oval == EVENTFD_VALMAX) {
 132      -                cv_broadcast(&state->efd_cv);
 133  145                  pollwakeup(&state->efd_pollhd, POLLWRNORM | POLLOUT);
 134  146          }
 135  147  
 136  148          return (err);
 137  149  }
 138  150  
 139  151  /*ARGSUSED*/
 140  152  static int
 141  153  eventfd_write(dev_t dev, struct uio *uio, cred_t *credp)
 142  154  {
↓ open down ↓ 14 lines elided ↑ open up ↑
 157  169          state = ddi_get_soft_state(eventfd_softstate, minor);
 158  170  
 159  171          mutex_enter(&state->efd_lock);
 160  172  
 161  173          while (val > EVENTFD_VALMAX - state->efd_value) {
 162  174                  if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
 163  175                          mutex_exit(&state->efd_lock);
 164  176                          return (EAGAIN);
 165  177                  }
 166  178  
      179 +                state->efd_bwriters++;
 167  180                  if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
      181 +                        state->efd_bwriters--;
 168  182                          mutex_exit(&state->efd_lock);
 169  183                          return (EINTR);
 170  184                  }
      185 +                state->efd_bwriters--;
 171  186          }
 172  187  
 173  188          /*
 174  189           * We now know that we can add the value without overflowing.
 175  190           */
 176  191          state->efd_value = (oval = state->efd_value) + val;
 177  192  
      193 +        /*
      194 +         * If the value was previously "empty", notify blocked readers that
      195 +         * data is available.
      196 +         */
      197 +        if (oval == 0) {
      198 +                cv_broadcast(&state->efd_cv);
      199 +        }
 178  200          mutex_exit(&state->efd_lock);
 179  201  
      202 +        /*
      203 +         * Notify pollers as well if the eventfd is now readable.
      204 +         */
 180  205          if (oval == 0) {
 181      -                cv_broadcast(&state->efd_cv);
 182  206                  pollwakeup(&state->efd_pollhd, POLLRDNORM | POLLIN);
 183  207          }
 184  208  
 185  209          return (0);
 186  210  }
 187  211  
 188  212  /*ARGSUSED*/
 189  213  static int
 190  214  eventfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
 191  215      struct pollhead **phpp)
↓ open down ↓ 233 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX