Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/io/eventfd.c
          +++ new/usr/src/uts/common/io/eventfd.c
   1    1  /*
   2    2   * This file and its contents are supplied under the terms of the
   3    3   * Common Development and Distribution License ("CDDL"), version 1.0.
   4    4   * You may only use this file in accordance with the terms of version
   5    5   * 1.0 of the CDDL.
   6    6   *
   7    7   * A full copy of the text of the CDDL should have accompanied this
   8    8   * source.  A copy of the CDDL is also available via the Internet at
   9    9   * http://www.illumos.org/license/CDDL.
  10   10   */
  11   11  
  12   12  /*
  13   13   * Copyright 2016 Joyent, Inc.
  14   14   */
  15   15  
  16   16  /*
  17   17   * Support for the eventfd facility, a Linux-borne facility for user-generated
  18   18   * file descriptor-based events.
  19   19   */
  20   20  
  21   21  #include <sys/ddi.h>
  22   22  #include <sys/sunddi.h>
  23   23  #include <sys/eventfd.h>
  24   24  #include <sys/conf.h>
  25   25  #include <sys/vmem.h>
  26   26  #include <sys/sysmacros.h>
  27   27  #include <sys/filio.h>
  28   28  #include <sys/stat.h>
  29   29  #include <sys/file.h>
  30   30  
  31   31  struct eventfd_state;
  32   32  typedef struct eventfd_state eventfd_state_t;
  33   33  
  34   34  struct eventfd_state {
  35   35          kmutex_t efd_lock;                      /* lock protecting state */
  36   36          boolean_t efd_semaphore;                /* boolean: sema. semantics */
  37   37          kcondvar_t efd_cv;                      /* condvar */
  38   38          pollhead_t efd_pollhd;                  /* poll head */
  39   39          uint64_t efd_value;                     /* value */
  40   40          size_t efd_bwriters;                    /* count of blocked writers */
  41   41          eventfd_state_t *efd_next;              /* next state on global list */
  42   42  };
  43   43  
  44   44  /*
  45   45   * Internal global variables.
  46   46   */
  47   47  static kmutex_t         eventfd_lock;           /* lock protecting state */
  48   48  static dev_info_t       *eventfd_devi;          /* device info */
  49   49  static vmem_t           *eventfd_minor;         /* minor number arena */
  50   50  static void             *eventfd_softstate;     /* softstate pointer */
  51   51  static eventfd_state_t  *eventfd_state;         /* global list of state */
  52   52  
  53   53  /*ARGSUSED*/
  54   54  static int
  55   55  eventfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
  56   56  {
  57   57          eventfd_state_t *state;
  58   58          major_t major = getemajor(*devp);
  59   59          minor_t minor = getminor(*devp);
  60   60  
  61   61          if (minor != EVENTFDMNRN_EVENTFD)
  62   62                  return (ENXIO);
  63   63  
  64   64          mutex_enter(&eventfd_lock);
  65   65  
  66   66          minor = (minor_t)(uintptr_t)vmem_alloc(eventfd_minor, 1,
  67   67              VM_BESTFIT | VM_SLEEP);
  68   68  
  69   69          if (ddi_soft_state_zalloc(eventfd_softstate, minor) != DDI_SUCCESS) {
  70   70                  vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1);
  71   71                  mutex_exit(&eventfd_lock);
  72   72                  return (NULL);
  73   73          }
  74   74  
  75   75          state = ddi_get_soft_state(eventfd_softstate, minor);
  76   76          *devp = makedevice(major, minor);
  77   77  
  78   78          state->efd_next = eventfd_state;
  79   79          eventfd_state = state;
  80   80  
  81   81          mutex_exit(&eventfd_lock);
  82   82  
  83   83          return (0);
  84   84  }
  85   85  
  86   86  /*ARGSUSED*/
  87   87  static int
  88   88  eventfd_read(dev_t dev, uio_t *uio, cred_t *cr)
  89   89  {
  90   90          eventfd_state_t *state;
  91   91          minor_t minor = getminor(dev);
  92   92          uint64_t val, oval;
  93   93          int err;
  94   94  
  95   95          if (uio->uio_resid < sizeof (val))
  96   96                  return (EINVAL);
  97   97  
  98   98          state = ddi_get_soft_state(eventfd_softstate, minor);
  99   99  
 100  100          mutex_enter(&state->efd_lock);
 101  101  
 102  102          while (state->efd_value == 0) {
 103  103                  if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
 104  104                          mutex_exit(&state->efd_lock);
 105  105                          return (EAGAIN);
 106  106                  }
 107  107  
 108  108                  if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
 109  109                          mutex_exit(&state->efd_lock);
 110  110                          return (EINTR);
 111  111                  }
 112  112          }
 113  113  
 114  114          /*
 115  115           * We have a non-zero value and we own the lock; our behavior now
 116  116           * depends on whether or not EFD_SEMAPHORE was set when the eventfd
 117  117           * was created.
 118  118           */
 119  119          val = oval = state->efd_value;
 120  120  
 121  121          if (state->efd_semaphore) {
 122  122                  state->efd_value--;
 123  123                  val = 1;
 124  124          } else {
 125  125                  state->efd_value = 0;
 126  126          }
 127  127  
 128  128          err = uiomove(&val, sizeof (val), UIO_READ, uio);
 129  129  
 130  130          /*
 131  131           * Wake any writers blocked on this eventfd as this read operation may
 132  132           * have created adequate capacity for their values.
 133  133           */
 134  134          if (state->efd_bwriters != 0) {
 135  135                  cv_broadcast(&state->efd_cv);
 136  136          }
 137  137          mutex_exit(&state->efd_lock);
 138  138  
 139  139          /*
 140  140           * It is necessary to emit POLLOUT events only when the eventfd
 141  141           * transitions from EVENTFD_VALMAX to a lower value.  At all other
 142  142           * times, it is already considered writable by poll.
 143  143           */
 144  144          if (oval == EVENTFD_VALMAX) {
 145  145                  pollwakeup(&state->efd_pollhd, POLLWRNORM | POLLOUT);
 146  146          }
 147  147  
 148  148          return (err);
 149  149  }
 150  150  
 151  151  /*ARGSUSED*/
 152  152  static int
 153  153  eventfd_write(dev_t dev, struct uio *uio, cred_t *credp)
 154  154  {
 155  155          eventfd_state_t *state;
 156  156          minor_t minor = getminor(dev);
 157  157          uint64_t val, oval;
 158  158          int err;
 159  159  
 160  160          if (uio->uio_resid < sizeof (val))
 161  161                  return (EINVAL);
 162  162  
 163  163          if ((err = uiomove(&val, sizeof (val), UIO_WRITE, uio)) != 0)
 164  164                  return (err);
 165  165  
 166  166          if (val > EVENTFD_VALMAX)
 167  167                  return (EINVAL);
 168  168  
 169  169          state = ddi_get_soft_state(eventfd_softstate, minor);
 170  170  
 171  171          mutex_enter(&state->efd_lock);
 172  172  
 173  173          while (val > EVENTFD_VALMAX - state->efd_value) {
 174  174                  if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
 175  175                          mutex_exit(&state->efd_lock);
 176  176                          return (EAGAIN);
 177  177                  }
 178  178  
 179  179                  state->efd_bwriters++;
 180  180                  if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
 181  181                          state->efd_bwriters--;
 182  182                          mutex_exit(&state->efd_lock);
 183  183                          return (EINTR);
 184  184                  }
 185  185                  state->efd_bwriters--;
 186  186          }
 187  187  
 188  188          /*
 189  189           * We now know that we can add the value without overflowing.
 190  190           */
 191  191          state->efd_value = (oval = state->efd_value) + val;
 192  192  
 193  193          /*
 194  194           * If the value was previously "empty", notify blocked readers that
 195  195           * data is available.
 196  196           */
 197  197          if (oval == 0) {
 198  198                  cv_broadcast(&state->efd_cv);
 199  199          }
 200  200          mutex_exit(&state->efd_lock);
 201  201  
 202  202          /*
 203  203           * Notify pollers as well if the eventfd is now readable.
 204  204           */
 205  205          if (oval == 0) {
 206  206                  pollwakeup(&state->efd_pollhd, POLLRDNORM | POLLIN);
 207  207          }
 208  208  
 209  209          return (0);
 210  210  }
 211  211  
 212  212  /*ARGSUSED*/
 213  213  static int
 214  214  eventfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
 215  215      struct pollhead **phpp)
 216  216  {
 217  217          eventfd_state_t *state;
 218  218          minor_t minor = getminor(dev);
 219  219          short revents = 0;
 220  220  
 221  221          state = ddi_get_soft_state(eventfd_softstate, minor);
 222  222  
 223  223          mutex_enter(&state->efd_lock);
 224  224  
 225  225          if (state->efd_value > 0)
 226  226                  revents |= POLLRDNORM | POLLIN;
 227  227  
 228  228          if (state->efd_value < EVENTFD_VALMAX)
 229  229                  revents |= POLLWRNORM | POLLOUT;
 230  230  
 231  231          if (!(*reventsp = revents & events) && !anyyet)
 232  232                  *phpp = &state->efd_pollhd;
 233  233  
 234  234          mutex_exit(&state->efd_lock);
 235  235  
 236  236          return (0);
 237  237  }
 238  238  
 239  239  /*ARGSUSED*/
 240  240  static int
 241  241  eventfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
 242  242  {
 243  243          eventfd_state_t *state;
 244  244          minor_t minor = getminor(dev);
 245  245  
 246  246          state = ddi_get_soft_state(eventfd_softstate, minor);
 247  247  
 248  248          switch (cmd) {
 249  249          case EVENTFDIOC_SEMAPHORE: {
 250  250                  mutex_enter(&state->efd_lock);
 251  251                  state->efd_semaphore ^= 1;
 252  252                  mutex_exit(&state->efd_lock);
 253  253  
 254  254                  return (0);
 255  255          }
 256  256  
 257  257          default:
 258  258                  break;
 259  259          }
 260  260  
 261  261          return (ENOTTY);
 262  262  }
 263  263  
 264  264  /*ARGSUSED*/
 265  265  static int
 266  266  eventfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
 267  267  {
 268  268          eventfd_state_t *state, **sp;
 269  269          minor_t minor = getminor(dev);
 270  270  
 271  271          state = ddi_get_soft_state(eventfd_softstate, minor);
 272  272  
 273  273          if (state->efd_pollhd.ph_list != NULL) {
 274  274                  pollwakeup(&state->efd_pollhd, POLLERR);
 275  275                  pollhead_clean(&state->efd_pollhd);
 276  276          }
 277  277  
 278  278          mutex_enter(&eventfd_lock);
 279  279  
 280  280          /*
 281  281           * Remove our state from our global list.
 282  282           */
 283  283          for (sp = &eventfd_state; *sp != state; sp = &((*sp)->efd_next))
 284  284                  VERIFY(*sp != NULL);
 285  285  
 286  286          *sp = (*sp)->efd_next;
 287  287  
 288  288          ddi_soft_state_free(eventfd_softstate, minor);
 289  289          vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1);
 290  290  
 291  291          mutex_exit(&eventfd_lock);
 292  292  
 293  293          return (0);
 294  294  }
 295  295  
 296  296  static int
 297  297  eventfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 298  298  {
 299  299          switch (cmd) {
 300  300          case DDI_ATTACH:
 301  301                  break;
 302  302  
 303  303          case DDI_RESUME:
 304  304                  return (DDI_SUCCESS);
 305  305  
 306  306          default:
 307  307                  return (DDI_FAILURE);
 308  308          }
 309  309  
 310  310          mutex_enter(&eventfd_lock);
 311  311  
 312  312          if (ddi_soft_state_init(&eventfd_softstate,
 313  313              sizeof (eventfd_state_t), 0) != 0) {
 314  314                  cmn_err(CE_NOTE, "/dev/eventfd failed to create soft state");
 315  315                  mutex_exit(&eventfd_lock);
 316  316                  return (DDI_FAILURE);
 317  317          }
 318  318  
 319  319          if (ddi_create_minor_node(devi, "eventfd", S_IFCHR,
 320  320              EVENTFDMNRN_EVENTFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 321  321                  cmn_err(CE_NOTE, "/dev/eventfd couldn't create minor node");
 322  322                  ddi_soft_state_fini(&eventfd_softstate);
 323  323                  mutex_exit(&eventfd_lock);
 324  324                  return (DDI_FAILURE);
 325  325          }
 326  326  
 327  327          ddi_report_dev(devi);
 328  328          eventfd_devi = devi;
 329  329  
 330  330          eventfd_minor = vmem_create("eventfd_minor", (void *)EVENTFDMNRN_CLONE,
 331  331              UINT32_MAX - EVENTFDMNRN_CLONE, 1, NULL, NULL, NULL, 0,
 332  332              VM_SLEEP | VMC_IDENTIFIER);
 333  333  
 334  334          mutex_exit(&eventfd_lock);
 335  335  
 336  336          return (DDI_SUCCESS);
 337  337  }
 338  338  
 339  339  /*ARGSUSED*/
 340  340  static int
 341  341  eventfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 342  342  {
 343  343          switch (cmd) {
 344  344          case DDI_DETACH:
 345  345                  break;
 346  346  
 347  347          case DDI_SUSPEND:
 348  348                  return (DDI_SUCCESS);
 349  349  
 350  350          default:
 351  351                  return (DDI_FAILURE);
 352  352          }
 353  353  
 354  354          mutex_enter(&eventfd_lock);
 355  355          vmem_destroy(eventfd_minor);
 356  356  
 357  357          ddi_remove_minor_node(eventfd_devi, NULL);
 358  358          eventfd_devi = NULL;
 359  359  
 360  360          ddi_soft_state_fini(&eventfd_softstate);
 361  361          mutex_exit(&eventfd_lock);
 362  362  
 363  363          return (DDI_SUCCESS);
 364  364  }
 365  365  
 366  366  /*ARGSUSED*/
 367  367  static int
 368  368  eventfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 369  369  {
 370  370          int error;
 371  371  
 372  372          switch (infocmd) {
 373  373          case DDI_INFO_DEVT2DEVINFO:
 374  374                  *result = (void *)eventfd_devi;
 375  375                  error = DDI_SUCCESS;
 376  376                  break;
 377  377          case DDI_INFO_DEVT2INSTANCE:
 378  378                  *result = (void *)0;
 379  379                  error = DDI_SUCCESS;
 380  380                  break;
 381  381          default:
 382  382                  error = DDI_FAILURE;
 383  383          }
 384  384          return (error);
 385  385  }
 386  386  
 387  387  static struct cb_ops eventfd_cb_ops = {
 388  388          eventfd_open,           /* open */
 389  389          eventfd_close,          /* close */
 390  390          nulldev,                /* strategy */
 391  391          nulldev,                /* print */
 392  392          nodev,                  /* dump */
 393  393          eventfd_read,           /* read */
 394  394          eventfd_write,          /* write */
 395  395          eventfd_ioctl,          /* ioctl */
 396  396          nodev,                  /* devmap */
 397  397          nodev,                  /* mmap */
 398  398          nodev,                  /* segmap */
 399  399          eventfd_poll,           /* poll */
 400  400          ddi_prop_op,            /* cb_prop_op */
 401  401          0,                      /* streamtab  */
 402  402          D_NEW | D_MP            /* Driver compatibility flag */
 403  403  };
 404  404  
 405  405  static struct dev_ops eventfd_ops = {
 406  406          DEVO_REV,               /* devo_rev */
 407  407          0,                      /* refcnt */
 408  408          eventfd_info,           /* get_dev_info */
 409  409          nulldev,                /* identify */
 410  410          nulldev,                /* probe */
 411  411          eventfd_attach,         /* attach */
 412  412          eventfd_detach,         /* detach */
 413  413          nodev,                  /* reset */
 414  414          &eventfd_cb_ops,        /* driver operations */
 415  415          NULL,                   /* bus operations */
 416  416          nodev,                  /* dev power */
 417  417          ddi_quiesce_not_needed, /* quiesce */
 418  418  };
 419  419  
 420  420  static struct modldrv modldrv = {
 421  421          &mod_driverops,         /* module type (this is a pseudo driver) */
 422  422          "eventfd support",      /* name of module */
 423  423          &eventfd_ops,           /* driver ops */
 424  424  };
 425  425  
 426  426  static struct modlinkage modlinkage = {
 427  427          MODREV_1,
 428  428          (void *)&modldrv,
 429  429          NULL
 430  430  };
 431  431  
 432  432  int
 433  433  _init(void)
 434  434  {
 435  435          return (mod_install(&modlinkage));
 436  436  }
 437  437  
 438  438  int
 439  439  _info(struct modinfo *modinfop)
 440  440  {
 441  441          return (mod_info(&modlinkage, modinfop));
 442  442  }
 443  443  
 444  444  int
 445  445  _fini(void)
 446  446  {
 447  447          return (mod_remove(&modlinkage));
 448  448  }
  
    | 
      ↓ open down ↓ | 
    448 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX