Print this page
cleanup port_free_event_local() semantics
XXXXX race between closef(portfd) and timer_delete()

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/port_subr.c
          +++ new/usr/src/uts/common/os/port_subr.c
↓ open down ↓ 16 lines elided ↑ open up ↑
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   */
  26   26  
  27      -#pragma ident   "%Z%%M% %I%     %E% SMI"
       27 +/*
       28 + * Copyright 2020 Joyent, Inc.
       29 + */
  28   30  
  29   31  /*
  30   32   * This file containts all the functions required for interactions of
  31   33   * event sources with the event port file system.
  32   34   */
  33   35  
  34   36  #include <sys/types.h>
  35   37  #include <sys/conf.h>
  36   38  #include <sys/stat.h>
  37   39  #include <sys/errno.h>
↓ open down ↓ 159 lines elided ↑ open up ↑
 197  199           * ports if being polled.
 198  200           */
 199  201          if (pkevp->portkev_source != PORT_SOURCE_FD &&
 200  202              portq->portq_flags & PORTQ_POLLIN) {
 201  203                  port_t  *pp;
 202  204  
 203  205                  portq->portq_flags &= ~PORTQ_POLLIN;
 204  206                  /*
 205  207                   * Need to save port_t for calling pollwakeup since port_getn()
 206  208                   * may end up freeing pkevp once portq_mutex is dropped.
      209 +                 *
      210 +                 * For that matter, if PORTQ_CLOSE is set, all of the port_t
      211 +                 * may be freed upon dropping portq_mutex.
 207  212                   */
 208      -                pp = pkevp->portkev_port;
      213 +                pp = (portq->portq_flags & PORTQ_CLOSE) ?
      214 +                    NULL : pkevp->portkev_port;
 209  215                  mutex_exit(&portq->portq_mutex);
 210      -                pollwakeup(&pp->port_pollhd, POLLIN);
      216 +                if (pp != NULL)
      217 +                        pollwakeup(&pp->port_pollhd, POLLIN);
 211  218          } else {
 212  219                  mutex_exit(&portq->portq_mutex);
 213  220          }
 214  221  }
 215  222  
 216  223  /*
 217  224   * The port_alloc_event() function has to be used by all event sources
 218  225   * to request an slot for event notification.
 219  226   * The slot reservation could be denied because of lack of resources.
 220  227   * For that reason the event source should allocate an event slot as early
↓ open down ↓ 22 lines elided ↑ open up ↑
 243  250   *      The port_get(n) function will deliver events of such an structure to
 244  251   *      the application but it will not free the event structure itself.
 245  252   *      The event source must free this structure using port_free_event().
 246  253   */
 247  254  int
 248  255  port_alloc_event(int port, int flags, int source, port_kevent_t **pkevpp)
 249  256  {
 250  257          port_t          *pp;
 251  258          file_t          *fp;
 252  259          port_kevent_t   *pkevp;
      260 +        int             err;
 253  261  
 254  262          if ((fp = getf(port)) == NULL)
 255  263                  return (EBADF);
 256  264  
 257  265          if (fp->f_vnode->v_type != VPORT) {
 258  266                  releasef(port);
 259  267                  return (EBADFD);
 260  268          }
 261  269  
 262  270          pkevp = kmem_cache_alloc(port_control.pc_cache, KM_NOSLEEP);
↓ open down ↓ 1 lines elided ↑ open up ↑
 264  272                  releasef(port);
 265  273                  return (ENOMEM);
 266  274          }
 267  275  
 268  276          /*
 269  277           * port_max_events is controlled by the resource control
 270  278           * process.port-max-events
 271  279           */
 272  280          pp = VTOEP(fp->f_vnode);
 273  281          mutex_enter(&pp->port_queue.portq_mutex);
 274      -        if (pp->port_curr >= pp->port_max_events) {
      282 +
      283 +        /* Two possible error conditions. */
      284 +        if (pp->port_queue.portq_flags & PORTQ_CLOSE)
      285 +                err = EBADFD;   /* Mid-close. */
      286 +        else if (pp->port_curr >= pp->port_max_events)
      287 +                err = EAGAIN;   /* Resources unavailabile. */
      288 +        else
      289 +                err = 0;        /* Good to go! */
      290 +
      291 +        if (err != 0) {
 275  292                  mutex_exit(&pp->port_queue.portq_mutex);
 276  293                  kmem_cache_free(port_control.pc_cache, pkevp);
 277  294                  releasef(port);
 278      -                return (EAGAIN);
      295 +                return (err);
 279  296          }
 280  297          pp->port_curr++;
 281  298          mutex_exit(&pp->port_queue.portq_mutex);
 282  299  
 283  300          bzero(pkevp, sizeof (port_kevent_t));
 284  301          mutex_init(&pkevp->portkev_lock, NULL, MUTEX_DEFAULT, NULL);
 285  302          pkevp->portkev_source = source;
 286  303          pkevp->portkev_flags = flags;
 287  304          pkevp->portkev_pid = curproc->p_pid;
 288  305          pkevp->portkev_port = pp;
↓ open down ↓ 23 lines elided ↑ open up ↑
 312  329   * port_alloc_event_local() is reserved for internal use only.
 313  330   * It is doing the same job as port_alloc_event() but with the event port
 314  331   * pointer as the first argument.
 315  332   * The check of the validity of the port file descriptor is skipped here.
 316  333   */
 317  334  int
 318  335  port_alloc_event_local(port_t *pp, int source, int flags,
 319  336      port_kevent_t **pkevpp)
 320  337  {
 321  338          port_kevent_t   *pkevp;
      339 +        int             err;
 322  340  
 323  341          pkevp = kmem_cache_alloc(port_control.pc_cache, KM_NOSLEEP);
 324  342          if (pkevp == NULL)
 325  343                  return (ENOMEM);
 326  344  
 327  345          mutex_enter(&pp->port_queue.portq_mutex);
 328      -        if (pp->port_curr >= pp->port_max_events) {
      346 +
      347 +        /* Two possible error conditions. */
      348 +        if (pp->port_queue.portq_flags & PORTQ_CLOSE)
      349 +                err = EBADFD;   /* Mid-close. */
      350 +        else if (pp->port_curr >= pp->port_max_events)
      351 +                err = EAGAIN;   /* Resources unavailable. */
      352 +        else
      353 +                err = 0;        /* Good to go! */
      354 +
      355 +        if (err != 0) {
 329  356                  mutex_exit(&pp->port_queue.portq_mutex);
 330  357                  kmem_cache_free(port_control.pc_cache, pkevp);
 331      -                return (EAGAIN);
      358 +                return (err);
 332  359          }
 333  360          pp->port_curr++;
 334  361          mutex_exit(&pp->port_queue.portq_mutex);
 335  362  
 336  363          bzero(pkevp, sizeof (port_kevent_t));
 337  364          mutex_init(&pkevp->portkev_lock, NULL, MUTEX_DEFAULT, NULL);
 338  365          pkevp->portkev_flags = flags;
 339  366          pkevp->portkev_port = pp;
 340  367          pkevp->portkev_source = source;
 341  368          pkevp->portkev_pid = curproc->p_pid;
↓ open down ↓ 9 lines elided ↑ open up ↑
 351  378   * reliability of event delivery for library event sources.
 352  379   */
 353  380  int
 354  381  port_alloc_event_block(port_t *pp, int source, int flags,
 355  382      port_kevent_t **pkevpp)
 356  383  {
 357  384          port_kevent_t *pkevp =
 358  385              kmem_cache_alloc(port_control.pc_cache, KM_SLEEP);
 359  386  
 360  387          mutex_enter(&pp->port_queue.portq_mutex);
      388 +        /*
      389 +         * Mid-close check needs to happen immediately before any
      390 +         * resource checking or cv_wait()-ing of any sort.
      391 +         */
      392 +        if (pp->port_queue.portq_flags & PORTQ_CLOSE) {
      393 +                mutex_exit(&pp->port_queue.portq_mutex);
      394 +                kmem_cache_free(port_control.pc_cache, pkevp);
      395 +                return (EBADFD);
      396 +        }
      397 +
 361  398          while (pp->port_curr >= pp->port_max_events) {
 362  399                  if (!cv_wait_sig(&pp->port_cv, &pp->port_queue.portq_mutex)) {
 363  400                          /* signal detected */
 364  401                          mutex_exit(&pp->port_queue.portq_mutex);
 365  402                          kmem_cache_free(port_control.pc_cache, pkevp);
 366  403                          return (EINTR);
 367  404                  }
      405 +
      406 +                /* Oh, and we have to re-check the close state. */
      407 +                if (pp->port_queue.portq_flags & PORTQ_CLOSE) {
      408 +                        mutex_exit(&pp->port_queue.portq_mutex);
      409 +                        kmem_cache_free(port_control.pc_cache, pkevp);
      410 +                        return (EBADFD);
      411 +                }
 368  412          }
 369  413          pp->port_curr++;
 370  414          mutex_exit(&pp->port_queue.portq_mutex);
 371  415  
 372  416          bzero(pkevp, sizeof (port_kevent_t));
 373  417          mutex_init(&pkevp->portkev_lock, NULL, MUTEX_DEFAULT, NULL);
 374  418          pkevp->portkev_flags = flags;
 375  419          pkevp->portkev_port = pp;
 376  420          pkevp->portkev_source = source;
 377  421          pkevp->portkev_pid = curproc->p_pid;
↓ open down ↓ 57 lines elided ↑ open up ↑
 435  479  void
 436  480  port_free_event(port_kevent_t *pkevp)
 437  481  {
 438  482          port_queue_t    *portq;
 439  483          port_t          *pp;
 440  484  
 441  485          pp = pkevp->portkev_port;
 442  486          if (pp == NULL)
 443  487                  return;
 444  488          if (pkevp->portkev_flags & PORT_ALLOC_PRIVATE) {
 445      -                port_free_event_local(pkevp, 0);
      489 +                port_free_event_local(pkevp, B_TRUE);
 446  490                  return;
 447  491          }
 448  492  
 449  493          portq = &pp->port_queue;
 450  494          mutex_enter(&portq->portq_mutex);
 451  495          port_block(portq);
 452  496          if (pkevp->portkev_flags & PORT_KEV_DONEQ) {
 453  497                  pkevp->portkev_flags |= PORT_KEV_FREE;
 454  498                  pkevp->portkev_callback = NULL;
 455  499                  port_unblock(portq);
↓ open down ↓ 13 lines elided ↑ open up ↑
 469  513                  /*
 470  514                   * Another thread is closing the event port.
 471  515                   * That thread will sleep until all allocated event
 472  516                   * structures returned to the event port framework.
 473  517                   * The portq_mutex is used to synchronize the status
 474  518                   * of the allocated event structures (port_curr).
 475  519                   */
 476  520                  if (pp->port_curr <= portq->portq_nent)
 477  521                          cv_signal(&portq->portq_closecv);
 478  522          }
 479      -        mutex_exit(&portq->portq_mutex);
 480      -        port_free_event_local(pkevp, 1);
      523 +
      524 +        /*
      525 +         * We're holding portq_mutex and we decremented port_curr, so don't
      526 +         * have port_free_event_local() do it for us.
      527 +         */
      528 +        port_free_event_local(pkevp, B_FALSE);
 481  529  }
 482  530  
 483  531  /*
 484  532   * This event port internal function is used by port_free_event() and
 485  533   * other port internal functions to return event structures back to the
 486  534   * kmem_cache.
      535 + *
      536 + * If the caller already holds the portq_mutex, indicate by setting
      537 + * lock_and_decrement to B_FALSE.  This function MUST drop portq_mutex as
      538 + * it can call pollwakeup() at its end.
 487  539   */
 488  540  void
 489      -port_free_event_local(port_kevent_t *pkevp, int counter)
      541 +port_free_event_local(port_kevent_t *pkevp, boolean_t lock_and_decrement)
 490  542  {
 491  543          port_t *pp = pkevp->portkev_port;
 492  544          port_queue_t *portq = &pp->port_queue;
 493  545          int wakeup;
 494  546  
 495  547          pkevp->portkev_callback = NULL;
 496  548          pkevp->portkev_flags = 0;
 497  549          pkevp->portkev_port = NULL;
 498  550          mutex_destroy(&pkevp->portkev_lock);
 499  551          kmem_cache_free(port_control.pc_cache, pkevp);
 500  552  
 501      -        mutex_enter(&portq->portq_mutex);
 502      -        if (counter == 0) {
      553 +        if (lock_and_decrement) {
      554 +                /*
      555 +                 * If we're entering here from outside port_event_free(),
      556 +                 * grab the mutex.  We enter here if we hadn't already
      557 +                 * decremented-and-signaled port_curr.
      558 +                 */
      559 +                mutex_enter(&portq->portq_mutex);
 503  560                  if (--pp->port_curr < pp->port_max_events)
 504  561                          cv_signal(&pp->port_cv);
 505  562          }
 506      -        wakeup = (portq->portq_flags & PORTQ_POLLOUT);
      563 +        ASSERT(MUTEX_HELD(&portq->portq_mutex));
      564 +
      565 +        /*
      566 +         * Don't send the POLLOUT if we're mid-close.  We can only send it if
      567 +         * we've dropped the mutex, and if we're closing, dropping the mutex
      568 +         * could lead to another thread freeing pp->port_pollhd and the rest
      569 +         * of *pp.
      570 +         */
      571 +        wakeup = ((portq->portq_flags & (PORTQ_POLLOUT | PORTQ_CLOSE)) ==
      572 +            PORTQ_POLLOUT);
 507  573          portq->portq_flags &= ~PORTQ_POLLOUT;
 508  574          mutex_exit(&portq->portq_mutex);
 509  575  
 510  576          /* Submit a POLLOUT event if requested */
 511  577          if (wakeup)
 512  578                  pollwakeup(&pp->port_pollhd, POLLOUT);
 513  579  }
 514  580  
 515  581  /*
 516  582   * port_init_event(port_event_t *pev, uintptr_t object, void *user,
↓ open down ↓ 126 lines elided ↑ open up ↑
 643  709                  port_remove_event_doneq(pkevp, portq);
 644  710                  removed = 1;
 645  711          }
 646  712          port_unblock(portq);
 647  713          mutex_exit(&portq->portq_mutex);
 648  714          if (pkevp->portkev_callback) {
 649  715                  (void) (*pkevp->portkev_callback)(pkevp->portkev_arg,
 650  716                      &error, pkevp->portkev_pid, PORT_CALLBACK_DISSOCIATE,
 651  717                      pkevp);
 652  718          }
 653      -        port_free_event_local(pkevp, 0);
      719 +        port_free_event_local(pkevp, B_TRUE);
 654  720  
 655  721          /* remove polldat struct */
 656  722          port_pcache_remove_fd(pcp, pfd);
 657  723          return (removed);
 658  724  }
 659  725  
 660  726  /*
 661  727   * The port_close_fd() function dissociates a file descriptor from a port
 662  728   * and removes all allocated resources.
 663  729   * close(2) detects in the uf_entry_t structure that the fd is associated
↓ open down ↓ 40 lines elided ↑ open up ↑
 704  770          if ((fp = getf(port)) == NULL)
 705  771                  return (EBADF);
 706  772  
 707  773          if (fp->f_vnode->v_type != VPORT) {
 708  774                  releasef(port);
 709  775                  return (EBADFD);
 710  776          }
 711  777          pp = VTOEP(fp->f_vnode);
 712  778  
 713  779          mutex_enter(&pp->port_queue.portq_source_mutex);
      780 +        if (pp->port_queue.portq_scache == NULL) {
      781 +                /*
      782 +                 * This event-port is mid-close.
      783 +                 * By the time portq_scache is freed, PORTQ_CLOSE was set.
      784 +                 */
      785 +                ASSERT(pp->port_queue.portq_flags & PORTQ_CLOSE);
      786 +                mutex_exit(&pp->port_queue.portq_source_mutex);
      787 +                releasef(port);
      788 +                /* Equivalent of a bad file descriptor at this point. */
      789 +                return (EBADFD);
      790 +        }
 714  791          ps = &pp->port_queue.portq_scache[PORT_SHASH(source)];
 715  792          for (pse = *ps; pse != NULL; pse = pse->portsrc_next) {
 716  793                  if (pse->portsrc_source == source)
 717  794                          break;
 718  795          }
 719  796  
 720  797          if (pse == NULL) {
 721  798                  /* Create association of the event source with the port */
 722  799                  pse = kmem_zalloc(sizeof (port_source_t), KM_NOSLEEP);
 723  800                  if (pse == NULL) {
↓ open down ↓ 78 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX