Print this page
    
3354 kernel crash in rpcsec_gss after using gsscred
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Carlos Neira <cneirabustos@gmail.com>
Approved by: Robert Mustacchi <rm@joyent.com>
NEX-14051 Be careful with RPC groups
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
8085 Handle RPC groups better
Reviewed by: "Joshua M. Clulow" <josh@sysmgr.org>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Dan McDonald <danmcd@omniti.com>
NEX-2320 svc_queueclose() could wake up too much threads
Reviewed by: Rick Mesta <rick.mesta@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Dan Fields <dan.fields@nexenta.com>
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Tony Nguyen <tony.nguyen@nexenta.com>
re #12783 rb4338 Flow control is needed in rpcmod when the NFS server is unable to keep up with the network
re #13613 rb4516 Tunables needs volatile keyword
re #12714 rb4347 Corruption of the `xprt-ready' queue in svc_xprt_qdelete()
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/rpc/svc.c
          +++ new/usr/src/uts/common/rpc/svc.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
       23 + * Copyright 2012 Marcel Telka <marcel@telka.sk>
  23   24   * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
       25 + * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
  24   26   */
  25   27  
  26   28  /*
  27   29   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  28   30   * Use is subject to license terms.
  29   31   */
  30   32  
  31   33  /*
  32   34   * Copyright 1993 OpenVision Technologies, Inc., All Rights Reserved.
  33   35   */
  34   36  
  35   37  /*      Copyright (c) 1983, 1984, 1985,  1986, 1987, 1988, 1989 AT&T    */
  36      -/*        All Rights Reserved   */
       38 +/*      All Rights Reserved     */
  37   39  
  38   40  /*
  39   41   * Portions of this source code were derived from Berkeley 4.3 BSD
  40   42   * under license from the Regents of the University of California.
  41   43   */
  42   44  
  43   45  /*
  44   46   * Server-side remote procedure call interface.
  45   47   *
  46   48   * Master transport handle (SVCMASTERXPRT).
  47   49   *   The master transport handle structure is shared among service
  48   50   *   threads processing events on the transport. Some fields in the
  49   51   *   master structure are protected by locks
  50   52   *   - xp_req_lock protects the request queue:
  51   53   *      xp_req_head, xp_req_tail, xp_reqs, xp_size, xp_full, xp_enable
  52   54   *   - xp_thread_lock protects the thread (clone) counts
  53   55   *      xp_threads, xp_detached_threads, xp_wq
  54   56   *   Each master transport is registered to exactly one thread pool.
  55   57   *
  56   58   * Clone transport handle (SVCXPRT)
  57   59   *   The clone transport handle structure is a per-service-thread handle
  58   60   *   to the transport. The structure carries all the fields/buffers used
  59   61   *   for request processing. A service thread or, in other words, a clone
  60   62   *   structure, can be linked to an arbitrary master structure to process
  61   63   *   requests on this transport. The master handle keeps track of reference
  62   64   *   counts of threads (clones) linked to it. A service thread can switch
  63   65   *   to another transport by unlinking its clone handle from the current
  64   66   *   transport and linking to a new one. Switching is relatively inexpensive
  65   67   *   but it involves locking (master's xprt->xp_thread_lock).
  66   68   *
  67   69   * Pools.
  68   70   *   A pool represents a kernel RPC service (NFS, Lock Manager, etc.).
  69   71   *   Transports related to the service are registered to the service pool.
  70   72   *   Service threads can switch between different transports in the pool.
  71   73   *   Thus, each service has its own pool of service threads. The maximum
  72   74   *   number of threads in a pool is pool->p_maxthreads. This limit allows
  73   75   *   to restrict resource usage by the service. Some fields are protected
  74   76   *   by locks:
  75   77   *   - p_req_lock protects several counts and flags:
  76   78   *      p_reqs, p_size, p_walkers, p_asleep, p_drowsy, p_req_cv
  77   79   *   - p_thread_lock governs other thread counts:
  78   80   *      p_threads, p_detached_threads, p_reserved_threads, p_closing
  79   81   *
  80   82   *   In addition, each pool contains a doubly-linked list of transports,
  81   83   *   an `xprt-ready' queue and a creator thread (see below). Threads in
  82   84   *   the pool share some other parameters such as stack size and
  83   85   *   polling timeout.
  84   86   *
  85   87   *   Pools are initialized through the svc_pool_create() function called from
  86   88   *   the nfssys() system call. However, thread creation must be done by
  87   89   *   the userland agent. This is done by using SVCPOOL_WAIT and
  88   90   *   SVCPOOL_RUN arguments to nfssys(), which call svc_wait() and
  89   91   *   svc_do_run(), respectively. Once the pool has been initialized,
  90   92   *   the userland process must set up a 'creator' thread. This thread
  91   93   *   should park itself in the kernel by calling svc_wait(). If
  92   94   *   svc_wait() returns successfully, it should fork off a new worker
  93   95   *   thread, which then calls svc_do_run() in order to get work. When
  94   96   *   that thread is complete, svc_do_run() will return, and the user
  95   97   *   program should call thr_exit().
  96   98   *
  97   99   *   When we try to register a new pool and there is an old pool with
  98  100   *   the same id in the doubly linked pool list (this happens when we kill
  99  101   *   and restart nfsd or lockd), then we unlink the old pool from the list
 100  102   *   and mark its state as `closing'. After that the transports can still
 101  103   *   process requests but new transports won't be registered. When all the
 102  104   *   transports and service threads associated with the pool are gone the
 103  105   *   creator thread (see below) will clean up the pool structure and exit.
 104  106   *
 105  107   * svc_queuereq() and svc_run().
 106  108   *   The kernel RPC server is interrupt driven. The svc_queuereq() interrupt
 107  109   *   routine is called to deliver an RPC request. The service threads
 108  110   *   loop in svc_run(). The interrupt function queues a request on the
 109  111   *   transport's queue and it makes sure that the request is serviced.
 110  112   *   It may either wake up one of sleeping threads, or ask for a new thread
 111  113   *   to be created, or, if the previous request is just being picked up, do
 112  114   *   nothing. In the last case the service thread that is picking up the
 113  115   *   previous request will wake up or create the next thread. After a service
 114  116   *   thread processes a request and sends a reply it returns to svc_run()
 115  117   *   and svc_run() calls svc_poll() to find new input.
 116  118   *
 117  119   * svc_poll().
 118  120   *   In order to avoid unnecessary locking, which causes performance
 119  121   *   problems, we always look for a pending request on the current transport.
 120  122   *   If there is none we take a hint from the pool's `xprt-ready' queue.
 121  123   *   If the queue had an overflow we switch to the `drain' mode checking
 122  124   *   each transport  in the pool's transport list. Once we find a
 123  125   *   master transport handle with a pending request we latch the request
 124  126   *   lock on this transport and return to svc_run(). If the request
 125  127   *   belongs to a transport different than the one the service thread is
 126  128   *   linked to we need to unlink and link again.
 127  129   *
 128  130   *   A service thread goes asleep when there are no pending
 129  131   *   requests on the transports registered on the pool's transports.
 130  132   *   All the pool's threads sleep on the same condition variable.
 131  133   *   If a thread has been sleeping for too long period of time
 132  134   *   (by default 5 seconds) it wakes up and exits.  Also when a transport
 133  135   *   is closing sleeping threads wake up to unlink from this transport.
 134  136   *
 135  137   * The `xprt-ready' queue.
 136  138   *   If a service thread finds no request on a transport it is currently linked
 137  139   *   to it will find another transport with a pending request. To make
 138  140   *   this search more efficient each pool has an `xprt-ready' queue.
 139  141   *   The queue is a FIFO. When the interrupt routine queues a request it also
 140  142   *   inserts a pointer to the transport into the `xprt-ready' queue. A
 141  143   *   thread looking for a transport with a pending request can pop up a
 142  144   *   transport and check for a request. The request can be already gone
 143  145   *   since it could be taken by a thread linked to that transport. In such a
 144  146   *   case we try the next hint. The `xprt-ready' queue has fixed size (by
 145  147   *   default 256 nodes). If it overflows svc_poll() has to switch to the
 146  148   *   less efficient but safe `drain' mode and walk through the pool's
 147  149   *   transport list.
 148  150   *
 149  151   *   Both the svc_poll() loop and the `xprt-ready' queue are optimized
 150  152   *   for the peak load case that is for the situation when the queue is not
 151  153   *   empty, there are all the time few pending requests, and a service
 152  154   *   thread which has just processed a request does not go asleep but picks
 153  155   *   up immediately the next request.
 154  156   *
 155  157   * Thread creator.
 156  158   *   Each pool has a thread creator associated with it. The creator thread
 157  159   *   sleeps on a condition variable and waits for a signal to create a
 158  160   *   service thread. The actual thread creation is done in userland by
 159  161   *   the method described in "Pools" above.
 160  162   *
 161  163   *   Signaling threads should turn on the `creator signaled' flag, and
 162  164   *   can avoid sending signals when the flag is on. The flag is cleared
 163  165   *   when the thread is created.
 164  166   *
 165  167   *   When the pool is in closing state (ie it has been already unregistered
 166  168   *   from the pool list) the last thread on the last transport in the pool
 167  169   *   should turn the p_creator_exit flag on. The creator thread will
 168  170   *   clean up the pool structure and exit.
 169  171   *
 170  172   * Thread reservation; Detaching service threads.
 171  173   *   A service thread can detach itself to block for an extended amount
 172  174   *   of time. However, to keep the service active we need to guarantee
 173  175   *   at least pool->p_redline non-detached threads that can process incoming
 174  176   *   requests. This, the maximum number of detached and reserved threads is
 175  177   *   p->p_maxthreads - p->p_redline. A service thread should first acquire
 176  178   *   a reservation, and if the reservation was granted it can detach itself.
 177  179   *   If a reservation was granted but the thread does not detach itself
 178  180   *   it should cancel the reservation before it returns to svc_run().
 179  181   */
 180  182  
 181  183  #include <sys/param.h>
 182  184  #include <sys/types.h>
 183  185  #include <rpc/types.h>
 184  186  #include <sys/socket.h>
 185  187  #include <sys/time.h>
 186  188  #include <sys/tiuser.h>
 187  189  #include <sys/t_kuser.h>
 188  190  #include <netinet/in.h>
 189  191  #include <rpc/xdr.h>
 190  192  #include <rpc/auth.h>
 191  193  #include <rpc/clnt.h>
 192  194  #include <rpc/rpc_msg.h>
 193  195  #include <rpc/svc.h>
 194  196  #include <sys/proc.h>
 195  197  #include <sys/user.h>
 196  198  #include <sys/stream.h>
 197  199  #include <sys/strsubr.h>
 198  200  #include <sys/strsun.h>
 199  201  #include <sys/tihdr.h>
 200  202  #include <sys/debug.h>
 201  203  #include <sys/cmn_err.h>
 202  204  #include <sys/file.h>
 203  205  #include <sys/systm.h>
 204  206  #include <sys/callb.h>
 205  207  #include <sys/vtrace.h>
 206  208  #include <sys/zone.h>
 207  209  #include <nfs/nfs.h>
 208  210  #include <sys/tsol/label_macro.h>
 209  211  
 210  212  /*
 211  213   * Defines for svc_poll()
  
    | 
      ↓ open down ↓ | 
    165 lines elided | 
    
      ↑ open up ↑ | 
  
 212  214   */
 213  215  #define SVC_EXPRTGONE ((SVCMASTERXPRT *)1)      /* Transport is closing */
 214  216  #define SVC_ETIMEDOUT ((SVCMASTERXPRT *)2)      /* Timeout */
 215  217  #define SVC_EINTR ((SVCMASTERXPRT *)3)          /* Interrupted by signal */
 216  218  
 217  219  /*
 218  220   * Default stack size for service threads.
 219  221   */
 220  222  #define DEFAULT_SVC_RUN_STKSIZE         (0)     /* default kernel stack */
 221  223  
 222      -int    svc_default_stksize = DEFAULT_SVC_RUN_STKSIZE;
      224 +volatile int    svc_default_stksize = DEFAULT_SVC_RUN_STKSIZE;
 223  225  
 224  226  /*
 225  227   * Default polling timeout for service threads.
 226  228   * Multiplied by hz when used.
 227  229   */
 228  230  #define DEFAULT_SVC_POLL_TIMEOUT        (5)     /* seconds */
 229  231  
 230  232  clock_t svc_default_timeout = DEFAULT_SVC_POLL_TIMEOUT;
 231  233  
 232  234  /*
 233  235   * Size of the `xprt-ready' queue.
 234  236   */
 235  237  #define DEFAULT_SVC_QSIZE               (256)   /* qnodes */
 236  238  
 237  239  size_t svc_default_qsize = DEFAULT_SVC_QSIZE;
 238  240  
 239  241  /*
 240  242   * Default limit for the number of service threads.
  
    | 
      ↓ open down ↓ | 
    8 lines elided | 
    
      ↑ open up ↑ | 
  
 241  243   */
 242  244  #define DEFAULT_SVC_MAXTHREADS          (INT16_MAX)
 243  245  
 244  246  int    svc_default_maxthreads = DEFAULT_SVC_MAXTHREADS;
 245  247  
 246  248  /*
 247  249   * Maximum number of requests from the same transport (in `drain' mode).
 248  250   */
 249  251  #define DEFAULT_SVC_MAX_SAME_XPRT       (8)
 250  252  
 251      -int    svc_default_max_same_xprt = DEFAULT_SVC_MAX_SAME_XPRT;
      253 +volatile int    svc_default_max_same_xprt = DEFAULT_SVC_MAX_SAME_XPRT;
 252  254  
 253  255  
 254  256  /*
 255  257   * Default `Redline' of non-detached threads.
 256  258   * Total number of detached and reserved threads in an RPC server
 257  259   * thread pool is limited to pool->p_maxthreads - svc_redline.
 258  260   */
 259  261  #define DEFAULT_SVC_REDLINE             (1)
 260  262  
 261  263  int    svc_default_redline = DEFAULT_SVC_REDLINE;
 262  264  
 263  265  /*
 264  266   * A node for the `xprt-ready' queue.
 265  267   * See below.
 266  268   */
 267  269  struct __svcxprt_qnode {
 268  270          __SVCXPRT_QNODE *q_next;
 269  271          SVCMASTERXPRT   *q_xprt;
 270  272  };
 271  273  
 272  274  /*
 273  275   * Global SVC variables (private).
 274  276   */
 275  277  struct svc_globals {
 276  278          SVCPOOL         *svc_pools;
 277  279          kmutex_t        svc_plock;
 278  280  };
 279  281  
 280  282  /*
 281  283   * Debug variable to check for rdma based
 282  284   * transport startup and cleanup. Contorlled
 283  285   * through /etc/system. Off by default.
 284  286   */
 285  287  int rdma_check = 0;
 286  288  
 287  289  /*
 288  290   * This allows disabling flow control in svc_queuereq().
  
    | 
      ↓ open down ↓ | 
    27 lines elided | 
    
      ↑ open up ↑ | 
  
 289  291   */
 290  292  volatile int svc_flowcontrol_disable = 0;
 291  293  
 292  294  /*
 293  295   * Authentication parameters list.
 294  296   */
 295  297  static caddr_t rqcred_head;
 296  298  static kmutex_t rqcred_lock;
 297  299  
 298  300  /*
 299      - * Pointers to transport specific `rele' routines in rpcmod (set from rpcmod).
 300      - */
 301      -void    (*rpc_rele)(queue_t *, mblk_t *, bool_t) = NULL;
 302      -void    (*mir_rele)(queue_t *, mblk_t *, bool_t) = NULL;
 303      -
 304      -/* ARGSUSED */
 305      -void
 306      -rpc_rdma_rele(queue_t *q, mblk_t *mp, bool_t enable)
 307      -{
 308      -}
 309      -void    (*rdma_rele)(queue_t *, mblk_t *, bool_t) = rpc_rdma_rele;
 310      -
 311      -
 312      -/*
 313      - * This macro picks which `rele' routine to use, based on the transport type.
 314      - */
 315      -#define RELE_PROC(xprt) \
 316      -        ((xprt)->xp_type == T_RDMA ? rdma_rele : \
 317      -        (((xprt)->xp_type == T_CLTS) ? rpc_rele : mir_rele))
 318      -
 319      -/*
 320  301   * If true, then keep quiet about version mismatch.
 321  302   * This macro is for broadcast RPC only. We have no broadcast RPC in
 322  303   * kernel now but one may define a flag in the transport structure
 323  304   * and redefine this macro.
 324  305   */
 325  306  #define version_keepquiet(xprt) (FALSE)
 326  307  
 327  308  /*
 328  309   * ZSD key used to retrieve zone-specific svc globals
 329  310   */
 330  311  static zone_key_t svc_zone_key;
 331  312  
 332  313  static void svc_callout_free(SVCMASTERXPRT *);
 333  314  static void svc_xprt_qinit(SVCPOOL *, size_t);
 334  315  static void svc_xprt_qdestroy(SVCPOOL *);
 335  316  static void svc_thread_creator(SVCPOOL *);
 336  317  static void svc_creator_signal(SVCPOOL *);
 337  318  static void svc_creator_signalexit(SVCPOOL *);
 338  319  static void svc_pool_unregister(struct svc_globals *, SVCPOOL *);
 339  320  static int svc_run(SVCPOOL *);
 340  321  
 341  322  /* ARGSUSED */
 342  323  static void *
 343  324  svc_zoneinit(zoneid_t zoneid)
 344  325  {
 345  326          struct svc_globals *svc;
 346  327  
 347  328          svc = kmem_alloc(sizeof (*svc), KM_SLEEP);
 348  329          mutex_init(&svc->svc_plock, NULL, MUTEX_DEFAULT, NULL);
 349  330          svc->svc_pools = NULL;
 350  331          return (svc);
 351  332  }
 352  333  
 353  334  /* ARGSUSED */
 354  335  static void
 355  336  svc_zoneshutdown(zoneid_t zoneid, void *arg)
 356  337  {
 357  338          struct svc_globals *svc = arg;
 358  339          SVCPOOL *pool;
 359  340  
 360  341          mutex_enter(&svc->svc_plock);
 361  342          while ((pool = svc->svc_pools) != NULL) {
 362  343                  svc_pool_unregister(svc, pool);
 363  344          }
 364  345          mutex_exit(&svc->svc_plock);
 365  346  }
 366  347  
 367  348  /* ARGSUSED */
 368  349  static void
 369  350  svc_zonefini(zoneid_t zoneid, void *arg)
 370  351  {
 371  352          struct svc_globals *svc = arg;
 372  353  
 373  354          ASSERT(svc->svc_pools == NULL);
 374  355          mutex_destroy(&svc->svc_plock);
 375  356          kmem_free(svc, sizeof (*svc));
 376  357  }
 377  358  
 378  359  /*
 379  360   * Global SVC init routine.
 380  361   * Initialize global generic and transport type specific structures
 381  362   * used by the kernel RPC server side. This routine is called only
 382  363   * once when the module is being loaded.
 383  364   */
 384  365  void
 385  366  svc_init()
 386  367  {
 387  368          zone_key_create(&svc_zone_key, svc_zoneinit, svc_zoneshutdown,
 388  369              svc_zonefini);
 389  370          svc_cots_init();
 390  371          svc_clts_init();
 391  372  }
 392  373  
 393  374  /*
 394  375   * Destroy the SVCPOOL structure.
 395  376   */
 396  377  static void
 397  378  svc_pool_cleanup(SVCPOOL *pool)
 398  379  {
 399  380          ASSERT(pool->p_threads + pool->p_detached_threads == 0);
 400  381          ASSERT(pool->p_lcount == 0);
 401  382          ASSERT(pool->p_closing);
 402  383  
 403  384          /*
 404  385           * Call the user supplied shutdown function.  This is done
 405  386           * here so the user of the pool will be able to cleanup
 406  387           * service related resources.
 407  388           */
 408  389          if (pool->p_shutdown != NULL)
 409  390                  (pool->p_shutdown)();
 410  391  
 411  392          /* Destroy `xprt-ready' queue */
 412  393          svc_xprt_qdestroy(pool);
 413  394  
 414  395          /* Destroy transport list */
 415  396          rw_destroy(&pool->p_lrwlock);
 416  397  
 417  398          /* Destroy locks and condition variables */
 418  399          mutex_destroy(&pool->p_thread_lock);
 419  400          mutex_destroy(&pool->p_req_lock);
 420  401          cv_destroy(&pool->p_req_cv);
 421  402  
 422  403          /* Destroy creator's locks and condition variables */
 423  404          mutex_destroy(&pool->p_creator_lock);
 424  405          cv_destroy(&pool->p_creator_cv);
 425  406          mutex_destroy(&pool->p_user_lock);
 426  407          cv_destroy(&pool->p_user_cv);
 427  408  
 428  409          /* Free pool structure */
 429  410          kmem_free(pool, sizeof (SVCPOOL));
 430  411  }
 431  412  
 432  413  /*
 433  414   * If all the transports and service threads are already gone
 434  415   * signal the creator thread to clean up and exit.
 435  416   */
 436  417  static bool_t
 437  418  svc_pool_tryexit(SVCPOOL *pool)
 438  419  {
 439  420          ASSERT(MUTEX_HELD(&pool->p_thread_lock));
 440  421          ASSERT(pool->p_closing);
 441  422  
 442  423          if (pool->p_threads + pool->p_detached_threads == 0) {
 443  424                  rw_enter(&pool->p_lrwlock, RW_READER);
 444  425                  if (pool->p_lcount == 0) {
 445  426                          /*
 446  427                           * Release the locks before sending a signal.
 447  428                           */
 448  429                          rw_exit(&pool->p_lrwlock);
 449  430                          mutex_exit(&pool->p_thread_lock);
 450  431  
 451  432                          /*
 452  433                           * Notify the creator thread to clean up and exit
 453  434                           *
 454  435                           * NOTICE: No references to the pool beyond this point!
 455  436                           *                 The pool is being destroyed.
 456  437                           */
 457  438                          ASSERT(!MUTEX_HELD(&pool->p_thread_lock));
 458  439                          svc_creator_signalexit(pool);
 459  440  
 460  441                          return (TRUE);
 461  442                  }
 462  443                  rw_exit(&pool->p_lrwlock);
 463  444          }
 464  445  
 465  446          ASSERT(MUTEX_HELD(&pool->p_thread_lock));
 466  447          return (FALSE);
 467  448  }
 468  449  
 469  450  /*
 470  451   * Find a pool with a given id.
 471  452   */
 472  453  static SVCPOOL *
 473  454  svc_pool_find(struct svc_globals *svc, int id)
 474  455  {
 475  456          SVCPOOL *pool;
 476  457  
 477  458          ASSERT(MUTEX_HELD(&svc->svc_plock));
 478  459  
 479  460          /*
 480  461           * Search the list for a pool with a matching id
 481  462           * and register the transport handle with that pool.
 482  463           */
 483  464          for (pool = svc->svc_pools; pool; pool = pool->p_next)
 484  465                  if (pool->p_id == id)
 485  466                          return (pool);
 486  467  
 487  468          return (NULL);
 488  469  }
 489  470  
 490  471  /*
 491  472   * PSARC 2003/523 Contract Private Interface
 492  473   * svc_do_run
 493  474   * Changes must be reviewed by Solaris File Sharing
 494  475   * Changes must be communicated to contract-2003-523@sun.com
 495  476   */
 496  477  int
 497  478  svc_do_run(int id)
 498  479  {
 499  480          SVCPOOL *pool;
 500  481          int err = 0;
 501  482          struct svc_globals *svc;
 502  483  
 503  484          svc = zone_getspecific(svc_zone_key, curproc->p_zone);
 504  485          mutex_enter(&svc->svc_plock);
 505  486  
 506  487          pool = svc_pool_find(svc, id);
 507  488  
 508  489          mutex_exit(&svc->svc_plock);
 509  490  
 510  491          if (pool == NULL)
 511  492                  return (ENOENT);
 512  493  
 513  494          /*
 514  495           * Increment counter of pool threads now
 515  496           * that a thread has been created.
 516  497           */
 517  498          mutex_enter(&pool->p_thread_lock);
 518  499          pool->p_threads++;
 519  500          mutex_exit(&pool->p_thread_lock);
 520  501  
 521  502          /* Give work to the new thread. */
 522  503          err = svc_run(pool);
 523  504  
 524  505          return (err);
 525  506  }
 526  507  
 527  508  /*
 528  509   * Unregister a pool from the pool list.
 529  510   * Set the closing state. If all the transports and service threads
 530  511   * are already gone signal the creator thread to clean up and exit.
 531  512   */
 532  513  static void
 533  514  svc_pool_unregister(struct svc_globals *svc, SVCPOOL *pool)
 534  515  {
 535  516          SVCPOOL *next = pool->p_next;
 536  517          SVCPOOL *prev = pool->p_prev;
 537  518  
 538  519          ASSERT(MUTEX_HELD(&svc->svc_plock));
 539  520  
 540  521          /* Remove from the list */
 541  522          if (pool == svc->svc_pools)
 542  523                  svc->svc_pools = next;
 543  524          if (next)
 544  525                  next->p_prev = prev;
 545  526          if (prev)
 546  527                  prev->p_next = next;
 547  528          pool->p_next = pool->p_prev = NULL;
 548  529  
 549  530          /*
 550  531           * Offline the pool. Mark the pool as closing.
 551  532           * If there are no transports in this pool notify
 552  533           * the creator thread to clean it up and exit.
 553  534           */
 554  535          mutex_enter(&pool->p_thread_lock);
 555  536          if (pool->p_offline != NULL)
 556  537                  (pool->p_offline)();
 557  538          pool->p_closing = TRUE;
 558  539          if (svc_pool_tryexit(pool))
 559  540                  return;
 560  541          mutex_exit(&pool->p_thread_lock);
 561  542  }
 562  543  
 563  544  /*
 564  545   * Register a pool with a given id in the global doubly linked pool list.
 565  546   * - if there is a pool with the same id in the list then unregister it
 566  547   * - insert the new pool into the list.
 567  548   */
 568  549  static void
 569  550  svc_pool_register(struct svc_globals *svc, SVCPOOL *pool, int id)
 570  551  {
 571  552          SVCPOOL *old_pool;
 572  553  
 573  554          /*
 574  555           * If there is a pool with the same id then remove it from
 575  556           * the list and mark the pool as closing.
 576  557           */
 577  558          mutex_enter(&svc->svc_plock);
 578  559  
 579  560          if (old_pool = svc_pool_find(svc, id))
 580  561                  svc_pool_unregister(svc, old_pool);
 581  562  
 582  563          /* Insert into the doubly linked list */
 583  564          pool->p_id = id;
 584  565          pool->p_next = svc->svc_pools;
 585  566          pool->p_prev = NULL;
 586  567          if (svc->svc_pools)
 587  568                  svc->svc_pools->p_prev = pool;
  
    | 
      ↓ open down ↓ | 
    258 lines elided | 
    
      ↑ open up ↑ | 
  
 588  569          svc->svc_pools = pool;
 589  570  
 590  571          mutex_exit(&svc->svc_plock);
 591  572  }
 592  573  
 593  574  /*
 594  575   * Initialize a newly created pool structure
 595  576   */
 596  577  static int
 597  578  svc_pool_init(SVCPOOL *pool, uint_t maxthreads, uint_t redline,
 598      -        uint_t qsize, uint_t timeout, uint_t stksize, uint_t max_same_xprt)
      579 +    uint_t qsize, uint_t timeout, uint_t stksize, uint_t max_same_xprt)
 599  580  {
 600  581          klwp_t *lwp = ttolwp(curthread);
 601  582  
 602  583          ASSERT(pool);
 603  584  
 604  585          if (maxthreads == 0)
 605  586                  maxthreads = svc_default_maxthreads;
 606  587          if (redline == 0)
 607  588                  redline = svc_default_redline;
 608  589          if (qsize == 0)
 609  590                  qsize = svc_default_qsize;
 610  591          if (timeout == 0)
 611  592                  timeout = svc_default_timeout;
 612  593          if (stksize == 0)
 613  594                  stksize = svc_default_stksize;
 614  595          if (max_same_xprt == 0)
 615  596                  max_same_xprt = svc_default_max_same_xprt;
 616  597  
 617  598          if (maxthreads < redline)
 618  599                  return (EINVAL);
 619  600  
 620  601          /* Allocate and initialize the `xprt-ready' queue */
 621  602          svc_xprt_qinit(pool, qsize);
 622  603  
 623  604          /* Initialize doubly-linked xprt list */
 624  605          rw_init(&pool->p_lrwlock, NULL, RW_DEFAULT, NULL);
 625  606  
 626  607          /*
 627  608           * Setting lwp_childstksz on the current lwp so that
 628  609           * descendants of this lwp get the modified stacksize, if
 629  610           * it is defined. It is important that either this lwp or
 630  611           * one of its descendants do the actual servicepool thread
 631  612           * creation to maintain the stacksize inheritance.
 632  613           */
 633  614          if (lwp != NULL)
 634  615                  lwp->lwp_childstksz = stksize;
 635  616  
 636  617          /* Initialize thread limits, locks and condition variables */
 637  618          pool->p_maxthreads = maxthreads;
 638  619          pool->p_redline = redline;
 639  620          pool->p_timeout = timeout * hz;
 640  621          pool->p_stksize = stksize;
 641  622          pool->p_max_same_xprt = max_same_xprt;
 642  623          mutex_init(&pool->p_thread_lock, NULL, MUTEX_DEFAULT, NULL);
 643  624          mutex_init(&pool->p_req_lock, NULL, MUTEX_DEFAULT, NULL);
 644  625          cv_init(&pool->p_req_cv, NULL, CV_DEFAULT, NULL);
 645  626  
 646  627          /* Initialize userland creator */
 647  628          pool->p_user_exit = FALSE;
 648  629          pool->p_signal_create_thread = FALSE;
 649  630          pool->p_user_waiting = FALSE;
 650  631          mutex_init(&pool->p_user_lock, NULL, MUTEX_DEFAULT, NULL);
 651  632          cv_init(&pool->p_user_cv, NULL, CV_DEFAULT, NULL);
 652  633  
 653  634          /* Initialize the creator and start the creator thread */
 654  635          pool->p_creator_exit = FALSE;
 655  636          mutex_init(&pool->p_creator_lock, NULL, MUTEX_DEFAULT, NULL);
 656  637          cv_init(&pool->p_creator_cv, NULL, CV_DEFAULT, NULL);
 657  638  
 658  639          (void) zthread_create(NULL, pool->p_stksize, svc_thread_creator,
 659  640              pool, 0, minclsyspri);
 660  641  
 661  642          return (0);
 662  643  }
 663  644  
 664  645  /*
 665  646   * PSARC 2003/523 Contract Private Interface
 666  647   * svc_pool_create
 667  648   * Changes must be reviewed by Solaris File Sharing
 668  649   * Changes must be communicated to contract-2003-523@sun.com
 669  650   *
 670  651   * Create an kernel RPC server-side thread/transport pool.
 671  652   *
 672  653   * This is public interface for creation of a server RPC thread pool
 673  654   * for a given service provider. Transports registered with the pool's id
 674  655   * will be served by a pool's threads. This function is called from the
 675  656   * nfssys() system call.
 676  657   */
 677  658  int
 678  659  svc_pool_create(struct svcpool_args *args)
 679  660  {
 680  661          SVCPOOL *pool;
 681  662          int error;
 682  663          struct svc_globals *svc;
 683  664  
 684  665          /*
 685  666           * Caller should check credentials in a way appropriate
 686  667           * in the context of the call.
 687  668           */
 688  669  
 689  670          svc = zone_getspecific(svc_zone_key, curproc->p_zone);
 690  671          /* Allocate a new pool */
 691  672          pool = kmem_zalloc(sizeof (SVCPOOL), KM_SLEEP);
 692  673  
 693  674          /*
 694  675           * Initialize the pool structure and create a creator thread.
 695  676           */
 696  677          error = svc_pool_init(pool, args->maxthreads, args->redline,
 697  678              args->qsize, args->timeout, args->stksize, args->max_same_xprt);
 698  679  
 699  680          if (error) {
 700  681                  kmem_free(pool, sizeof (SVCPOOL));
 701  682                  return (error);
 702  683          }
 703  684  
 704  685          /* Register the pool with the global pool list */
 705  686          svc_pool_register(svc, pool, args->id);
 706  687  
 707  688          return (0);
 708  689  }
 709  690  
 710  691  int
 711  692  svc_pool_control(int id, int cmd, void *arg)
 712  693  {
 713  694          SVCPOOL *pool;
 714  695          struct svc_globals *svc;
 715  696  
 716  697          svc = zone_getspecific(svc_zone_key, curproc->p_zone);
 717  698  
 718  699          switch (cmd) {
 719  700          case SVCPSET_SHUTDOWN_PROC:
 720  701                  /*
 721  702                   * Search the list for a pool with a matching id
 722  703                   * and register the transport handle with that pool.
 723  704                   */
 724  705                  mutex_enter(&svc->svc_plock);
 725  706  
 726  707                  if ((pool = svc_pool_find(svc, id)) == NULL) {
 727  708                          mutex_exit(&svc->svc_plock);
 728  709                          return (ENOENT);
 729  710                  }
 730  711                  /*
 731  712                   * Grab the transport list lock before releasing the
 732  713                   * pool list lock
 733  714                   */
 734  715                  rw_enter(&pool->p_lrwlock, RW_WRITER);
 735  716                  mutex_exit(&svc->svc_plock);
 736  717  
 737  718                  pool->p_shutdown = *((void (*)())arg);
 738  719  
 739  720                  rw_exit(&pool->p_lrwlock);
 740  721  
 741  722                  return (0);
 742  723          case SVCPSET_UNREGISTER_PROC:
 743  724                  /*
 744  725                   * Search the list for a pool with a matching id
 745  726                   * and register the unregister callback handle with that pool.
 746  727                   */
 747  728                  mutex_enter(&svc->svc_plock);
 748  729  
 749  730                  if ((pool = svc_pool_find(svc, id)) == NULL) {
 750  731                          mutex_exit(&svc->svc_plock);
 751  732                          return (ENOENT);
 752  733                  }
 753  734                  /*
 754  735                   * Grab the transport list lock before releasing the
 755  736                   * pool list lock
 756  737                   */
 757  738                  rw_enter(&pool->p_lrwlock, RW_WRITER);
 758  739                  mutex_exit(&svc->svc_plock);
 759  740  
 760  741                  pool->p_offline = *((void (*)())arg);
 761  742  
 762  743                  rw_exit(&pool->p_lrwlock);
 763  744  
 764  745                  return (0);
 765  746          default:
 766  747                  return (EINVAL);
 767  748          }
 768  749  }
 769  750  
 770  751  /*
 771  752   * Pool's transport list manipulation routines.
 772  753   * - svc_xprt_register()
 773  754   * - svc_xprt_unregister()
 774  755   *
 775  756   * svc_xprt_register() is called from svc_tli_kcreate() to
 776  757   * insert a new master transport handle into the doubly linked
 777  758   * list of server transport handles (one list per pool).
 778  759   *
 779  760   * The list is used by svc_poll(), when it operates in `drain'
 780  761   * mode, to search for a next transport with a pending request.
 781  762   */
 782  763  
 783  764  int
 784  765  svc_xprt_register(SVCMASTERXPRT *xprt, int id)
 785  766  {
 786  767          SVCMASTERXPRT *prev, *next;
 787  768          SVCPOOL *pool;
 788  769          struct svc_globals *svc;
 789  770  
 790  771          svc = zone_getspecific(svc_zone_key, curproc->p_zone);
 791  772          /*
 792  773           * Search the list for a pool with a matching id
 793  774           * and register the transport handle with that pool.
 794  775           */
 795  776          mutex_enter(&svc->svc_plock);
 796  777  
 797  778          if ((pool = svc_pool_find(svc, id)) == NULL) {
 798  779                  mutex_exit(&svc->svc_plock);
 799  780                  return (ENOENT);
 800  781          }
 801  782  
 802  783          /* Grab the transport list lock before releasing the pool list lock */
 803  784          rw_enter(&pool->p_lrwlock, RW_WRITER);
 804  785          mutex_exit(&svc->svc_plock);
 805  786  
 806  787          /* Don't register new transports when the pool is in closing state */
 807  788          if (pool->p_closing) {
 808  789                  rw_exit(&pool->p_lrwlock);
 809  790                  return (EBUSY);
 810  791          }
 811  792  
 812  793          /*
 813  794           * Initialize xp_pool to point to the pool.
 814  795           * We don't want to go through the pool list every time.
 815  796           */
 816  797          xprt->xp_pool = pool;
 817  798  
 818  799          /*
 819  800           * Insert a transport handle into the list.
 820  801           * The list head points to the most recently inserted transport.
 821  802           */
 822  803          if (pool->p_lhead == NULL)
 823  804                  pool->p_lhead = xprt->xp_prev = xprt->xp_next = xprt;
 824  805          else {
 825  806                  next = pool->p_lhead;
 826  807                  prev = pool->p_lhead->xp_prev;
 827  808  
 828  809                  xprt->xp_next = next;
 829  810                  xprt->xp_prev = prev;
 830  811  
 831  812                  pool->p_lhead = prev->xp_next = next->xp_prev = xprt;
 832  813          }
 833  814  
 834  815          /* Increment the transports count */
 835  816          pool->p_lcount++;
 836  817  
 837  818          rw_exit(&pool->p_lrwlock);
 838  819          return (0);
 839  820  }
 840  821  
 841  822  /*
 842  823   * Called from svc_xprt_cleanup() to remove a master transport handle
 843  824   * from the pool's list of server transports (when a transport is
 844  825   * being destroyed).
 845  826   */
 846  827  void
 847  828  svc_xprt_unregister(SVCMASTERXPRT *xprt)
 848  829  {
 849  830          SVCPOOL *pool = xprt->xp_pool;
 850  831  
 851  832          /*
 852  833           * Unlink xprt from the list.
 853  834           * If the list head points to this xprt then move it
 854  835           * to the next xprt or reset to NULL if this is the last
 855  836           * xprt in the list.
 856  837           */
 857  838          rw_enter(&pool->p_lrwlock, RW_WRITER);
 858  839  
 859  840          if (xprt == xprt->xp_next)
 860  841                  pool->p_lhead = NULL;
 861  842          else {
 862  843                  SVCMASTERXPRT *next = xprt->xp_next;
 863  844                  SVCMASTERXPRT *prev = xprt->xp_prev;
 864  845  
 865  846                  next->xp_prev = prev;
 866  847                  prev->xp_next = next;
 867  848  
 868  849                  if (pool->p_lhead == xprt)
 869  850                          pool->p_lhead = next;
 870  851          }
 871  852  
 872  853          xprt->xp_next = xprt->xp_prev = NULL;
 873  854  
 874  855          /* Decrement list count */
 875  856          pool->p_lcount--;
 876  857  
 877  858          rw_exit(&pool->p_lrwlock);
 878  859  }
 879  860  
 880  861  static void
 881  862  svc_xprt_qdestroy(SVCPOOL *pool)
 882  863  {
 883  864          mutex_destroy(&pool->p_qend_lock);
 884  865          kmem_free(pool->p_qbody, pool->p_qsize * sizeof (__SVCXPRT_QNODE));
 885  866  }
 886  867  
 887  868  /*
 888  869   * Initialize an `xprt-ready' queue for a given pool.
 889  870   */
 890  871  static void
 891  872  svc_xprt_qinit(SVCPOOL *pool, size_t qsize)
 892  873  {
 893  874          int i;
 894  875  
 895  876          pool->p_qsize = qsize;
 896  877          pool->p_qbody = kmem_zalloc(pool->p_qsize * sizeof (__SVCXPRT_QNODE),
 897  878              KM_SLEEP);
 898  879  
 899  880          for (i = 0; i < pool->p_qsize - 1; i++)
 900  881                  pool->p_qbody[i].q_next = &(pool->p_qbody[i+1]);
 901  882  
 902  883          pool->p_qbody[pool->p_qsize-1].q_next = &(pool->p_qbody[0]);
 903  884          pool->p_qtop = &(pool->p_qbody[0]);
 904  885          pool->p_qend = &(pool->p_qbody[0]);
 905  886  
 906  887          mutex_init(&pool->p_qend_lock, NULL, MUTEX_DEFAULT, NULL);
 907  888  }
 908  889  
 909  890  /*
 910  891   * Called from the svc_queuereq() interrupt routine to queue
 911  892   * a hint for svc_poll() which transport has a pending request.
 912  893   * - insert a pointer to xprt into the xprt-ready queue (FIFO)
 913  894   * - if the xprt-ready queue is full turn the overflow flag on.
 914  895   *
 915  896   * NOTICE: pool->p_qtop is protected by the pool's request lock
 916  897   * and the caller (svc_queuereq()) must hold the lock.
 917  898   */
 918  899  static void
 919  900  svc_xprt_qput(SVCPOOL *pool, SVCMASTERXPRT *xprt)
 920  901  {
 921  902          ASSERT(MUTEX_HELD(&pool->p_req_lock));
 922  903  
 923  904          /* If the overflow flag is on there is nothing we can do */
 924  905          if (pool->p_qoverflow)
 925  906                  return;
 926  907  
 927  908          /* If the queue is full turn the overflow flag on and exit */
 928  909          if (pool->p_qtop->q_next == pool->p_qend) {
 929  910                  mutex_enter(&pool->p_qend_lock);
 930  911                  if (pool->p_qtop->q_next == pool->p_qend) {
 931  912                          pool->p_qoverflow = TRUE;
 932  913                          mutex_exit(&pool->p_qend_lock);
 933  914                          return;
 934  915                  }
 935  916                  mutex_exit(&pool->p_qend_lock);
 936  917          }
 937  918  
 938  919          /* Insert a hint and move pool->p_qtop */
 939  920          pool->p_qtop->q_xprt = xprt;
 940  921          pool->p_qtop = pool->p_qtop->q_next;
 941  922  }
 942  923  
 943  924  /*
 944  925   * Called from svc_poll() to get a hint which transport has a
 945  926   * pending request. Returns a pointer to a transport or NULL if the
 946  927   * `xprt-ready' queue is empty.
 947  928   *
 948  929   * Since we do not acquire the pool's request lock while checking if
 949  930   * the queue is empty we may miss a request that is just being delivered.
 950  931   * However this is ok since svc_poll() will retry again until the
 951  932   * count indicates that there are pending requests for this pool.
 952  933   */
 953  934  static SVCMASTERXPRT *
 954  935  svc_xprt_qget(SVCPOOL *pool)
 955  936  {
 956  937          SVCMASTERXPRT *xprt;
 957  938  
 958  939          mutex_enter(&pool->p_qend_lock);
 959  940          do {
 960  941                  /*
 961  942                   * If the queue is empty return NULL.
 962  943                   * Since we do not acquire the pool's request lock which
 963  944                   * protects pool->p_qtop this is not exact check. However,
 964  945                   * this is safe - if we miss a request here svc_poll()
 965  946                   * will retry again.
 966  947                   */
 967  948                  if (pool->p_qend == pool->p_qtop) {
 968  949                          mutex_exit(&pool->p_qend_lock);
 969  950                          return (NULL);
 970  951                  }
 971  952  
 972  953                  /* Get a hint and move pool->p_qend */
 973  954                  xprt = pool->p_qend->q_xprt;
 974  955                  pool->p_qend = pool->p_qend->q_next;
 975  956  
 976  957                  /* Skip fields deleted by svc_xprt_qdelete()     */
 977  958          } while (xprt == NULL);
 978  959          mutex_exit(&pool->p_qend_lock);
 979  960  
 980  961          return (xprt);
 981  962  }
 982  963  
 983  964  /*
 984  965   * Delete all the references to a transport handle that
 985  966   * is being destroyed from the xprt-ready queue.
 986  967   * Deleted pointers are replaced with NULLs.
 987  968   */
 988  969  static void
 989  970  svc_xprt_qdelete(SVCPOOL *pool, SVCMASTERXPRT *xprt)
 990  971  {
 991  972          __SVCXPRT_QNODE *q;
 992  973  
 993  974          mutex_enter(&pool->p_req_lock);
 994  975          for (q = pool->p_qend; q != pool->p_qtop; q = q->q_next) {
 995  976                  if (q->q_xprt == xprt)
 996  977                          q->q_xprt = NULL;
 997  978          }
 998  979          mutex_exit(&pool->p_req_lock);
 999  980  }
1000  981  
1001  982  /*
1002  983   * Destructor for a master server transport handle.
1003  984   * - if there are no more non-detached threads linked to this transport
1004  985   *   then, if requested, call xp_closeproc (we don't wait for detached
1005  986   *   threads linked to this transport to complete).
1006  987   * - if there are no more threads linked to this
1007  988   *   transport then
1008  989   *   a) remove references to this transport from the xprt-ready queue
1009  990   *   b) remove a reference to this transport from the pool's transport list
1010  991   *   c) call a transport specific `destroy' function
1011  992   *   d) cancel remaining thread reservations.
1012  993   *
1013  994   * NOTICE: Caller must hold the transport's thread lock.
1014  995   */
1015  996  static void
1016  997  svc_xprt_cleanup(SVCMASTERXPRT *xprt, bool_t detached)
1017  998  {
1018  999          ASSERT(MUTEX_HELD(&xprt->xp_thread_lock));
1019 1000          ASSERT(xprt->xp_wq == NULL);
1020 1001  
1021 1002          /*
1022 1003           * If called from the last non-detached thread
1023 1004           * it should call the closeproc on this transport.
1024 1005           */
1025 1006          if (!detached && xprt->xp_threads == 0 && xprt->xp_closeproc) {
1026 1007                  (*(xprt->xp_closeproc)) (xprt);
1027 1008          }
1028 1009  
1029 1010          if (xprt->xp_threads + xprt->xp_detached_threads > 0)
1030 1011                  mutex_exit(&xprt->xp_thread_lock);
1031 1012          else {
1032 1013                  /* Remove references to xprt from the `xprt-ready' queue */
1033 1014                  svc_xprt_qdelete(xprt->xp_pool, xprt);
1034 1015  
1035 1016                  /* Unregister xprt from the pool's transport list */
1036 1017                  svc_xprt_unregister(xprt);
1037 1018                  svc_callout_free(xprt);
1038 1019                  SVC_DESTROY(xprt);
1039 1020          }
1040 1021  }
1041 1022  
1042 1023  /*
1043 1024   * Find a dispatch routine for a given prog/vers pair.
1044 1025   * This function is called from svc_getreq() to search the callout
1045 1026   * table for an entry with a matching RPC program number `prog'
1046 1027   * and a version range that covers `vers'.
1047 1028   * - if it finds a matching entry it returns pointer to the dispatch routine
1048 1029   * - otherwise it returns NULL and, if `minp' or `maxp' are not NULL,
1049 1030   *   fills them with, respectively, lowest version and highest version
1050 1031   *   supported for the program `prog'
1051 1032   */
1052 1033  static SVC_DISPATCH *
1053 1034  svc_callout_find(SVCXPRT *xprt, rpcprog_t prog, rpcvers_t vers,
1054 1035      rpcvers_t *vers_min, rpcvers_t *vers_max)
1055 1036  {
1056 1037          SVC_CALLOUT_TABLE *sct = xprt->xp_sct;
1057 1038          int i;
1058 1039  
1059 1040          *vers_min = ~(rpcvers_t)0;
1060 1041          *vers_max = 0;
1061 1042  
1062 1043          for (i = 0; i < sct->sct_size; i++) {
1063 1044                  SVC_CALLOUT *sc = &sct->sct_sc[i];
1064 1045  
1065 1046                  if (prog == sc->sc_prog) {
1066 1047                          if (vers >= sc->sc_versmin && vers <= sc->sc_versmax)
1067 1048                                  return (sc->sc_dispatch);
1068 1049  
1069 1050                          if (*vers_max < sc->sc_versmax)
1070 1051                                  *vers_max = sc->sc_versmax;
1071 1052                          if (*vers_min > sc->sc_versmin)
1072 1053                                  *vers_min = sc->sc_versmin;
1073 1054                  }
1074 1055          }
1075 1056  
1076 1057          return (NULL);
1077 1058  }
1078 1059  
1079 1060  /*
1080 1061   * Optionally free callout table allocated for this transport by
1081 1062   * the service provider.
1082 1063   */
1083 1064  static void
1084 1065  svc_callout_free(SVCMASTERXPRT *xprt)
1085 1066  {
1086 1067          SVC_CALLOUT_TABLE *sct = xprt->xp_sct;
1087 1068  
1088 1069          if (sct->sct_free) {
1089 1070                  kmem_free(sct->sct_sc, sct->sct_size * sizeof (SVC_CALLOUT));
1090 1071                  kmem_free(sct, sizeof (SVC_CALLOUT_TABLE));
1091 1072          }
1092 1073  }
1093 1074  
1094 1075  /*
1095 1076   * Send a reply to an RPC request
1096 1077   *
1097 1078   * PSARC 2003/523 Contract Private Interface
1098 1079   * svc_sendreply
1099 1080   * Changes must be reviewed by Solaris File Sharing
1100 1081   * Changes must be communicated to contract-2003-523@sun.com
1101 1082   */
1102 1083  bool_t
1103 1084  svc_sendreply(const SVCXPRT *clone_xprt, const xdrproc_t xdr_results,
1104 1085      const caddr_t xdr_location)
1105 1086  {
1106 1087          struct rpc_msg rply;
1107 1088  
1108 1089          rply.rm_direction = REPLY;
1109 1090          rply.rm_reply.rp_stat = MSG_ACCEPTED;
1110 1091          rply.acpted_rply.ar_verf = clone_xprt->xp_verf;
1111 1092          rply.acpted_rply.ar_stat = SUCCESS;
1112 1093          rply.acpted_rply.ar_results.where = xdr_location;
1113 1094          rply.acpted_rply.ar_results.proc = xdr_results;
1114 1095  
1115 1096          return (SVC_REPLY((SVCXPRT *)clone_xprt, &rply));
1116 1097  }
1117 1098  
1118 1099  /*
1119 1100   * No procedure error reply
1120 1101   *
1121 1102   * PSARC 2003/523 Contract Private Interface
1122 1103   * svcerr_noproc
1123 1104   * Changes must be reviewed by Solaris File Sharing
1124 1105   * Changes must be communicated to contract-2003-523@sun.com
1125 1106   */
1126 1107  void
1127 1108  svcerr_noproc(const SVCXPRT *clone_xprt)
1128 1109  {
1129 1110          struct rpc_msg rply;
1130 1111  
1131 1112          rply.rm_direction = REPLY;
1132 1113          rply.rm_reply.rp_stat = MSG_ACCEPTED;
1133 1114          rply.acpted_rply.ar_verf = clone_xprt->xp_verf;
1134 1115          rply.acpted_rply.ar_stat = PROC_UNAVAIL;
1135 1116          SVC_FREERES((SVCXPRT *)clone_xprt);
1136 1117          SVC_REPLY((SVCXPRT *)clone_xprt, &rply);
1137 1118  }
1138 1119  
1139 1120  /*
1140 1121   * Can't decode arguments error reply
1141 1122   *
1142 1123   * PSARC 2003/523 Contract Private Interface
1143 1124   * svcerr_decode
1144 1125   * Changes must be reviewed by Solaris File Sharing
1145 1126   * Changes must be communicated to contract-2003-523@sun.com
1146 1127   */
1147 1128  void
1148 1129  svcerr_decode(const SVCXPRT *clone_xprt)
1149 1130  {
1150 1131          struct rpc_msg rply;
1151 1132  
1152 1133          rply.rm_direction = REPLY;
1153 1134          rply.rm_reply.rp_stat = MSG_ACCEPTED;
1154 1135          rply.acpted_rply.ar_verf = clone_xprt->xp_verf;
1155 1136          rply.acpted_rply.ar_stat = GARBAGE_ARGS;
1156 1137          SVC_FREERES((SVCXPRT *)clone_xprt);
1157 1138          SVC_REPLY((SVCXPRT *)clone_xprt, &rply);
1158 1139  }
1159 1140  
1160 1141  /*
1161 1142   * Some system error
1162 1143   */
1163 1144  void
1164 1145  svcerr_systemerr(const SVCXPRT *clone_xprt)
1165 1146  {
1166 1147          struct rpc_msg rply;
1167 1148  
1168 1149          rply.rm_direction = REPLY;
1169 1150          rply.rm_reply.rp_stat = MSG_ACCEPTED;
1170 1151          rply.acpted_rply.ar_verf = clone_xprt->xp_verf;
1171 1152          rply.acpted_rply.ar_stat = SYSTEM_ERR;
1172 1153          SVC_FREERES((SVCXPRT *)clone_xprt);
1173 1154          SVC_REPLY((SVCXPRT *)clone_xprt, &rply);
1174 1155  }
1175 1156  
1176 1157  /*
1177 1158   * Authentication error reply
1178 1159   */
1179 1160  void
1180 1161  svcerr_auth(const SVCXPRT *clone_xprt, const enum auth_stat why)
1181 1162  {
1182 1163          struct rpc_msg rply;
1183 1164  
1184 1165          rply.rm_direction = REPLY;
1185 1166          rply.rm_reply.rp_stat = MSG_DENIED;
1186 1167          rply.rjcted_rply.rj_stat = AUTH_ERROR;
1187 1168          rply.rjcted_rply.rj_why = why;
1188 1169          SVC_FREERES((SVCXPRT *)clone_xprt);
1189 1170          SVC_REPLY((SVCXPRT *)clone_xprt, &rply);
1190 1171  }
1191 1172  
1192 1173  /*
1193 1174   * Authentication too weak error reply
1194 1175   */
1195 1176  void
1196 1177  svcerr_weakauth(const SVCXPRT *clone_xprt)
1197 1178  {
1198 1179          svcerr_auth((SVCXPRT *)clone_xprt, AUTH_TOOWEAK);
1199 1180  }
1200 1181  
1201 1182  /*
1202 1183   * Authentication error; bad credentials
1203 1184   */
1204 1185  void
1205 1186  svcerr_badcred(const SVCXPRT *clone_xprt)
1206 1187  {
1207 1188          struct rpc_msg rply;
1208 1189  
1209 1190          rply.rm_direction = REPLY;
1210 1191          rply.rm_reply.rp_stat = MSG_DENIED;
1211 1192          rply.rjcted_rply.rj_stat = AUTH_ERROR;
1212 1193          rply.rjcted_rply.rj_why = AUTH_BADCRED;
1213 1194          SVC_FREERES((SVCXPRT *)clone_xprt);
1214 1195          SVC_REPLY((SVCXPRT *)clone_xprt, &rply);
1215 1196  }
1216 1197  
1217 1198  /*
1218 1199   * Program unavailable error reply
1219 1200   *
1220 1201   * PSARC 2003/523 Contract Private Interface
1221 1202   * svcerr_noprog
1222 1203   * Changes must be reviewed by Solaris File Sharing
1223 1204   * Changes must be communicated to contract-2003-523@sun.com
1224 1205   */
1225 1206  void
1226 1207  svcerr_noprog(const SVCXPRT *clone_xprt)
1227 1208  {
1228 1209          struct rpc_msg rply;
1229 1210  
1230 1211          rply.rm_direction = REPLY;
1231 1212          rply.rm_reply.rp_stat = MSG_ACCEPTED;
1232 1213          rply.acpted_rply.ar_verf = clone_xprt->xp_verf;
1233 1214          rply.acpted_rply.ar_stat = PROG_UNAVAIL;
1234 1215          SVC_FREERES((SVCXPRT *)clone_xprt);
1235 1216          SVC_REPLY((SVCXPRT *)clone_xprt, &rply);
1236 1217  }
1237 1218  
1238 1219  /*
1239 1220   * Program version mismatch error reply
1240 1221   *
1241 1222   * PSARC 2003/523 Contract Private Interface
1242 1223   * svcerr_progvers
1243 1224   * Changes must be reviewed by Solaris File Sharing
1244 1225   * Changes must be communicated to contract-2003-523@sun.com
1245 1226   */
1246 1227  void
1247 1228  svcerr_progvers(const SVCXPRT *clone_xprt,
1248 1229      const rpcvers_t low_vers, const rpcvers_t high_vers)
1249 1230  {
1250 1231          struct rpc_msg rply;
1251 1232  
1252 1233          rply.rm_direction = REPLY;
1253 1234          rply.rm_reply.rp_stat = MSG_ACCEPTED;
1254 1235          rply.acpted_rply.ar_verf = clone_xprt->xp_verf;
1255 1236          rply.acpted_rply.ar_stat = PROG_MISMATCH;
1256 1237          rply.acpted_rply.ar_vers.low = low_vers;
1257 1238          rply.acpted_rply.ar_vers.high = high_vers;
1258 1239          SVC_FREERES((SVCXPRT *)clone_xprt);
1259 1240          SVC_REPLY((SVCXPRT *)clone_xprt, &rply);
1260 1241  }
1261 1242  
1262 1243  /*
1263 1244   * Get server side input from some transport.
1264 1245   *
1265 1246   * Statement of authentication parameters management:
1266 1247   * This function owns and manages all authentication parameters, specifically
1267 1248   * the "raw" parameters (msg.rm_call.cb_cred and msg.rm_call.cb_verf) and
1268 1249   * the "cooked" credentials (rqst->rq_clntcred).
1269 1250   * However, this function does not know the structure of the cooked
1270 1251   * credentials, so it make the following assumptions:
1271 1252   *   a) the structure is contiguous (no pointers), and
1272 1253   *   b) the cred structure size does not exceed RQCRED_SIZE bytes.
1273 1254   * In all events, all three parameters are freed upon exit from this routine.
1274 1255   * The storage is trivially managed on the call stack in user land, but
1275 1256   * is malloced in kernel land.
1276 1257   *
1277 1258   * Note: the xprt's xp_svc_lock is not held while the service's dispatch
1278 1259   * routine is running.  If we decide to implement svc_unregister(), we'll
1279 1260   * need to decide whether it's okay for a thread to unregister a service
1280 1261   * while a request is being processed.  If we decide that this is a
1281 1262   * problem, we can probably use some sort of reference counting scheme to
1282 1263   * keep the callout entry from going away until the request has completed.
1283 1264   */
1284 1265  static void
1285 1266  svc_getreq(
1286 1267          SVCXPRT *clone_xprt,    /* clone transport handle */
1287 1268          mblk_t *mp)
1288 1269  {
1289 1270          struct rpc_msg msg;
1290 1271          struct svc_req r;
1291 1272          char  *cred_area;       /* too big to allocate on call stack */
1292 1273  
1293 1274          TRACE_0(TR_FAC_KRPC, TR_SVC_GETREQ_START,
1294 1275              "svc_getreq_start:");
1295 1276  
1296 1277          ASSERT(clone_xprt->xp_master != NULL);
1297 1278          ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL ||
1298 1279              mp->b_datap->db_type != M_DATA);
1299 1280  
1300 1281          /*
1301 1282           * Firstly, allocate the authentication parameters' storage
1302 1283           */
1303 1284          mutex_enter(&rqcred_lock);
1304 1285          if (rqcred_head) {
1305 1286                  cred_area = rqcred_head;
1306 1287  
1307 1288                  /* LINTED pointer alignment */
1308 1289                  rqcred_head = *(caddr_t *)rqcred_head;
1309 1290                  mutex_exit(&rqcred_lock);
1310 1291          } else {
1311 1292                  mutex_exit(&rqcred_lock);
1312 1293                  cred_area = kmem_alloc(2 * MAX_AUTH_BYTES + RQCRED_SIZE,
1313 1294                      KM_SLEEP);
1314 1295          }
1315 1296          msg.rm_call.cb_cred.oa_base = cred_area;
1316 1297          msg.rm_call.cb_verf.oa_base = &(cred_area[MAX_AUTH_BYTES]);
1317 1298          r.rq_clntcred = &(cred_area[2 * MAX_AUTH_BYTES]);
1318 1299  
1319 1300          /*
1320 1301           * underlying transport recv routine may modify mblk data
1321 1302           * and make it difficult to extract label afterwards. So
1322 1303           * get the label from the raw mblk data now.
1323 1304           */
1324 1305          if (is_system_labeled()) {
1325 1306                  cred_t *cr;
1326 1307  
1327 1308                  r.rq_label = kmem_alloc(sizeof (bslabel_t), KM_SLEEP);
1328 1309                  cr = msg_getcred(mp, NULL);
1329 1310                  ASSERT(cr != NULL);
1330 1311  
1331 1312                  bcopy(label2bslabel(crgetlabel(cr)), r.rq_label,
1332 1313                      sizeof (bslabel_t));
1333 1314          } else {
1334 1315                  r.rq_label = NULL;
1335 1316          }
1336 1317  
1337 1318          /*
1338 1319           * Now receive a message from the transport.
1339 1320           */
1340 1321          if (SVC_RECV(clone_xprt, mp, &msg)) {
1341 1322                  void (*dispatchroutine) (struct svc_req *, SVCXPRT *);
1342 1323                  rpcvers_t vers_min;
1343 1324                  rpcvers_t vers_max;
1344 1325                  bool_t no_dispatch;
1345 1326                  enum auth_stat why;
1346 1327  
1347 1328                  /*
1348 1329                   * Find the registered program and call its
1349 1330                   * dispatch routine.
1350 1331                   */
1351 1332                  r.rq_xprt = clone_xprt;
1352 1333                  r.rq_prog = msg.rm_call.cb_prog;
1353 1334                  r.rq_vers = msg.rm_call.cb_vers;
1354 1335                  r.rq_proc = msg.rm_call.cb_proc;
1355 1336                  r.rq_cred = msg.rm_call.cb_cred;
1356 1337  
1357 1338                  /*
1358 1339                   * First authenticate the message.
1359 1340                   */
1360 1341                  TRACE_0(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_START,
1361 1342                      "svc_getreq_auth_start:");
1362 1343                  if ((why = sec_svc_msg(&r, &msg, &no_dispatch)) != AUTH_OK) {
1363 1344                          TRACE_1(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_END,
1364 1345                              "svc_getreq_auth_end:(%S)", "failed");
1365 1346                          svcerr_auth(clone_xprt, why);
1366 1347                          /*
1367 1348                           * Free the arguments.
1368 1349                           */
1369 1350                          (void) SVC_FREEARGS(clone_xprt, NULL, NULL);
1370 1351                  } else if (no_dispatch) {
1371 1352                          /*
1372 1353                           * XXX - when bug id 4053736 is done, remove
1373 1354                           * the SVC_FREEARGS() call.
1374 1355                           */
1375 1356                          (void) SVC_FREEARGS(clone_xprt, NULL, NULL);
1376 1357                  } else {
1377 1358                          TRACE_1(TR_FAC_KRPC, TR_SVC_GETREQ_AUTH_END,
1378 1359                              "svc_getreq_auth_end:(%S)", "good");
1379 1360  
1380 1361                          dispatchroutine = svc_callout_find(clone_xprt,
1381 1362                              r.rq_prog, r.rq_vers, &vers_min, &vers_max);
1382 1363  
1383 1364                          if (dispatchroutine) {
1384 1365                                  (*dispatchroutine) (&r, clone_xprt);
1385 1366                          } else {
1386 1367                                  /*
1387 1368                                   * If we got here, the program or version
1388 1369                                   * is not served ...
1389 1370                                   */
1390 1371                                  if (vers_max == 0 ||
1391 1372                                      version_keepquiet(clone_xprt))
1392 1373                                          svcerr_noprog(clone_xprt);
1393 1374                                  else
1394 1375                                          svcerr_progvers(clone_xprt, vers_min,
1395 1376                                              vers_max);
1396 1377  
1397 1378                                  /*
1398 1379                                   * Free the arguments. For successful calls
1399 1380                                   * this is done by the dispatch routine.
1400 1381                                   */
1401 1382                                  (void) SVC_FREEARGS(clone_xprt, NULL, NULL);
1402 1383                                  /* Fall through to ... */
1403 1384                          }
1404 1385                          /*
1405 1386                           * Call cleanup procedure for RPCSEC_GSS.
1406 1387                           * This is a hack since there is currently no
1407 1388                           * op, such as SVC_CLEANAUTH. rpc_gss_cleanup
1408 1389                           * should only be called for a non null proc.
1409 1390                           * Null procs in RPC GSS are overloaded to
1410 1391                           * provide context setup and control. The main
1411 1392                           * purpose of rpc_gss_cleanup is to decrement the
1412 1393                           * reference count associated with the cached
1413 1394                           * GSS security context. We should never get here
1414 1395                           * for an RPCSEC_GSS null proc since *no_dispatch
1415 1396                           * would have been set to true from sec_svc_msg above.
1416 1397                           */
1417 1398                          if (r.rq_cred.oa_flavor == RPCSEC_GSS)
1418 1399                                  rpc_gss_cleanup(clone_xprt);
1419 1400                  }
1420 1401          }
1421 1402  
1422 1403          if (r.rq_label != NULL)
1423 1404                  kmem_free(r.rq_label, sizeof (bslabel_t));
1424 1405  
1425 1406          /*
1426 1407           * Free authentication parameters' storage
1427 1408           */
1428 1409          mutex_enter(&rqcred_lock);
1429 1410          /* LINTED pointer alignment */
1430 1411          *(caddr_t *)cred_area = rqcred_head;
1431 1412          rqcred_head = cred_area;
1432 1413          mutex_exit(&rqcred_lock);
1433 1414  }
1434 1415  
1435 1416  /*
1436 1417   * Allocate new clone transport handle.
1437 1418   */
1438 1419  SVCXPRT *
1439 1420  svc_clone_init(void)
1440 1421  {
1441 1422          SVCXPRT *clone_xprt;
1442 1423  
1443 1424          clone_xprt = kmem_zalloc(sizeof (SVCXPRT), KM_SLEEP);
1444 1425          clone_xprt->xp_cred = crget();
1445 1426          return (clone_xprt);
1446 1427  }
1447 1428  
1448 1429  /*
1449 1430   * Free memory allocated by svc_clone_init.
1450 1431   */
1451 1432  void
1452 1433  svc_clone_free(SVCXPRT *clone_xprt)
1453 1434  {
1454 1435          /* Fre credentials from crget() */
1455 1436          if (clone_xprt->xp_cred)
1456 1437                  crfree(clone_xprt->xp_cred);
1457 1438          kmem_free(clone_xprt, sizeof (SVCXPRT));
1458 1439  }
1459 1440  
1460 1441  /*
1461 1442   * Link a per-thread clone transport handle to a master
1462 1443   * - increment a thread reference count on the master
1463 1444   * - copy some of the master's fields to the clone
1464 1445   * - call a transport specific clone routine.
1465 1446   */
1466 1447  void
1467 1448  svc_clone_link(SVCMASTERXPRT *xprt, SVCXPRT *clone_xprt, SVCXPRT *clone_xprt2)
1468 1449  {
1469 1450          cred_t *cred = clone_xprt->xp_cred;
1470 1451  
1471 1452          ASSERT(cred);
1472 1453  
1473 1454          /*
1474 1455           * Bump up master's thread count.
1475 1456           * Linking a per-thread clone transport handle to a master
1476 1457           * associates a service thread with the master.
1477 1458           */
1478 1459          mutex_enter(&xprt->xp_thread_lock);
1479 1460          xprt->xp_threads++;
1480 1461          mutex_exit(&xprt->xp_thread_lock);
1481 1462  
1482 1463          /* Clear everything */
1483 1464          bzero(clone_xprt, sizeof (SVCXPRT));
1484 1465  
1485 1466          /* Set pointer to the master transport stucture */
1486 1467          clone_xprt->xp_master = xprt;
1487 1468  
1488 1469          /* Structure copy of all the common fields */
1489 1470          clone_xprt->xp_xpc = xprt->xp_xpc;
1490 1471  
1491 1472          /* Restore per-thread fields (xp_cred) */
1492 1473          clone_xprt->xp_cred = cred;
1493 1474  
1494 1475          if (clone_xprt2)
1495 1476                  SVC_CLONE_XPRT(clone_xprt2, clone_xprt);
1496 1477  }
1497 1478  
1498 1479  /*
1499 1480   * Unlink a non-detached clone transport handle from a master
1500 1481   * - decrement a thread reference count on the master
1501 1482   * - if the transport is closing (xp_wq is NULL) call svc_xprt_cleanup();
1502 1483   *   if this is the last non-detached/absolute thread on this transport
1503 1484   *   then it will close/destroy the transport
1504 1485   * - call transport specific function to destroy the clone handle
1505 1486   * - clear xp_master to avoid recursion.
1506 1487   */
1507 1488  void
1508 1489  svc_clone_unlink(SVCXPRT *clone_xprt)
1509 1490  {
1510 1491          SVCMASTERXPRT *xprt = clone_xprt->xp_master;
1511 1492  
1512 1493          /* This cannot be a detached thread */
1513 1494          ASSERT(!clone_xprt->xp_detached);
1514 1495          ASSERT(xprt->xp_threads > 0);
1515 1496  
1516 1497          /* Decrement a reference count on the transport */
1517 1498          mutex_enter(&xprt->xp_thread_lock);
1518 1499          xprt->xp_threads--;
1519 1500  
1520 1501          /* svc_xprt_cleanup() unlocks xp_thread_lock or destroys xprt */
1521 1502          if (xprt->xp_wq)
1522 1503                  mutex_exit(&xprt->xp_thread_lock);
1523 1504          else
1524 1505                  svc_xprt_cleanup(xprt, FALSE);
1525 1506  
1526 1507          /* Call a transport specific clone `destroy' function */
1527 1508          SVC_CLONE_DESTROY(clone_xprt);
1528 1509  
1529 1510          /* Clear xp_master */
1530 1511          clone_xprt->xp_master = NULL;
1531 1512  }
1532 1513  
1533 1514  /*
1534 1515   * Unlink a detached clone transport handle from a master
1535 1516   * - decrement the thread count on the master
1536 1517   * - if the transport is closing (xp_wq is NULL) call svc_xprt_cleanup();
1537 1518   *   if this is the last thread on this transport then it will destroy
1538 1519   *   the transport.
1539 1520   * - call a transport specific function to destroy the clone handle
1540 1521   * - clear xp_master to avoid recursion.
1541 1522   */
1542 1523  static void
1543 1524  svc_clone_unlinkdetached(SVCXPRT *clone_xprt)
1544 1525  {
1545 1526          SVCMASTERXPRT *xprt = clone_xprt->xp_master;
1546 1527  
1547 1528          /* This must be a detached thread */
1548 1529          ASSERT(clone_xprt->xp_detached);
1549 1530          ASSERT(xprt->xp_detached_threads > 0);
1550 1531          ASSERT(xprt->xp_threads + xprt->xp_detached_threads > 0);
1551 1532  
1552 1533          /* Grab xprt->xp_thread_lock and decrement link counts */
1553 1534          mutex_enter(&xprt->xp_thread_lock);
1554 1535          xprt->xp_detached_threads--;
1555 1536  
1556 1537          /* svc_xprt_cleanup() unlocks xp_thread_lock or destroys xprt */
1557 1538          if (xprt->xp_wq)
1558 1539                  mutex_exit(&xprt->xp_thread_lock);
1559 1540          else
1560 1541                  svc_xprt_cleanup(xprt, TRUE);
1561 1542  
1562 1543          /* Call transport specific clone `destroy' function */
1563 1544          SVC_CLONE_DESTROY(clone_xprt);
1564 1545  
1565 1546          /* Clear xp_master */
1566 1547          clone_xprt->xp_master = NULL;
1567 1548  }
1568 1549  
1569 1550  /*
1570 1551   * Try to exit a non-detached service thread
1571 1552   * - check if there are enough threads left
1572 1553   * - if this thread (ie its clone transport handle) are linked
1573 1554   *   to a master transport then unlink it
1574 1555   * - free the clone structure
1575 1556   * - return to userland for thread exit
1576 1557   *
1577 1558   * If this is the last non-detached or the last thread on this
1578 1559   * transport then the call to svc_clone_unlink() will, respectively,
1579 1560   * close and/or destroy the transport.
1580 1561   */
1581 1562  static void
1582 1563  svc_thread_exit(SVCPOOL *pool, SVCXPRT *clone_xprt)
1583 1564  {
1584 1565          if (clone_xprt->xp_master)
1585 1566                  svc_clone_unlink(clone_xprt);
1586 1567          svc_clone_free(clone_xprt);
1587 1568  
1588 1569          mutex_enter(&pool->p_thread_lock);
1589 1570          pool->p_threads--;
1590 1571          if (pool->p_closing && svc_pool_tryexit(pool))
1591 1572                  /* return -  thread exit will be handled at user level */
1592 1573                  return;
1593 1574          mutex_exit(&pool->p_thread_lock);
1594 1575  
1595 1576          /* return -  thread exit will be handled at user level */
1596 1577  }
1597 1578  
1598 1579  /*
1599 1580   * Exit a detached service thread that returned to svc_run
1600 1581   * - decrement the `detached thread' count for the pool
1601 1582   * - unlink the detached clone transport handle from the master
1602 1583   * - free the clone structure
1603 1584   * - return to userland for thread exit
1604 1585   *
1605 1586   * If this is the last thread on this transport then the call
1606 1587   * to svc_clone_unlinkdetached() will destroy the transport.
1607 1588   */
1608 1589  static void
1609 1590  svc_thread_exitdetached(SVCPOOL *pool, SVCXPRT *clone_xprt)
1610 1591  {
1611 1592          /* This must be a detached thread */
1612 1593          ASSERT(clone_xprt->xp_master);
1613 1594          ASSERT(clone_xprt->xp_detached);
1614 1595          ASSERT(!MUTEX_HELD(&pool->p_thread_lock));
1615 1596  
1616 1597          svc_clone_unlinkdetached(clone_xprt);
1617 1598          svc_clone_free(clone_xprt);
1618 1599  
1619 1600          mutex_enter(&pool->p_thread_lock);
1620 1601  
1621 1602          ASSERT(pool->p_reserved_threads >= 0);
1622 1603          ASSERT(pool->p_detached_threads > 0);
1623 1604  
1624 1605          pool->p_detached_threads--;
1625 1606          if (pool->p_closing && svc_pool_tryexit(pool))
1626 1607                  /* return -  thread exit will be handled at user level */
1627 1608                  return;
1628 1609          mutex_exit(&pool->p_thread_lock);
1629 1610  
1630 1611          /* return -  thread exit will be handled at user level */
1631 1612  }
1632 1613  
1633 1614  /*
1634 1615   * PSARC 2003/523 Contract Private Interface
1635 1616   * svc_wait
1636 1617   * Changes must be reviewed by Solaris File Sharing
1637 1618   * Changes must be communicated to contract-2003-523@sun.com
1638 1619   */
1639 1620  int
1640 1621  svc_wait(int id)
1641 1622  {
1642 1623          SVCPOOL *pool;
1643 1624          int     err = 0;
1644 1625          struct svc_globals *svc;
1645 1626  
1646 1627          svc = zone_getspecific(svc_zone_key, curproc->p_zone);
1647 1628          mutex_enter(&svc->svc_plock);
1648 1629          pool = svc_pool_find(svc, id);
1649 1630          mutex_exit(&svc->svc_plock);
1650 1631  
1651 1632          if (pool == NULL)
1652 1633                  return (ENOENT);
1653 1634  
1654 1635          mutex_enter(&pool->p_user_lock);
1655 1636  
1656 1637          /* Check if there's already a user thread waiting on this pool */
1657 1638          if (pool->p_user_waiting) {
1658 1639                  mutex_exit(&pool->p_user_lock);
1659 1640                  return (EBUSY);
1660 1641          }
1661 1642  
1662 1643          pool->p_user_waiting = TRUE;
1663 1644  
1664 1645          /* Go to sleep, waiting for the signaled flag. */
1665 1646          while (!pool->p_signal_create_thread && !pool->p_user_exit) {
1666 1647                  if (cv_wait_sig(&pool->p_user_cv, &pool->p_user_lock) == 0) {
1667 1648                          /* Interrupted, return to handle exit or signal */
1668 1649                          pool->p_user_waiting = FALSE;
1669 1650                          pool->p_signal_create_thread = FALSE;
1670 1651                          mutex_exit(&pool->p_user_lock);
1671 1652  
1672 1653                          /*
1673 1654                           * Thread has been interrupted and therefore
1674 1655                           * the service daemon is leaving as well so
1675 1656                           * let's go ahead and remove the service
1676 1657                           * pool at this time.
1677 1658                           */
1678 1659                          mutex_enter(&svc->svc_plock);
1679 1660                          svc_pool_unregister(svc, pool);
1680 1661                          mutex_exit(&svc->svc_plock);
1681 1662  
1682 1663                          return (EINTR);
1683 1664                  }
1684 1665          }
1685 1666  
1686 1667          pool->p_signal_create_thread = FALSE;
1687 1668          pool->p_user_waiting = FALSE;
1688 1669  
1689 1670          /*
1690 1671           * About to exit the service pool. Set return value
1691 1672           * to let the userland code know our intent. Signal
1692 1673           * svc_thread_creator() so that it can clean up the
1693 1674           * pool structure.
1694 1675           */
1695 1676          if (pool->p_user_exit) {
1696 1677                  err = ECANCELED;
1697 1678                  cv_signal(&pool->p_user_cv);
1698 1679          }
1699 1680  
1700 1681          mutex_exit(&pool->p_user_lock);
1701 1682  
1702 1683          /* Return to userland with error code, for possible thread creation. */
1703 1684          return (err);
1704 1685  }
1705 1686  
1706 1687  /*
1707 1688   * `Service threads' creator thread.
1708 1689   * The creator thread waits for a signal to create new thread.
1709 1690   */
1710 1691  static void
1711 1692  svc_thread_creator(SVCPOOL *pool)
1712 1693  {
1713 1694          callb_cpr_t cpr_info;   /* CPR info for the creator thread */
1714 1695  
1715 1696          CALLB_CPR_INIT(&cpr_info, &pool->p_creator_lock, callb_generic_cpr,
1716 1697              "svc_thread_creator");
1717 1698  
1718 1699          for (;;) {
1719 1700                  mutex_enter(&pool->p_creator_lock);
1720 1701  
1721 1702                  /* Check if someone set the exit flag */
1722 1703                  if (pool->p_creator_exit)
1723 1704                          break;
1724 1705  
1725 1706                  /* Clear the `signaled' flag and go asleep */
1726 1707                  pool->p_creator_signaled = FALSE;
1727 1708  
1728 1709                  CALLB_CPR_SAFE_BEGIN(&cpr_info);
1729 1710                  cv_wait(&pool->p_creator_cv, &pool->p_creator_lock);
1730 1711                  CALLB_CPR_SAFE_END(&cpr_info, &pool->p_creator_lock);
1731 1712  
1732 1713                  /* Check if someone signaled to exit */
1733 1714                  if (pool->p_creator_exit)
1734 1715                          break;
1735 1716  
1736 1717                  mutex_exit(&pool->p_creator_lock);
1737 1718  
1738 1719                  mutex_enter(&pool->p_thread_lock);
1739 1720  
1740 1721                  /*
1741 1722                   * When the pool is in closing state and all the transports
1742 1723                   * are gone the creator should not create any new threads.
1743 1724                   */
1744 1725                  if (pool->p_closing) {
1745 1726                          rw_enter(&pool->p_lrwlock, RW_READER);
1746 1727                          if (pool->p_lcount == 0) {
1747 1728                                  rw_exit(&pool->p_lrwlock);
1748 1729                                  mutex_exit(&pool->p_thread_lock);
1749 1730                                  continue;
1750 1731                          }
1751 1732                          rw_exit(&pool->p_lrwlock);
1752 1733                  }
1753 1734  
1754 1735                  /*
1755 1736                   * Create a new service thread now.
1756 1737                   */
1757 1738                  ASSERT(pool->p_reserved_threads >= 0);
1758 1739                  ASSERT(pool->p_detached_threads >= 0);
1759 1740  
1760 1741                  if (pool->p_threads + pool->p_detached_threads <
1761 1742                      pool->p_maxthreads) {
1762 1743                          /*
1763 1744                           * Signal the service pool wait thread
1764 1745                           * only if it hasn't already been signaled.
1765 1746                           */
1766 1747                          mutex_enter(&pool->p_user_lock);
1767 1748                          if (pool->p_signal_create_thread == FALSE) {
1768 1749                                  pool->p_signal_create_thread = TRUE;
1769 1750                                  cv_signal(&pool->p_user_cv);
1770 1751                          }
1771 1752                          mutex_exit(&pool->p_user_lock);
1772 1753  
1773 1754                  }
1774 1755  
1775 1756                  mutex_exit(&pool->p_thread_lock);
1776 1757          }
1777 1758  
1778 1759          /*
1779 1760           * Pool is closed. Cleanup and exit.
1780 1761           */
1781 1762  
1782 1763          /* Signal userland creator thread that it can stop now. */
1783 1764          mutex_enter(&pool->p_user_lock);
1784 1765          pool->p_user_exit = TRUE;
1785 1766          cv_broadcast(&pool->p_user_cv);
1786 1767          mutex_exit(&pool->p_user_lock);
1787 1768  
1788 1769          /* Wait for svc_wait() to be done with the pool */
1789 1770          mutex_enter(&pool->p_user_lock);
1790 1771          while (pool->p_user_waiting) {
1791 1772                  CALLB_CPR_SAFE_BEGIN(&cpr_info);
1792 1773                  cv_wait(&pool->p_user_cv, &pool->p_user_lock);
1793 1774                  CALLB_CPR_SAFE_END(&cpr_info, &pool->p_creator_lock);
1794 1775          }
1795 1776          mutex_exit(&pool->p_user_lock);
1796 1777  
1797 1778          CALLB_CPR_EXIT(&cpr_info);
1798 1779          svc_pool_cleanup(pool);
1799 1780          zthread_exit();
1800 1781  }
1801 1782  
1802 1783  /*
1803 1784   * If the creator thread  is idle signal it to create
1804 1785   * a new service thread.
1805 1786   */
1806 1787  static void
1807 1788  svc_creator_signal(SVCPOOL *pool)
1808 1789  {
1809 1790          mutex_enter(&pool->p_creator_lock);
1810 1791          if (pool->p_creator_signaled == FALSE) {
1811 1792                  pool->p_creator_signaled = TRUE;
1812 1793                  cv_signal(&pool->p_creator_cv);
1813 1794          }
1814 1795          mutex_exit(&pool->p_creator_lock);
1815 1796  }
1816 1797  
1817 1798  /*
1818 1799   * Notify the creator thread to clean up and exit.
1819 1800   */
1820 1801  static void
1821 1802  svc_creator_signalexit(SVCPOOL *pool)
1822 1803  {
1823 1804          mutex_enter(&pool->p_creator_lock);
1824 1805          pool->p_creator_exit = TRUE;
1825 1806          cv_signal(&pool->p_creator_cv);
1826 1807          mutex_exit(&pool->p_creator_lock);
1827 1808  }
1828 1809  
1829 1810  /*
1830 1811   * Polling part of the svc_run().
1831 1812   * - search for a transport with a pending request
1832 1813   * - when one is found then latch the request lock and return to svc_run()
1833 1814   * - if there is no request go asleep and wait for a signal
1834 1815   * - handle two exceptions:
1835 1816   *   a) current transport is closing
1836 1817   *   b) timeout waiting for a new request
1837 1818   *   in both cases return to svc_run()
1838 1819   */
1839 1820  static SVCMASTERXPRT *
1840 1821  svc_poll(SVCPOOL *pool, SVCMASTERXPRT *xprt, SVCXPRT *clone_xprt)
1841 1822  {
1842 1823          /*
1843 1824           * Main loop iterates until
1844 1825           * a) we find a pending request,
1845 1826           * b) detect that the current transport is closing
1846 1827           * c) time out waiting for a new request.
1847 1828           */
1848 1829          for (;;) {
1849 1830                  SVCMASTERXPRT *next;
1850 1831                  clock_t timeleft;
1851 1832  
1852 1833                  /*
1853 1834                   * Step 1.
1854 1835                   * Check if there is a pending request on the current
1855 1836                   * transport handle so that we can avoid cloning.
1856 1837                   * If so then decrement the `pending-request' count for
1857 1838                   * the pool and return to svc_run().
1858 1839                   *
1859 1840                   * We need to prevent a potential starvation. When
1860 1841                   * a selected transport has all pending requests coming in
1861 1842                   * all the time then the service threads will never switch to
1862 1843                   * another transport. With a limited number of service
1863 1844                   * threads some transports may be never serviced.
1864 1845                   * To prevent such a scenario we pick up at most
1865 1846                   * pool->p_max_same_xprt requests from the same transport
1866 1847                   * and then take a hint from the xprt-ready queue or walk
1867 1848                   * the transport list.
1868 1849                   */
1869 1850                  if (xprt && xprt->xp_req_head && (!pool->p_qoverflow ||
1870 1851                      clone_xprt->xp_same_xprt++ < pool->p_max_same_xprt)) {
1871 1852                          mutex_enter(&xprt->xp_req_lock);
1872 1853                          if (xprt->xp_req_head)
1873 1854                                  return (xprt);
1874 1855                          mutex_exit(&xprt->xp_req_lock);
1875 1856                  }
1876 1857                  clone_xprt->xp_same_xprt = 0;
1877 1858  
1878 1859                  /*
1879 1860                   * Step 2.
1880 1861                   * If there is no request on the current transport try to
1881 1862                   * find another transport with a pending request.
1882 1863                   */
1883 1864                  mutex_enter(&pool->p_req_lock);
1884 1865                  pool->p_walkers++;
1885 1866                  mutex_exit(&pool->p_req_lock);
1886 1867  
1887 1868                  /*
1888 1869                   * Make sure that transports will not be destroyed just
1889 1870                   * while we are checking them.
1890 1871                   */
1891 1872                  rw_enter(&pool->p_lrwlock, RW_READER);
1892 1873  
1893 1874                  for (;;) {
1894 1875                          SVCMASTERXPRT *hint;
1895 1876  
1896 1877                          /*
1897 1878                           * Get the next transport from the xprt-ready queue.
1898 1879                           * This is a hint. There is no guarantee that the
1899 1880                           * transport still has a pending request since it
1900 1881                           * could be picked up by another thread in step 1.
1901 1882                           *
1902 1883                           * If the transport has a pending request then keep
1903 1884                           * it locked. Decrement the `pending-requests' for
1904 1885                           * the pool and `walking-threads' counts, and return
1905 1886                           * to svc_run().
1906 1887                           */
1907 1888                          hint = svc_xprt_qget(pool);
1908 1889  
1909 1890                          if (hint && hint->xp_req_head) {
1910 1891                                  mutex_enter(&hint->xp_req_lock);
1911 1892                                  if (hint->xp_req_head) {
1912 1893                                          rw_exit(&pool->p_lrwlock);
1913 1894  
1914 1895                                          mutex_enter(&pool->p_req_lock);
1915 1896                                          pool->p_walkers--;
1916 1897                                          mutex_exit(&pool->p_req_lock);
1917 1898  
1918 1899                                          return (hint);
1919 1900                                  }
1920 1901                                  mutex_exit(&hint->xp_req_lock);
1921 1902                          }
1922 1903  
1923 1904                          /*
1924 1905                           * If there was no hint in the xprt-ready queue then
1925 1906                           * - if there is less pending requests than polling
1926 1907                           *   threads go asleep
1927 1908                           * - otherwise check if there was an overflow in the
1928 1909                           *   xprt-ready queue; if so, then we need to break
1929 1910                           *   the `drain' mode
1930 1911                           */
1931 1912                          if (hint == NULL) {
1932 1913                                  if (pool->p_reqs < pool->p_walkers) {
1933 1914                                          mutex_enter(&pool->p_req_lock);
1934 1915                                          if (pool->p_reqs < pool->p_walkers)
1935 1916                                                  goto sleep;
1936 1917                                          mutex_exit(&pool->p_req_lock);
1937 1918                                  }
1938 1919                                  if (pool->p_qoverflow) {
1939 1920                                          break;
1940 1921                                  }
1941 1922                          }
1942 1923                  }
1943 1924  
1944 1925                  /*
1945 1926                   * If there was an overflow in the xprt-ready queue then we
1946 1927                   * need to switch to the `drain' mode, i.e. walk through the
1947 1928                   * pool's transport list and search for a transport with a
1948 1929                   * pending request. If we manage to drain all the pending
1949 1930                   * requests then we can clear the overflow flag. This will
1950 1931                   * switch svc_poll() back to taking hints from the xprt-ready
1951 1932                   * queue (which is generally more efficient).
1952 1933                   *
1953 1934                   * If there are no registered transports simply go asleep.
1954 1935                   */
1955 1936                  if (xprt == NULL && pool->p_lhead == NULL) {
1956 1937                          mutex_enter(&pool->p_req_lock);
1957 1938                          goto sleep;
1958 1939                  }
1959 1940  
1960 1941                  /*
1961 1942                   * `Walk' through the pool's list of master server
1962 1943                   * transport handles. Continue to loop until there are less
1963 1944                   * looping threads then pending requests.
1964 1945                   */
1965 1946                  next = xprt ? xprt->xp_next : pool->p_lhead;
1966 1947  
1967 1948                  for (;;) {
1968 1949                          /*
1969 1950                           * Check if there is a request on this transport.
1970 1951                           *
1971 1952                           * Since blocking on a locked mutex is very expensive
1972 1953                           * check for a request without a lock first. If we miss
1973 1954                           * a request that is just being delivered but this will
1974 1955                           * cost at most one full walk through the list.
1975 1956                           */
1976 1957                          if (next->xp_req_head) {
1977 1958                                  /*
1978 1959                                   * Check again, now with a lock.
1979 1960                                   */
1980 1961                                  mutex_enter(&next->xp_req_lock);
1981 1962                                  if (next->xp_req_head) {
1982 1963                                          rw_exit(&pool->p_lrwlock);
1983 1964  
1984 1965                                          mutex_enter(&pool->p_req_lock);
1985 1966                                          pool->p_walkers--;
1986 1967                                          mutex_exit(&pool->p_req_lock);
1987 1968  
1988 1969                                          return (next);
1989 1970                                  }
1990 1971                                  mutex_exit(&next->xp_req_lock);
1991 1972                          }
1992 1973  
1993 1974                          /*
1994 1975                           * Continue to `walk' through the pool's
1995 1976                           * transport list until there is less requests
1996 1977                           * than walkers. Check this condition without
1997 1978                           * a lock first to avoid contention on a mutex.
1998 1979                           */
1999 1980                          if (pool->p_reqs < pool->p_walkers) {
2000 1981                                  /* Check again, now with the lock. */
2001 1982                                  mutex_enter(&pool->p_req_lock);
2002 1983                                  if (pool->p_reqs < pool->p_walkers)
2003 1984                                          break;  /* goto sleep */
2004 1985                                  mutex_exit(&pool->p_req_lock);
2005 1986                          }
2006 1987  
2007 1988                          next = next->xp_next;
2008 1989                  }
2009 1990  
2010 1991          sleep:
2011 1992                  /*
2012 1993                   * No work to do. Stop the `walk' and go asleep.
2013 1994                   * Decrement the `walking-threads' count for the pool.
2014 1995                   */
2015 1996                  pool->p_walkers--;
2016 1997                  rw_exit(&pool->p_lrwlock);
2017 1998  
2018 1999                  /*
2019 2000                   * Count us as asleep, mark this thread as safe
2020 2001                   * for suspend and wait for a request.
2021 2002                   */
2022 2003                  pool->p_asleep++;
2023 2004                  timeleft = cv_reltimedwait_sig(&pool->p_req_cv,
2024 2005                      &pool->p_req_lock, pool->p_timeout, TR_CLOCK_TICK);
2025 2006  
2026 2007                  /*
2027 2008                   * If the drowsy flag is on this means that
2028 2009                   * someone has signaled a wakeup. In such a case
2029 2010                   * the `asleep-threads' count has already updated
2030 2011                   * so just clear the flag.
2031 2012                   *
2032 2013                   * If the drowsy flag is off then we need to update
2033 2014                   * the `asleep-threads' count.
2034 2015                   */
2035 2016                  if (pool->p_drowsy) {
2036 2017                          pool->p_drowsy = FALSE;
2037 2018                          /*
2038 2019                           * If the thread is here because it timedout,
2039 2020                           * instead of returning SVC_ETIMEDOUT, it is
2040 2021                           * time to do some more work.
2041 2022                           */
2042 2023                          if (timeleft == -1)
2043 2024                                  timeleft = 1;
2044 2025                  } else {
2045 2026                          pool->p_asleep--;
2046 2027                  }
2047 2028                  mutex_exit(&pool->p_req_lock);
2048 2029  
2049 2030                  /*
2050 2031                   * If we received a signal while waiting for a
2051 2032                   * request, inform svc_run(), so that we can return
2052 2033                   * to user level and exit.
2053 2034                   */
2054 2035                  if (timeleft == 0)
2055 2036                          return (SVC_EINTR);
2056 2037  
2057 2038                  /*
2058 2039                   * If the current transport is gone then notify
2059 2040                   * svc_run() to unlink from it.
2060 2041                   */
2061 2042                  if (xprt && xprt->xp_wq == NULL)
2062 2043                          return (SVC_EXPRTGONE);
2063 2044  
2064 2045                  /*
2065 2046                   * If we have timed out waiting for a request inform
2066 2047                   * svc_run() that we probably don't need this thread.
2067 2048                   */
2068 2049                  if (timeleft == -1)
2069 2050                          return (SVC_ETIMEDOUT);
2070 2051          }
2071 2052  }
2072 2053  
2073 2054  /*
2074 2055   * calculate memory space used by message
2075 2056   */
2076 2057  static size_t
2077 2058  svc_msgsize(mblk_t *mp)
2078 2059  {
2079 2060          size_t count = 0;
2080 2061  
2081 2062          for (; mp; mp = mp->b_cont)
2082 2063                  count += MBLKSIZE(mp);
2083 2064  
2084 2065          return (count);
2085 2066  }
2086 2067  
2087 2068  /*
2088 2069   * svc_flowcontrol() attempts to turn the flow control on or off for the
2089 2070   * transport.
2090 2071   *
2091 2072   * On input the xprt->xp_full determines whether the flow control is currently
2092 2073   * off (FALSE) or on (TRUE).  If it is off we do tests to see whether we should
2093 2074   * turn it on, and vice versa.
2094 2075   *
2095 2076   * There are two conditions considered for the flow control.  Both conditions
2096 2077   * have the low and the high watermark.  Once the high watermark is reached in
2097 2078   * EITHER condition the flow control is turned on.  For turning the flow
2098 2079   * control off BOTH conditions must be below the low watermark.
2099 2080   *
2100 2081   * Condition #1 - Number of requests queued:
2101 2082   *
2102 2083   * The max number of threads working on the pool is roughly pool->p_maxthreads.
2103 2084   * Every thread could handle up to pool->p_max_same_xprt requests from one
2104 2085   * transport before it moves to another transport.  See svc_poll() for details.
2105 2086   * In case all threads in the pool are working on a transport they will handle
2106 2087   * no more than enough_reqs (pool->p_maxthreads * pool->p_max_same_xprt)
2107 2088   * requests in one shot from that transport.  We are turning the flow control
2108 2089   * on once the high watermark is reached for a transport so that the underlying
2109 2090   * queue knows the rate of incoming requests is higher than we are able to
2110 2091   * handle.
2111 2092   *
2112 2093   * The high watermark: 2 * enough_reqs
2113 2094   * The low watermark: enough_reqs
2114 2095   *
2115 2096   * Condition #2 - Length of the data payload for the queued messages/requests:
2116 2097   *
2117 2098   * We want to prevent a particular pool exhausting the memory, so once the
2118 2099   * total length of queued requests for the whole pool reaches the high
2119 2100   * watermark we start to turn on the flow control for significant memory
2120 2101   * consumers (individual transports).  To keep the implementation simple
2121 2102   * enough, this condition is not exact, because we count only the data part of
2122 2103   * the queued requests and we ignore the overhead.  For our purposes this
2123 2104   * should be enough.  We should also consider that up to pool->p_maxthreads
2124 2105   * threads for the pool might work on large requests (this is not counted for
2125 2106   * this condition).  We need to leave some space for rest of the system and for
2126 2107   * other big memory consumers (like ZFS).  Also, after the flow control is
2127 2108   * turned on (on cots transports) we can start to accumulate a few megabytes in
2128 2109   * queues for each transport.
2129 2110   *
2130 2111   * Usually, the big memory consumers are NFS WRITE requests, so we do not
2131 2112   * expect to see this condition met for other than NFS pools.
2132 2113   *
2133 2114   * The high watermark: 1/5 of available memory
2134 2115   * The low watermark: 1/6 of available memory
2135 2116   *
2136 2117   * Once the high watermark is reached we turn the flow control on only for
2137 2118   * transports exceeding a per-transport memory limit.  The per-transport
2138 2119   * fraction of memory is calculated as:
2139 2120   *
2140 2121   * the high watermark / number of transports
2141 2122   *
2142 2123   * For transports with less than the per-transport fraction of memory consumed,
2143 2124   * the flow control is not turned on, so they are not blocked by a few "hungry"
2144 2125   * transports.  Because of this, the total memory consumption for the
2145 2126   * particular pool might grow up to 2 * the high watermark.
2146 2127   *
2147 2128   * The individual transports are unblocked once their consumption is below:
2148 2129   *
2149 2130   * per-transport fraction of memory / 2
2150 2131   *
2151 2132   * or once the total memory consumption for the whole pool falls below the low
2152 2133   * watermark.
2153 2134   *
2154 2135   */
2155 2136  static void
2156 2137  svc_flowcontrol(SVCMASTERXPRT *xprt)
2157 2138  {
2158 2139          SVCPOOL *pool = xprt->xp_pool;
2159 2140          size_t totalmem = ptob(physmem);
2160 2141          int enough_reqs = pool->p_maxthreads * pool->p_max_same_xprt;
2161 2142  
2162 2143          ASSERT(MUTEX_HELD(&xprt->xp_req_lock));
2163 2144  
2164 2145          /* Should we turn the flow control on? */
2165 2146          if (xprt->xp_full == FALSE) {
2166 2147                  /* Is flow control disabled? */
2167 2148                  if (svc_flowcontrol_disable != 0)
2168 2149                          return;
2169 2150  
2170 2151                  /* Is there enough requests queued? */
2171 2152                  if (xprt->xp_reqs >= enough_reqs * 2) {
2172 2153                          xprt->xp_full = TRUE;
2173 2154                          return;
2174 2155                  }
2175 2156  
2176 2157                  /*
2177 2158                   * If this pool uses over 20% of memory and this transport is
2178 2159                   * significant memory consumer then we are full
2179 2160                   */
2180 2161                  if (pool->p_size >= totalmem / 5 &&
2181 2162                      xprt->xp_size >= totalmem / 5 / pool->p_lcount)
2182 2163                          xprt->xp_full = TRUE;
2183 2164  
2184 2165                  return;
2185 2166          }
2186 2167  
2187 2168          /* We might want to turn the flow control off */
2188 2169  
2189 2170          /* Do we still have enough requests? */
2190 2171          if (xprt->xp_reqs > enough_reqs)
2191 2172                  return;
2192 2173  
2193 2174          /*
2194 2175           * If this pool still uses over 16% of memory and this transport is
2195 2176           * still significant memory consumer then we are still full
2196 2177           */
2197 2178          if (pool->p_size >= totalmem / 6 &&
2198 2179              xprt->xp_size >= totalmem / 5 / pool->p_lcount / 2)
2199 2180                  return;
2200 2181  
2201 2182          /* Turn the flow control off and make sure rpcmod is notified */
2202 2183          xprt->xp_full = FALSE;
2203 2184          xprt->xp_enable = TRUE;
2204 2185  }
2205 2186  
2206 2187  /*
2207 2188   * Main loop of the kernel RPC server
2208 2189   * - wait for input (find a transport with a pending request).
2209 2190   * - dequeue the request
2210 2191   * - call a registered server routine to process the requests
2211 2192   *
2212 2193   * There can many threads running concurrently in this loop
2213 2194   * on the same or on different transports.
2214 2195   */
2215 2196  static int
2216 2197  svc_run(SVCPOOL *pool)
2217 2198  {
2218 2199          SVCMASTERXPRT *xprt = NULL;     /* master transport handle  */
2219 2200          SVCXPRT *clone_xprt;    /* clone for this thread    */
2220 2201          proc_t *p = ttoproc(curthread);
2221 2202  
2222 2203          /* Allocate a clone transport handle for this thread */
2223 2204          clone_xprt = svc_clone_init();
2224 2205  
2225 2206          /*
2226 2207           * The loop iterates until the thread becomes
2227 2208           * idle too long or the transport is gone.
2228 2209           */
2229 2210          for (;;) {
2230 2211                  SVCMASTERXPRT *next;
2231 2212                  mblk_t *mp;
2232 2213                  bool_t enable;
2233 2214                  size_t size;
2234 2215  
2235 2216                  TRACE_0(TR_FAC_KRPC, TR_SVC_RUN, "svc_run");
2236 2217  
2237 2218                  /*
2238 2219                   * If the process is exiting/killed, return
2239 2220                   * immediately without processing any more
2240 2221                   * requests.
2241 2222                   */
2242 2223                  if (p->p_flag & (SEXITING | SKILLED)) {
2243 2224                          svc_thread_exit(pool, clone_xprt);
2244 2225                          return (EINTR);
2245 2226                  }
2246 2227  
2247 2228                  /* Find a transport with a pending request */
2248 2229                  next = svc_poll(pool, xprt, clone_xprt);
2249 2230  
2250 2231                  /*
2251 2232                   * If svc_poll() finds a transport with a request
2252 2233                   * it latches xp_req_lock on it. Therefore we need
2253 2234                   * to dequeue the request and release the lock as
2254 2235                   * soon as possible.
2255 2236                   */
2256 2237                  ASSERT(next != NULL &&
2257 2238                      (next == SVC_EXPRTGONE ||
2258 2239                      next == SVC_ETIMEDOUT ||
2259 2240                      next == SVC_EINTR ||
2260 2241                      MUTEX_HELD(&next->xp_req_lock)));
2261 2242  
2262 2243                  /* Ooops! Current transport is closing. Unlink now */
2263 2244                  if (next == SVC_EXPRTGONE) {
2264 2245                          svc_clone_unlink(clone_xprt);
2265 2246                          xprt = NULL;
2266 2247                          continue;
2267 2248                  }
2268 2249  
2269 2250                  /* Ooops! Timeout while waiting for a request. Exit */
2270 2251                  if (next == SVC_ETIMEDOUT) {
2271 2252                          svc_thread_exit(pool, clone_xprt);
2272 2253                          return (0);
2273 2254                  }
2274 2255  
2275 2256                  /*
2276 2257                   * Interrupted by a signal while waiting for a
2277 2258                   * request. Return to userspace and exit.
2278 2259                   */
2279 2260                  if (next == SVC_EINTR) {
2280 2261                          svc_thread_exit(pool, clone_xprt);
2281 2262                          return (EINTR);
2282 2263                  }
2283 2264  
2284 2265                  /*
2285 2266                   * De-queue the request and release the request lock
2286 2267                   * on this transport (latched by svc_poll()).
2287 2268                   */
2288 2269                  mp = next->xp_req_head;
2289 2270                  next->xp_req_head = mp->b_next;
2290 2271                  mp->b_next = (mblk_t *)0;
2291 2272                  size = svc_msgsize(mp);
2292 2273  
2293 2274                  mutex_enter(&pool->p_req_lock);
2294 2275                  pool->p_reqs--;
2295 2276                  if (pool->p_reqs == 0)
2296 2277                          pool->p_qoverflow = FALSE;
2297 2278                  pool->p_size -= size;
2298 2279                  mutex_exit(&pool->p_req_lock);
2299 2280  
2300 2281                  next->xp_reqs--;
2301 2282                  next->xp_size -= size;
2302 2283  
2303 2284                  if (next->xp_full)
2304 2285                          svc_flowcontrol(next);
2305 2286  
2306 2287                  TRACE_2(TR_FAC_KRPC, TR_NFSFP_QUE_REQ_DEQ,
2307 2288                      "rpc_que_req_deq:pool %p mp %p", pool, mp);
2308 2289                  mutex_exit(&next->xp_req_lock);
2309 2290  
2310 2291                  /*
2311 2292                   * If this is a new request on a current transport then
2312 2293                   * the clone structure is already properly initialized.
2313 2294                   * Otherwise, if the request is on a different transport,
2314 2295                   * unlink from the current master and link to
2315 2296                   * the one we got a request on.
2316 2297                   */
2317 2298                  if (next != xprt) {
2318 2299                          if (xprt)
2319 2300                                  svc_clone_unlink(clone_xprt);
2320 2301                          svc_clone_link(next, clone_xprt, NULL);
2321 2302                          xprt = next;
2322 2303                  }
2323 2304  
2324 2305                  /*
2325 2306                   * If there are more requests and req_cv hasn't
2326 2307                   * been signaled yet then wake up one more thread now.
2327 2308                   *
2328 2309                   * We avoid signaling req_cv until the most recently
2329 2310                   * signaled thread wakes up and gets CPU to clear
2330 2311                   * the `drowsy' flag.
2331 2312                   */
2332 2313                  if (!(pool->p_drowsy || pool->p_reqs <= pool->p_walkers ||
2333 2314                      pool->p_asleep == 0)) {
2334 2315                          mutex_enter(&pool->p_req_lock);
2335 2316  
2336 2317                          if (pool->p_drowsy || pool->p_reqs <= pool->p_walkers ||
2337 2318                              pool->p_asleep == 0)
2338 2319                                  mutex_exit(&pool->p_req_lock);
2339 2320                          else {
2340 2321                                  pool->p_asleep--;
2341 2322                                  pool->p_drowsy = TRUE;
2342 2323  
2343 2324                                  cv_signal(&pool->p_req_cv);
2344 2325                                  mutex_exit(&pool->p_req_lock);
2345 2326                          }
2346 2327                  }
2347 2328  
2348 2329                  /*
2349 2330                   * If there are no asleep/signaled threads, we are
2350 2331                   * still below pool->p_maxthreads limit, and no thread is
2351 2332                   * currently being created then signal the creator
2352 2333                   * for one more service thread.
2353 2334                   *
2354 2335                   * The asleep and drowsy checks are not protected
2355 2336                   * by a lock since it hurts performance and a wrong
2356 2337                   * decision is not essential.
2357 2338                   */
2358 2339                  if (pool->p_asleep == 0 && !pool->p_drowsy &&
2359 2340                      pool->p_threads + pool->p_detached_threads <
2360 2341                      pool->p_maxthreads)
2361 2342                          svc_creator_signal(pool);
2362 2343  
2363 2344                  /*
2364 2345                   * Process the request.
2365 2346                   */
2366 2347                  svc_getreq(clone_xprt, mp);
2367 2348  
2368 2349                  /* If thread had a reservation it should have been canceled */
2369 2350                  ASSERT(!clone_xprt->xp_reserved);
2370 2351  
2371 2352                  /*
2372 2353                   * If the clone is marked detached then exit.
2373 2354                   * The rpcmod slot has already been released
2374 2355                   * when we detached this thread.
2375 2356                   */
2376 2357                  if (clone_xprt->xp_detached) {
2377 2358                          svc_thread_exitdetached(pool, clone_xprt);
2378 2359                          return (0);
2379 2360                  }
  
    | 
      ↓ open down ↓ | 
    1771 lines elided | 
    
      ↑ open up ↑ | 
  
2380 2361  
2381 2362                  /*
2382 2363                   * Release our reference on the rpcmod
2383 2364                   * slot attached to xp_wq->q_ptr.
2384 2365                   */
2385 2366                  mutex_enter(&xprt->xp_req_lock);
2386 2367                  enable = xprt->xp_enable;
2387 2368                  if (enable)
2388 2369                          xprt->xp_enable = FALSE;
2389 2370                  mutex_exit(&xprt->xp_req_lock);
2390      -                (*RELE_PROC(xprt)) (clone_xprt->xp_wq, NULL, enable);
     2371 +                SVC_RELE(clone_xprt, NULL, enable);
2391 2372          }
2392 2373          /* NOTREACHED */
2393 2374  }
2394 2375  
2395 2376  /*
2396 2377   * Flush any pending requests for the queue and
2397 2378   * free the associated mblks.
2398 2379   */
2399 2380  void
2400 2381  svc_queueclean(queue_t *q)
2401 2382  {
2402 2383          SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0];
2403 2384          mblk_t *mp;
2404 2385          SVCPOOL *pool;
  
    | 
      ↓ open down ↓ | 
    4 lines elided | 
    
      ↑ open up ↑ | 
  
2405 2386  
2406 2387          /*
2407 2388           * clean up the requests
2408 2389           */
2409 2390          mutex_enter(&xprt->xp_req_lock);
2410 2391          pool = xprt->xp_pool;
2411 2392          while ((mp = xprt->xp_req_head) != NULL) {
2412 2393                  /* remove the request from the list */
2413 2394                  xprt->xp_req_head = mp->b_next;
2414 2395                  mp->b_next = (mblk_t *)0;
2415      -                (*RELE_PROC(xprt)) (xprt->xp_wq, mp, FALSE);
     2396 +                SVC_RELE(xprt, mp, FALSE);
2416 2397          }
2417 2398  
2418 2399          mutex_enter(&pool->p_req_lock);
2419 2400          pool->p_reqs -= xprt->xp_reqs;
2420 2401          pool->p_size -= xprt->xp_size;
2421 2402          mutex_exit(&pool->p_req_lock);
2422 2403  
2423 2404          xprt->xp_reqs = 0;
2424 2405          xprt->xp_size = 0;
2425 2406          xprt->xp_full = FALSE;
2426 2407          xprt->xp_enable = FALSE;
2427 2408          mutex_exit(&xprt->xp_req_lock);
2428 2409  }
2429 2410  
2430 2411  /*
2431 2412   * This routine is called by rpcmod to inform kernel RPC that a
2432 2413   * queue is closing. It is called after all the requests have been
2433 2414   * picked up (that is after all the slots on the queue have
2434 2415   * been released by kernel RPC). It is also guaranteed that no more
2435 2416   * request will be delivered on this transport.
2436 2417   *
2437 2418   * - clear xp_wq to mark the master server transport handle as closing
2438 2419   * - if there are no more threads on this transport close/destroy it
2439 2420   * - otherwise, leave the linked threads to close/destroy the transport
2440 2421   *   later.
2441 2422   */
2442 2423  void
2443 2424  svc_queueclose(queue_t *q)
2444 2425  {
2445 2426          SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0];
2446 2427  
2447 2428          if (xprt == NULL) {
2448 2429                  /*
2449 2430                   * If there is no master xprt associated with this stream,
2450 2431                   * then there is nothing to do.  This happens regularly
2451 2432                   * with connection-oriented listening streams created by
2452 2433                   * nfsd.
2453 2434                   */
2454 2435                  return;
2455 2436          }
2456 2437  
2457 2438          mutex_enter(&xprt->xp_thread_lock);
2458 2439  
2459 2440          ASSERT(xprt->xp_req_head == NULL);
2460 2441          ASSERT(xprt->xp_wq != NULL);
2461 2442  
2462 2443          xprt->xp_wq = NULL;
2463 2444  
2464 2445          if (xprt->xp_threads == 0) {
2465 2446                  SVCPOOL *pool = xprt->xp_pool;
2466 2447  
2467 2448                  /*
2468 2449                   * svc_xprt_cleanup() destroys the transport
2469 2450                   * or releases the transport thread lock
2470 2451                   */
2471 2452                  svc_xprt_cleanup(xprt, FALSE);
2472 2453  
2473 2454                  mutex_enter(&pool->p_thread_lock);
2474 2455  
2475 2456                  /*
2476 2457                   * If the pool is in closing state and this was
2477 2458                   * the last transport in the pool then signal the creator
2478 2459                   * thread to clean up and exit.
2479 2460                   */
2480 2461                  if (pool->p_closing && svc_pool_tryexit(pool)) {
2481 2462                          return;
2482 2463                  }
2483 2464                  mutex_exit(&pool->p_thread_lock);
2484 2465          } else {
2485 2466                  /*
2486 2467                   * There are still some threads linked to the transport.  They
2487 2468                   * are very likely sleeping in svc_poll().  We could wake up
2488 2469                   * them by broadcasting on the p_req_cv condition variable, but
2489 2470                   * that might give us a performance penalty if there are too
2490 2471                   * many sleeping threads.
2491 2472                   *
2492 2473                   * Instead, we do nothing here.  The linked threads will unlink
2493 2474                   * themselves and destroy the transport once they are woken up
2494 2475                   * on timeout, or by new request.  There is no reason to hurry
2495 2476                   * up now with the thread wake up.
2496 2477                   */
2497 2478  
2498 2479                  /*
2499 2480                   *  NOTICE: No references to the master transport structure
2500 2481                   *          beyond this point!
2501 2482                   */
2502 2483                  mutex_exit(&xprt->xp_thread_lock);
2503 2484          }
2504 2485  }
2505 2486  
2506 2487  /*
2507 2488   * Interrupt `request delivery' routine called from rpcmod
2508 2489   * - put a request at the tail of the transport request queue
2509 2490   * - insert a hint for svc_poll() into the xprt-ready queue
2510 2491   * - increment the `pending-requests' count for the pool
2511 2492   * - handle flow control
2512 2493   * - wake up a thread sleeping in svc_poll() if necessary
2513 2494   * - if all the threads are running ask the creator for a new one.
2514 2495   */
2515 2496  bool_t
2516 2497  svc_queuereq(queue_t *q, mblk_t *mp, bool_t flowcontrol)
2517 2498  {
2518 2499          SVCMASTERXPRT *xprt = ((void **) q->q_ptr)[0];
2519 2500          SVCPOOL *pool = xprt->xp_pool;
2520 2501          size_t size;
2521 2502  
2522 2503          TRACE_0(TR_FAC_KRPC, TR_SVC_QUEUEREQ_START, "svc_queuereq_start");
2523 2504  
2524 2505          ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL ||
2525 2506              mp->b_datap->db_type != M_DATA);
2526 2507  
2527 2508          /*
2528 2509           * Step 1.
2529 2510           * Grab the transport's request lock and the
2530 2511           * pool's request lock so that when we put
2531 2512           * the request at the tail of the transport's
2532 2513           * request queue, possibly put the request on
2533 2514           * the xprt ready queue and increment the
2534 2515           * pending request count it looks atomic.
2535 2516           */
2536 2517          mutex_enter(&xprt->xp_req_lock);
2537 2518          if (flowcontrol && xprt->xp_full) {
2538 2519                  mutex_exit(&xprt->xp_req_lock);
2539 2520  
2540 2521                  return (FALSE);
2541 2522          }
2542 2523          ASSERT(xprt->xp_full == FALSE);
2543 2524          mutex_enter(&pool->p_req_lock);
2544 2525          if (xprt->xp_req_head == NULL)
2545 2526                  xprt->xp_req_head = mp;
2546 2527          else
2547 2528                  xprt->xp_req_tail->b_next = mp;
2548 2529          xprt->xp_req_tail = mp;
2549 2530  
2550 2531          /*
2551 2532           * Step 2.
2552 2533           * Insert a hint into the xprt-ready queue, increment
2553 2534           * counters, handle flow control, and wake up
2554 2535           * a thread sleeping in svc_poll() if necessary.
2555 2536           */
2556 2537  
2557 2538          /* Insert pointer to this transport into the xprt-ready queue */
2558 2539          svc_xprt_qput(pool, xprt);
2559 2540  
2560 2541          /* Increment counters */
2561 2542          pool->p_reqs++;
2562 2543          xprt->xp_reqs++;
2563 2544  
2564 2545          size = svc_msgsize(mp);
2565 2546          xprt->xp_size += size;
2566 2547          pool->p_size += size;
2567 2548  
2568 2549          /* Handle flow control */
2569 2550          if (flowcontrol)
2570 2551                  svc_flowcontrol(xprt);
2571 2552  
2572 2553          TRACE_2(TR_FAC_KRPC, TR_NFSFP_QUE_REQ_ENQ,
2573 2554              "rpc_que_req_enq:pool %p mp %p", pool, mp);
2574 2555  
2575 2556          /*
2576 2557           * If there are more requests and req_cv hasn't
2577 2558           * been signaled yet then wake up one more thread now.
2578 2559           *
2579 2560           * We avoid signaling req_cv until the most recently
2580 2561           * signaled thread wakes up and gets CPU to clear
2581 2562           * the `drowsy' flag.
2582 2563           */
2583 2564          if (pool->p_drowsy || pool->p_reqs <= pool->p_walkers ||
2584 2565              pool->p_asleep == 0) {
2585 2566                  mutex_exit(&pool->p_req_lock);
2586 2567          } else {
2587 2568                  pool->p_drowsy = TRUE;
2588 2569                  pool->p_asleep--;
2589 2570  
2590 2571                  /*
2591 2572                   * Signal wakeup and drop the request lock.
2592 2573                   */
2593 2574                  cv_signal(&pool->p_req_cv);
2594 2575                  mutex_exit(&pool->p_req_lock);
2595 2576          }
2596 2577          mutex_exit(&xprt->xp_req_lock);
2597 2578  
2598 2579          /*
2599 2580           * Step 3.
2600 2581           * If there are no asleep/signaled threads, we are
2601 2582           * still below pool->p_maxthreads limit, and no thread is
2602 2583           * currently being created then signal the creator
2603 2584           * for one more service thread.
2604 2585           *
2605 2586           * The asleep and drowsy checks are not not protected
2606 2587           * by a lock since it hurts performance and a wrong
2607 2588           * decision is not essential.
2608 2589           */
2609 2590          if (pool->p_asleep == 0 && !pool->p_drowsy &&
2610 2591              pool->p_threads + pool->p_detached_threads < pool->p_maxthreads)
2611 2592                  svc_creator_signal(pool);
2612 2593  
2613 2594          TRACE_1(TR_FAC_KRPC, TR_SVC_QUEUEREQ_END,
2614 2595              "svc_queuereq_end:(%S)", "end");
2615 2596  
2616 2597          return (TRUE);
2617 2598  }
2618 2599  
2619 2600  /*
2620 2601   * Reserve a service thread so that it can be detached later.
2621 2602   * This reservation is required to make sure that when it tries to
2622 2603   * detach itself the total number of detached threads does not exceed
2623 2604   * pool->p_maxthreads - pool->p_redline (i.e. that we can have
2624 2605   * up to pool->p_redline non-detached threads).
2625 2606   *
2626 2607   * If the thread does not detach itself later, it should cancel the
2627 2608   * reservation before returning to svc_run().
2628 2609   *
2629 2610   * - check if there is room for more reserved/detached threads
2630 2611   * - if so, then increment the `reserved threads' count for the pool
2631 2612   * - mark the thread as reserved (setting the flag in the clone transport
2632 2613   *   handle for this thread
2633 2614   * - returns 1 if the reservation succeeded, 0 if it failed.
2634 2615   */
2635 2616  int
2636 2617  svc_reserve_thread(SVCXPRT *clone_xprt)
2637 2618  {
2638 2619          SVCPOOL *pool = clone_xprt->xp_master->xp_pool;
2639 2620  
2640 2621          /* Recursive reservations are not allowed */
2641 2622          ASSERT(!clone_xprt->xp_reserved);
2642 2623          ASSERT(!clone_xprt->xp_detached);
2643 2624  
2644 2625          /* Check pool counts if there is room for reservation */
2645 2626          mutex_enter(&pool->p_thread_lock);
2646 2627          if (pool->p_reserved_threads + pool->p_detached_threads >=
2647 2628              pool->p_maxthreads - pool->p_redline) {
2648 2629                  mutex_exit(&pool->p_thread_lock);
2649 2630                  return (0);
2650 2631          }
2651 2632          pool->p_reserved_threads++;
2652 2633          mutex_exit(&pool->p_thread_lock);
2653 2634  
2654 2635          /* Mark the thread (clone handle) as reserved */
2655 2636          clone_xprt->xp_reserved = TRUE;
2656 2637  
2657 2638          return (1);
2658 2639  }
2659 2640  
2660 2641  /*
2661 2642   * Cancel a reservation for a thread.
2662 2643   * - decrement the `reserved threads' count for the pool
2663 2644   * - clear the flag in the clone transport handle for this thread.
2664 2645   */
2665 2646  void
2666 2647  svc_unreserve_thread(SVCXPRT *clone_xprt)
2667 2648  {
2668 2649          SVCPOOL *pool = clone_xprt->xp_master->xp_pool;
2669 2650  
2670 2651          /* Thread must have a reservation */
2671 2652          ASSERT(clone_xprt->xp_reserved);
2672 2653          ASSERT(!clone_xprt->xp_detached);
2673 2654  
2674 2655          /* Decrement global count */
2675 2656          mutex_enter(&pool->p_thread_lock);
2676 2657          pool->p_reserved_threads--;
2677 2658          mutex_exit(&pool->p_thread_lock);
2678 2659  
2679 2660          /* Clear reservation flag */
2680 2661          clone_xprt->xp_reserved = FALSE;
2681 2662  }
2682 2663  
2683 2664  /*
2684 2665   * Detach a thread from its transport, so that it can block for an
2685 2666   * extended time.  Because the transport can be closed after the thread is
2686 2667   * detached, the thread should have already sent off a reply if it was
2687 2668   * going to send one.
2688 2669   *
2689 2670   * - decrement `non-detached threads' count and increment `detached threads'
2690 2671   *   counts for the transport
2691 2672   * - decrement the  `non-detached threads' and `reserved threads'
2692 2673   *   counts and increment the `detached threads' count for the pool
2693 2674   * - release the rpcmod slot
2694 2675   * - mark the clone (thread) as detached.
2695 2676   *
2696 2677   * No need to return a pointer to the thread's CPR information, since
2697 2678   * the thread has a userland identity.
2698 2679   *
2699 2680   * NOTICE: a thread must not detach itself without making a prior reservation
2700 2681   *         through svc_thread_reserve().
2701 2682   */
2702 2683  callb_cpr_t *
2703 2684  svc_detach_thread(SVCXPRT *clone_xprt)
2704 2685  {
2705 2686          SVCMASTERXPRT *xprt = clone_xprt->xp_master;
2706 2687          SVCPOOL *pool = xprt->xp_pool;
2707 2688          bool_t enable;
2708 2689  
2709 2690          /* Thread must have a reservation */
2710 2691          ASSERT(clone_xprt->xp_reserved);
2711 2692          ASSERT(!clone_xprt->xp_detached);
2712 2693  
2713 2694          /* Bookkeeping for this transport */
2714 2695          mutex_enter(&xprt->xp_thread_lock);
2715 2696          xprt->xp_threads--;
2716 2697          xprt->xp_detached_threads++;
2717 2698          mutex_exit(&xprt->xp_thread_lock);
2718 2699  
2719 2700          /* Bookkeeping for the pool */
2720 2701          mutex_enter(&pool->p_thread_lock);
2721 2702          pool->p_threads--;
  
    | 
      ↓ open down ↓ | 
    296 lines elided | 
    
      ↑ open up ↑ | 
  
2722 2703          pool->p_reserved_threads--;
2723 2704          pool->p_detached_threads++;
2724 2705          mutex_exit(&pool->p_thread_lock);
2725 2706  
2726 2707          /* Release an rpcmod slot for this request */
2727 2708          mutex_enter(&xprt->xp_req_lock);
2728 2709          enable = xprt->xp_enable;
2729 2710          if (enable)
2730 2711                  xprt->xp_enable = FALSE;
2731 2712          mutex_exit(&xprt->xp_req_lock);
2732      -        (*RELE_PROC(xprt)) (clone_xprt->xp_wq, NULL, enable);
     2713 +        SVC_RELE(clone_xprt, NULL, enable);
2733 2714  
2734 2715          /* Mark the clone (thread) as detached */
2735 2716          clone_xprt->xp_reserved = FALSE;
2736 2717          clone_xprt->xp_detached = TRUE;
2737 2718  
2738 2719          return (NULL);
2739 2720  }
2740 2721  
2741 2722  /*
2742 2723   * This routine is responsible for extracting RDMA plugin master XPRT,
2743 2724   * unregister from the SVCPOOL and initiate plugin specific cleanup.
2744 2725   * It is passed a list/group of rdma transports as records which are
2745 2726   * active in a given registered or unregistered kRPC thread pool. Its shuts
2746 2727   * all active rdma transports in that pool. If the thread active on the trasport
2747 2728   * happens to be last thread for that pool, it will signal the creater thread
2748 2729   * to cleanup the pool and destroy the xprt in svc_queueclose()
2749 2730   */
2750 2731  void
2751 2732  rdma_stop(rdma_xprt_group_t *rdma_xprts)
2752 2733  {
2753 2734          SVCMASTERXPRT *xprt;
2754 2735          rdma_xprt_record_t *curr_rec;
2755 2736          queue_t *q;
2756 2737          mblk_t *mp;
2757 2738          int i, rtg_count;
2758 2739          SVCPOOL *pool;
2759 2740  
2760 2741          if (rdma_xprts->rtg_count == 0)
2761 2742                  return;
2762 2743  
2763 2744          rtg_count = rdma_xprts->rtg_count;
2764 2745  
2765 2746          for (i = 0; i < rtg_count; i++) {
2766 2747                  curr_rec = rdma_xprts->rtg_listhead;
2767 2748                  rdma_xprts->rtg_listhead = curr_rec->rtr_next;
2768 2749                  rdma_xprts->rtg_count--;
2769 2750                  curr_rec->rtr_next = NULL;
2770 2751                  xprt = curr_rec->rtr_xprt_ptr;
2771 2752                  q = xprt->xp_wq;
2772 2753                  svc_rdma_kstop(xprt);
2773 2754  
2774 2755                  mutex_enter(&xprt->xp_req_lock);
2775 2756                  pool = xprt->xp_pool;
2776 2757                  while ((mp = xprt->xp_req_head) != NULL) {
2777 2758                          rdma_recv_data_t *rdp = (rdma_recv_data_t *)mp->b_rptr;
2778 2759  
2779 2760                          /* remove the request from the list */
2780 2761                          xprt->xp_req_head = mp->b_next;
2781 2762                          mp->b_next = (mblk_t *)0;
2782 2763  
2783 2764                          RDMA_BUF_FREE(rdp->conn, &rdp->rpcmsg);
2784 2765                          RDMA_REL_CONN(rdp->conn);
2785 2766                          freemsg(mp);
2786 2767                  }
2787 2768                  mutex_enter(&pool->p_req_lock);
2788 2769                  pool->p_reqs -= xprt->xp_reqs;
2789 2770                  pool->p_size -= xprt->xp_size;
2790 2771                  mutex_exit(&pool->p_req_lock);
2791 2772                  xprt->xp_reqs = 0;
2792 2773                  xprt->xp_size = 0;
2793 2774                  xprt->xp_full = FALSE;
2794 2775                  xprt->xp_enable = FALSE;
2795 2776                  mutex_exit(&xprt->xp_req_lock);
2796 2777                  svc_queueclose(q);
2797 2778  #ifdef  DEBUG
2798 2779                  if (rdma_check)
2799 2780                          cmn_err(CE_NOTE, "rdma_stop: Exited svc_queueclose\n");
2800 2781  #endif
2801 2782                  /*
2802 2783                   * Free the rdma transport record for the expunged rdma
2803 2784                   * based master transport handle.
2804 2785                   */
2805 2786                  kmem_free(curr_rec, sizeof (rdma_xprt_record_t));
2806 2787                  if (!rdma_xprts->rtg_listhead)
2807 2788                          break;
2808 2789          }
2809 2790  }
2810 2791  
2811 2792  
2812 2793  /*
2813 2794   * rpc_msg_dup/rpc_msg_free
2814 2795   * Currently only used by svc_rpcsec_gss.c but put in this file as it
2815 2796   * may be useful to others in the future.
2816 2797   * But future consumers should be careful cuz so far
2817 2798   *   - only tested/used for call msgs (not reply)
2818 2799   *   - only tested/used with call verf oa_length==0
2819 2800   */
2820 2801  struct rpc_msg *
2821 2802  rpc_msg_dup(struct rpc_msg *src)
2822 2803  {
2823 2804          struct rpc_msg *dst;
2824 2805          struct opaque_auth oa_src, oa_dst;
2825 2806  
2826 2807          dst = kmem_alloc(sizeof (*dst), KM_SLEEP);
2827 2808  
2828 2809          dst->rm_xid = src->rm_xid;
2829 2810          dst->rm_direction = src->rm_direction;
2830 2811  
2831 2812          dst->rm_call.cb_rpcvers = src->rm_call.cb_rpcvers;
2832 2813          dst->rm_call.cb_prog = src->rm_call.cb_prog;
2833 2814          dst->rm_call.cb_vers = src->rm_call.cb_vers;
2834 2815          dst->rm_call.cb_proc = src->rm_call.cb_proc;
2835 2816  
2836 2817          /* dup opaque auth call body cred */
2837 2818          oa_src = src->rm_call.cb_cred;
2838 2819  
2839 2820          oa_dst.oa_flavor = oa_src.oa_flavor;
2840 2821          oa_dst.oa_base = kmem_alloc(oa_src.oa_length, KM_SLEEP);
2841 2822  
2842 2823          bcopy(oa_src.oa_base, oa_dst.oa_base, oa_src.oa_length);
2843 2824          oa_dst.oa_length = oa_src.oa_length;
2844 2825  
2845 2826          dst->rm_call.cb_cred = oa_dst;
2846 2827  
2847 2828          /* dup or just alloc opaque auth call body verifier */
2848 2829          if (src->rm_call.cb_verf.oa_length > 0) {
2849 2830                  oa_src = src->rm_call.cb_verf;
2850 2831  
2851 2832                  oa_dst.oa_flavor = oa_src.oa_flavor;
2852 2833                  oa_dst.oa_base = kmem_alloc(oa_src.oa_length, KM_SLEEP);
2853 2834  
2854 2835                  bcopy(oa_src.oa_base, oa_dst.oa_base, oa_src.oa_length);
2855 2836                  oa_dst.oa_length = oa_src.oa_length;
2856 2837  
2857 2838                  dst->rm_call.cb_verf = oa_dst;
2858 2839          } else {
2859 2840                  oa_dst.oa_flavor = -1;  /* will be set later */
2860 2841                  oa_dst.oa_base = kmem_alloc(MAX_AUTH_BYTES, KM_SLEEP);
2861 2842  
2862 2843                  oa_dst.oa_length = 0;   /* will be set later */
2863 2844  
2864 2845                  dst->rm_call.cb_verf = oa_dst;
2865 2846          }
2866 2847          return (dst);
2867 2848  
2868 2849  error:
2869 2850          kmem_free(dst->rm_call.cb_cred.oa_base, dst->rm_call.cb_cred.oa_length);
2870 2851          kmem_free(dst, sizeof (*dst));
2871 2852          return (NULL);
2872 2853  }
2873 2854  
2874 2855  void
2875 2856  rpc_msg_free(struct rpc_msg **msg, int cb_verf_oa_length)
2876 2857  {
2877 2858          struct rpc_msg *m = *msg;
2878 2859  
2879 2860          kmem_free(m->rm_call.cb_cred.oa_base, m->rm_call.cb_cred.oa_length);
2880 2861          m->rm_call.cb_cred.oa_base = NULL;
2881 2862          m->rm_call.cb_cred.oa_length = 0;
2882 2863  
2883 2864          kmem_free(m->rm_call.cb_verf.oa_base, cb_verf_oa_length);
2884 2865          m->rm_call.cb_verf.oa_base = NULL;
2885 2866          m->rm_call.cb_verf.oa_length = 0;
2886 2867  
2887 2868          kmem_free(m, sizeof (*m));
2888 2869          m = NULL;
2889 2870  }
  
    | 
      ↓ open down ↓ | 
    147 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX