Print this page
    
Fix NFS design problems re. multiple zone keys
Make NFS server zone-specific data all have the same lifetime
Fix rfs4_clean_state_exi
Fix exi_cache_reclaim
Fix mistakes in zone keys work
More fixes re. exi_zoneid and exi_tree
(danmcd -> Keep some ASSERT()s around for readability.)
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs_auth.c
          +++ new/usr/src/uts/common/fs/nfs/nfs_auth.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright (c) 2015 by Delphix. All rights reserved.
  25   25   * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
  26   26   */
  27   27  
  28   28  #include <sys/param.h>
  29   29  #include <sys/errno.h>
  30   30  #include <sys/vfs.h>
  31   31  #include <sys/vnode.h>
  32   32  #include <sys/cred.h>
  33   33  #include <sys/cmn_err.h>
  34   34  #include <sys/systm.h>
  35   35  #include <sys/kmem.h>
  36   36  #include <sys/pathname.h>
  37   37  #include <sys/utsname.h>
  38   38  #include <sys/debug.h>
  39   39  #include <sys/door.h>
  40   40  #include <sys/sdt.h>
  41   41  #include <sys/thread.h>
  42   42  #include <sys/avl.h>
  43   43  
  44   44  #include <rpc/types.h>
  
    | 
      ↓ open down ↓ | 
    44 lines elided | 
    
      ↑ open up ↑ | 
  
  45   45  #include <rpc/auth.h>
  46   46  #include <rpc/clnt.h>
  47   47  
  48   48  #include <nfs/nfs.h>
  49   49  #include <nfs/export.h>
  50   50  #include <nfs/nfs_clnt.h>
  51   51  #include <nfs/auth.h>
  52   52  
  53   53  static struct kmem_cache *exi_cache_handle;
  54   54  static void exi_cache_reclaim(void *);
       55 +static void exi_cache_reclaim_zone(nfs_globals_t *);
  55   56  static void exi_cache_trim(struct exportinfo *exi);
  56      -static void *nfsauth_zone_init(zoneid_t);
  57      -static void nfsauth_zone_shutdown(zoneid_t zoneid, void *data);
  58      -static void nfsauth_zone_fini(zoneid_t, void *);
  59   57  
  60   58  extern pri_t minclsyspri;
  61   59  
  62   60  /* NFS auth cache statistics */
  63   61  volatile uint_t nfsauth_cache_hit;
  64   62  volatile uint_t nfsauth_cache_miss;
  65   63  volatile uint_t nfsauth_cache_refresh;
  66   64  volatile uint_t nfsauth_cache_reclaim;
  67   65  volatile uint_t exi_cache_auth_reclaim_failed;
  68   66  volatile uint_t exi_cache_clnt_reclaim_failed;
  69   67  
  70   68  /*
  71   69   * The lifetime of an auth cache entry:
  72   70   * ------------------------------------
  73   71   *
  74   72   * An auth cache entry is created with both the auth_time
  75   73   * and auth_freshness times set to the current time.
  76   74   *
  77   75   * Upon every client access which results in a hit, the
  78   76   * auth_time will be updated.
  79   77   *
  80   78   * If a client access determines that the auth_freshness
  81   79   * indicates that the entry is STALE, then it will be
  82   80   * refreshed. Note that this will explicitly reset
  83   81   * auth_time.
  84   82   *
  85   83   * When the REFRESH successfully occurs, then the
  86   84   * auth_freshness is updated.
  87   85   *
  88   86   * There are two ways for an entry to leave the cache:
  89   87   *
  90   88   * 1) Purged by an action on the export (remove or changed)
  91   89   * 2) Memory backpressure from the kernel (check against NFSAUTH_CACHE_TRIM)
  92   90   *
  93   91   * For 2) we check the timeout value against auth_time.
  94   92   */
  95   93  
  96   94  /*
  97   95   * Number of seconds until we mark for refresh an auth cache entry.
  98   96   */
  99   97  #define NFSAUTH_CACHE_REFRESH 600
 100   98  
 101   99  /*
 102  100   * Number of idle seconds until we yield to backpressure
 103  101   * to trim a cache entry.
 104  102   */
 105  103  #define NFSAUTH_CACHE_TRIM 3600
 106  104  
 107  105  /*
 108  106   * While we could encapuslate the exi_list inside the
 109  107   * exi structure, we can't do that for the auth_list.
 110  108   * So, to keep things looking clean, we keep them both
 111  109   * in these external lists.
 112  110   */
 113  111  typedef struct refreshq_exi_node {
 114  112          struct exportinfo       *ren_exi;
 115  113          list_t                  ren_authlist;
 116  114          list_node_t             ren_node;
 117  115  } refreshq_exi_node_t;
 118  116  
 119  117  typedef struct refreshq_auth_node {
 120  118          struct auth_cache       *ran_auth;
 121  119          char                    *ran_netid;
 122  120          list_node_t             ran_node;
 123  121  } refreshq_auth_node_t;
 124  122  
 125  123  /*
 126  124   * Used to manipulate things on the refreshq_queue.  Note that the refresh
 127  125   * thread will effectively pop a node off of the queue, at which point it
 128  126   * will no longer need to hold the mutex.
 129  127   */
 130  128  static kmutex_t refreshq_lock;
 131  129  static list_t refreshq_queue;
 132  130  static kcondvar_t refreshq_cv;
 133  131  
 134  132  /*
 135  133   * If there is ever a problem with loading the module, then nfsauth_fini()
 136  134   * needs to be called to remove state.  In that event, since the refreshq
 137  135   * thread has been started, they need to work together to get rid of state.
 138  136   */
 139  137  typedef enum nfsauth_refreshq_thread_state {
 140  138          REFRESHQ_THREAD_RUNNING,
 141  139          REFRESHQ_THREAD_FINI_REQ,
 142  140          REFRESHQ_THREAD_HALTED,
 143  141          REFRESHQ_THREAD_NEED_CREATE
 144  142  } nfsauth_refreshq_thread_state_t;
 145  143  
 146  144  typedef struct nfsauth_globals {
 147  145          kmutex_t        mountd_lock;
 148  146          door_handle_t   mountd_dh;
 149  147  
 150  148          /*
 151  149           * Used to manipulate things on the refreshq_queue.  Note that the
 152  150           * refresh thread will effectively pop a node off of the queue,
 153  151           * at which point it will no longer need to hold the mutex.
 154  152           */
 155  153          kmutex_t        refreshq_lock;
 156  154          list_t          refreshq_queue;
 157  155          kcondvar_t      refreshq_cv;
 158  156  
 159  157          /*
 160  158           * A list_t would be overkill.  These are auth_cache entries which are
 161  159           * no longer linked to an exi.  It should be the case that all of their
 162  160           * states are NFS_AUTH_INVALID, i.e., the only way to be put on this
 163  161           * list is iff their state indicated that they had been placed on the
 164  162           * refreshq_queue.
 165  163           *
 166  164           * Note that while there is no link from the exi or back to the exi,
 167  165           * the exi can not go away until these entries are harvested.
 168  166           */
  
    | 
      ↓ open down ↓ | 
    100 lines elided | 
    
      ↑ open up ↑ | 
  
 169  167          struct auth_cache               *refreshq_dead_entries;
 170  168          nfsauth_refreshq_thread_state_t refreshq_thread_state;
 171  169  
 172  170  } nfsauth_globals_t;
 173  171  
 174  172  static void nfsauth_free_node(struct auth_cache *);
 175  173  static void nfsauth_refresh_thread(nfsauth_globals_t *);
 176  174  
 177  175  static int nfsauth_cache_compar(const void *, const void *);
 178  176  
 179      -static zone_key_t       nfsauth_zone_key;
      177 +static nfsauth_globals_t *
      178 +nfsauth_get_zg(void)
      179 +{
      180 +        nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
      181 +        nfsauth_globals_t *nag = ng->nfs_auth;
      182 +        ASSERT(nag != NULL);
      183 +        return (nag);
      184 +}
 180  185  
 181  186  void
 182  187  mountd_args(uint_t did)
 183  188  {
 184  189          nfsauth_globals_t *nag;
 185  190  
 186      -        nag = zone_getspecific(nfsauth_zone_key, curzone);
      191 +        nag = nfsauth_get_zg();
 187  192          mutex_enter(&nag->mountd_lock);
 188  193          if (nag->mountd_dh != NULL)
 189  194                  door_ki_rele(nag->mountd_dh);
 190  195          nag->mountd_dh = door_ki_lookup(did);
 191  196          mutex_exit(&nag->mountd_lock);
 192  197  }
 193  198  
 194  199  void
 195  200  nfsauth_init(void)
 196  201  {
 197      -        zone_key_create(&nfsauth_zone_key, nfsauth_zone_init,
 198      -            nfsauth_zone_shutdown, nfsauth_zone_fini);
 199      -
 200  202          exi_cache_handle = kmem_cache_create("exi_cache_handle",
 201  203              sizeof (struct auth_cache), 0, NULL, NULL,
 202  204              exi_cache_reclaim, NULL, NULL, 0);
 203  205  }
 204  206  
 205  207  void
 206  208  nfsauth_fini(void)
 207  209  {
 208  210          kmem_cache_destroy(exi_cache_handle);
 209  211  }
 210  212  
 211      -/*ARGSUSED*/
 212      -static void *
 213      -nfsauth_zone_init(zoneid_t zoneid)
      213 +void
      214 +nfsauth_zone_init(nfs_globals_t *ng)
 214  215  {
 215  216          nfsauth_globals_t *nag;
 216  217  
 217  218          nag = kmem_zalloc(sizeof (*nag), KM_SLEEP);
 218  219  
 219  220          /*
 220  221           * mountd can be restarted by smf(5).  We need to make sure
 221  222           * the updated door handle will safely make it to mountd_dh.
 222  223           */
 223  224          mutex_init(&nag->mountd_lock, NULL, MUTEX_DEFAULT, NULL);
 224  225          mutex_init(&nag->refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
 225  226          list_create(&nag->refreshq_queue, sizeof (refreshq_exi_node_t),
 226  227              offsetof(refreshq_exi_node_t, ren_node));
 227  228          cv_init(&nag->refreshq_cv, NULL, CV_DEFAULT, NULL);
 228  229          nag->refreshq_thread_state = REFRESHQ_THREAD_NEED_CREATE;
 229  230  
 230      -        return (nag);
      231 +        ng->nfs_auth = nag;
 231  232  }
 232  233  
 233      -/*ARGSUSED*/
 234      -static void
 235      -nfsauth_zone_shutdown(zoneid_t zoneid, void *data)
      234 +void
      235 +nfsauth_zone_shutdown(nfs_globals_t *ng)
 236  236  {
 237  237          refreshq_exi_node_t     *ren;
 238      -        nfsauth_globals_t       *nag = data;
      238 +        nfsauth_globals_t       *nag = ng->nfs_auth;
 239  239  
 240  240          /* Prevent the nfsauth_refresh_thread from getting new work */
 241  241          mutex_enter(&nag->refreshq_lock);
 242  242          if (nag->refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
 243  243                  nag->refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
 244  244                  cv_broadcast(&nag->refreshq_cv);
 245  245  
 246  246                  /* Wait for nfsauth_refresh_thread() to exit */
 247  247                  while (nag->refreshq_thread_state != REFRESHQ_THREAD_HALTED)
 248  248                          cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
 249  249          }
 250  250          mutex_exit(&nag->refreshq_lock);
 251  251  
 252  252          /*
 253  253           * Walk the exi_list and in turn, walk the auth_lists and free all
 254  254           * lists.  In addition, free INVALID auth_cache entries.
 255  255           */
 256  256          while ((ren = list_remove_head(&nag->refreshq_queue))) {
 257  257                  refreshq_auth_node_t *ran;
 258  258  
 259  259                  while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) {
 260  260                          struct auth_cache *p = ran->ran_auth;
 261  261                          if (p->auth_state == NFS_AUTH_INVALID)
 262  262                                  nfsauth_free_node(p);
  
    | 
      ↓ open down ↓ | 
    14 lines elided | 
    
      ↑ open up ↑ | 
  
 263  263                          strfree(ran->ran_netid);
 264  264                          kmem_free(ran, sizeof (*ran));
 265  265                  }
 266  266  
 267  267                  list_destroy(&ren->ren_authlist);
 268  268                  exi_rele(ren->ren_exi);
 269  269                  kmem_free(ren, sizeof (*ren));
 270  270          }
 271  271  }
 272  272  
 273      -/*ARGSUSED*/
 274      -static void
 275      -nfsauth_zone_fini(zoneid_t zoneid, void *data)
      273 +void
      274 +nfsauth_zone_fini(nfs_globals_t *ng)
 276  275  {
 277      -        nfsauth_globals_t *nag = data;
      276 +        nfsauth_globals_t *nag = ng->nfs_auth;
 278  277  
      278 +        ng->nfs_auth = NULL;
      279 +
 279  280          list_destroy(&nag->refreshq_queue);
 280  281          cv_destroy(&nag->refreshq_cv);
 281  282          mutex_destroy(&nag->refreshq_lock);
 282  283          mutex_destroy(&nag->mountd_lock);
 283  284          /* Extra cleanup. */
 284  285          if (nag->mountd_dh != NULL)
 285  286                  door_ki_rele(nag->mountd_dh);
 286  287          kmem_free(nag, sizeof (*nag));
 287  288  }
 288  289  
 289  290  /*
 290  291   * Convert the address in a netbuf to
 291  292   * a hash index for the auth_cache table.
 292  293   */
 293  294  static int
 294  295  hash(struct netbuf *a)
 295  296  {
 296  297          int i, h = 0;
 297  298  
 298  299          for (i = 0; i < a->len; i++)
 299  300                  h ^= a->buf[i];
 300  301  
 301  302          return (h & (AUTH_TABLESIZE - 1));
 302  303  }
 303  304  
 304  305  /*
 305  306   * Mask out the components of an
 306  307   * address that do not identify
 307  308   * a host. For socket addresses the
 308  309   * masking gets rid of the port number.
 309  310   */
 310  311  static void
 311  312  addrmask(struct netbuf *addr, struct netbuf *mask)
 312  313  {
 313  314          int i;
 314  315  
 315  316          for (i = 0; i < addr->len; i++)
 316  317                  addr->buf[i] &= mask->buf[i];
 317  318  }
 318  319  
 319  320  /*
 320  321   * nfsauth4_access is used for NFS V4 auth checking. Besides doing
 321  322   * the common nfsauth_access(), it will check if the client can
 322  323   * have a limited access to this vnode even if the security flavor
 323  324   * used does not meet the policy.
 324  325   */
 325  326  int
 326  327  nfsauth4_access(struct exportinfo *exi, vnode_t *vp, struct svc_req *req,
 327  328      cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
 328  329  {
 329  330          int access;
 330  331  
 331  332          access = nfsauth_access(exi, req, cr, uid, gid, ngids, gids);
 332  333  
 333  334          /*
 334  335           * There are cases that the server needs to allow the client
 335  336           * to have a limited view.
 336  337           *
 337  338           * e.g.
 338  339           * /export is shared as "sec=sys,rw=dfs-test-4,sec=krb5,rw"
 339  340           * /export/home is shared as "sec=sys,rw"
 340  341           *
 341  342           * When the client mounts /export with sec=sys, the client
 342  343           * would get a limited view with RO access on /export to see
 343  344           * "home" only because the client is allowed to access
 344  345           * /export/home with auth_sys.
 345  346           */
 346  347          if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
 347  348                  /*
 348  349                   * Allow ro permission with LIMITED view if there is a
 349  350                   * sub-dir exported under vp.
 350  351                   */
 351  352                  if (has_visible(exi, vp))
 352  353                          return (NFSAUTH_LIMITED);
 353  354          }
 354  355  
 355  356          return (access);
 356  357  }
 357  358  
 358  359  static void
 359  360  sys_log(const char *msg)
 360  361  {
 361  362          static time_t   tstamp = 0;
 362  363          time_t          now;
 363  364  
 364  365          /*
 365  366           * msg is shown (at most) once per minute
 366  367           */
 367  368          now = gethrestime_sec();
 368  369          if ((tstamp + 60) < now) {
 369  370                  tstamp = now;
 370  371                  cmn_err(CE_WARN, msg);
 371  372          }
 372  373  }
 373  374  
 374  375  /*
 375  376   * Callup to the mountd to get access information in the kernel.
 376  377   */
 377  378  static bool_t
 378  379  nfsauth_retrieve(nfsauth_globals_t *nag, struct exportinfo *exi,
 379  380      char *req_netid, int flavor, struct netbuf *addr, int *access,
 380  381      cred_t *clnt_cred, uid_t *srv_uid, gid_t *srv_gid, uint_t *srv_gids_cnt,
 381  382      gid_t **srv_gids)
 382  383  {
 383  384          varg_t                    varg = {0};
 384  385          nfsauth_res_t             res = {0};
 385  386          XDR                       xdrs;
 386  387          size_t                    absz;
 387  388          caddr_t                   abuf;
 388  389          int                       last = 0;
 389  390          door_arg_t                da;
 390  391          door_info_t               di;
 391  392          door_handle_t             dh;
 392  393          uint_t                    ntries = 0;
 393  394  
 394  395          /*
 395  396           * No entry in the cache for this client/flavor
 396  397           * so we need to call the nfsauth service in the
 397  398           * mount daemon.
 398  399           */
 399  400  
 400  401          varg.vers = V_PROTO;
 401  402          varg.arg_u.arg.cmd = NFSAUTH_ACCESS;
 402  403          varg.arg_u.arg.areq.req_client.n_len = addr->len;
 403  404          varg.arg_u.arg.areq.req_client.n_bytes = addr->buf;
 404  405          varg.arg_u.arg.areq.req_netid = req_netid;
 405  406          varg.arg_u.arg.areq.req_path = exi->exi_export.ex_path;
 406  407          varg.arg_u.arg.areq.req_flavor = flavor;
 407  408          varg.arg_u.arg.areq.req_clnt_uid = crgetuid(clnt_cred);
 408  409          varg.arg_u.arg.areq.req_clnt_gid = crgetgid(clnt_cred);
 409  410          varg.arg_u.arg.areq.req_clnt_gids.len = crgetngroups(clnt_cred);
 410  411          varg.arg_u.arg.areq.req_clnt_gids.val = (gid_t *)crgetgroups(clnt_cred);
 411  412  
 412  413          DTRACE_PROBE1(nfsserv__func__nfsauth__varg, varg_t *, &varg);
 413  414  
 414  415          /*
 415  416           * Setup the XDR stream for encoding the arguments. Notice that
 416  417           * in addition to the args having variable fields (req_netid and
 417  418           * req_path), the argument data structure is itself versioned,
 418  419           * so we need to make sure we can size the arguments buffer
 419  420           * appropriately to encode all the args. If we can't get sizing
 420  421           * info _or_ properly encode the arguments, there's really no
 421  422           * point in continuting, so we fail the request.
 422  423           */
 423  424          if ((absz = xdr_sizeof(xdr_varg, &varg)) == 0) {
 424  425                  *access = NFSAUTH_DENIED;
 425  426                  return (FALSE);
 426  427          }
 427  428  
 428  429          abuf = (caddr_t)kmem_alloc(absz, KM_SLEEP);
 429  430          xdrmem_create(&xdrs, abuf, absz, XDR_ENCODE);
 430  431          if (!xdr_varg(&xdrs, &varg)) {
 431  432                  XDR_DESTROY(&xdrs);
 432  433                  goto fail;
 433  434          }
 434  435          XDR_DESTROY(&xdrs);
 435  436  
 436  437          /*
 437  438           * Prepare the door arguments
 438  439           *
 439  440           * We don't know the size of the message the daemon
 440  441           * will pass back to us.  By setting rbuf to NULL,
 441  442           * we force the door code to allocate a buf of the
 442  443           * appropriate size.  We must set rsize > 0, however,
 443  444           * else the door code acts as if no response was
 444  445           * expected and doesn't pass the data to us.
 445  446           */
 446  447          da.data_ptr = (char *)abuf;
 447  448          da.data_size = absz;
 448  449          da.desc_ptr = NULL;
 449  450          da.desc_num = 0;
 450  451          da.rbuf = NULL;
 451  452          da.rsize = 1;
 452  453  
 453  454  retry:
 454  455          mutex_enter(&nag->mountd_lock);
 455  456          dh = nag->mountd_dh;
 456  457          if (dh != NULL)
 457  458                  door_ki_hold(dh);
 458  459          mutex_exit(&nag->mountd_lock);
 459  460  
 460  461          if (dh == NULL) {
 461  462                  /*
 462  463                   * The rendezvous point has not been established yet!
 463  464                   * This could mean that either mountd(1m) has not yet
 464  465                   * been started or that _this_ routine nuked the door
 465  466                   * handle after receiving an EINTR for a REVOKED door.
 466  467                   *
 467  468                   * Returning NFSAUTH_DROP will cause the NFS client
 468  469                   * to retransmit the request, so let's try to be more
 469  470                   * rescillient and attempt for ntries before we bail.
 470  471                   */
 471  472                  if (++ntries % NFSAUTH_DR_TRYCNT) {
 472  473                          delay(hz);
 473  474                          goto retry;
 474  475                  }
 475  476  
 476  477                  kmem_free(abuf, absz);
 477  478  
 478  479                  sys_log("nfsauth: mountd has not established door");
 479  480                  *access = NFSAUTH_DROP;
 480  481                  return (FALSE);
 481  482          }
 482  483  
 483  484          ntries = 0;
 484  485  
 485  486          /*
 486  487           * Now that we've got what we need, place the call.
 487  488           */
 488  489          switch (door_ki_upcall_limited(dh, &da, NULL, SIZE_MAX, 0)) {
 489  490          case 0:                         /* Success */
 490  491                  door_ki_rele(dh);
 491  492  
 492  493                  if (da.data_ptr == NULL && da.data_size == 0) {
 493  494                          /*
 494  495                           * The door_return that contained the data
 495  496                           * failed! We're here because of the 2nd
 496  497                           * door_return (w/o data) such that we can
 497  498                           * get control of the thread (and exit
 498  499                           * gracefully).
 499  500                           */
 500  501                          DTRACE_PROBE1(nfsserv__func__nfsauth__door__nil,
 501  502                              door_arg_t *, &da);
 502  503                          goto fail;
 503  504                  }
 504  505  
 505  506                  break;
 506  507  
 507  508          case EAGAIN:
 508  509                  /*
 509  510                   * Server out of resources; back off for a bit
 510  511                   */
 511  512                  door_ki_rele(dh);
 512  513                  delay(hz);
 513  514                  goto retry;
 514  515                  /* NOTREACHED */
 515  516  
 516  517          case EINTR:
 517  518                  if (!door_ki_info(dh, &di)) {
 518  519                          door_ki_rele(dh);
 519  520  
 520  521                          if (di.di_attributes & DOOR_REVOKED) {
 521  522                                  /*
 522  523                                   * The server barfed and revoked
 523  524                                   * the (existing) door on us; we
 524  525                                   * want to wait to give smf(5) a
 525  526                                   * chance to restart mountd(1m)
 526  527                                   * and establish a new door handle.
 527  528                                   */
 528  529                                  mutex_enter(&nag->mountd_lock);
 529  530                                  if (dh == nag->mountd_dh) {
 530  531                                          door_ki_rele(nag->mountd_dh);
 531  532                                          nag->mountd_dh = NULL;
 532  533                                  }
 533  534                                  mutex_exit(&nag->mountd_lock);
 534  535                                  delay(hz);
 535  536                                  goto retry;
 536  537                          }
 537  538                          /*
 538  539                           * If the door was _not_ revoked on us,
 539  540                           * then more than likely we took an INTR,
 540  541                           * so we need to fail the operation.
 541  542                           */
 542  543                          goto fail;
 543  544                  }
 544  545                  /*
 545  546                   * The only failure that can occur from getting
 546  547                   * the door info is EINVAL, so we let the code
 547  548                   * below handle it.
 548  549                   */
 549  550                  /* FALLTHROUGH */
 550  551  
 551  552          case EBADF:
 552  553          case EINVAL:
 553  554          default:
 554  555                  /*
 555  556                   * If we have a stale door handle, give smf a last
 556  557                   * chance to start it by sleeping for a little bit.
 557  558                   * If we're still hosed, we'll fail the call.
 558  559                   *
 559  560                   * Since we're going to reacquire the door handle
 560  561                   * upon the retry, we opt to sleep for a bit and
 561  562                   * _not_ to clear mountd_dh. If mountd restarted
 562  563                   * and was able to set mountd_dh, we should see
 563  564                   * the new instance; if not, we won't get caught
 564  565                   * up in the retry/DELAY loop.
 565  566                   */
 566  567                  door_ki_rele(dh);
 567  568                  if (!last) {
 568  569                          delay(hz);
 569  570                          last++;
 570  571                          goto retry;
 571  572                  }
 572  573                  sys_log("nfsauth: stale mountd door handle");
 573  574                  goto fail;
 574  575          }
 575  576  
 576  577          ASSERT(da.rbuf != NULL);
 577  578  
 578  579          /*
 579  580           * No door errors encountered; setup the XDR stream for decoding
 580  581           * the results. If we fail to decode the results, we've got no
 581  582           * other recourse than to fail the request.
 582  583           */
 583  584          xdrmem_create(&xdrs, da.rbuf, da.rsize, XDR_DECODE);
 584  585          if (!xdr_nfsauth_res(&xdrs, &res)) {
 585  586                  xdr_free(xdr_nfsauth_res, (char *)&res);
 586  587                  XDR_DESTROY(&xdrs);
 587  588                  kmem_free(da.rbuf, da.rsize);
 588  589                  goto fail;
 589  590          }
 590  591          XDR_DESTROY(&xdrs);
 591  592          kmem_free(da.rbuf, da.rsize);
 592  593  
 593  594          DTRACE_PROBE1(nfsserv__func__nfsauth__results, nfsauth_res_t *, &res);
 594  595          switch (res.stat) {
 595  596                  case NFSAUTH_DR_OKAY:
 596  597                          *access = res.ares.auth_perm;
 597  598                          *srv_uid = res.ares.auth_srv_uid;
 598  599                          *srv_gid = res.ares.auth_srv_gid;
 599  600                          *srv_gids_cnt = res.ares.auth_srv_gids.len;
 600  601                          *srv_gids = kmem_alloc(*srv_gids_cnt * sizeof (gid_t),
 601  602                              KM_SLEEP);
 602  603                          bcopy(res.ares.auth_srv_gids.val, *srv_gids,
 603  604                              *srv_gids_cnt * sizeof (gid_t));
 604  605                          break;
 605  606  
 606  607                  case NFSAUTH_DR_EFAIL:
 607  608                  case NFSAUTH_DR_DECERR:
 608  609                  case NFSAUTH_DR_BADCMD:
 609  610                  default:
 610  611                          xdr_free(xdr_nfsauth_res, (char *)&res);
 611  612  fail:
 612  613                          *access = NFSAUTH_DENIED;
 613  614                          kmem_free(abuf, absz);
 614  615                          return (FALSE);
 615  616                          /* NOTREACHED */
 616  617          }
 617  618  
 618  619          xdr_free(xdr_nfsauth_res, (char *)&res);
 619  620          kmem_free(abuf, absz);
 620  621  
 621  622          return (TRUE);
 622  623  }
 623  624  
 624  625  static void
 625  626  nfsauth_refresh_thread(nfsauth_globals_t *nag)
 626  627  {
 627  628          refreshq_exi_node_t     *ren;
 628  629          refreshq_auth_node_t    *ran;
 629  630  
 630  631          struct exportinfo       *exi;
 631  632  
 632  633          int                     access;
 633  634          bool_t                  retrieval;
 634  635  
 635  636          callb_cpr_t             cprinfo;
 636  637  
 637  638          CALLB_CPR_INIT(&cprinfo, &nag->refreshq_lock, callb_generic_cpr,
 638  639              "nfsauth_refresh");
 639  640  
 640  641          for (;;) {
 641  642                  mutex_enter(&nag->refreshq_lock);
 642  643                  if (nag->refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
 643  644                          /* Keep the hold on the lock! */
 644  645                          break;
 645  646                  }
 646  647  
 647  648                  ren = list_remove_head(&nag->refreshq_queue);
 648  649                  if (ren == NULL) {
 649  650                          CALLB_CPR_SAFE_BEGIN(&cprinfo);
 650  651                          cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
 651  652                          CALLB_CPR_SAFE_END(&cprinfo, &nag->refreshq_lock);
 652  653                          mutex_exit(&nag->refreshq_lock);
 653  654                          continue;
 654  655                  }
 655  656                  mutex_exit(&nag->refreshq_lock);
 656  657  
 657  658                  exi = ren->ren_exi;
 658  659                  ASSERT(exi != NULL);
 659  660  
 660  661                  /*
 661  662                   * Since the ren was removed from the refreshq_queue above,
 662  663                   * this is the only thread aware about the ren existence, so we
 663  664                   * have the exclusive ownership of it and we do not need to
 664  665                   * protect it by any lock.
 665  666                   */
 666  667                  while ((ran = list_remove_head(&ren->ren_authlist))) {
 667  668                          uid_t uid;
 668  669                          gid_t gid;
 669  670                          uint_t ngids;
 670  671                          gid_t *gids;
 671  672                          struct auth_cache *p = ran->ran_auth;
 672  673                          char *netid = ran->ran_netid;
 673  674  
 674  675                          ASSERT(p != NULL);
 675  676                          ASSERT(netid != NULL);
 676  677  
 677  678                          kmem_free(ran, sizeof (refreshq_auth_node_t));
 678  679  
 679  680                          mutex_enter(&p->auth_lock);
 680  681  
 681  682                          /*
 682  683                           * Once the entry goes INVALID, it can not change
 683  684                           * state.
 684  685                           *
 685  686                           * No need to refresh entries also in a case we are
 686  687                           * just shutting down.
 687  688                           *
 688  689                           * In general, there is no need to hold the
 689  690                           * refreshq_lock to test the refreshq_thread_state.  We
 690  691                           * do hold it at other places because there is some
 691  692                           * related thread synchronization (or some other tasks)
 692  693                           * close to the refreshq_thread_state check.
 693  694                           *
 694  695                           * The check for the refreshq_thread_state value here
 695  696                           * is purely advisory to allow the faster
 696  697                           * nfsauth_refresh_thread() shutdown.  In a case we
 697  698                           * will miss such advisory, nothing catastrophic
 698  699                           * happens: we will just spin longer here before the
 699  700                           * shutdown.
 700  701                           */
 701  702                          if (p->auth_state == NFS_AUTH_INVALID ||
 702  703                              nag->refreshq_thread_state !=
 703  704                              REFRESHQ_THREAD_RUNNING) {
 704  705                                  mutex_exit(&p->auth_lock);
 705  706  
 706  707                                  if (p->auth_state == NFS_AUTH_INVALID)
 707  708                                          nfsauth_free_node(p);
 708  709  
 709  710                                  strfree(netid);
 710  711  
 711  712                                  continue;
 712  713                          }
 713  714  
 714  715                          /*
 715  716                           * Make sure the state is valid.  Note that once we
 716  717                           * change the state to NFS_AUTH_REFRESHING, no other
 717  718                           * thread will be able to work on this entry.
 718  719                           */
 719  720                          ASSERT(p->auth_state == NFS_AUTH_STALE);
 720  721  
 721  722                          p->auth_state = NFS_AUTH_REFRESHING;
 722  723                          mutex_exit(&p->auth_lock);
 723  724  
 724  725                          DTRACE_PROBE2(nfsauth__debug__cache__refresh,
 725  726                              struct exportinfo *, exi,
 726  727                              struct auth_cache *, p);
 727  728  
 728  729                          /*
 729  730                           * The first caching of the access rights
 730  731                           * is done with the netid pulled out of the
 731  732                           * request from the client. All subsequent
 732  733                           * users of the cache may or may not have
 733  734                           * the same netid. It doesn't matter. So
 734  735                           * when we refresh, we simply use the netid
 735  736                           * of the request which triggered the
 736  737                           * refresh attempt.
 737  738                           */
 738  739                          retrieval = nfsauth_retrieve(nag, exi, netid,
 739  740                              p->auth_flavor, &p->auth_clnt->authc_addr, &access,
 740  741                              p->auth_clnt_cred, &uid, &gid, &ngids, &gids);
 741  742  
 742  743                          /*
 743  744                           * This can only be set in one other place
 744  745                           * and the state has to be NFS_AUTH_FRESH.
 745  746                           */
 746  747                          strfree(netid);
 747  748  
 748  749                          mutex_enter(&p->auth_lock);
 749  750                          if (p->auth_state == NFS_AUTH_INVALID) {
 750  751                                  mutex_exit(&p->auth_lock);
 751  752                                  nfsauth_free_node(p);
 752  753                                  if (retrieval == TRUE)
 753  754                                          kmem_free(gids, ngids * sizeof (gid_t));
 754  755                          } else {
 755  756                                  /*
 756  757                                   * If we got an error, do not reset the
 757  758                                   * time. This will cause the next access
 758  759                                   * check for the client to reschedule this
 759  760                                   * node.
 760  761                                   */
 761  762                                  if (retrieval == TRUE) {
 762  763                                          p->auth_access = access;
 763  764  
 764  765                                          p->auth_srv_uid = uid;
 765  766                                          p->auth_srv_gid = gid;
 766  767                                          kmem_free(p->auth_srv_gids,
 767  768                                              p->auth_srv_ngids * sizeof (gid_t));
 768  769                                          p->auth_srv_ngids = ngids;
 769  770                                          p->auth_srv_gids = gids;
 770  771  
 771  772                                          p->auth_freshness = gethrestime_sec();
 772  773                                  }
 773  774                                  p->auth_state = NFS_AUTH_FRESH;
 774  775  
 775  776                                  cv_broadcast(&p->auth_cv);
 776  777                                  mutex_exit(&p->auth_lock);
 777  778                          }
 778  779                  }
 779  780  
 780  781                  list_destroy(&ren->ren_authlist);
 781  782                  exi_rele(ren->ren_exi);
 782  783                  kmem_free(ren, sizeof (refreshq_exi_node_t));
 783  784          }
 784  785  
 785  786          nag->refreshq_thread_state = REFRESHQ_THREAD_HALTED;
 786  787          cv_broadcast(&nag->refreshq_cv);
 787  788          CALLB_CPR_EXIT(&cprinfo);
 788  789          DTRACE_PROBE(nfsauth__nfsauth__refresh__thread__exit);
 789  790          zthread_exit();
 790  791  }
 791  792  
 792  793  int
 793  794  nfsauth_cache_clnt_compar(const void *v1, const void *v2)
 794  795  {
 795  796          int c;
 796  797  
 797  798          const struct auth_cache_clnt *a1 = (const struct auth_cache_clnt *)v1;
 798  799          const struct auth_cache_clnt *a2 = (const struct auth_cache_clnt *)v2;
 799  800  
 800  801          if (a1->authc_addr.len < a2->authc_addr.len)
 801  802                  return (-1);
 802  803          if (a1->authc_addr.len > a2->authc_addr.len)
 803  804                  return (1);
 804  805  
 805  806          c = memcmp(a1->authc_addr.buf, a2->authc_addr.buf, a1->authc_addr.len);
 806  807          if (c < 0)
 807  808                  return (-1);
 808  809          if (c > 0)
 809  810                  return (1);
 810  811  
 811  812          return (0);
 812  813  }
 813  814  
 814  815  static int
 815  816  nfsauth_cache_compar(const void *v1, const void *v2)
 816  817  {
 817  818          int c;
 818  819  
 819  820          const struct auth_cache *a1 = (const struct auth_cache *)v1;
 820  821          const struct auth_cache *a2 = (const struct auth_cache *)v2;
 821  822  
 822  823          if (a1->auth_flavor < a2->auth_flavor)
 823  824                  return (-1);
 824  825          if (a1->auth_flavor > a2->auth_flavor)
 825  826                  return (1);
 826  827  
 827  828          if (crgetuid(a1->auth_clnt_cred) < crgetuid(a2->auth_clnt_cred))
 828  829                  return (-1);
 829  830          if (crgetuid(a1->auth_clnt_cred) > crgetuid(a2->auth_clnt_cred))
 830  831                  return (1);
 831  832  
 832  833          if (crgetgid(a1->auth_clnt_cred) < crgetgid(a2->auth_clnt_cred))
 833  834                  return (-1);
 834  835          if (crgetgid(a1->auth_clnt_cred) > crgetgid(a2->auth_clnt_cred))
 835  836                  return (1);
 836  837  
 837  838          if (crgetngroups(a1->auth_clnt_cred) < crgetngroups(a2->auth_clnt_cred))
 838  839                  return (-1);
 839  840          if (crgetngroups(a1->auth_clnt_cred) > crgetngroups(a2->auth_clnt_cred))
 840  841                  return (1);
 841  842  
 842  843          c = memcmp(crgetgroups(a1->auth_clnt_cred),
 843  844              crgetgroups(a2->auth_clnt_cred), crgetngroups(a1->auth_clnt_cred));
 844  845          if (c < 0)
 845  846                  return (-1);
 846  847          if (c > 0)
 847  848                  return (1);
 848  849  
 849  850          return (0);
 850  851  }
 851  852  
 852  853  /*
 853  854   * Get the access information from the cache or callup to the mountd
 854  855   * to get and cache the access information in the kernel.
 855  856   */
 856  857  static int
 857  858  nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
 858  859      cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
 859  860  {
 860  861          nfsauth_globals_t       *nag;
 861  862          struct netbuf           *taddrmask;
 862  863          struct netbuf           addr;   /* temporary copy of client's address */
 863  864          const struct netbuf     *claddr;
 864  865          avl_tree_t              *tree;
 865  866          struct auth_cache       ac;     /* used as a template for avl_find() */
 866  867          struct auth_cache_clnt  *c;
 867  868          struct auth_cache_clnt  acc;    /* used as a template for avl_find() */
 868  869          struct auth_cache       *p = NULL;
 869  870          int                     access;
 870  871  
  
    | 
      ↓ open down ↓ | 
    582 lines elided | 
    
      ↑ open up ↑ | 
  
 871  872          uid_t                   tmpuid;
 872  873          gid_t                   tmpgid;
 873  874          uint_t                  tmpngids;
 874  875          gid_t                   *tmpgids;
 875  876  
 876  877          avl_index_t             where;  /* used for avl_find()/avl_insert() */
 877  878  
 878  879          ASSERT(cr != NULL);
 879  880  
 880  881          ASSERT3P(curzone, ==, exi->exi_zone);
 881      -        nag = zone_getspecific(nfsauth_zone_key, curzone);
      882 +        nag = nfsauth_get_zg();
 882  883  
 883  884          /*
 884  885           * Now check whether this client already
 885  886           * has an entry for this flavor in the cache
 886  887           * for this export.
 887  888           * Get the caller's address, mask off the
 888  889           * parts of the address that do not identify
 889  890           * the host (port number, etc), and then hash
 890  891           * it to find the chain of cache entries.
 891  892           */
 892  893  
 893  894          claddr = svc_getrpccaller(req->rq_xprt);
 894  895          addr = *claddr;
 895  896          addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP);
 896  897          bcopy(claddr->buf, addr.buf, claddr->len);
 897  898  
 898  899          SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask);
 899  900          ASSERT(taddrmask != NULL);
 900  901          addrmask(&addr, taddrmask);
 901  902  
 902  903          ac.auth_flavor = flavor;
 903  904          ac.auth_clnt_cred = crdup(cr);
 904  905  
 905  906          acc.authc_addr = addr;
 906  907  
 907  908          tree = exi->exi_cache[hash(&addr)];
 908  909  
 909  910          rw_enter(&exi->exi_cache_lock, RW_READER);
 910  911          c = (struct auth_cache_clnt *)avl_find(tree, &acc, NULL);
 911  912  
 912  913          if (c == NULL) {
 913  914                  struct auth_cache_clnt *nc;
 914  915  
 915  916                  rw_exit(&exi->exi_cache_lock);
 916  917  
 917  918                  nc = kmem_alloc(sizeof (*nc), KM_NOSLEEP | KM_NORMALPRI);
 918  919                  if (nc == NULL)
 919  920                          goto retrieve;
 920  921  
 921  922                  /*
 922  923                   * Initialize the new auth_cache_clnt
 923  924                   */
 924  925                  nc->authc_addr = addr;
 925  926                  nc->authc_addr.buf = kmem_alloc(addr.maxlen,
 926  927                      KM_NOSLEEP | KM_NORMALPRI);
 927  928                  if (addr.maxlen != 0 && nc->authc_addr.buf == NULL) {
 928  929                          kmem_free(nc, sizeof (*nc));
 929  930                          goto retrieve;
 930  931                  }
 931  932                  bcopy(addr.buf, nc->authc_addr.buf, addr.len);
 932  933                  rw_init(&nc->authc_lock, NULL, RW_DEFAULT, NULL);
 933  934                  avl_create(&nc->authc_tree, nfsauth_cache_compar,
 934  935                      sizeof (struct auth_cache),
 935  936                      offsetof(struct auth_cache, auth_link));
 936  937  
 937  938                  rw_enter(&exi->exi_cache_lock, RW_WRITER);
 938  939                  c = (struct auth_cache_clnt *)avl_find(tree, &acc, &where);
 939  940                  if (c == NULL) {
 940  941                          avl_insert(tree, nc, where);
 941  942                          rw_downgrade(&exi->exi_cache_lock);
 942  943                          c = nc;
 943  944                  } else {
 944  945                          rw_downgrade(&exi->exi_cache_lock);
 945  946  
 946  947                          avl_destroy(&nc->authc_tree);
 947  948                          rw_destroy(&nc->authc_lock);
 948  949                          kmem_free(nc->authc_addr.buf, nc->authc_addr.maxlen);
 949  950                          kmem_free(nc, sizeof (*nc));
 950  951                  }
 951  952          }
 952  953  
 953  954          ASSERT(c != NULL);
 954  955  
 955  956          rw_enter(&c->authc_lock, RW_READER);
 956  957          p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, NULL);
 957  958  
 958  959          if (p == NULL) {
 959  960                  struct auth_cache *np;
 960  961  
 961  962                  rw_exit(&c->authc_lock);
 962  963  
 963  964                  np = kmem_cache_alloc(exi_cache_handle,
 964  965                      KM_NOSLEEP | KM_NORMALPRI);
 965  966                  if (np == NULL) {
 966  967                          rw_exit(&exi->exi_cache_lock);
 967  968                          goto retrieve;
 968  969                  }
 969  970  
 970  971                  /*
 971  972                   * Initialize the new auth_cache
 972  973                   */
 973  974                  np->auth_clnt = c;
 974  975                  np->auth_flavor = flavor;
 975  976                  np->auth_clnt_cred = ac.auth_clnt_cred;
 976  977                  np->auth_srv_ngids = 0;
 977  978                  np->auth_srv_gids = NULL;
 978  979                  np->auth_time = np->auth_freshness = gethrestime_sec();
 979  980                  np->auth_state = NFS_AUTH_NEW;
 980  981                  mutex_init(&np->auth_lock, NULL, MUTEX_DEFAULT, NULL);
 981  982                  cv_init(&np->auth_cv, NULL, CV_DEFAULT, NULL);
 982  983  
 983  984                  rw_enter(&c->authc_lock, RW_WRITER);
 984  985                  rw_exit(&exi->exi_cache_lock);
 985  986  
 986  987                  p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, &where);
 987  988                  if (p == NULL) {
 988  989                          avl_insert(&c->authc_tree, np, where);
 989  990                          rw_downgrade(&c->authc_lock);
 990  991                          p = np;
 991  992                  } else {
 992  993                          rw_downgrade(&c->authc_lock);
 993  994  
 994  995                          cv_destroy(&np->auth_cv);
 995  996                          mutex_destroy(&np->auth_lock);
 996  997                          crfree(ac.auth_clnt_cred);
 997  998                          kmem_cache_free(exi_cache_handle, np);
 998  999                  }
 999 1000          } else {
1000 1001                  rw_exit(&exi->exi_cache_lock);
1001 1002                  crfree(ac.auth_clnt_cred);
1002 1003          }
1003 1004  
1004 1005          mutex_enter(&p->auth_lock);
1005 1006          rw_exit(&c->authc_lock);
1006 1007  
1007 1008          /*
1008 1009           * If the entry is in the WAITING state then some other thread is just
1009 1010           * retrieving the required info.  The entry was either NEW, or the list
1010 1011           * of client's supplemental groups is going to be changed (either by
1011 1012           * this thread, or by some other thread).  We need to wait until the
1012 1013           * nfsauth_retrieve() is done.
1013 1014           */
1014 1015          while (p->auth_state == NFS_AUTH_WAITING)
1015 1016                  cv_wait(&p->auth_cv, &p->auth_lock);
1016 1017  
1017 1018          /*
1018 1019           * Here the entry cannot be in WAITING or INVALID state.
1019 1020           */
1020 1021          ASSERT(p->auth_state != NFS_AUTH_WAITING);
1021 1022          ASSERT(p->auth_state != NFS_AUTH_INVALID);
1022 1023  
1023 1024          /*
1024 1025           * If the cache entry is not valid yet, we need to retrieve the
1025 1026           * info ourselves.
1026 1027           */
1027 1028          if (p->auth_state == NFS_AUTH_NEW) {
1028 1029                  bool_t res;
1029 1030                  /*
1030 1031                   * NFS_AUTH_NEW is the default output auth_state value in a
1031 1032                   * case we failed somewhere below.
1032 1033                   */
1033 1034                  auth_state_t state = NFS_AUTH_NEW;
1034 1035  
1035 1036                  p->auth_state = NFS_AUTH_WAITING;
1036 1037                  mutex_exit(&p->auth_lock);
1037 1038                  kmem_free(addr.buf, addr.maxlen);
1038 1039                  addr = p->auth_clnt->authc_addr;
1039 1040  
1040 1041                  atomic_inc_uint(&nfsauth_cache_miss);
1041 1042  
1042 1043                  res = nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt),
1043 1044                      flavor, &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids,
1044 1045                      &tmpgids);
1045 1046  
1046 1047                  p->auth_access = access;
1047 1048                  p->auth_time = p->auth_freshness = gethrestime_sec();
1048 1049  
1049 1050                  if (res == TRUE) {
1050 1051                          if (uid != NULL)
1051 1052                                  *uid = tmpuid;
1052 1053                          if (gid != NULL)
1053 1054                                  *gid = tmpgid;
1054 1055                          if (ngids != NULL && gids != NULL) {
1055 1056                                  *ngids = tmpngids;
1056 1057                                  *gids = tmpgids;
1057 1058  
1058 1059                                  /*
1059 1060                                   * We need a copy of gids for the
1060 1061                                   * auth_cache entry
1061 1062                                   */
1062 1063                                  tmpgids = kmem_alloc(tmpngids * sizeof (gid_t),
1063 1064                                      KM_NOSLEEP | KM_NORMALPRI);
1064 1065                                  if (tmpgids != NULL)
1065 1066                                          bcopy(*gids, tmpgids,
1066 1067                                              tmpngids * sizeof (gid_t));
1067 1068                          }
1068 1069  
1069 1070                          if (tmpgids != NULL || tmpngids == 0) {
1070 1071                                  p->auth_srv_uid = tmpuid;
1071 1072                                  p->auth_srv_gid = tmpgid;
1072 1073                                  p->auth_srv_ngids = tmpngids;
1073 1074                                  p->auth_srv_gids = tmpgids;
1074 1075  
1075 1076                                  state = NFS_AUTH_FRESH;
1076 1077                          }
1077 1078                  }
1078 1079  
1079 1080                  /*
1080 1081                   * Set the auth_state and notify waiters.
1081 1082                   */
1082 1083                  mutex_enter(&p->auth_lock);
1083 1084                  p->auth_state = state;
1084 1085                  cv_broadcast(&p->auth_cv);
1085 1086                  mutex_exit(&p->auth_lock);
1086 1087          } else {
1087 1088                  uint_t nach;
1088 1089                  time_t refresh;
1089 1090  
1090 1091                  refresh = gethrestime_sec() - p->auth_freshness;
1091 1092  
1092 1093                  p->auth_time = gethrestime_sec();
1093 1094  
1094 1095                  if (uid != NULL)
1095 1096                          *uid = p->auth_srv_uid;
1096 1097                  if (gid != NULL)
1097 1098                          *gid = p->auth_srv_gid;
1098 1099                  if (ngids != NULL && gids != NULL) {
1099 1100                          *ngids = p->auth_srv_ngids;
1100 1101                          *gids = kmem_alloc(*ngids * sizeof (gid_t), KM_SLEEP);
1101 1102                          bcopy(p->auth_srv_gids, *gids, *ngids * sizeof (gid_t));
1102 1103                  }
1103 1104  
1104 1105                  access = p->auth_access;
1105 1106  
1106 1107                  if ((refresh > NFSAUTH_CACHE_REFRESH) &&
1107 1108                      p->auth_state == NFS_AUTH_FRESH) {
1108 1109                          refreshq_auth_node_t *ran;
1109 1110                          uint_t nacr;
1110 1111  
1111 1112                          p->auth_state = NFS_AUTH_STALE;
1112 1113                          mutex_exit(&p->auth_lock);
1113 1114  
1114 1115                          nacr = atomic_inc_uint_nv(&nfsauth_cache_refresh);
1115 1116                          DTRACE_PROBE3(nfsauth__debug__cache__stale,
1116 1117                              struct exportinfo *, exi,
1117 1118                              struct auth_cache *, p,
1118 1119                              uint_t, nacr);
1119 1120  
1120 1121                          ran = kmem_alloc(sizeof (refreshq_auth_node_t),
1121 1122                              KM_SLEEP);
1122 1123                          ran->ran_auth = p;
1123 1124                          ran->ran_netid = strdup(svc_getnetid(req->rq_xprt));
1124 1125  
1125 1126                          mutex_enter(&nag->refreshq_lock);
1126 1127  
1127 1128                          if (nag->refreshq_thread_state ==
1128 1129                              REFRESHQ_THREAD_NEED_CREATE) {
1129 1130                                  /* Launch nfsauth refresh thread */
1130 1131                                  nag->refreshq_thread_state =
1131 1132                                      REFRESHQ_THREAD_RUNNING;
1132 1133                                  (void) zthread_create(NULL, 0,
1133 1134                                      nfsauth_refresh_thread, nag, 0,
1134 1135                                      minclsyspri);
1135 1136                          }
1136 1137  
1137 1138                          /*
1138 1139                           * We should not add a work queue item if the thread
1139 1140                           * is not accepting them.
1140 1141                           */
1141 1142                          if (nag->refreshq_thread_state ==
1142 1143                              REFRESHQ_THREAD_RUNNING) {
1143 1144                                  refreshq_exi_node_t *ren;
1144 1145  
1145 1146                                  /*
1146 1147                                   * Is there an existing exi_list?
1147 1148                                   */
1148 1149                                  for (ren = list_head(&nag->refreshq_queue);
1149 1150                                      ren != NULL;
1150 1151                                      ren = list_next(&nag->refreshq_queue,
1151 1152                                      ren)) {
1152 1153                                          if (ren->ren_exi == exi) {
1153 1154                                                  list_insert_tail(
1154 1155                                                      &ren->ren_authlist, ran);
1155 1156                                                  break;
1156 1157                                          }
1157 1158                                  }
1158 1159  
1159 1160                                  if (ren == NULL) {
1160 1161                                          ren = kmem_alloc(
1161 1162                                              sizeof (refreshq_exi_node_t),
1162 1163                                              KM_SLEEP);
1163 1164  
1164 1165                                          exi_hold(exi);
1165 1166                                          ren->ren_exi = exi;
1166 1167  
1167 1168                                          list_create(&ren->ren_authlist,
1168 1169                                              sizeof (refreshq_auth_node_t),
1169 1170                                              offsetof(refreshq_auth_node_t,
1170 1171                                              ran_node));
1171 1172  
1172 1173                                          list_insert_tail(&ren->ren_authlist,
1173 1174                                              ran);
1174 1175                                          list_insert_tail(&nag->refreshq_queue,
1175 1176                                              ren);
1176 1177                                  }
1177 1178  
1178 1179                                  cv_broadcast(&nag->refreshq_cv);
1179 1180                          } else {
1180 1181                                  strfree(ran->ran_netid);
1181 1182                                  kmem_free(ran, sizeof (refreshq_auth_node_t));
1182 1183                          }
1183 1184  
1184 1185                          mutex_exit(&nag->refreshq_lock);
1185 1186                  } else {
1186 1187                          mutex_exit(&p->auth_lock);
1187 1188                  }
1188 1189  
1189 1190                  nach = atomic_inc_uint_nv(&nfsauth_cache_hit);
1190 1191                  DTRACE_PROBE2(nfsauth__debug__cache__hit,
1191 1192                      uint_t, nach,
1192 1193                      time_t, refresh);
1193 1194  
1194 1195                  kmem_free(addr.buf, addr.maxlen);
1195 1196          }
1196 1197  
1197 1198          return (access);
1198 1199  
1199 1200  retrieve:
1200 1201          crfree(ac.auth_clnt_cred);
1201 1202  
1202 1203          /*
1203 1204           * Retrieve the required data without caching.
1204 1205           */
1205 1206  
1206 1207          ASSERT(p == NULL);
1207 1208  
1208 1209          atomic_inc_uint(&nfsauth_cache_miss);
1209 1210  
1210 1211          if (nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), flavor,
1211 1212              &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
1212 1213                  if (uid != NULL)
1213 1214                          *uid = tmpuid;
1214 1215                  if (gid != NULL)
1215 1216                          *gid = tmpgid;
1216 1217                  if (ngids != NULL && gids != NULL) {
1217 1218                          *ngids = tmpngids;
1218 1219                          *gids = tmpgids;
1219 1220                  } else {
1220 1221                          kmem_free(tmpgids, tmpngids * sizeof (gid_t));
1221 1222                  }
1222 1223          }
1223 1224  
1224 1225          kmem_free(addr.buf, addr.maxlen);
1225 1226  
1226 1227          return (access);
1227 1228  }
1228 1229  
1229 1230  /*
1230 1231   * Check if the requesting client has access to the filesystem with
1231 1232   * a given nfs flavor number which is an explicitly shared flavor.
1232 1233   */
1233 1234  int
1234 1235  nfsauth4_secinfo_access(struct exportinfo *exi, struct svc_req *req,
1235 1236      int flavor, int perm, cred_t *cr)
1236 1237  {
1237 1238          int access;
1238 1239  
1239 1240          if (! (perm & M_4SEC_EXPORTED)) {
1240 1241                  return (NFSAUTH_DENIED);
1241 1242          }
1242 1243  
1243 1244          /*
1244 1245           * Optimize if there are no lists
1245 1246           */
1246 1247          if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0) {
1247 1248                  perm &= ~M_4SEC_EXPORTED;
1248 1249                  if (perm == M_RO)
1249 1250                          return (NFSAUTH_RO);
1250 1251                  if (perm == M_RW)
1251 1252                          return (NFSAUTH_RW);
1252 1253          }
1253 1254  
1254 1255          access = nfsauth_cache_get(exi, req, flavor, cr, NULL, NULL, NULL,
1255 1256              NULL);
1256 1257  
1257 1258          return (access);
1258 1259  }
1259 1260  
1260 1261  int
1261 1262  nfsauth_access(struct exportinfo *exi, struct svc_req *req, cred_t *cr,
1262 1263      uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
1263 1264  {
1264 1265          int access, mapaccess;
1265 1266          struct secinfo *sp;
1266 1267          int i, flavor, perm;
1267 1268          int authnone_entry = -1;
1268 1269  
1269 1270          /*
1270 1271           * By default root is mapped to anonymous user.
1271 1272           * This might get overriden later in nfsauth_cache_get().
1272 1273           */
1273 1274          if (crgetuid(cr) == 0) {
1274 1275                  if (uid != NULL)
1275 1276                          *uid = exi->exi_export.ex_anon;
1276 1277                  if (gid != NULL)
1277 1278                          *gid = exi->exi_export.ex_anon;
1278 1279          } else {
1279 1280                  if (uid != NULL)
1280 1281                          *uid = crgetuid(cr);
1281 1282                  if (gid != NULL)
1282 1283                          *gid = crgetgid(cr);
1283 1284          }
1284 1285  
1285 1286          if (ngids != NULL)
1286 1287                  *ngids = 0;
1287 1288          if (gids != NULL)
1288 1289                  *gids = NULL;
1289 1290  
1290 1291          /*
1291 1292           *  Get the nfs flavor number from xprt.
1292 1293           */
1293 1294          flavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;
1294 1295  
1295 1296          /*
1296 1297           * First check the access restrictions on the filesystem.  If
1297 1298           * there are no lists associated with this flavor then there's no
1298 1299           * need to make an expensive call to the nfsauth service or to
1299 1300           * cache anything.
1300 1301           */
1301 1302  
1302 1303          sp = exi->exi_export.ex_secinfo;
1303 1304          for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1304 1305                  if (flavor != sp[i].s_secinfo.sc_nfsnum) {
1305 1306                          if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1306 1307                                  authnone_entry = i;
1307 1308                          continue;
1308 1309                  }
1309 1310                  break;
1310 1311          }
1311 1312  
1312 1313          mapaccess = 0;
1313 1314  
1314 1315          if (i >= exi->exi_export.ex_seccnt) {
1315 1316                  /*
1316 1317                   * Flavor not found, but use AUTH_NONE if it exists
1317 1318                   */
1318 1319                  if (authnone_entry == -1)
1319 1320                          return (NFSAUTH_DENIED);
1320 1321                  flavor = AUTH_NONE;
1321 1322                  mapaccess = NFSAUTH_MAPNONE;
1322 1323                  i = authnone_entry;
1323 1324          }
1324 1325  
1325 1326          /*
1326 1327           * If the flavor is in the ex_secinfo list, but not an explicitly
1327 1328           * shared flavor by the user, it is a result of the nfsv4 server
1328 1329           * namespace setup. We will grant an RO permission similar for
1329 1330           * a pseudo node except that this node is a shared one.
1330 1331           *
1331 1332           * e.g. flavor in (flavor) indicates that it is not explictly
1332 1333           *      shared by the user:
1333 1334           *
1334 1335           *              /       (sys, krb5)
1335 1336           *              |
1336 1337           *              export  #share -o sec=sys (krb5)
1337 1338           *              |
1338 1339           *              secure  #share -o sec=krb5
1339 1340           *
1340 1341           *      In this case, when a krb5 request coming in to access
1341 1342           *      /export, RO permission is granted.
1342 1343           */
1343 1344          if (!(sp[i].s_flags & M_4SEC_EXPORTED))
1344 1345                  return (mapaccess | NFSAUTH_RO);
1345 1346  
1346 1347          /*
1347 1348           * Optimize if there are no lists.
1348 1349           * We cannot optimize for AUTH_SYS with NGRPS (16) supplemental groups.
1349 1350           */
1350 1351          perm = sp[i].s_flags;
1351 1352          if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0 && (ngroups_max <= NGRPS ||
1352 1353              flavor != AUTH_SYS || crgetngroups(cr) < NGRPS)) {
1353 1354                  perm &= ~M_4SEC_EXPORTED;
1354 1355                  if (perm == M_RO)
1355 1356                          return (mapaccess | NFSAUTH_RO);
1356 1357                  if (perm == M_RW)
1357 1358                          return (mapaccess | NFSAUTH_RW);
1358 1359          }
1359 1360  
1360 1361          access = nfsauth_cache_get(exi, req, flavor, cr, uid, gid, ngids, gids);
1361 1362  
1362 1363          /*
1363 1364           * For both NFSAUTH_DENIED and NFSAUTH_WRONGSEC we do not care about
1364 1365           * the supplemental groups.
1365 1366           */
1366 1367          if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
1367 1368                  if (ngids != NULL && gids != NULL) {
1368 1369                          kmem_free(*gids, *ngids * sizeof (gid_t));
1369 1370                          *ngids = 0;
1370 1371                          *gids = NULL;
1371 1372                  }
1372 1373          }
1373 1374  
1374 1375          /*
1375 1376           * Client's security flavor doesn't match with "ro" or
1376 1377           * "rw" list. Try again using AUTH_NONE if present.
1377 1378           */
1378 1379          if ((access & NFSAUTH_WRONGSEC) && (flavor != AUTH_NONE)) {
1379 1380                  /*
1380 1381                   * Have we already encountered AUTH_NONE ?
1381 1382                   */
1382 1383                  if (authnone_entry != -1) {
1383 1384                          mapaccess = NFSAUTH_MAPNONE;
1384 1385                          access = nfsauth_cache_get(exi, req, AUTH_NONE, cr,
1385 1386                              NULL, NULL, NULL, NULL);
1386 1387                  } else {
1387 1388                          /*
1388 1389                           * Check for AUTH_NONE presence.
1389 1390                           */
1390 1391                          for (; i < exi->exi_export.ex_seccnt; i++) {
1391 1392                                  if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1392 1393                                          mapaccess = NFSAUTH_MAPNONE;
1393 1394                                          access = nfsauth_cache_get(exi, req,
1394 1395                                              AUTH_NONE, cr, NULL, NULL, NULL,
1395 1396                                              NULL);
1396 1397                                          break;
1397 1398                                  }
1398 1399                          }
1399 1400                  }
1400 1401          }
1401 1402  
1402 1403          if (access & NFSAUTH_DENIED)
1403 1404                  access = NFSAUTH_DENIED;
1404 1405  
1405 1406          return (access | mapaccess);
1406 1407  }
1407 1408  
1408 1409  static void
1409 1410  nfsauth_free_clnt_node(struct auth_cache_clnt *p)
1410 1411  {
1411 1412          void *cookie = NULL;
1412 1413          struct auth_cache *node;
1413 1414  
1414 1415          while ((node = avl_destroy_nodes(&p->authc_tree, &cookie)) != NULL)
1415 1416                  nfsauth_free_node(node);
1416 1417          avl_destroy(&p->authc_tree);
1417 1418  
1418 1419          kmem_free(p->authc_addr.buf, p->authc_addr.maxlen);
1419 1420          rw_destroy(&p->authc_lock);
1420 1421  
1421 1422          kmem_free(p, sizeof (*p));
1422 1423  }
1423 1424  
1424 1425  static void
1425 1426  nfsauth_free_node(struct auth_cache *p)
1426 1427  {
1427 1428          crfree(p->auth_clnt_cred);
1428 1429          kmem_free(p->auth_srv_gids, p->auth_srv_ngids * sizeof (gid_t));
1429 1430          mutex_destroy(&p->auth_lock);
1430 1431          cv_destroy(&p->auth_cv);
1431 1432          kmem_cache_free(exi_cache_handle, p);
1432 1433  }
1433 1434  
1434 1435  /*
1435 1436   * Free the nfsauth cache for a given export
1436 1437   */
1437 1438  void
1438 1439  nfsauth_cache_free(struct exportinfo *exi)
1439 1440  {
1440 1441          int i;
1441 1442  
1442 1443          /*
1443 1444           * The only way we got here was with an exi_rele, which means that no
1444 1445           * auth cache entry is being refreshed.
1445 1446           */
1446 1447  
1447 1448          for (i = 0; i < AUTH_TABLESIZE; i++) {
  
    | 
      ↓ open down ↓ | 
    556 lines elided | 
    
      ↑ open up ↑ | 
  
1448 1449                  avl_tree_t *tree = exi->exi_cache[i];
1449 1450                  void *cookie = NULL;
1450 1451                  struct auth_cache_clnt *node;
1451 1452  
1452 1453                  while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
1453 1454                          nfsauth_free_clnt_node(node);
1454 1455          }
1455 1456  }
1456 1457  
1457 1458  /*
1458      - * Called by the kernel memory allocator when
1459      - * memory is low. Free unused cache entries.
1460      - * If that's not enough, the VM system will
1461      - * call again for some more.
     1459 + * Called by the kernel memory allocator when memory is low.
     1460 + * Free unused cache entries. If that's not enough, the VM system
     1461 + * will call again for some more.
     1462 + *
     1463 + * This needs to operate on all zones, so we take a reader lock
     1464 + * on the list of zones and walk the list.  This is OK here
     1465 + * becuase exi_cache_trim doesn't block or cause new objects
     1466 + * to be allocated (basically just frees lots of stuff).
     1467 + * Use care if nfssrv_globals_rwl is taken as reader in any
     1468 + * other cases because it will block nfs_server_zone_init
     1469 + * and nfs_server_zone_fini, which enter as writer.
1462 1470   */
1463 1471  /*ARGSUSED*/
1464 1472  void
1465 1473  exi_cache_reclaim(void *cdrarg)
1466 1474  {
     1475 +        nfs_globals_t *ng;
     1476 +
     1477 +        rw_enter(&nfssrv_globals_rwl, RW_READER);
     1478 +
     1479 +        ng = list_head(&nfssrv_globals_list);
     1480 +        while (ng != NULL) {
     1481 +                exi_cache_reclaim_zone(ng);
     1482 +                ng = list_next(&nfssrv_globals_list, ng);
     1483 +        }
     1484 +
     1485 +        rw_exit(&nfssrv_globals_rwl);
     1486 +}
     1487 +
     1488 +static void
     1489 +exi_cache_reclaim_zone(nfs_globals_t *ng)
     1490 +{
1467 1491          int i;
1468 1492          struct exportinfo *exi;
1469      -        nfs_export_t *ne = nfs_get_export();
     1493 +        nfs_export_t *ne = ng->nfs_export;
1470 1494  
1471 1495          rw_enter(&ne->exported_lock, RW_READER);
1472 1496  
1473 1497          for (i = 0; i < EXPTABLESIZE; i++) {
1474 1498                  for (exi = ne->exptable[i]; exi; exi = exi->fid_hash.next)
1475 1499                          exi_cache_trim(exi);
1476 1500          }
1477 1501  
1478 1502          rw_exit(&ne->exported_lock);
1479 1503  
1480 1504          atomic_inc_uint(&nfsauth_cache_reclaim);
1481 1505  }
1482 1506  
1483      -void
     1507 +static void
1484 1508  exi_cache_trim(struct exportinfo *exi)
1485 1509  {
1486 1510          struct auth_cache_clnt *c;
1487 1511          struct auth_cache_clnt *nextc;
1488 1512          struct auth_cache *p;
1489 1513          struct auth_cache *next;
1490 1514          int i;
1491 1515          time_t stale_time;
1492 1516          avl_tree_t *tree;
1493 1517  
1494 1518          for (i = 0; i < AUTH_TABLESIZE; i++) {
1495 1519                  tree = exi->exi_cache[i];
1496 1520                  stale_time = gethrestime_sec() - NFSAUTH_CACHE_TRIM;
1497 1521                  rw_enter(&exi->exi_cache_lock, RW_READER);
1498 1522  
1499 1523                  /*
1500 1524                   * Free entries that have not been
1501 1525                   * used for NFSAUTH_CACHE_TRIM seconds.
1502 1526                   */
1503 1527                  for (c = avl_first(tree); c != NULL; c = AVL_NEXT(tree, c)) {
1504 1528                          /*
1505 1529                           * We are being called by the kmem subsystem to reclaim
1506 1530                           * memory so don't block if we can't get the lock.
1507 1531                           */
1508 1532                          if (rw_tryenter(&c->authc_lock, RW_WRITER) == 0) {
1509 1533                                  exi_cache_auth_reclaim_failed++;
1510 1534                                  rw_exit(&exi->exi_cache_lock);
1511 1535                                  return;
1512 1536                          }
1513 1537  
1514 1538                          for (p = avl_first(&c->authc_tree); p != NULL;
1515 1539                              p = next) {
1516 1540                                  next = AVL_NEXT(&c->authc_tree, p);
1517 1541  
1518 1542                                  ASSERT(p->auth_state != NFS_AUTH_INVALID);
1519 1543  
1520 1544                                  mutex_enter(&p->auth_lock);
1521 1545  
1522 1546                                  /*
1523 1547                                   * We won't trim recently used and/or WAITING
1524 1548                                   * entries.
1525 1549                                   */
1526 1550                                  if (p->auth_time > stale_time ||
1527 1551                                      p->auth_state == NFS_AUTH_WAITING) {
1528 1552                                          mutex_exit(&p->auth_lock);
1529 1553                                          continue;
1530 1554                                  }
1531 1555  
1532 1556                                  DTRACE_PROBE1(nfsauth__debug__trim__state,
1533 1557                                      auth_state_t, p->auth_state);
1534 1558  
1535 1559                                  /*
1536 1560                                   * STALE and REFRESHING entries needs to be
1537 1561                                   * marked INVALID only because they are
1538 1562                                   * referenced by some other structures or
1539 1563                                   * threads.  They will be freed later.
1540 1564                                   */
1541 1565                                  if (p->auth_state == NFS_AUTH_STALE ||
1542 1566                                      p->auth_state == NFS_AUTH_REFRESHING) {
1543 1567                                          p->auth_state = NFS_AUTH_INVALID;
1544 1568                                          mutex_exit(&p->auth_lock);
1545 1569  
1546 1570                                          avl_remove(&c->authc_tree, p);
1547 1571                                  } else {
1548 1572                                          mutex_exit(&p->auth_lock);
1549 1573  
1550 1574                                          avl_remove(&c->authc_tree, p);
1551 1575                                          nfsauth_free_node(p);
1552 1576                                  }
1553 1577                          }
1554 1578                          rw_exit(&c->authc_lock);
1555 1579                  }
1556 1580  
1557 1581                  if (rw_tryupgrade(&exi->exi_cache_lock) == 0) {
1558 1582                          rw_exit(&exi->exi_cache_lock);
1559 1583                          exi_cache_clnt_reclaim_failed++;
1560 1584                          continue;
1561 1585                  }
1562 1586  
1563 1587                  for (c = avl_first(tree); c != NULL; c = nextc) {
1564 1588                          nextc = AVL_NEXT(tree, c);
1565 1589  
1566 1590                          if (avl_is_empty(&c->authc_tree) == B_FALSE)
1567 1591                                  continue;
1568 1592  
1569 1593                          avl_remove(tree, c);
1570 1594  
1571 1595                          nfsauth_free_clnt_node(c);
1572 1596                  }
1573 1597  
1574 1598                  rw_exit(&exi->exi_cache_lock);
1575 1599          }
1576 1600  }
  
    | 
      ↓ open down ↓ | 
    83 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX