Print this page
    
Clean up merge problems with illumos#11083 (nfs-zone)
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs_auth.c
          +++ new/usr/src/uts/common/fs/nfs/nfs_auth.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
  24      - * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
  25   24   * Copyright (c) 2015 by Delphix. All rights reserved.
  26   25   * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
  27   26   * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
  28   27   */
  29   28  
  30   29  #include <sys/param.h>
  31   30  #include <sys/errno.h>
  32   31  #include <sys/vfs.h>
  33   32  #include <sys/vnode.h>
  34   33  #include <sys/cred.h>
  35   34  #include <sys/cmn_err.h>
  36   35  #include <sys/systm.h>
  37   36  #include <sys/kmem.h>
  38   37  #include <sys/pathname.h>
  39   38  #include <sys/utsname.h>
  40   39  #include <sys/debug.h>
  41   40  #include <sys/door.h>
  42   41  #include <sys/sdt.h>
  43   42  #include <sys/thread.h>
  44   43  #include <sys/avl.h>
  45   44  
  46   45  #include <rpc/types.h>
  47   46  #include <rpc/auth.h>
  48   47  #include <rpc/clnt.h>
  49   48  
  50   49  #include <nfs/nfs.h>
  51   50  #include <nfs/export.h>
  52   51  #include <nfs/nfs_clnt.h>
  53   52  #include <nfs/auth.h>
  54   53  
  55   54  static struct kmem_cache *exi_cache_handle;
  56   55  static void exi_cache_reclaim(void *);
  57   56  static void exi_cache_reclaim_zone(nfs_globals_t *);
  58   57  static void exi_cache_trim(struct exportinfo *exi);
  59   58  
  60   59  extern pri_t minclsyspri;
  61   60  
  62   61  /* NFS auth cache statistics */
  63   62  volatile uint_t nfsauth_cache_hit;
  64   63  volatile uint_t nfsauth_cache_miss;
  65   64  volatile uint_t nfsauth_cache_refresh;
  66   65  volatile uint_t nfsauth_cache_reclaim;
  67   66  volatile uint_t exi_cache_auth_reclaim_failed;
  68   67  volatile uint_t exi_cache_clnt_reclaim_failed;
  69   68  
  70   69  /*
  71   70   * The lifetime of an auth cache entry:
  72   71   * ------------------------------------
  73   72   *
  74   73   * An auth cache entry is created with both the auth_time
  75   74   * and auth_freshness times set to the current time.
  76   75   *
  77   76   * Upon every client access which results in a hit, the
  78   77   * auth_time will be updated.
  79   78   *
  80   79   * If a client access determines that the auth_freshness
  81   80   * indicates that the entry is STALE, then it will be
  82   81   * refreshed. Note that this will explicitly reset
  83   82   * auth_time.
  84   83   *
  85   84   * When the REFRESH successfully occurs, then the
  86   85   * auth_freshness is updated.
  87   86   *
  88   87   * There are two ways for an entry to leave the cache:
  89   88   *
  90   89   * 1) Purged by an action on the export (remove or changed)
  91   90   * 2) Memory backpressure from the kernel (check against NFSAUTH_CACHE_TRIM)
  92   91   *
  93   92   * For 2) we check the timeout value against auth_time.
  94   93   */
  95   94  
  96   95  /*
  97   96   * Number of seconds until we mark for refresh an auth cache entry.
  98   97   */
  99   98  #define NFSAUTH_CACHE_REFRESH 600
 100   99  
 101  100  /*
 102  101   * Number of idle seconds until we yield to backpressure
 103  102   * to trim a cache entry.
 104  103   */
 105  104  #define NFSAUTH_CACHE_TRIM 3600
 106  105  
 107  106  /*
 108  107   * While we could encapuslate the exi_list inside the
 109  108   * exi structure, we can't do that for the auth_list.
 110  109   * So, to keep things looking clean, we keep them both
 111  110   * in these external lists.
 112  111   */
 113  112  typedef struct refreshq_exi_node {
 114  113          struct exportinfo       *ren_exi;
 115  114          list_t                  ren_authlist;
 116  115          list_node_t             ren_node;
 117  116  } refreshq_exi_node_t;
 118  117  
 119  118  typedef struct refreshq_auth_node {
 120  119          struct auth_cache       *ran_auth;
 121  120          char                    *ran_netid;
 122  121          list_node_t             ran_node;
 123  122  } refreshq_auth_node_t;
 124  123  
 125  124  /*
 126  125   * Used to manipulate things on the refreshq_queue.  Note that the refresh
 127  126   * thread will effectively pop a node off of the queue, at which point it
 128  127   * will no longer need to hold the mutex.
 129  128   */
 130  129  static kmutex_t refreshq_lock;
 131  130  static list_t refreshq_queue;
 132  131  static kcondvar_t refreshq_cv;
 133  132  
 134  133  /*
 135  134   * If there is ever a problem with loading the module, then nfsauth_fini()
 136  135   * needs to be called to remove state.  In that event, since the refreshq
 137  136   * thread has been started, they need to work together to get rid of state.
 138  137   */
 139  138  typedef enum nfsauth_refreshq_thread_state {
 140  139          REFRESHQ_THREAD_RUNNING,
 141  140          REFRESHQ_THREAD_FINI_REQ,
 142  141          REFRESHQ_THREAD_HALTED,
 143  142          REFRESHQ_THREAD_NEED_CREATE
 144  143  } nfsauth_refreshq_thread_state_t;
 145  144  
 146  145  typedef struct nfsauth_globals {
 147  146          kmutex_t        mountd_lock;
 148  147          door_handle_t   mountd_dh;
 149  148  
 150  149          /*
 151  150           * Used to manipulate things on the refreshq_queue.  Note that the
 152  151           * refresh thread will effectively pop a node off of the queue,
 153  152           * at which point it will no longer need to hold the mutex.
 154  153           */
 155  154          kmutex_t        refreshq_lock;
 156  155          list_t          refreshq_queue;
 157  156          kcondvar_t      refreshq_cv;
 158  157  
 159  158          /*
 160  159           * A list_t would be overkill.  These are auth_cache entries which are
 161  160           * no longer linked to an exi.  It should be the case that all of their
 162  161           * states are NFS_AUTH_INVALID, i.e., the only way to be put on this
 163  162           * list is iff their state indicated that they had been placed on the
 164  163           * refreshq_queue.
 165  164           *
 166  165           * Note that while there is no link from the exi or back to the exi,
 167  166           * the exi can not go away until these entries are harvested.
 168  167           */
 169  168          struct auth_cache               *refreshq_dead_entries;
 170  169          nfsauth_refreshq_thread_state_t refreshq_thread_state;
 171  170  
 172  171  } nfsauth_globals_t;
 173  172  
 174  173  static void nfsauth_free_node(struct auth_cache *);
 175  174  static void nfsauth_refresh_thread(nfsauth_globals_t *);
 176  175  
 177  176  static int nfsauth_cache_compar(const void *, const void *);
 178  177  
 179  178  static nfsauth_globals_t *
 180  179  nfsauth_get_zg(void)
 181  180  {
 182  181          nfs_globals_t *ng = nfs_srv_getzg();
 183  182          nfsauth_globals_t *nag = ng->nfs_auth;
 184  183          ASSERT(nag != NULL);
 185  184          return (nag);
 186  185  }
 187  186  
 188  187  void
 189  188  mountd_args(uint_t did)
 190  189  {
 191  190          nfsauth_globals_t *nag;
 192  191  
 193  192          nag = nfsauth_get_zg();
 194  193          mutex_enter(&nag->mountd_lock);
 195  194          if (nag->mountd_dh != NULL)
 196  195                  door_ki_rele(nag->mountd_dh);
 197  196          nag->mountd_dh = door_ki_lookup(did);
 198  197          mutex_exit(&nag->mountd_lock);
 199  198  }
 200  199  
 201  200  void
 202  201  nfsauth_init(void)
 203  202  {
 204  203          exi_cache_handle = kmem_cache_create("exi_cache_handle",
 205  204              sizeof (struct auth_cache), 0, NULL, NULL,
 206  205              exi_cache_reclaim, NULL, NULL, 0);
 207  206  }
 208  207  
 209  208  void
 210  209  nfsauth_fini(void)
 211  210  {
 212  211          kmem_cache_destroy(exi_cache_handle);
 213  212  }
 214  213  
 215  214  void
 216  215  nfsauth_zone_init(nfs_globals_t *ng)
 217  216  {
 218  217          nfsauth_globals_t *nag;
 219  218  
 220  219          nag = kmem_zalloc(sizeof (*nag), KM_SLEEP);
 221  220  
 222  221          /*
 223  222           * mountd can be restarted by smf(5).  We need to make sure
 224  223           * the updated door handle will safely make it to mountd_dh.
 225  224           */
 226  225          mutex_init(&nag->mountd_lock, NULL, MUTEX_DEFAULT, NULL);
 227  226          mutex_init(&nag->refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
 228  227          list_create(&nag->refreshq_queue, sizeof (refreshq_exi_node_t),
 229  228              offsetof(refreshq_exi_node_t, ren_node));
 230  229          cv_init(&nag->refreshq_cv, NULL, CV_DEFAULT, NULL);
 231  230          nag->refreshq_thread_state = REFRESHQ_THREAD_NEED_CREATE;
 232  231  
 233  232          ng->nfs_auth = nag;
 234  233  }
 235  234  
 236  235  void
 237  236  nfsauth_zone_shutdown(nfs_globals_t *ng)
 238  237  {
 239  238          refreshq_exi_node_t     *ren;
 240  239          nfsauth_globals_t       *nag = ng->nfs_auth;
 241  240  
 242  241          /* Prevent the nfsauth_refresh_thread from getting new work */
 243  242          mutex_enter(&nag->refreshq_lock);
 244  243          if (nag->refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
 245  244                  nag->refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
 246  245                  cv_broadcast(&nag->refreshq_cv);
 247  246  
 248  247                  /* Wait for nfsauth_refresh_thread() to exit */
 249  248                  while (nag->refreshq_thread_state != REFRESHQ_THREAD_HALTED)
 250  249                          cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
 251  250          }
 252  251          mutex_exit(&nag->refreshq_lock);
 253  252  
 254  253          /*
 255  254           * Walk the exi_list and in turn, walk the auth_lists and free all
 256  255           * lists.  In addition, free INVALID auth_cache entries.
 257  256           */
 258  257          while ((ren = list_remove_head(&nag->refreshq_queue))) {
 259  258                  refreshq_auth_node_t *ran;
 260  259  
 261  260                  while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) {
 262  261                          struct auth_cache *p = ran->ran_auth;
 263  262                          if (p->auth_state == NFS_AUTH_INVALID)
 264  263                                  nfsauth_free_node(p);
 265  264                          strfree(ran->ran_netid);
 266  265                          kmem_free(ran, sizeof (*ran));
 267  266                  }
 268  267  
 269  268                  list_destroy(&ren->ren_authlist);
 270  269                  exi_rele(ren->ren_exi);
 271  270                  kmem_free(ren, sizeof (*ren));
 272  271          }
 273  272  }
 274  273  
 275  274  void
 276  275  nfsauth_zone_fini(nfs_globals_t *ng)
 277  276  {
 278  277          nfsauth_globals_t *nag = ng->nfs_auth;
 279  278  
 280  279          ng->nfs_auth = NULL;
 281  280  
 282  281          list_destroy(&nag->refreshq_queue);
 283  282          cv_destroy(&nag->refreshq_cv);
 284  283          mutex_destroy(&nag->refreshq_lock);
 285  284          mutex_destroy(&nag->mountd_lock);
 286  285          /* Extra cleanup. */
 287  286          if (nag->mountd_dh != NULL)
 288  287                  door_ki_rele(nag->mountd_dh);
 289  288          kmem_free(nag, sizeof (*nag));
 290  289  }
 291  290  
 292  291  /*
 293  292   * Convert the address in a netbuf to
 294  293   * a hash index for the auth_cache table.
 295  294   */
 296  295  static int
 297  296  hash(struct netbuf *a)
 298  297  {
 299  298          int i, h = 0;
 300  299  
 301  300          for (i = 0; i < a->len; i++)
 302  301                  h ^= a->buf[i];
 303  302  
 304  303          return (h & (AUTH_TABLESIZE - 1));
 305  304  }
 306  305  
 307  306  /*
 308  307   * Mask out the components of an
 309  308   * address that do not identify
 310  309   * a host. For socket addresses the
 311  310   * masking gets rid of the port number.
 312  311   */
 313  312  static void
 314  313  addrmask(struct netbuf *addr, struct netbuf *mask)
 315  314  {
 316  315          int i;
 317  316  
 318  317          for (i = 0; i < addr->len; i++)
 319  318                  addr->buf[i] &= mask->buf[i];
 320  319  }
 321  320  
 322  321  /*
 323  322   * nfsauth4_access is used for NFS V4 auth checking. Besides doing
 324  323   * the common nfsauth_access(), it will check if the client can
 325  324   * have a limited access to this vnode even if the security flavor
 326  325   * used does not meet the policy.
 327  326   */
 328  327  int
 329  328  nfsauth4_access(struct exportinfo *exi, vnode_t *vp, struct svc_req *req,
 330  329      cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
 331  330  {
 332  331          int access;
 333  332  
 334  333          access = nfsauth_access(exi, req, cr, uid, gid, ngids, gids);
 335  334  
 336  335          /*
 337  336           * There are cases that the server needs to allow the client
 338  337           * to have a limited view.
 339  338           *
 340  339           * e.g.
 341  340           * /export is shared as "sec=sys,rw=dfs-test-4,sec=krb5,rw"
 342  341           * /export/home is shared as "sec=sys,rw"
 343  342           *
 344  343           * When the client mounts /export with sec=sys, the client
 345  344           * would get a limited view with RO access on /export to see
 346  345           * "home" only because the client is allowed to access
 347  346           * /export/home with auth_sys.
 348  347           */
 349  348          if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
 350  349                  /*
 351  350                   * Allow ro permission with LIMITED view if there is a
 352  351                   * sub-dir exported under vp.
 353  352                   */
 354  353                  if (has_visible(exi, vp))
 355  354                          return (NFSAUTH_LIMITED);
 356  355          }
 357  356  
 358  357          return (access);
 359  358  }
 360  359  
 361  360  static void
 362  361  sys_log(const char *msg)
 363  362  {
 364  363          static time_t   tstamp = 0;
 365  364          time_t          now;
 366  365  
 367  366          /*
 368  367           * msg is shown (at most) once per minute
 369  368           */
 370  369          now = gethrestime_sec();
 371  370          if ((tstamp + 60) < now) {
 372  371                  tstamp = now;
 373  372                  cmn_err(CE_WARN, msg);
 374  373          }
 375  374  }
 376  375  
 377  376  /*
 378  377   * Callup to the mountd to get access information in the kernel.
 379  378   */
 380  379  static bool_t
 381  380  nfsauth_retrieve(nfsauth_globals_t *nag, struct exportinfo *exi,
 382  381      char *req_netid, int flavor, struct netbuf *addr, int *access,
 383  382      cred_t *clnt_cred, uid_t *srv_uid, gid_t *srv_gid, uint_t *srv_gids_cnt,
 384  383      gid_t **srv_gids)
 385  384  {
 386  385          varg_t                    varg = {0};
 387  386          nfsauth_res_t             res = {0};
 388  387          XDR                       xdrs;
 389  388          size_t                    absz;
 390  389          caddr_t                   abuf;
 391  390          int                       last = 0;
 392  391          door_arg_t                da;
 393  392          door_info_t               di;
 394  393          door_handle_t             dh;
 395  394          uint_t                    ntries = 0;
 396  395  
 397  396          /*
 398  397           * No entry in the cache for this client/flavor
 399  398           * so we need to call the nfsauth service in the
 400  399           * mount daemon.
 401  400           */
 402  401  
 403  402          varg.vers = V_PROTO;
 404  403          varg.arg_u.arg.cmd = NFSAUTH_ACCESS;
 405  404          varg.arg_u.arg.areq.req_client.n_len = addr->len;
 406  405          varg.arg_u.arg.areq.req_client.n_bytes = addr->buf;
 407  406          varg.arg_u.arg.areq.req_netid = req_netid;
 408  407          varg.arg_u.arg.areq.req_path = exi->exi_export.ex_path;
 409  408          varg.arg_u.arg.areq.req_flavor = flavor;
 410  409          varg.arg_u.arg.areq.req_clnt_uid = crgetuid(clnt_cred);
 411  410          varg.arg_u.arg.areq.req_clnt_gid = crgetgid(clnt_cred);
 412  411          varg.arg_u.arg.areq.req_clnt_gids.len = crgetngroups(clnt_cred);
 413  412          varg.arg_u.arg.areq.req_clnt_gids.val = (gid_t *)crgetgroups(clnt_cred);
 414  413  
 415  414          DTRACE_PROBE1(nfsserv__func__nfsauth__varg, varg_t *, &varg);
 416  415  
 417  416          /*
 418  417           * Setup the XDR stream for encoding the arguments. Notice that
 419  418           * in addition to the args having variable fields (req_netid and
 420  419           * req_path), the argument data structure is itself versioned,
 421  420           * so we need to make sure we can size the arguments buffer
 422  421           * appropriately to encode all the args. If we can't get sizing
 423  422           * info _or_ properly encode the arguments, there's really no
 424  423           * point in continuting, so we fail the request.
 425  424           */
 426  425          if ((absz = xdr_sizeof(xdr_varg, &varg)) == 0) {
 427  426                  *access = NFSAUTH_DENIED;
 428  427                  return (FALSE);
 429  428          }
 430  429  
 431  430          abuf = (caddr_t)kmem_alloc(absz, KM_SLEEP);
 432  431          xdrmem_create(&xdrs, abuf, absz, XDR_ENCODE);
 433  432          if (!xdr_varg(&xdrs, &varg)) {
 434  433                  XDR_DESTROY(&xdrs);
 435  434                  goto fail;
 436  435          }
 437  436          XDR_DESTROY(&xdrs);
 438  437  
 439  438          /*
 440  439           * Prepare the door arguments
 441  440           *
 442  441           * We don't know the size of the message the daemon
 443  442           * will pass back to us.  By setting rbuf to NULL,
 444  443           * we force the door code to allocate a buf of the
 445  444           * appropriate size.  We must set rsize > 0, however,
 446  445           * else the door code acts as if no response was
 447  446           * expected and doesn't pass the data to us.
 448  447           */
 449  448          da.data_ptr = (char *)abuf;
 450  449          da.data_size = absz;
 451  450          da.desc_ptr = NULL;
 452  451          da.desc_num = 0;
 453  452          da.rbuf = NULL;
 454  453          da.rsize = 1;
 455  454  
 456  455  retry:
 457  456          mutex_enter(&nag->mountd_lock);
 458  457          dh = nag->mountd_dh;
 459  458          if (dh != NULL)
 460  459                  door_ki_hold(dh);
 461  460          mutex_exit(&nag->mountd_lock);
 462  461  
 463  462          if (dh == NULL) {
 464  463                  /*
 465  464                   * The rendezvous point has not been established yet!
 466  465                   * This could mean that either mountd(1m) has not yet
 467  466                   * been started or that _this_ routine nuked the door
 468  467                   * handle after receiving an EINTR for a REVOKED door.
 469  468                   *
 470  469                   * Returning NFSAUTH_DROP will cause the NFS client
 471  470                   * to retransmit the request, so let's try to be more
 472  471                   * rescillient and attempt for ntries before we bail.
 473  472                   */
 474  473                  if (++ntries % NFSAUTH_DR_TRYCNT) {
 475  474                          delay(hz);
 476  475                          goto retry;
 477  476                  }
 478  477  
 479  478                  kmem_free(abuf, absz);
 480  479  
 481  480                  sys_log("nfsauth: mountd has not established door");
 482  481                  *access = NFSAUTH_DROP;
 483  482                  return (FALSE);
 484  483          }
 485  484  
 486  485          ntries = 0;
 487  486  
 488  487          /*
 489  488           * Now that we've got what we need, place the call.
 490  489           */
 491  490          switch (door_ki_upcall_limited(dh, &da, NULL, SIZE_MAX, 0)) {
 492  491          case 0:                         /* Success */
 493  492                  door_ki_rele(dh);
 494  493  
 495  494                  if (da.data_ptr == NULL && da.data_size == 0) {
 496  495                          /*
 497  496                           * The door_return that contained the data
 498  497                           * failed! We're here because of the 2nd
 499  498                           * door_return (w/o data) such that we can
 500  499                           * get control of the thread (and exit
 501  500                           * gracefully).
 502  501                           */
 503  502                          DTRACE_PROBE1(nfsserv__func__nfsauth__door__nil,
 504  503                              door_arg_t *, &da);
 505  504                          goto fail;
 506  505                  }
 507  506  
 508  507                  break;
 509  508  
 510  509          case EAGAIN:
 511  510                  /*
 512  511                   * Server out of resources; back off for a bit
 513  512                   */
 514  513                  door_ki_rele(dh);
 515  514                  delay(hz);
 516  515                  goto retry;
 517  516                  /* NOTREACHED */
 518  517  
 519  518          case EINTR:
 520  519                  if (!door_ki_info(dh, &di)) {
 521  520                          door_ki_rele(dh);
 522  521  
 523  522                          if (di.di_attributes & DOOR_REVOKED) {
 524  523                                  /*
 525  524                                   * The server barfed and revoked
 526  525                                   * the (existing) door on us; we
 527  526                                   * want to wait to give smf(5) a
 528  527                                   * chance to restart mountd(1m)
 529  528                                   * and establish a new door handle.
 530  529                                   */
 531  530                                  mutex_enter(&nag->mountd_lock);
 532  531                                  if (dh == nag->mountd_dh) {
 533  532                                          door_ki_rele(nag->mountd_dh);
 534  533                                          nag->mountd_dh = NULL;
 535  534                                  }
 536  535                                  mutex_exit(&nag->mountd_lock);
 537  536                                  delay(hz);
 538  537                                  goto retry;
 539  538                          }
 540  539                          /*
 541  540                           * If the door was _not_ revoked on us,
 542  541                           * then more than likely we took an INTR,
 543  542                           * so we need to fail the operation.
 544  543                           */
 545  544                          goto fail;
 546  545                  }
 547  546                  /*
 548  547                   * The only failure that can occur from getting
 549  548                   * the door info is EINVAL, so we let the code
 550  549                   * below handle it.
 551  550                   */
 552  551                  /* FALLTHROUGH */
 553  552  
 554  553          case EBADF:
 555  554          case EINVAL:
 556  555          default:
 557  556                  /*
 558  557                   * If we have a stale door handle, give smf a last
 559  558                   * chance to start it by sleeping for a little bit.
 560  559                   * If we're still hosed, we'll fail the call.
 561  560                   *
 562  561                   * Since we're going to reacquire the door handle
 563  562                   * upon the retry, we opt to sleep for a bit and
 564  563                   * _not_ to clear mountd_dh. If mountd restarted
 565  564                   * and was able to set mountd_dh, we should see
 566  565                   * the new instance; if not, we won't get caught
 567  566                   * up in the retry/DELAY loop.
 568  567                   */
 569  568                  door_ki_rele(dh);
 570  569                  if (!last) {
 571  570                          delay(hz);
 572  571                          last++;
 573  572                          goto retry;
 574  573                  }
 575  574                  sys_log("nfsauth: stale mountd door handle");
 576  575                  goto fail;
 577  576          }
 578  577  
 579  578          ASSERT(da.rbuf != NULL);
 580  579  
 581  580          /*
 582  581           * No door errors encountered; setup the XDR stream for decoding
 583  582           * the results. If we fail to decode the results, we've got no
 584  583           * other recourse than to fail the request.
 585  584           */
 586  585          xdrmem_create(&xdrs, da.rbuf, da.rsize, XDR_DECODE);
 587  586          if (!xdr_nfsauth_res(&xdrs, &res)) {
 588  587                  xdr_free(xdr_nfsauth_res, (char *)&res);
 589  588                  XDR_DESTROY(&xdrs);
 590  589                  kmem_free(da.rbuf, da.rsize);
 591  590                  goto fail;
 592  591          }
 593  592          XDR_DESTROY(&xdrs);
 594  593          kmem_free(da.rbuf, da.rsize);
 595  594  
 596  595          DTRACE_PROBE1(nfsserv__func__nfsauth__results, nfsauth_res_t *, &res);
 597  596          switch (res.stat) {
 598  597                  case NFSAUTH_DR_OKAY:
 599  598                          *access = res.ares.auth_perm;
 600  599                          *srv_uid = res.ares.auth_srv_uid;
 601  600                          *srv_gid = res.ares.auth_srv_gid;
 602  601  
 603  602                          if ((*srv_gids_cnt = res.ares.auth_srv_gids.len) != 0) {
 604  603                                  *srv_gids = kmem_alloc(*srv_gids_cnt *
 605  604                                      sizeof (gid_t), KM_SLEEP);
 606  605                                  bcopy(res.ares.auth_srv_gids.val, *srv_gids,
 607  606                                      *srv_gids_cnt * sizeof (gid_t));
 608  607                          } else {
 609  608                                  *srv_gids = NULL;
 610  609                          }
 611  610  
 612  611                          break;
 613  612  
 614  613                  case NFSAUTH_DR_EFAIL:
 615  614                  case NFSAUTH_DR_DECERR:
 616  615                  case NFSAUTH_DR_BADCMD:
 617  616                  default:
 618  617                          xdr_free(xdr_nfsauth_res, (char *)&res);
 619  618  fail:
 620  619                          *access = NFSAUTH_DENIED;
 621  620                          kmem_free(abuf, absz);
 622  621                          return (FALSE);
 623  622                          /* NOTREACHED */
 624  623          }
 625  624  
 626  625          xdr_free(xdr_nfsauth_res, (char *)&res);
 627  626          kmem_free(abuf, absz);
 628  627  
 629  628          return (TRUE);
 630  629  }
 631  630  
 632  631  static void
 633  632  nfsauth_refresh_thread(nfsauth_globals_t *nag)
 634  633  {
 635  634          refreshq_exi_node_t     *ren;
 636  635          refreshq_auth_node_t    *ran;
 637  636  
 638  637          struct exportinfo       *exi;
 639  638  
 640  639          int                     access;
 641  640          bool_t                  retrieval;
 642  641  
 643  642          callb_cpr_t             cprinfo;
 644  643  
 645  644          CALLB_CPR_INIT(&cprinfo, &nag->refreshq_lock, callb_generic_cpr,
 646  645              "nfsauth_refresh");
 647  646  
 648  647          for (;;) {
 649  648                  mutex_enter(&nag->refreshq_lock);
 650  649                  if (nag->refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
 651  650                          /* Keep the hold on the lock! */
 652  651                          break;
 653  652                  }
 654  653  
 655  654                  ren = list_remove_head(&nag->refreshq_queue);
 656  655                  if (ren == NULL) {
 657  656                          CALLB_CPR_SAFE_BEGIN(&cprinfo);
 658  657                          cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
 659  658                          CALLB_CPR_SAFE_END(&cprinfo, &nag->refreshq_lock);
 660  659                          mutex_exit(&nag->refreshq_lock);
 661  660                          continue;
 662  661                  }
 663  662                  mutex_exit(&nag->refreshq_lock);
 664  663  
 665  664                  exi = ren->ren_exi;
 666  665                  ASSERT(exi != NULL);
 667  666  
 668  667                  /*
 669  668                   * Since the ren was removed from the refreshq_queue above,
 670  669                   * this is the only thread aware about the ren existence, so we
 671  670                   * have the exclusive ownership of it and we do not need to
 672  671                   * protect it by any lock.
 673  672                   */
 674  673                  while ((ran = list_remove_head(&ren->ren_authlist))) {
 675  674                          uid_t uid;
 676  675                          gid_t gid;
 677  676                          uint_t ngids;
 678  677                          gid_t *gids;
 679  678                          struct auth_cache *p = ran->ran_auth;
 680  679                          char *netid = ran->ran_netid;
 681  680  
 682  681                          ASSERT(p != NULL);
 683  682                          ASSERT(netid != NULL);
 684  683  
 685  684                          kmem_free(ran, sizeof (refreshq_auth_node_t));
 686  685  
 687  686                          mutex_enter(&p->auth_lock);
 688  687  
 689  688                          /*
 690  689                           * Once the entry goes INVALID, it can not change
 691  690                           * state.
 692  691                           *
 693  692                           * No need to refresh entries also in a case we are
 694  693                           * just shutting down.
 695  694                           *
 696  695                           * In general, there is no need to hold the
 697  696                           * refreshq_lock to test the refreshq_thread_state.  We
 698  697                           * do hold it at other places because there is some
 699  698                           * related thread synchronization (or some other tasks)
 700  699                           * close to the refreshq_thread_state check.
 701  700                           *
 702  701                           * The check for the refreshq_thread_state value here
 703  702                           * is purely advisory to allow the faster
 704  703                           * nfsauth_refresh_thread() shutdown.  In a case we
 705  704                           * will miss such advisory, nothing catastrophic
 706  705                           * happens: we will just spin longer here before the
 707  706                           * shutdown.
 708  707                           */
 709  708                          if (p->auth_state == NFS_AUTH_INVALID ||
 710  709                              nag->refreshq_thread_state !=
 711  710                              REFRESHQ_THREAD_RUNNING) {
 712  711                                  mutex_exit(&p->auth_lock);
 713  712  
 714  713                                  if (p->auth_state == NFS_AUTH_INVALID)
 715  714                                          nfsauth_free_node(p);
 716  715  
 717  716                                  strfree(netid);
 718  717  
 719  718                                  continue;
 720  719                          }
 721  720  
 722  721                          /*
 723  722                           * Make sure the state is valid.  Note that once we
 724  723                           * change the state to NFS_AUTH_REFRESHING, no other
 725  724                           * thread will be able to work on this entry.
 726  725                           */
 727  726                          ASSERT(p->auth_state == NFS_AUTH_STALE);
 728  727  
 729  728                          p->auth_state = NFS_AUTH_REFRESHING;
 730  729                          mutex_exit(&p->auth_lock);
 731  730  
 732  731                          DTRACE_PROBE2(nfsauth__debug__cache__refresh,
 733  732                              struct exportinfo *, exi,
 734  733                              struct auth_cache *, p);
 735  734  
 736  735                          /*
 737  736                           * The first caching of the access rights
 738  737                           * is done with the netid pulled out of the
 739  738                           * request from the client. All subsequent
 740  739                           * users of the cache may or may not have
 741  740                           * the same netid. It doesn't matter. So
 742  741                           * when we refresh, we simply use the netid
 743  742                           * of the request which triggered the
 744  743                           * refresh attempt.
 745  744                           */
 746  745                          retrieval = nfsauth_retrieve(nag, exi, netid,
 747  746                              p->auth_flavor, &p->auth_clnt->authc_addr, &access,
 748  747                              p->auth_clnt_cred, &uid, &gid, &ngids, &gids);
 749  748  
 750  749                          /*
 751  750                           * This can only be set in one other place
 752  751                           * and the state has to be NFS_AUTH_FRESH.
 753  752                           */
 754  753                          strfree(netid);
 755  754  
 756  755                          mutex_enter(&p->auth_lock);
 757  756                          if (p->auth_state == NFS_AUTH_INVALID) {
 758  757                                  mutex_exit(&p->auth_lock);
 759  758                                  nfsauth_free_node(p);
 760  759                                  if (retrieval == TRUE)
 761  760                                          kmem_free(gids, ngids * sizeof (gid_t));
 762  761                          } else {
 763  762                                  /*
 764  763                                   * If we got an error, do not reset the
 765  764                                   * time. This will cause the next access
 766  765                                   * check for the client to reschedule this
 767  766                                   * node.
 768  767                                   */
 769  768                                  if (retrieval == TRUE) {
 770  769                                          p->auth_access = access;
 771  770  
 772  771                                          p->auth_srv_uid = uid;
 773  772                                          p->auth_srv_gid = gid;
 774  773                                          kmem_free(p->auth_srv_gids,
 775  774                                              p->auth_srv_ngids * sizeof (gid_t));
 776  775                                          p->auth_srv_ngids = ngids;
 777  776                                          p->auth_srv_gids = gids;
 778  777  
 779  778                                          p->auth_freshness = gethrestime_sec();
 780  779                                  }
 781  780                                  p->auth_state = NFS_AUTH_FRESH;
 782  781  
 783  782                                  cv_broadcast(&p->auth_cv);
 784  783                                  mutex_exit(&p->auth_lock);
 785  784                          }
 786  785                  }
 787  786  
 788  787                  list_destroy(&ren->ren_authlist);
 789  788                  exi_rele(ren->ren_exi);
 790  789                  kmem_free(ren, sizeof (refreshq_exi_node_t));
 791  790          }
 792  791  
 793  792          nag->refreshq_thread_state = REFRESHQ_THREAD_HALTED;
 794  793          cv_broadcast(&nag->refreshq_cv);
 795  794          CALLB_CPR_EXIT(&cprinfo);
 796  795          DTRACE_PROBE(nfsauth__nfsauth__refresh__thread__exit);
 797  796          zthread_exit();
 798  797  }
 799  798  
 800  799  int
 801  800  nfsauth_cache_clnt_compar(const void *v1, const void *v2)
 802  801  {
 803  802          int c;
 804  803  
 805  804          const struct auth_cache_clnt *a1 = (const struct auth_cache_clnt *)v1;
 806  805          const struct auth_cache_clnt *a2 = (const struct auth_cache_clnt *)v2;
 807  806  
 808  807          if (a1->authc_addr.len < a2->authc_addr.len)
 809  808                  return (-1);
 810  809          if (a1->authc_addr.len > a2->authc_addr.len)
 811  810                  return (1);
 812  811  
 813  812          c = memcmp(a1->authc_addr.buf, a2->authc_addr.buf, a1->authc_addr.len);
 814  813          if (c < 0)
 815  814                  return (-1);
 816  815          if (c > 0)
 817  816                  return (1);
 818  817  
 819  818          return (0);
 820  819  }
 821  820  
 822  821  static int
 823  822  nfsauth_cache_compar(const void *v1, const void *v2)
 824  823  {
 825  824          int c;
 826  825  
 827  826          const struct auth_cache *a1 = (const struct auth_cache *)v1;
 828  827          const struct auth_cache *a2 = (const struct auth_cache *)v2;
 829  828  
 830  829          if (a1->auth_flavor < a2->auth_flavor)
 831  830                  return (-1);
 832  831          if (a1->auth_flavor > a2->auth_flavor)
 833  832                  return (1);
 834  833  
 835  834          if (crgetuid(a1->auth_clnt_cred) < crgetuid(a2->auth_clnt_cred))
 836  835                  return (-1);
 837  836          if (crgetuid(a1->auth_clnt_cred) > crgetuid(a2->auth_clnt_cred))
 838  837                  return (1);
 839  838  
 840  839          if (crgetgid(a1->auth_clnt_cred) < crgetgid(a2->auth_clnt_cred))
 841  840                  return (-1);
 842  841          if (crgetgid(a1->auth_clnt_cred) > crgetgid(a2->auth_clnt_cred))
 843  842                  return (1);
 844  843  
 845  844          if (crgetngroups(a1->auth_clnt_cred) < crgetngroups(a2->auth_clnt_cred))
 846  845                  return (-1);
 847  846          if (crgetngroups(a1->auth_clnt_cred) > crgetngroups(a2->auth_clnt_cred))
 848  847                  return (1);
 849  848  
 850  849          c = memcmp(crgetgroups(a1->auth_clnt_cred),
 851  850              crgetgroups(a2->auth_clnt_cred), crgetngroups(a1->auth_clnt_cred));
 852  851          if (c < 0)
 853  852                  return (-1);
 854  853          if (c > 0)
 855  854                  return (1);
 856  855  
 857  856          return (0);
 858  857  }
 859  858  
 860  859  /*
 861  860   * Get the access information from the cache or callup to the mountd
 862  861   * to get and cache the access information in the kernel.
 863  862   */
 864  863  static int
 865  864  nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
 866  865      cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
 867  866  {
 868  867          nfsauth_globals_t       *nag;
 869  868          struct netbuf           *taddrmask;
 870  869          struct netbuf           addr;   /* temporary copy of client's address */
 871  870          const struct netbuf     *claddr;
 872  871          avl_tree_t              *tree;
 873  872          struct auth_cache       ac;     /* used as a template for avl_find() */
 874  873          struct auth_cache_clnt  *c;
 875  874          struct auth_cache_clnt  acc;    /* used as a template for avl_find() */
 876  875          struct auth_cache       *p = NULL;
 877  876          int                     access;
 878  877  
 879  878          uid_t                   tmpuid;
 880  879          gid_t                   tmpgid;
 881  880          uint_t                  tmpngids;
 882  881          gid_t                   *tmpgids;
 883  882  
 884  883          avl_index_t             where;  /* used for avl_find()/avl_insert() */
 885  884  
 886  885          ASSERT(cr != NULL);
 887  886  
 888  887          ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid);
 889  888          nag = nfsauth_get_zg();
 890  889  
 891  890          /*
 892  891           * Now check whether this client already
 893  892           * has an entry for this flavor in the cache
 894  893           * for this export.
 895  894           * Get the caller's address, mask off the
 896  895           * parts of the address that do not identify
 897  896           * the host (port number, etc), and then hash
 898  897           * it to find the chain of cache entries.
 899  898           */
 900  899  
 901  900          claddr = svc_getrpccaller(req->rq_xprt);
 902  901          addr = *claddr;
 903  902          if (claddr->len != 0) {
 904  903                  addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP);
 905  904                  bcopy(claddr->buf, addr.buf, claddr->len);
 906  905          } else {
 907  906                  addr.buf = NULL;
 908  907          }
 909  908  
 910  909          SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask);
 911  910          ASSERT(taddrmask != NULL);
 912  911          addrmask(&addr, taddrmask);
 913  912  
 914  913          ac.auth_flavor = flavor;
 915  914          ac.auth_clnt_cred = crdup(cr);
 916  915  
 917  916          acc.authc_addr = addr;
 918  917  
 919  918          tree = exi->exi_cache[hash(&addr)];
 920  919  
 921  920          rw_enter(&exi->exi_cache_lock, RW_READER);
 922  921          c = (struct auth_cache_clnt *)avl_find(tree, &acc, NULL);
 923  922  
 924  923          if (c == NULL) {
 925  924                  struct auth_cache_clnt *nc;
 926  925  
 927  926                  rw_exit(&exi->exi_cache_lock);
 928  927  
 929  928                  nc = kmem_alloc(sizeof (*nc), KM_NOSLEEP | KM_NORMALPRI);
 930  929                  if (nc == NULL)
 931  930                          goto retrieve;
 932  931  
 933  932                  /*
 934  933                   * Initialize the new auth_cache_clnt
 935  934                   */
 936  935                  nc->authc_addr = addr;
 937  936                  nc->authc_addr.buf = kmem_alloc(addr.maxlen,
 938  937                      KM_NOSLEEP | KM_NORMALPRI);
 939  938                  if (addr.maxlen != 0 && nc->authc_addr.buf == NULL) {
 940  939                          kmem_free(nc, sizeof (*nc));
 941  940                          goto retrieve;
 942  941                  }
 943  942                  bcopy(addr.buf, nc->authc_addr.buf, addr.len);
 944  943                  rw_init(&nc->authc_lock, NULL, RW_DEFAULT, NULL);
 945  944                  avl_create(&nc->authc_tree, nfsauth_cache_compar,
 946  945                      sizeof (struct auth_cache),
 947  946                      offsetof(struct auth_cache, auth_link));
 948  947  
 949  948                  rw_enter(&exi->exi_cache_lock, RW_WRITER);
 950  949                  c = (struct auth_cache_clnt *)avl_find(tree, &acc, &where);
 951  950                  if (c == NULL) {
 952  951                          avl_insert(tree, nc, where);
 953  952                          rw_downgrade(&exi->exi_cache_lock);
 954  953                          c = nc;
 955  954                  } else {
 956  955                          rw_downgrade(&exi->exi_cache_lock);
 957  956  
 958  957                          avl_destroy(&nc->authc_tree);
 959  958                          rw_destroy(&nc->authc_lock);
 960  959                          kmem_free(nc->authc_addr.buf, nc->authc_addr.maxlen);
 961  960                          kmem_free(nc, sizeof (*nc));
 962  961                  }
 963  962          }
 964  963  
 965  964          ASSERT(c != NULL);
 966  965  
 967  966          rw_enter(&c->authc_lock, RW_READER);
 968  967          p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, NULL);
 969  968  
 970  969          if (p == NULL) {
 971  970                  struct auth_cache *np;
 972  971  
 973  972                  rw_exit(&c->authc_lock);
 974  973  
 975  974                  np = kmem_cache_alloc(exi_cache_handle,
 976  975                      KM_NOSLEEP | KM_NORMALPRI);
 977  976                  if (np == NULL) {
 978  977                          rw_exit(&exi->exi_cache_lock);
 979  978                          goto retrieve;
 980  979                  }
 981  980  
 982  981                  /*
 983  982                   * Initialize the new auth_cache
 984  983                   */
 985  984                  np->auth_clnt = c;
 986  985                  np->auth_flavor = flavor;
 987  986                  np->auth_clnt_cred = ac.auth_clnt_cred;
 988  987                  np->auth_srv_ngids = 0;
 989  988                  np->auth_srv_gids = NULL;
 990  989                  np->auth_time = np->auth_freshness = gethrestime_sec();
 991  990                  np->auth_state = NFS_AUTH_NEW;
 992  991                  mutex_init(&np->auth_lock, NULL, MUTEX_DEFAULT, NULL);
 993  992                  cv_init(&np->auth_cv, NULL, CV_DEFAULT, NULL);
 994  993  
 995  994                  rw_enter(&c->authc_lock, RW_WRITER);
 996  995                  rw_exit(&exi->exi_cache_lock);
 997  996  
 998  997                  p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, &where);
 999  998                  if (p == NULL) {
1000  999                          avl_insert(&c->authc_tree, np, where);
1001 1000                          rw_downgrade(&c->authc_lock);
1002 1001                          p = np;
1003 1002                  } else {
1004 1003                          rw_downgrade(&c->authc_lock);
1005 1004  
1006 1005                          cv_destroy(&np->auth_cv);
1007 1006                          mutex_destroy(&np->auth_lock);
1008 1007                          crfree(ac.auth_clnt_cred);
1009 1008                          kmem_cache_free(exi_cache_handle, np);
1010 1009                  }
1011 1010          } else {
1012 1011                  rw_exit(&exi->exi_cache_lock);
1013 1012                  crfree(ac.auth_clnt_cred);
1014 1013          }
1015 1014  
1016 1015          mutex_enter(&p->auth_lock);
1017 1016          rw_exit(&c->authc_lock);
1018 1017  
1019 1018          /*
1020 1019           * If the entry is in the WAITING state then some other thread is just
1021 1020           * retrieving the required info.  The entry was either NEW, or the list
1022 1021           * of client's supplemental groups is going to be changed (either by
1023 1022           * this thread, or by some other thread).  We need to wait until the
1024 1023           * nfsauth_retrieve() is done.
1025 1024           */
1026 1025          while (p->auth_state == NFS_AUTH_WAITING)
1027 1026                  cv_wait(&p->auth_cv, &p->auth_lock);
1028 1027  
1029 1028          /*
1030 1029           * Here the entry cannot be in WAITING or INVALID state.
1031 1030           */
1032 1031          ASSERT(p->auth_state != NFS_AUTH_WAITING);
1033 1032          ASSERT(p->auth_state != NFS_AUTH_INVALID);
1034 1033  
1035 1034          /*
1036 1035           * If the cache entry is not valid yet, we need to retrieve the
1037 1036           * info ourselves.
1038 1037           */
1039 1038          if (p->auth_state == NFS_AUTH_NEW) {
1040 1039                  bool_t res;
1041 1040                  /*
1042 1041                   * NFS_AUTH_NEW is the default output auth_state value in a
1043 1042                   * case we failed somewhere below.
1044 1043                   */
1045 1044                  auth_state_t state = NFS_AUTH_NEW;
1046 1045  
1047 1046                  p->auth_state = NFS_AUTH_WAITING;
1048 1047                  mutex_exit(&p->auth_lock);
1049 1048                  kmem_free(addr.buf, addr.maxlen);
1050 1049                  addr = p->auth_clnt->authc_addr;
1051 1050  
1052 1051                  atomic_inc_uint(&nfsauth_cache_miss);
1053 1052  
1054 1053                  res = nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt),
1055 1054                      flavor, &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids,
1056 1055                      &tmpgids);
1057 1056  
1058 1057                  p->auth_access = access;
1059 1058                  p->auth_time = p->auth_freshness = gethrestime_sec();
1060 1059  
1061 1060                  if (res == TRUE) {
1062 1061                          if (uid != NULL)
1063 1062                                  *uid = tmpuid;
1064 1063                          if (gid != NULL)
1065 1064                                  *gid = tmpgid;
1066 1065                          if (ngids != NULL && gids != NULL) {
1067 1066                                  *ngids = tmpngids;
1068 1067                                  *gids = tmpgids;
1069 1068  
1070 1069                                  /*
1071 1070                                   * We need a copy of gids for the
1072 1071                                   * auth_cache entry
1073 1072                                   */
1074 1073                                  tmpgids = kmem_alloc(tmpngids * sizeof (gid_t),
1075 1074                                      KM_NOSLEEP | KM_NORMALPRI);
1076 1075                                  if (tmpgids != NULL)
1077 1076                                          bcopy(*gids, tmpgids,
1078 1077                                              tmpngids * sizeof (gid_t));
1079 1078                          }
1080 1079  
1081 1080                          if (tmpgids != NULL || tmpngids == 0) {
1082 1081                                  p->auth_srv_uid = tmpuid;
1083 1082                                  p->auth_srv_gid = tmpgid;
1084 1083                                  p->auth_srv_ngids = tmpngids;
1085 1084                                  p->auth_srv_gids = tmpgids;
1086 1085  
1087 1086                                  state = NFS_AUTH_FRESH;
1088 1087                          }
1089 1088                  }
1090 1089  
1091 1090                  /*
1092 1091                   * Set the auth_state and notify waiters.
1093 1092                   */
1094 1093                  mutex_enter(&p->auth_lock);
1095 1094                  p->auth_state = state;
1096 1095                  cv_broadcast(&p->auth_cv);
1097 1096                  mutex_exit(&p->auth_lock);
1098 1097          } else {
1099 1098                  uint_t nach;
1100 1099                  time_t refresh;
1101 1100  
1102 1101                  refresh = gethrestime_sec() - p->auth_freshness;
1103 1102  
1104 1103                  p->auth_time = gethrestime_sec();
1105 1104  
1106 1105                  if (uid != NULL)
1107 1106                          *uid = p->auth_srv_uid;
1108 1107                  if (gid != NULL)
1109 1108                          *gid = p->auth_srv_gid;
1110 1109                  if (ngids != NULL && gids != NULL) {
1111 1110                          if ((*ngids = p->auth_srv_ngids) != 0) {
1112 1111                                  size_t sz = *ngids * sizeof (gid_t);
1113 1112                                  *gids = kmem_alloc(sz, KM_SLEEP);
1114 1113                                  bcopy(p->auth_srv_gids, *gids, sz);
1115 1114                          } else {
1116 1115                                  *gids = NULL;
1117 1116                          }
1118 1117                  }
1119 1118  
1120 1119                  access = p->auth_access;
1121 1120  
1122 1121                  if ((refresh > NFSAUTH_CACHE_REFRESH) &&
1123 1122                      p->auth_state == NFS_AUTH_FRESH) {
1124 1123                          refreshq_auth_node_t *ran;
1125 1124                          uint_t nacr;
1126 1125  
1127 1126                          p->auth_state = NFS_AUTH_STALE;
1128 1127                          mutex_exit(&p->auth_lock);
1129 1128  
1130 1129                          nacr = atomic_inc_uint_nv(&nfsauth_cache_refresh);
1131 1130                          DTRACE_PROBE3(nfsauth__debug__cache__stale,
1132 1131                              struct exportinfo *, exi,
1133 1132                              struct auth_cache *, p,
1134 1133                              uint_t, nacr);
1135 1134  
1136 1135                          ran = kmem_alloc(sizeof (refreshq_auth_node_t),
1137 1136                              KM_SLEEP);
1138 1137                          ran->ran_auth = p;
1139 1138                          ran->ran_netid = strdup(svc_getnetid(req->rq_xprt));
1140 1139  
1141 1140                          mutex_enter(&nag->refreshq_lock);
1142 1141  
1143 1142                          if (nag->refreshq_thread_state ==
1144 1143                              REFRESHQ_THREAD_NEED_CREATE) {
1145 1144                                  /* Launch nfsauth refresh thread */
1146 1145                                  nag->refreshq_thread_state =
1147 1146                                      REFRESHQ_THREAD_RUNNING;
1148 1147                                  (void) zthread_create(NULL, 0,
1149 1148                                      nfsauth_refresh_thread, nag, 0,
1150 1149                                      minclsyspri);
1151 1150                          }
1152 1151  
1153 1152                          /*
1154 1153                           * We should not add a work queue item if the thread
1155 1154                           * is not accepting them.
1156 1155                           */
1157 1156                          if (nag->refreshq_thread_state ==
1158 1157                              REFRESHQ_THREAD_RUNNING) {
1159 1158                                  refreshq_exi_node_t *ren;
1160 1159  
1161 1160                                  /*
1162 1161                                   * Is there an existing exi_list?
1163 1162                                   */
1164 1163                                  for (ren = list_head(&nag->refreshq_queue);
1165 1164                                      ren != NULL;
1166 1165                                      ren = list_next(&nag->refreshq_queue,
1167 1166                                      ren)) {
1168 1167                                          if (ren->ren_exi == exi) {
1169 1168                                                  list_insert_tail(
1170 1169                                                      &ren->ren_authlist, ran);
1171 1170                                                  break;
1172 1171                                          }
1173 1172                                  }
1174 1173  
1175 1174                                  if (ren == NULL) {
1176 1175                                          ren = kmem_alloc(
1177 1176                                              sizeof (refreshq_exi_node_t),
1178 1177                                              KM_SLEEP);
1179 1178  
1180 1179                                          exi_hold(exi);
1181 1180                                          ren->ren_exi = exi;
1182 1181  
1183 1182                                          list_create(&ren->ren_authlist,
1184 1183                                              sizeof (refreshq_auth_node_t),
1185 1184                                              offsetof(refreshq_auth_node_t,
1186 1185                                              ran_node));
1187 1186  
1188 1187                                          list_insert_tail(&ren->ren_authlist,
1189 1188                                              ran);
1190 1189                                          list_insert_tail(&nag->refreshq_queue,
1191 1190                                              ren);
1192 1191                                  }
1193 1192  
1194 1193                                  cv_broadcast(&nag->refreshq_cv);
1195 1194                          } else {
1196 1195                                  strfree(ran->ran_netid);
1197 1196                                  kmem_free(ran, sizeof (refreshq_auth_node_t));
1198 1197                          }
1199 1198  
1200 1199                          mutex_exit(&nag->refreshq_lock);
1201 1200                  } else {
1202 1201                          mutex_exit(&p->auth_lock);
1203 1202                  }
1204 1203  
1205 1204                  nach = atomic_inc_uint_nv(&nfsauth_cache_hit);
1206 1205                  DTRACE_PROBE2(nfsauth__debug__cache__hit,
1207 1206                      uint_t, nach,
1208 1207                      time_t, refresh);
1209 1208  
1210 1209                  kmem_free(addr.buf, addr.maxlen);
1211 1210          }
1212 1211  
1213 1212          return (access);
1214 1213  
1215 1214  retrieve:
1216 1215          crfree(ac.auth_clnt_cred);
1217 1216  
1218 1217          /*
1219 1218           * Retrieve the required data without caching.
1220 1219           */
1221 1220  
1222 1221          ASSERT(p == NULL);
1223 1222  
1224 1223          atomic_inc_uint(&nfsauth_cache_miss);
1225 1224  
1226 1225          if (nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), flavor,
1227 1226              &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
1228 1227                  if (uid != NULL)
1229 1228                          *uid = tmpuid;
1230 1229                  if (gid != NULL)
1231 1230                          *gid = tmpgid;
1232 1231                  if (ngids != NULL && gids != NULL) {
1233 1232                          *ngids = tmpngids;
1234 1233                          *gids = tmpgids;
1235 1234                  } else {
1236 1235                          kmem_free(tmpgids, tmpngids * sizeof (gid_t));
1237 1236                  }
1238 1237          }
1239 1238  
1240 1239          kmem_free(addr.buf, addr.maxlen);
1241 1240  
1242 1241          return (access);
1243 1242  }
1244 1243  
1245 1244  /*
1246 1245   * Check if the requesting client has access to the filesystem with
1247 1246   * a given nfs flavor number which is an explicitly shared flavor.
1248 1247   */
1249 1248  int
1250 1249  nfsauth4_secinfo_access(struct exportinfo *exi, struct svc_req *req,
1251 1250      int flavor, int perm, cred_t *cr)
1252 1251  {
1253 1252          int access;
1254 1253  
1255 1254          if (! (perm & M_4SEC_EXPORTED)) {
1256 1255                  return (NFSAUTH_DENIED);
1257 1256          }
1258 1257  
1259 1258          /*
1260 1259           * Optimize if there are no lists
1261 1260           */
1262 1261          if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0) {
1263 1262                  perm &= ~M_4SEC_EXPORTED;
1264 1263                  if (perm == M_RO)
1265 1264                          return (NFSAUTH_RO);
1266 1265                  if (perm == M_RW)
1267 1266                          return (NFSAUTH_RW);
1268 1267          }
1269 1268  
1270 1269          access = nfsauth_cache_get(exi, req, flavor, cr, NULL, NULL, NULL,
1271 1270              NULL);
1272 1271  
1273 1272          return (access);
1274 1273  }
1275 1274  
1276 1275  int
1277 1276  nfsauth_access(struct exportinfo *exi, struct svc_req *req, cred_t *cr,
1278 1277      uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
1279 1278  {
1280 1279          int access, mapaccess;
1281 1280          struct secinfo *sp;
1282 1281          int i, flavor, perm;
1283 1282          int authnone_entry = -1;
1284 1283  
1285 1284          /*
1286 1285           * By default root is mapped to anonymous user.
1287 1286           * This might get overriden later in nfsauth_cache_get().
1288 1287           */
1289 1288          if (crgetuid(cr) == 0) {
1290 1289                  if (uid != NULL)
1291 1290                          *uid = exi->exi_export.ex_anon;
1292 1291                  if (gid != NULL)
1293 1292                          *gid = exi->exi_export.ex_anon;
1294 1293          } else {
1295 1294                  if (uid != NULL)
1296 1295                          *uid = crgetuid(cr);
1297 1296                  if (gid != NULL)
1298 1297                          *gid = crgetgid(cr);
1299 1298          }
1300 1299  
1301 1300          if (ngids != NULL)
1302 1301                  *ngids = 0;
1303 1302          if (gids != NULL)
1304 1303                  *gids = NULL;
1305 1304  
1306 1305          /*
1307 1306           *  Get the nfs flavor number from xprt.
1308 1307           */
1309 1308          flavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;
1310 1309  
1311 1310          /*
1312 1311           * First check the access restrictions on the filesystem.  If
1313 1312           * there are no lists associated with this flavor then there's no
1314 1313           * need to make an expensive call to the nfsauth service or to
1315 1314           * cache anything.
1316 1315           */
1317 1316  
1318 1317          sp = exi->exi_export.ex_secinfo;
1319 1318          for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1320 1319                  if (flavor != sp[i].s_secinfo.sc_nfsnum) {
1321 1320                          if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1322 1321                                  authnone_entry = i;
1323 1322                          continue;
1324 1323                  }
1325 1324                  break;
1326 1325          }
1327 1326  
1328 1327          mapaccess = 0;
1329 1328  
1330 1329          if (i >= exi->exi_export.ex_seccnt) {
1331 1330                  /*
1332 1331                   * Flavor not found, but use AUTH_NONE if it exists
1333 1332                   */
1334 1333                  if (authnone_entry == -1)
1335 1334                          return (NFSAUTH_DENIED);
1336 1335                  flavor = AUTH_NONE;
1337 1336                  mapaccess = NFSAUTH_MAPNONE;
1338 1337                  i = authnone_entry;
1339 1338          }
1340 1339  
1341 1340          /*
1342 1341           * If the flavor is in the ex_secinfo list, but not an explicitly
1343 1342           * shared flavor by the user, it is a result of the nfsv4 server
1344 1343           * namespace setup. We will grant an RO permission similar for
1345 1344           * a pseudo node except that this node is a shared one.
1346 1345           *
1347 1346           * e.g. flavor in (flavor) indicates that it is not explictly
1348 1347           *      shared by the user:
1349 1348           *
1350 1349           *              /       (sys, krb5)
1351 1350           *              |
1352 1351           *              export  #share -o sec=sys (krb5)
1353 1352           *              |
1354 1353           *              secure  #share -o sec=krb5
1355 1354           *
1356 1355           *      In this case, when a krb5 request coming in to access
1357 1356           *      /export, RO permission is granted.
1358 1357           */
1359 1358          if (!(sp[i].s_flags & M_4SEC_EXPORTED))
1360 1359                  return (mapaccess | NFSAUTH_RO);
1361 1360  
1362 1361          /*
1363 1362           * Optimize if there are no lists.
1364 1363           * We cannot optimize for AUTH_SYS with NGRPS (16) supplemental groups.
1365 1364           */
1366 1365          perm = sp[i].s_flags;
1367 1366          if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0 && (ngroups_max <= NGRPS ||
1368 1367              flavor != AUTH_SYS || crgetngroups(cr) < NGRPS)) {
1369 1368                  perm &= ~M_4SEC_EXPORTED;
1370 1369                  if (perm == M_RO)
1371 1370                          return (mapaccess | NFSAUTH_RO);
1372 1371                  if (perm == M_RW)
1373 1372                          return (mapaccess | NFSAUTH_RW);
1374 1373          }
1375 1374  
1376 1375          access = nfsauth_cache_get(exi, req, flavor, cr, uid, gid, ngids, gids);
1377 1376  
1378 1377          /*
1379 1378           * For both NFSAUTH_DENIED and NFSAUTH_WRONGSEC we do not care about
1380 1379           * the supplemental groups.
1381 1380           */
1382 1381          if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
1383 1382                  if (ngids != NULL && gids != NULL) {
1384 1383                          kmem_free(*gids, *ngids * sizeof (gid_t));
1385 1384                          *ngids = 0;
1386 1385                          *gids = NULL;
1387 1386                  }
1388 1387          }
1389 1388  
1390 1389          /*
1391 1390           * Client's security flavor doesn't match with "ro" or
1392 1391           * "rw" list. Try again using AUTH_NONE if present.
1393 1392           */
1394 1393          if ((access & NFSAUTH_WRONGSEC) && (flavor != AUTH_NONE)) {
1395 1394                  /*
1396 1395                   * Have we already encountered AUTH_NONE ?
1397 1396                   */
1398 1397                  if (authnone_entry != -1) {
1399 1398                          mapaccess = NFSAUTH_MAPNONE;
1400 1399                          access = nfsauth_cache_get(exi, req, AUTH_NONE, cr,
1401 1400                              NULL, NULL, NULL, NULL);
1402 1401                  } else {
1403 1402                          /*
1404 1403                           * Check for AUTH_NONE presence.
1405 1404                           */
1406 1405                          for (; i < exi->exi_export.ex_seccnt; i++) {
1407 1406                                  if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1408 1407                                          mapaccess = NFSAUTH_MAPNONE;
1409 1408                                          access = nfsauth_cache_get(exi, req,
1410 1409                                              AUTH_NONE, cr, NULL, NULL, NULL,
1411 1410                                              NULL);
1412 1411                                          break;
1413 1412                                  }
1414 1413                          }
1415 1414                  }
1416 1415          }
1417 1416  
1418 1417          if (access & NFSAUTH_DENIED)
1419 1418                  access = NFSAUTH_DENIED;
1420 1419  
1421 1420          return (access | mapaccess);
1422 1421  }
1423 1422  
1424 1423  static void
1425 1424  nfsauth_free_clnt_node(struct auth_cache_clnt *p)
1426 1425  {
1427 1426          void *cookie = NULL;
1428 1427          struct auth_cache *node;
1429 1428  
1430 1429          while ((node = avl_destroy_nodes(&p->authc_tree, &cookie)) != NULL)
1431 1430                  nfsauth_free_node(node);
1432 1431          avl_destroy(&p->authc_tree);
1433 1432  
1434 1433          kmem_free(p->authc_addr.buf, p->authc_addr.maxlen);
1435 1434          rw_destroy(&p->authc_lock);
1436 1435  
1437 1436          kmem_free(p, sizeof (*p));
1438 1437  }
1439 1438  
1440 1439  static void
1441 1440  nfsauth_free_node(struct auth_cache *p)
1442 1441  {
1443 1442          crfree(p->auth_clnt_cred);
1444 1443          kmem_free(p->auth_srv_gids, p->auth_srv_ngids * sizeof (gid_t));
1445 1444          mutex_destroy(&p->auth_lock);
1446 1445          cv_destroy(&p->auth_cv);
1447 1446          kmem_cache_free(exi_cache_handle, p);
1448 1447  }
1449 1448  
1450 1449  /*
1451 1450   * Free the nfsauth cache for a given export
1452 1451   */
1453 1452  void
1454 1453  nfsauth_cache_free(struct exportinfo *exi)
1455 1454  {
1456 1455          int i;
1457 1456  
1458 1457          /*
1459 1458           * The only way we got here was with an exi_rele, which means that no
1460 1459           * auth cache entry is being refreshed.
1461 1460           */
1462 1461  
1463 1462          for (i = 0; i < AUTH_TABLESIZE; i++) {
1464 1463                  avl_tree_t *tree = exi->exi_cache[i];
1465 1464                  void *cookie = NULL;
1466 1465                  struct auth_cache_clnt *node;
1467 1466  
1468 1467                  while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
1469 1468                          nfsauth_free_clnt_node(node);
1470 1469          }
1471 1470  }
1472 1471  
1473 1472  /*
1474 1473   * Called by the kernel memory allocator when memory is low.
1475 1474   * Free unused cache entries. If that's not enough, the VM system
1476 1475   * will call again for some more.
1477 1476   *
1478 1477   * This needs to operate on all zones, so we take a reader lock
1479 1478   * on the list of zones and walk the list.  This is OK here
1480 1479   * becuase exi_cache_trim doesn't block or cause new objects
1481 1480   * to be allocated (basically just frees lots of stuff).
1482 1481   * Use care if nfssrv_globals_rwl is taken as reader in any
1483 1482   * other cases because it will block nfs_server_zone_init
1484 1483   * and nfs_server_zone_fini, which enter as writer.
1485 1484   */
1486 1485  /*ARGSUSED*/
1487 1486  void
1488 1487  exi_cache_reclaim(void *cdrarg)
1489 1488  {
1490 1489          nfs_globals_t *ng;
1491 1490  
1492 1491          rw_enter(&nfssrv_globals_rwl, RW_READER);
1493 1492  
1494 1493          ng = list_head(&nfssrv_globals_list);
1495 1494          while (ng != NULL) {
1496 1495                  exi_cache_reclaim_zone(ng);
1497 1496                  ng = list_next(&nfssrv_globals_list, ng);
1498 1497          }
1499 1498  
1500 1499          rw_exit(&nfssrv_globals_rwl);
1501 1500  }
1502 1501  
1503 1502  static void
1504 1503  exi_cache_reclaim_zone(nfs_globals_t *ng)
1505 1504  {
1506 1505          int i;
1507 1506          struct exportinfo *exi;
1508 1507          nfs_export_t *ne = ng->nfs_export;
1509 1508  
1510 1509          rw_enter(&ne->exported_lock, RW_READER);
1511 1510  
1512 1511          for (i = 0; i < EXPTABLESIZE; i++) {
1513 1512                  for (exi = ne->exptable[i]; exi; exi = exi->fid_hash.next)
1514 1513                          exi_cache_trim(exi);
1515 1514          }
1516 1515  
1517 1516          rw_exit(&ne->exported_lock);
1518 1517  
1519 1518          atomic_inc_uint(&nfsauth_cache_reclaim);
1520 1519  }
1521 1520  
1522 1521  static void
1523 1522  exi_cache_trim(struct exportinfo *exi)
1524 1523  {
1525 1524          struct auth_cache_clnt *c;
1526 1525          struct auth_cache_clnt *nextc;
1527 1526          struct auth_cache *p;
1528 1527          struct auth_cache *next;
1529 1528          int i;
1530 1529          time_t stale_time;
1531 1530          avl_tree_t *tree;
1532 1531  
1533 1532          for (i = 0; i < AUTH_TABLESIZE; i++) {
1534 1533                  tree = exi->exi_cache[i];
1535 1534                  stale_time = gethrestime_sec() - NFSAUTH_CACHE_TRIM;
1536 1535                  rw_enter(&exi->exi_cache_lock, RW_READER);
1537 1536  
1538 1537                  /*
1539 1538                   * Free entries that have not been
1540 1539                   * used for NFSAUTH_CACHE_TRIM seconds.
1541 1540                   */
1542 1541                  for (c = avl_first(tree); c != NULL; c = AVL_NEXT(tree, c)) {
1543 1542                          /*
1544 1543                           * We are being called by the kmem subsystem to reclaim
1545 1544                           * memory so don't block if we can't get the lock.
1546 1545                           */
1547 1546                          if (rw_tryenter(&c->authc_lock, RW_WRITER) == 0) {
1548 1547                                  exi_cache_auth_reclaim_failed++;
1549 1548                                  rw_exit(&exi->exi_cache_lock);
1550 1549                                  return;
1551 1550                          }
1552 1551  
1553 1552                          for (p = avl_first(&c->authc_tree); p != NULL;
1554 1553                              p = next) {
1555 1554                                  next = AVL_NEXT(&c->authc_tree, p);
1556 1555  
1557 1556                                  ASSERT(p->auth_state != NFS_AUTH_INVALID);
1558 1557  
1559 1558                                  mutex_enter(&p->auth_lock);
1560 1559  
1561 1560                                  /*
1562 1561                                   * We won't trim recently used and/or WAITING
1563 1562                                   * entries.
1564 1563                                   */
1565 1564                                  if (p->auth_time > stale_time ||
1566 1565                                      p->auth_state == NFS_AUTH_WAITING) {
1567 1566                                          mutex_exit(&p->auth_lock);
1568 1567                                          continue;
1569 1568                                  }
1570 1569  
1571 1570                                  DTRACE_PROBE1(nfsauth__debug__trim__state,
1572 1571                                      auth_state_t, p->auth_state);
1573 1572  
1574 1573                                  /*
1575 1574                                   * STALE and REFRESHING entries needs to be
1576 1575                                   * marked INVALID only because they are
1577 1576                                   * referenced by some other structures or
1578 1577                                   * threads.  They will be freed later.
1579 1578                                   */
1580 1579                                  if (p->auth_state == NFS_AUTH_STALE ||
1581 1580                                      p->auth_state == NFS_AUTH_REFRESHING) {
1582 1581                                          p->auth_state = NFS_AUTH_INVALID;
1583 1582                                          mutex_exit(&p->auth_lock);
1584 1583  
1585 1584                                          avl_remove(&c->authc_tree, p);
1586 1585                                  } else {
1587 1586                                          mutex_exit(&p->auth_lock);
1588 1587  
1589 1588                                          avl_remove(&c->authc_tree, p);
1590 1589                                          nfsauth_free_node(p);
1591 1590                                  }
1592 1591                          }
1593 1592                          rw_exit(&c->authc_lock);
1594 1593                  }
1595 1594  
1596 1595                  if (rw_tryupgrade(&exi->exi_cache_lock) == 0) {
1597 1596                          rw_exit(&exi->exi_cache_lock);
1598 1597                          exi_cache_clnt_reclaim_failed++;
1599 1598                          continue;
1600 1599                  }
1601 1600  
1602 1601                  for (c = avl_first(tree); c != NULL; c = nextc) {
1603 1602                          nextc = AVL_NEXT(tree, c);
1604 1603  
1605 1604                          if (avl_is_empty(&c->authc_tree) == B_FALSE)
1606 1605                                  continue;
1607 1606  
1608 1607                          avl_remove(tree, c);
1609 1608  
1610 1609                          nfsauth_free_clnt_node(c);
1611 1610                  }
1612 1611  
1613 1612                  rw_exit(&exi->exi_cache_lock);
1614 1613          }
1615 1614  }
  
    | 
      ↓ open down ↓ | 
    1581 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX