1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
  25  * Copyright (c) 2015 by Delphix. All rights reserved.
  26  * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
  27  * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
  28  */
  29 
  30 #include <sys/param.h>
  31 #include <sys/errno.h>
  32 #include <sys/vfs.h>
  33 #include <sys/vnode.h>
  34 #include <sys/cred.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/systm.h>
  37 #include <sys/kmem.h>
  38 #include <sys/pathname.h>
  39 #include <sys/utsname.h>
  40 #include <sys/debug.h>
  41 #include <sys/door.h>
  42 #include <sys/sdt.h>
  43 #include <sys/thread.h>
  44 #include <sys/avl.h>
  45 
  46 #include <rpc/types.h>
  47 #include <rpc/auth.h>
  48 #include <rpc/clnt.h>
  49 
  50 #include <nfs/nfs.h>
  51 #include <nfs/export.h>
  52 #include <nfs/nfs_clnt.h>
  53 #include <nfs/auth.h>
  54 
  55 static struct kmem_cache *exi_cache_handle;
  56 static void exi_cache_reclaim(void *);
  57 static void exi_cache_reclaim_zone(nfs_globals_t *);
  58 static void exi_cache_trim(struct exportinfo *exi);
  59 
  60 extern pri_t minclsyspri;
  61 
  62 /* NFS auth cache statistics */
  63 volatile uint_t nfsauth_cache_hit;
  64 volatile uint_t nfsauth_cache_miss;
  65 volatile uint_t nfsauth_cache_refresh;
  66 volatile uint_t nfsauth_cache_reclaim;
  67 volatile uint_t exi_cache_auth_reclaim_failed;
  68 volatile uint_t exi_cache_clnt_reclaim_failed;
  69 
  70 /*
  71  * The lifetime of an auth cache entry:
  72  * ------------------------------------
  73  *
  74  * An auth cache entry is created with both the auth_time
  75  * and auth_freshness times set to the current time.
  76  *
  77  * Upon every client access which results in a hit, the
  78  * auth_time will be updated.
  79  *
  80  * If a client access determines that the auth_freshness
  81  * indicates that the entry is STALE, then it will be
  82  * refreshed. Note that this will explicitly reset
  83  * auth_time.
  84  *
  85  * When the REFRESH successfully occurs, then the
  86  * auth_freshness is updated.
  87  *
  88  * There are two ways for an entry to leave the cache:
  89  *
  90  * 1) Purged by an action on the export (remove or changed)
  91  * 2) Memory backpressure from the kernel (check against NFSAUTH_CACHE_TRIM)
  92  *
  93  * For 2) we check the timeout value against auth_time.
  94  */
  95 
  96 /*
  97  * Number of seconds until we mark for refresh an auth cache entry.
  98  */
  99 #define NFSAUTH_CACHE_REFRESH 600
 100 
 101 /*
 102  * Number of idle seconds until we yield to backpressure
 103  * to trim a cache entry.
 104  */
 105 #define NFSAUTH_CACHE_TRIM 3600
 106 
 107 /*
 108  * While we could encapuslate the exi_list inside the
 109  * exi structure, we can't do that for the auth_list.
 110  * So, to keep things looking clean, we keep them both
 111  * in these external lists.
 112  */
 113 typedef struct refreshq_exi_node {
 114         struct exportinfo       *ren_exi;
 115         list_t                  ren_authlist;
 116         list_node_t             ren_node;
 117 } refreshq_exi_node_t;
 118 
 119 typedef struct refreshq_auth_node {
 120         struct auth_cache       *ran_auth;
 121         char                    *ran_netid;
 122         list_node_t             ran_node;
 123 } refreshq_auth_node_t;
 124 
 125 /*
 126  * Used to manipulate things on the refreshq_queue.  Note that the refresh
 127  * thread will effectively pop a node off of the queue, at which point it
 128  * will no longer need to hold the mutex.
 129  */
 130 static kmutex_t refreshq_lock;
 131 static list_t refreshq_queue;
 132 static kcondvar_t refreshq_cv;
 133 
 134 /*
 135  * If there is ever a problem with loading the module, then nfsauth_fini()
 136  * needs to be called to remove state.  In that event, since the refreshq
 137  * thread has been started, they need to work together to get rid of state.
 138  */
 139 typedef enum nfsauth_refreshq_thread_state {
 140         REFRESHQ_THREAD_RUNNING,
 141         REFRESHQ_THREAD_FINI_REQ,
 142         REFRESHQ_THREAD_HALTED,
 143         REFRESHQ_THREAD_NEED_CREATE
 144 } nfsauth_refreshq_thread_state_t;
 145 
 146 typedef struct nfsauth_globals {
 147         kmutex_t        mountd_lock;
 148         door_handle_t   mountd_dh;
 149 
 150         /*
 151          * Used to manipulate things on the refreshq_queue.  Note that the
 152          * refresh thread will effectively pop a node off of the queue,
 153          * at which point it will no longer need to hold the mutex.
 154          */
 155         kmutex_t        refreshq_lock;
 156         list_t          refreshq_queue;
 157         kcondvar_t      refreshq_cv;
 158 
 159         /*
 160          * A list_t would be overkill.  These are auth_cache entries which are
 161          * no longer linked to an exi.  It should be the case that all of their
 162          * states are NFS_AUTH_INVALID, i.e., the only way to be put on this
 163          * list is iff their state indicated that they had been placed on the
 164          * refreshq_queue.
 165          *
 166          * Note that while there is no link from the exi or back to the exi,
 167          * the exi can not go away until these entries are harvested.
 168          */
 169         struct auth_cache               *refreshq_dead_entries;
 170         nfsauth_refreshq_thread_state_t refreshq_thread_state;
 171 
 172 } nfsauth_globals_t;
 173 
 174 static void nfsauth_free_node(struct auth_cache *);
 175 static void nfsauth_refresh_thread(nfsauth_globals_t *);
 176 
 177 static int nfsauth_cache_compar(const void *, const void *);
 178 
 179 static nfsauth_globals_t *
 180 nfsauth_get_zg(void)
 181 {
 182         nfs_globals_t *ng = nfs_srv_getzg();
 183         nfsauth_globals_t *nag = ng->nfs_auth;
 184         ASSERT(nag != NULL);
 185         return (nag);
 186 }
 187 
 188 void
 189 mountd_args(uint_t did)
 190 {
 191         nfsauth_globals_t *nag;
 192 
 193         nag = nfsauth_get_zg();
 194         mutex_enter(&nag->mountd_lock);
 195         if (nag->mountd_dh != NULL)
 196                 door_ki_rele(nag->mountd_dh);
 197         nag->mountd_dh = door_ki_lookup(did);
 198         mutex_exit(&nag->mountd_lock);
 199 }
 200 
 201 void
 202 nfsauth_init(void)
 203 {
 204         exi_cache_handle = kmem_cache_create("exi_cache_handle",
 205             sizeof (struct auth_cache), 0, NULL, NULL,
 206             exi_cache_reclaim, NULL, NULL, 0);
 207 }
 208 
 209 void
 210 nfsauth_fini(void)
 211 {
 212         kmem_cache_destroy(exi_cache_handle);
 213 }
 214 
 215 void
 216 nfsauth_zone_init(nfs_globals_t *ng)
 217 {
 218         nfsauth_globals_t *nag;
 219 
 220         nag = kmem_zalloc(sizeof (*nag), KM_SLEEP);
 221 
 222         /*
 223          * mountd can be restarted by smf(5).  We need to make sure
 224          * the updated door handle will safely make it to mountd_dh.
 225          */
 226         mutex_init(&nag->mountd_lock, NULL, MUTEX_DEFAULT, NULL);
 227         mutex_init(&nag->refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
 228         list_create(&nag->refreshq_queue, sizeof (refreshq_exi_node_t),
 229             offsetof(refreshq_exi_node_t, ren_node));
 230         cv_init(&nag->refreshq_cv, NULL, CV_DEFAULT, NULL);
 231         nag->refreshq_thread_state = REFRESHQ_THREAD_NEED_CREATE;
 232 
 233         ng->nfs_auth = nag;
 234 }
 235 
 236 void
 237 nfsauth_zone_shutdown(nfs_globals_t *ng)
 238 {
 239         refreshq_exi_node_t     *ren;
 240         nfsauth_globals_t       *nag = ng->nfs_auth;
 241 
 242         /* Prevent the nfsauth_refresh_thread from getting new work */
 243         mutex_enter(&nag->refreshq_lock);
 244         if (nag->refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
 245                 nag->refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
 246                 cv_broadcast(&nag->refreshq_cv);
 247 
 248                 /* Wait for nfsauth_refresh_thread() to exit */
 249                 while (nag->refreshq_thread_state != REFRESHQ_THREAD_HALTED)
 250                         cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
 251         }
 252         mutex_exit(&nag->refreshq_lock);
 253 
 254         /*
 255          * Walk the exi_list and in turn, walk the auth_lists and free all
 256          * lists.  In addition, free INVALID auth_cache entries.
 257          */
 258         while ((ren = list_remove_head(&nag->refreshq_queue))) {
 259                 refreshq_auth_node_t *ran;
 260 
 261                 while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) {
 262                         struct auth_cache *p = ran->ran_auth;
 263                         if (p->auth_state == NFS_AUTH_INVALID)
 264                                 nfsauth_free_node(p);
 265                         strfree(ran->ran_netid);
 266                         kmem_free(ran, sizeof (*ran));
 267                 }
 268 
 269                 list_destroy(&ren->ren_authlist);
 270                 exi_rele(ren->ren_exi);
 271                 kmem_free(ren, sizeof (*ren));
 272         }
 273 }
 274 
 275 void
 276 nfsauth_zone_fini(nfs_globals_t *ng)
 277 {
 278         nfsauth_globals_t *nag = ng->nfs_auth;
 279 
 280         ng->nfs_auth = NULL;
 281 
 282         list_destroy(&nag->refreshq_queue);
 283         cv_destroy(&nag->refreshq_cv);
 284         mutex_destroy(&nag->refreshq_lock);
 285         mutex_destroy(&nag->mountd_lock);
 286         /* Extra cleanup. */
 287         if (nag->mountd_dh != NULL)
 288                 door_ki_rele(nag->mountd_dh);
 289         kmem_free(nag, sizeof (*nag));
 290 }
 291 
 292 /*
 293  * Convert the address in a netbuf to
 294  * a hash index for the auth_cache table.
 295  */
 296 static int
 297 hash(struct netbuf *a)
 298 {
 299         int i, h = 0;
 300 
 301         for (i = 0; i < a->len; i++)
 302                 h ^= a->buf[i];
 303 
 304         return (h & (AUTH_TABLESIZE - 1));
 305 }
 306 
 307 /*
 308  * Mask out the components of an
 309  * address that do not identify
 310  * a host. For socket addresses the
 311  * masking gets rid of the port number.
 312  */
 313 static void
 314 addrmask(struct netbuf *addr, struct netbuf *mask)
 315 {
 316         int i;
 317 
 318         for (i = 0; i < addr->len; i++)
 319                 addr->buf[i] &= mask->buf[i];
 320 }
 321 
 322 /*
 323  * nfsauth4_access is used for NFS V4 auth checking. Besides doing
 324  * the common nfsauth_access(), it will check if the client can
 325  * have a limited access to this vnode even if the security flavor
 326  * used does not meet the policy.
 327  */
 328 int
 329 nfsauth4_access(struct exportinfo *exi, vnode_t *vp, struct svc_req *req,
 330     cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
 331 {
 332         int access;
 333 
 334         access = nfsauth_access(exi, req, cr, uid, gid, ngids, gids);
 335 
 336         /*
 337          * There are cases that the server needs to allow the client
 338          * to have a limited view.
 339          *
 340          * e.g.
 341          * /export is shared as "sec=sys,rw=dfs-test-4,sec=krb5,rw"
 342          * /export/home is shared as "sec=sys,rw"
 343          *
 344          * When the client mounts /export with sec=sys, the client
 345          * would get a limited view with RO access on /export to see
 346          * "home" only because the client is allowed to access
 347          * /export/home with auth_sys.
 348          */
 349         if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
 350                 /*
 351                  * Allow ro permission with LIMITED view if there is a
 352                  * sub-dir exported under vp.
 353                  */
 354                 if (has_visible(exi, vp))
 355                         return (NFSAUTH_LIMITED);
 356         }
 357 
 358         return (access);
 359 }
 360 
 361 static void
 362 sys_log(const char *msg)
 363 {
 364         static time_t   tstamp = 0;
 365         time_t          now;
 366 
 367         /*
 368          * msg is shown (at most) once per minute
 369          */
 370         now = gethrestime_sec();
 371         if ((tstamp + 60) < now) {
 372                 tstamp = now;
 373                 cmn_err(CE_WARN, msg);
 374         }
 375 }
 376 
 377 /*
 378  * Callup to the mountd to get access information in the kernel.
 379  */
 380 static bool_t
 381 nfsauth_retrieve(nfsauth_globals_t *nag, struct exportinfo *exi,
 382     char *req_netid, int flavor, struct netbuf *addr, int *access,
 383     cred_t *clnt_cred, uid_t *srv_uid, gid_t *srv_gid, uint_t *srv_gids_cnt,
 384     gid_t **srv_gids)
 385 {
 386         varg_t                    varg = {0};
 387         nfsauth_res_t             res = {0};
 388         XDR                       xdrs;
 389         size_t                    absz;
 390         caddr_t                   abuf;
 391         int                       last = 0;
 392         door_arg_t                da;
 393         door_info_t               di;
 394         door_handle_t             dh;
 395         uint_t                    ntries = 0;
 396 
 397         /*
 398          * No entry in the cache for this client/flavor
 399          * so we need to call the nfsauth service in the
 400          * mount daemon.
 401          */
 402 
 403         varg.vers = V_PROTO;
 404         varg.arg_u.arg.cmd = NFSAUTH_ACCESS;
 405         varg.arg_u.arg.areq.req_client.n_len = addr->len;
 406         varg.arg_u.arg.areq.req_client.n_bytes = addr->buf;
 407         varg.arg_u.arg.areq.req_netid = req_netid;
 408         varg.arg_u.arg.areq.req_path = exi->exi_export.ex_path;
 409         varg.arg_u.arg.areq.req_flavor = flavor;
 410         varg.arg_u.arg.areq.req_clnt_uid = crgetuid(clnt_cred);
 411         varg.arg_u.arg.areq.req_clnt_gid = crgetgid(clnt_cred);
 412         varg.arg_u.arg.areq.req_clnt_gids.len = crgetngroups(clnt_cred);
 413         varg.arg_u.arg.areq.req_clnt_gids.val = (gid_t *)crgetgroups(clnt_cred);
 414 
 415         DTRACE_PROBE1(nfsserv__func__nfsauth__varg, varg_t *, &varg);
 416 
 417         /*
 418          * Setup the XDR stream for encoding the arguments. Notice that
 419          * in addition to the args having variable fields (req_netid and
 420          * req_path), the argument data structure is itself versioned,
 421          * so we need to make sure we can size the arguments buffer
 422          * appropriately to encode all the args. If we can't get sizing
 423          * info _or_ properly encode the arguments, there's really no
 424          * point in continuting, so we fail the request.
 425          */
 426         if ((absz = xdr_sizeof(xdr_varg, &varg)) == 0) {
 427                 *access = NFSAUTH_DENIED;
 428                 return (FALSE);
 429         }
 430 
 431         abuf = (caddr_t)kmem_alloc(absz, KM_SLEEP);
 432         xdrmem_create(&xdrs, abuf, absz, XDR_ENCODE);
 433         if (!xdr_varg(&xdrs, &varg)) {
 434                 XDR_DESTROY(&xdrs);
 435                 goto fail;
 436         }
 437         XDR_DESTROY(&xdrs);
 438 
 439         /*
 440          * Prepare the door arguments
 441          *
 442          * We don't know the size of the message the daemon
 443          * will pass back to us.  By setting rbuf to NULL,
 444          * we force the door code to allocate a buf of the
 445          * appropriate size.  We must set rsize > 0, however,
 446          * else the door code acts as if no response was
 447          * expected and doesn't pass the data to us.
 448          */
 449         da.data_ptr = (char *)abuf;
 450         da.data_size = absz;
 451         da.desc_ptr = NULL;
 452         da.desc_num = 0;
 453         da.rbuf = NULL;
 454         da.rsize = 1;
 455 
 456 retry:
 457         mutex_enter(&nag->mountd_lock);
 458         dh = nag->mountd_dh;
 459         if (dh != NULL)
 460                 door_ki_hold(dh);
 461         mutex_exit(&nag->mountd_lock);
 462 
 463         if (dh == NULL) {
 464                 /*
 465                  * The rendezvous point has not been established yet!
 466                  * This could mean that either mountd(1m) has not yet
 467                  * been started or that _this_ routine nuked the door
 468                  * handle after receiving an EINTR for a REVOKED door.
 469                  *
 470                  * Returning NFSAUTH_DROP will cause the NFS client
 471                  * to retransmit the request, so let's try to be more
 472                  * rescillient and attempt for ntries before we bail.
 473                  */
 474                 if (++ntries % NFSAUTH_DR_TRYCNT) {
 475                         delay(hz);
 476                         goto retry;
 477                 }
 478 
 479                 kmem_free(abuf, absz);
 480 
 481                 sys_log("nfsauth: mountd has not established door");
 482                 *access = NFSAUTH_DROP;
 483                 return (FALSE);
 484         }
 485 
 486         ntries = 0;
 487 
 488         /*
 489          * Now that we've got what we need, place the call.
 490          */
 491         switch (door_ki_upcall_limited(dh, &da, NULL, SIZE_MAX, 0)) {
 492         case 0:                         /* Success */
 493                 door_ki_rele(dh);
 494 
 495                 if (da.data_ptr == NULL && da.data_size == 0) {
 496                         /*
 497                          * The door_return that contained the data
 498                          * failed! We're here because of the 2nd
 499                          * door_return (w/o data) such that we can
 500                          * get control of the thread (and exit
 501                          * gracefully).
 502                          */
 503                         DTRACE_PROBE1(nfsserv__func__nfsauth__door__nil,
 504                             door_arg_t *, &da);
 505                         goto fail;
 506                 }
 507 
 508                 break;
 509 
 510         case EAGAIN:
 511                 /*
 512                  * Server out of resources; back off for a bit
 513                  */
 514                 door_ki_rele(dh);
 515                 delay(hz);
 516                 goto retry;
 517                 /* NOTREACHED */
 518 
 519         case EINTR:
 520                 if (!door_ki_info(dh, &di)) {
 521                         door_ki_rele(dh);
 522 
 523                         if (di.di_attributes & DOOR_REVOKED) {
 524                                 /*
 525                                  * The server barfed and revoked
 526                                  * the (existing) door on us; we
 527                                  * want to wait to give smf(5) a
 528                                  * chance to restart mountd(1m)
 529                                  * and establish a new door handle.
 530                                  */
 531                                 mutex_enter(&nag->mountd_lock);
 532                                 if (dh == nag->mountd_dh) {
 533                                         door_ki_rele(nag->mountd_dh);
 534                                         nag->mountd_dh = NULL;
 535                                 }
 536                                 mutex_exit(&nag->mountd_lock);
 537                                 delay(hz);
 538                                 goto retry;
 539                         }
 540                         /*
 541                          * If the door was _not_ revoked on us,
 542                          * then more than likely we took an INTR,
 543                          * so we need to fail the operation.
 544                          */
 545                         goto fail;
 546                 }
 547                 /*
 548                  * The only failure that can occur from getting
 549                  * the door info is EINVAL, so we let the code
 550                  * below handle it.
 551                  */
 552                 /* FALLTHROUGH */
 553 
 554         case EBADF:
 555         case EINVAL:
 556         default:
 557                 /*
 558                  * If we have a stale door handle, give smf a last
 559                  * chance to start it by sleeping for a little bit.
 560                  * If we're still hosed, we'll fail the call.
 561                  *
 562                  * Since we're going to reacquire the door handle
 563                  * upon the retry, we opt to sleep for a bit and
 564                  * _not_ to clear mountd_dh. If mountd restarted
 565                  * and was able to set mountd_dh, we should see
 566                  * the new instance; if not, we won't get caught
 567                  * up in the retry/DELAY loop.
 568                  */
 569                 door_ki_rele(dh);
 570                 if (!last) {
 571                         delay(hz);
 572                         last++;
 573                         goto retry;
 574                 }
 575                 sys_log("nfsauth: stale mountd door handle");
 576                 goto fail;
 577         }
 578 
 579         ASSERT(da.rbuf != NULL);
 580 
 581         /*
 582          * No door errors encountered; setup the XDR stream for decoding
 583          * the results. If we fail to decode the results, we've got no
 584          * other recourse than to fail the request.
 585          */
 586         xdrmem_create(&xdrs, da.rbuf, da.rsize, XDR_DECODE);
 587         if (!xdr_nfsauth_res(&xdrs, &res)) {
 588                 xdr_free(xdr_nfsauth_res, (char *)&res);
 589                 XDR_DESTROY(&xdrs);
 590                 kmem_free(da.rbuf, da.rsize);
 591                 goto fail;
 592         }
 593         XDR_DESTROY(&xdrs);
 594         kmem_free(da.rbuf, da.rsize);
 595 
 596         DTRACE_PROBE1(nfsserv__func__nfsauth__results, nfsauth_res_t *, &res);
 597         switch (res.stat) {
 598                 case NFSAUTH_DR_OKAY:
 599                         *access = res.ares.auth_perm;
 600                         *srv_uid = res.ares.auth_srv_uid;
 601                         *srv_gid = res.ares.auth_srv_gid;
 602 
 603                         if ((*srv_gids_cnt = res.ares.auth_srv_gids.len) != 0) {
 604                                 *srv_gids = kmem_alloc(*srv_gids_cnt *
 605                                     sizeof (gid_t), KM_SLEEP);
 606                                 bcopy(res.ares.auth_srv_gids.val, *srv_gids,
 607                                     *srv_gids_cnt * sizeof (gid_t));
 608                         } else {
 609                                 *srv_gids = NULL;
 610                         }
 611 
 612                         break;
 613 
 614                 case NFSAUTH_DR_EFAIL:
 615                 case NFSAUTH_DR_DECERR:
 616                 case NFSAUTH_DR_BADCMD:
 617                 default:
 618                         xdr_free(xdr_nfsauth_res, (char *)&res);
 619 fail:
 620                         *access = NFSAUTH_DENIED;
 621                         kmem_free(abuf, absz);
 622                         return (FALSE);
 623                         /* NOTREACHED */
 624         }
 625 
 626         xdr_free(xdr_nfsauth_res, (char *)&res);
 627         kmem_free(abuf, absz);
 628 
 629         return (TRUE);
 630 }
 631 
 632 static void
 633 nfsauth_refresh_thread(nfsauth_globals_t *nag)
 634 {
 635         refreshq_exi_node_t     *ren;
 636         refreshq_auth_node_t    *ran;
 637 
 638         struct exportinfo       *exi;
 639 
 640         int                     access;
 641         bool_t                  retrieval;
 642 
 643         callb_cpr_t             cprinfo;
 644 
 645         CALLB_CPR_INIT(&cprinfo, &nag->refreshq_lock, callb_generic_cpr,
 646             "nfsauth_refresh");
 647 
 648         for (;;) {
 649                 mutex_enter(&nag->refreshq_lock);
 650                 if (nag->refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
 651                         /* Keep the hold on the lock! */
 652                         break;
 653                 }
 654 
 655                 ren = list_remove_head(&nag->refreshq_queue);
 656                 if (ren == NULL) {
 657                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
 658                         cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
 659                         CALLB_CPR_SAFE_END(&cprinfo, &nag->refreshq_lock);
 660                         mutex_exit(&nag->refreshq_lock);
 661                         continue;
 662                 }
 663                 mutex_exit(&nag->refreshq_lock);
 664 
 665                 exi = ren->ren_exi;
 666                 ASSERT(exi != NULL);
 667 
 668                 /*
 669                  * Since the ren was removed from the refreshq_queue above,
 670                  * this is the only thread aware about the ren existence, so we
 671                  * have the exclusive ownership of it and we do not need to
 672                  * protect it by any lock.
 673                  */
 674                 while ((ran = list_remove_head(&ren->ren_authlist))) {
 675                         uid_t uid;
 676                         gid_t gid;
 677                         uint_t ngids;
 678                         gid_t *gids;
 679                         struct auth_cache *p = ran->ran_auth;
 680                         char *netid = ran->ran_netid;
 681 
 682                         ASSERT(p != NULL);
 683                         ASSERT(netid != NULL);
 684 
 685                         kmem_free(ran, sizeof (refreshq_auth_node_t));
 686 
 687                         mutex_enter(&p->auth_lock);
 688 
 689                         /*
 690                          * Once the entry goes INVALID, it can not change
 691                          * state.
 692                          *
 693                          * No need to refresh entries also in a case we are
 694                          * just shutting down.
 695                          *
 696                          * In general, there is no need to hold the
 697                          * refreshq_lock to test the refreshq_thread_state.  We
 698                          * do hold it at other places because there is some
 699                          * related thread synchronization (or some other tasks)
 700                          * close to the refreshq_thread_state check.
 701                          *
 702                          * The check for the refreshq_thread_state value here
 703                          * is purely advisory to allow the faster
 704                          * nfsauth_refresh_thread() shutdown.  In a case we
 705                          * will miss such advisory, nothing catastrophic
 706                          * happens: we will just spin longer here before the
 707                          * shutdown.
 708                          */
 709                         if (p->auth_state == NFS_AUTH_INVALID ||
 710                             nag->refreshq_thread_state !=
 711                             REFRESHQ_THREAD_RUNNING) {
 712                                 mutex_exit(&p->auth_lock);
 713 
 714                                 if (p->auth_state == NFS_AUTH_INVALID)
 715                                         nfsauth_free_node(p);
 716 
 717                                 strfree(netid);
 718 
 719                                 continue;
 720                         }
 721 
 722                         /*
 723                          * Make sure the state is valid.  Note that once we
 724                          * change the state to NFS_AUTH_REFRESHING, no other
 725                          * thread will be able to work on this entry.
 726                          */
 727                         ASSERT(p->auth_state == NFS_AUTH_STALE);
 728 
 729                         p->auth_state = NFS_AUTH_REFRESHING;
 730                         mutex_exit(&p->auth_lock);
 731 
 732                         DTRACE_PROBE2(nfsauth__debug__cache__refresh,
 733                             struct exportinfo *, exi,
 734                             struct auth_cache *, p);
 735 
 736                         /*
 737                          * The first caching of the access rights
 738                          * is done with the netid pulled out of the
 739                          * request from the client. All subsequent
 740                          * users of the cache may or may not have
 741                          * the same netid. It doesn't matter. So
 742                          * when we refresh, we simply use the netid
 743                          * of the request which triggered the
 744                          * refresh attempt.
 745                          */
 746                         retrieval = nfsauth_retrieve(nag, exi, netid,
 747                             p->auth_flavor, &p->auth_clnt->authc_addr, &access,
 748                             p->auth_clnt_cred, &uid, &gid, &ngids, &gids);
 749 
 750                         /*
 751                          * This can only be set in one other place
 752                          * and the state has to be NFS_AUTH_FRESH.
 753                          */
 754                         strfree(netid);
 755 
 756                         mutex_enter(&p->auth_lock);
 757                         if (p->auth_state == NFS_AUTH_INVALID) {
 758                                 mutex_exit(&p->auth_lock);
 759                                 nfsauth_free_node(p);
 760                                 if (retrieval == TRUE)
 761                                         kmem_free(gids, ngids * sizeof (gid_t));
 762                         } else {
 763                                 /*
 764                                  * If we got an error, do not reset the
 765                                  * time. This will cause the next access
 766                                  * check for the client to reschedule this
 767                                  * node.
 768                                  */
 769                                 if (retrieval == TRUE) {
 770                                         p->auth_access = access;
 771 
 772                                         p->auth_srv_uid = uid;
 773                                         p->auth_srv_gid = gid;
 774                                         kmem_free(p->auth_srv_gids,
 775                                             p->auth_srv_ngids * sizeof (gid_t));
 776                                         p->auth_srv_ngids = ngids;
 777                                         p->auth_srv_gids = gids;
 778 
 779                                         p->auth_freshness = gethrestime_sec();
 780                                 }
 781                                 p->auth_state = NFS_AUTH_FRESH;
 782 
 783                                 cv_broadcast(&p->auth_cv);
 784                                 mutex_exit(&p->auth_lock);
 785                         }
 786                 }
 787 
 788                 list_destroy(&ren->ren_authlist);
 789                 exi_rele(ren->ren_exi);
 790                 kmem_free(ren, sizeof (refreshq_exi_node_t));
 791         }
 792 
 793         nag->refreshq_thread_state = REFRESHQ_THREAD_HALTED;
 794         cv_broadcast(&nag->refreshq_cv);
 795         CALLB_CPR_EXIT(&cprinfo);
 796         DTRACE_PROBE(nfsauth__nfsauth__refresh__thread__exit);
 797         zthread_exit();
 798 }
 799 
 800 int
 801 nfsauth_cache_clnt_compar(const void *v1, const void *v2)
 802 {
 803         int c;
 804 
 805         const struct auth_cache_clnt *a1 = (const struct auth_cache_clnt *)v1;
 806         const struct auth_cache_clnt *a2 = (const struct auth_cache_clnt *)v2;
 807 
 808         if (a1->authc_addr.len < a2->authc_addr.len)
 809                 return (-1);
 810         if (a1->authc_addr.len > a2->authc_addr.len)
 811                 return (1);
 812 
 813         c = memcmp(a1->authc_addr.buf, a2->authc_addr.buf, a1->authc_addr.len);
 814         if (c < 0)
 815                 return (-1);
 816         if (c > 0)
 817                 return (1);
 818 
 819         return (0);
 820 }
 821 
 822 static int
 823 nfsauth_cache_compar(const void *v1, const void *v2)
 824 {
 825         int c;
 826 
 827         const struct auth_cache *a1 = (const struct auth_cache *)v1;
 828         const struct auth_cache *a2 = (const struct auth_cache *)v2;
 829 
 830         if (a1->auth_flavor < a2->auth_flavor)
 831                 return (-1);
 832         if (a1->auth_flavor > a2->auth_flavor)
 833                 return (1);
 834 
 835         if (crgetuid(a1->auth_clnt_cred) < crgetuid(a2->auth_clnt_cred))
 836                 return (-1);
 837         if (crgetuid(a1->auth_clnt_cred) > crgetuid(a2->auth_clnt_cred))
 838                 return (1);
 839 
 840         if (crgetgid(a1->auth_clnt_cred) < crgetgid(a2->auth_clnt_cred))
 841                 return (-1);
 842         if (crgetgid(a1->auth_clnt_cred) > crgetgid(a2->auth_clnt_cred))
 843                 return (1);
 844 
 845         if (crgetngroups(a1->auth_clnt_cred) < crgetngroups(a2->auth_clnt_cred))
 846                 return (-1);
 847         if (crgetngroups(a1->auth_clnt_cred) > crgetngroups(a2->auth_clnt_cred))
 848                 return (1);
 849 
 850         c = memcmp(crgetgroups(a1->auth_clnt_cred),
 851             crgetgroups(a2->auth_clnt_cred), crgetngroups(a1->auth_clnt_cred));
 852         if (c < 0)
 853                 return (-1);
 854         if (c > 0)
 855                 return (1);
 856 
 857         return (0);
 858 }
 859 
 860 /*
 861  * Get the access information from the cache or callup to the mountd
 862  * to get and cache the access information in the kernel.
 863  */
 864 static int
 865 nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
 866     cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
 867 {
 868         nfsauth_globals_t       *nag;
 869         struct netbuf           *taddrmask;
 870         struct netbuf           addr;   /* temporary copy of client's address */
 871         const struct netbuf     *claddr;
 872         avl_tree_t              *tree;
 873         struct auth_cache       ac;     /* used as a template for avl_find() */
 874         struct auth_cache_clnt  *c;
 875         struct auth_cache_clnt  acc;    /* used as a template for avl_find() */
 876         struct auth_cache       *p = NULL;
 877         int                     access;
 878 
 879         uid_t                   tmpuid;
 880         gid_t                   tmpgid;
 881         uint_t                  tmpngids;
 882         gid_t                   *tmpgids;
 883 
 884         avl_index_t             where;  /* used for avl_find()/avl_insert() */
 885 
 886         ASSERT(cr != NULL);
 887 
 888         ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid);
 889         nag = nfsauth_get_zg();
 890 
 891         /*
 892          * Now check whether this client already
 893          * has an entry for this flavor in the cache
 894          * for this export.
 895          * Get the caller's address, mask off the
 896          * parts of the address that do not identify
 897          * the host (port number, etc), and then hash
 898          * it to find the chain of cache entries.
 899          */
 900 
 901         claddr = svc_getrpccaller(req->rq_xprt);
 902         addr = *claddr;
 903         if (claddr->len != 0) {
 904                 addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP);
 905                 bcopy(claddr->buf, addr.buf, claddr->len);
 906         } else {
 907                 addr.buf = NULL;
 908         }
 909 
 910         SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask);
 911         ASSERT(taddrmask != NULL);
 912         addrmask(&addr, taddrmask);
 913 
 914         ac.auth_flavor = flavor;
 915         ac.auth_clnt_cred = crdup(cr);
 916 
 917         acc.authc_addr = addr;
 918 
 919         tree = exi->exi_cache[hash(&addr)];
 920 
 921         rw_enter(&exi->exi_cache_lock, RW_READER);
 922         c = (struct auth_cache_clnt *)avl_find(tree, &acc, NULL);
 923 
 924         if (c == NULL) {
 925                 struct auth_cache_clnt *nc;
 926 
 927                 rw_exit(&exi->exi_cache_lock);
 928 
 929                 nc = kmem_alloc(sizeof (*nc), KM_NOSLEEP | KM_NORMALPRI);
 930                 if (nc == NULL)
 931                         goto retrieve;
 932 
 933                 /*
 934                  * Initialize the new auth_cache_clnt
 935                  */
 936                 nc->authc_addr = addr;
 937                 nc->authc_addr.buf = kmem_alloc(addr.maxlen,
 938                     KM_NOSLEEP | KM_NORMALPRI);
 939                 if (addr.maxlen != 0 && nc->authc_addr.buf == NULL) {
 940                         kmem_free(nc, sizeof (*nc));
 941                         goto retrieve;
 942                 }
 943                 bcopy(addr.buf, nc->authc_addr.buf, addr.len);
 944                 rw_init(&nc->authc_lock, NULL, RW_DEFAULT, NULL);
 945                 avl_create(&nc->authc_tree, nfsauth_cache_compar,
 946                     sizeof (struct auth_cache),
 947                     offsetof(struct auth_cache, auth_link));
 948 
 949                 rw_enter(&exi->exi_cache_lock, RW_WRITER);
 950                 c = (struct auth_cache_clnt *)avl_find(tree, &acc, &where);
 951                 if (c == NULL) {
 952                         avl_insert(tree, nc, where);
 953                         rw_downgrade(&exi->exi_cache_lock);
 954                         c = nc;
 955                 } else {
 956                         rw_downgrade(&exi->exi_cache_lock);
 957 
 958                         avl_destroy(&nc->authc_tree);
 959                         rw_destroy(&nc->authc_lock);
 960                         kmem_free(nc->authc_addr.buf, nc->authc_addr.maxlen);
 961                         kmem_free(nc, sizeof (*nc));
 962                 }
 963         }
 964 
 965         ASSERT(c != NULL);
 966 
 967         rw_enter(&c->authc_lock, RW_READER);
 968         p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, NULL);
 969 
 970         if (p == NULL) {
 971                 struct auth_cache *np;
 972 
 973                 rw_exit(&c->authc_lock);
 974 
 975                 np = kmem_cache_alloc(exi_cache_handle,
 976                     KM_NOSLEEP | KM_NORMALPRI);
 977                 if (np == NULL) {
 978                         rw_exit(&exi->exi_cache_lock);
 979                         goto retrieve;
 980                 }
 981 
 982                 /*
 983                  * Initialize the new auth_cache
 984                  */
 985                 np->auth_clnt = c;
 986                 np->auth_flavor = flavor;
 987                 np->auth_clnt_cred = ac.auth_clnt_cred;
 988                 np->auth_srv_ngids = 0;
 989                 np->auth_srv_gids = NULL;
 990                 np->auth_time = np->auth_freshness = gethrestime_sec();
 991                 np->auth_state = NFS_AUTH_NEW;
 992                 mutex_init(&np->auth_lock, NULL, MUTEX_DEFAULT, NULL);
 993                 cv_init(&np->auth_cv, NULL, CV_DEFAULT, NULL);
 994 
 995                 rw_enter(&c->authc_lock, RW_WRITER);
 996                 rw_exit(&exi->exi_cache_lock);
 997 
 998                 p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, &where);
 999                 if (p == NULL) {
1000                         avl_insert(&c->authc_tree, np, where);
1001                         rw_downgrade(&c->authc_lock);
1002                         p = np;
1003                 } else {
1004                         rw_downgrade(&c->authc_lock);
1005 
1006                         cv_destroy(&np->auth_cv);
1007                         mutex_destroy(&np->auth_lock);
1008                         crfree(ac.auth_clnt_cred);
1009                         kmem_cache_free(exi_cache_handle, np);
1010                 }
1011         } else {
1012                 rw_exit(&exi->exi_cache_lock);
1013                 crfree(ac.auth_clnt_cred);
1014         }
1015 
1016         mutex_enter(&p->auth_lock);
1017         rw_exit(&c->authc_lock);
1018 
1019         /*
1020          * If the entry is in the WAITING state then some other thread is just
1021          * retrieving the required info.  The entry was either NEW, or the list
1022          * of client's supplemental groups is going to be changed (either by
1023          * this thread, or by some other thread).  We need to wait until the
1024          * nfsauth_retrieve() is done.
1025          */
1026         while (p->auth_state == NFS_AUTH_WAITING)
1027                 cv_wait(&p->auth_cv, &p->auth_lock);
1028 
1029         /*
1030          * Here the entry cannot be in WAITING or INVALID state.
1031          */
1032         ASSERT(p->auth_state != NFS_AUTH_WAITING);
1033         ASSERT(p->auth_state != NFS_AUTH_INVALID);
1034 
1035         /*
1036          * If the cache entry is not valid yet, we need to retrieve the
1037          * info ourselves.
1038          */
1039         if (p->auth_state == NFS_AUTH_NEW) {
1040                 bool_t res;
1041                 /*
1042                  * NFS_AUTH_NEW is the default output auth_state value in a
1043                  * case we failed somewhere below.
1044                  */
1045                 auth_state_t state = NFS_AUTH_NEW;
1046 
1047                 p->auth_state = NFS_AUTH_WAITING;
1048                 mutex_exit(&p->auth_lock);
1049                 kmem_free(addr.buf, addr.maxlen);
1050                 addr = p->auth_clnt->authc_addr;
1051 
1052                 atomic_inc_uint(&nfsauth_cache_miss);
1053 
1054                 res = nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt),
1055                     flavor, &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids,
1056                     &tmpgids);
1057 
1058                 p->auth_access = access;
1059                 p->auth_time = p->auth_freshness = gethrestime_sec();
1060 
1061                 if (res == TRUE) {
1062                         if (uid != NULL)
1063                                 *uid = tmpuid;
1064                         if (gid != NULL)
1065                                 *gid = tmpgid;
1066                         if (ngids != NULL && gids != NULL) {
1067                                 *ngids = tmpngids;
1068                                 *gids = tmpgids;
1069 
1070                                 /*
1071                                  * We need a copy of gids for the
1072                                  * auth_cache entry
1073                                  */
1074                                 tmpgids = kmem_alloc(tmpngids * sizeof (gid_t),
1075                                     KM_NOSLEEP | KM_NORMALPRI);
1076                                 if (tmpgids != NULL)
1077                                         bcopy(*gids, tmpgids,
1078                                             tmpngids * sizeof (gid_t));
1079                         }
1080 
1081                         if (tmpgids != NULL || tmpngids == 0) {
1082                                 p->auth_srv_uid = tmpuid;
1083                                 p->auth_srv_gid = tmpgid;
1084                                 p->auth_srv_ngids = tmpngids;
1085                                 p->auth_srv_gids = tmpgids;
1086 
1087                                 state = NFS_AUTH_FRESH;
1088                         }
1089                 }
1090 
1091                 /*
1092                  * Set the auth_state and notify waiters.
1093                  */
1094                 mutex_enter(&p->auth_lock);
1095                 p->auth_state = state;
1096                 cv_broadcast(&p->auth_cv);
1097                 mutex_exit(&p->auth_lock);
1098         } else {
1099                 uint_t nach;
1100                 time_t refresh;
1101 
1102                 refresh = gethrestime_sec() - p->auth_freshness;
1103 
1104                 p->auth_time = gethrestime_sec();
1105 
1106                 if (uid != NULL)
1107                         *uid = p->auth_srv_uid;
1108                 if (gid != NULL)
1109                         *gid = p->auth_srv_gid;
1110                 if (ngids != NULL && gids != NULL) {
1111                         if ((*ngids = p->auth_srv_ngids) != 0) {
1112                                 size_t sz = *ngids * sizeof (gid_t);
1113                                 *gids = kmem_alloc(sz, KM_SLEEP);
1114                                 bcopy(p->auth_srv_gids, *gids, sz);
1115                         } else {
1116                                 *gids = NULL;
1117                         }
1118                 }
1119 
1120                 access = p->auth_access;
1121 
1122                 if ((refresh > NFSAUTH_CACHE_REFRESH) &&
1123                     p->auth_state == NFS_AUTH_FRESH) {
1124                         refreshq_auth_node_t *ran;
1125                         uint_t nacr;
1126 
1127                         p->auth_state = NFS_AUTH_STALE;
1128                         mutex_exit(&p->auth_lock);
1129 
1130                         nacr = atomic_inc_uint_nv(&nfsauth_cache_refresh);
1131                         DTRACE_PROBE3(nfsauth__debug__cache__stale,
1132                             struct exportinfo *, exi,
1133                             struct auth_cache *, p,
1134                             uint_t, nacr);
1135 
1136                         ran = kmem_alloc(sizeof (refreshq_auth_node_t),
1137                             KM_SLEEP);
1138                         ran->ran_auth = p;
1139                         ran->ran_netid = strdup(svc_getnetid(req->rq_xprt));
1140 
1141                         mutex_enter(&nag->refreshq_lock);
1142 
1143                         if (nag->refreshq_thread_state ==
1144                             REFRESHQ_THREAD_NEED_CREATE) {
1145                                 /* Launch nfsauth refresh thread */
1146                                 nag->refreshq_thread_state =
1147                                     REFRESHQ_THREAD_RUNNING;
1148                                 (void) zthread_create(NULL, 0,
1149                                     nfsauth_refresh_thread, nag, 0,
1150                                     minclsyspri);
1151                         }
1152 
1153                         /*
1154                          * We should not add a work queue item if the thread
1155                          * is not accepting them.
1156                          */
1157                         if (nag->refreshq_thread_state ==
1158                             REFRESHQ_THREAD_RUNNING) {
1159                                 refreshq_exi_node_t *ren;
1160 
1161                                 /*
1162                                  * Is there an existing exi_list?
1163                                  */
1164                                 for (ren = list_head(&nag->refreshq_queue);
1165                                     ren != NULL;
1166                                     ren = list_next(&nag->refreshq_queue,
1167                                     ren)) {
1168                                         if (ren->ren_exi == exi) {
1169                                                 list_insert_tail(
1170                                                     &ren->ren_authlist, ran);
1171                                                 break;
1172                                         }
1173                                 }
1174 
1175                                 if (ren == NULL) {
1176                                         ren = kmem_alloc(
1177                                             sizeof (refreshq_exi_node_t),
1178                                             KM_SLEEP);
1179 
1180                                         exi_hold(exi);
1181                                         ren->ren_exi = exi;
1182 
1183                                         list_create(&ren->ren_authlist,
1184                                             sizeof (refreshq_auth_node_t),
1185                                             offsetof(refreshq_auth_node_t,
1186                                             ran_node));
1187 
1188                                         list_insert_tail(&ren->ren_authlist,
1189                                             ran);
1190                                         list_insert_tail(&nag->refreshq_queue,
1191                                             ren);
1192                                 }
1193 
1194                                 cv_broadcast(&nag->refreshq_cv);
1195                         } else {
1196                                 strfree(ran->ran_netid);
1197                                 kmem_free(ran, sizeof (refreshq_auth_node_t));
1198                         }
1199 
1200                         mutex_exit(&nag->refreshq_lock);
1201                 } else {
1202                         mutex_exit(&p->auth_lock);
1203                 }
1204 
1205                 nach = atomic_inc_uint_nv(&nfsauth_cache_hit);
1206                 DTRACE_PROBE2(nfsauth__debug__cache__hit,
1207                     uint_t, nach,
1208                     time_t, refresh);
1209 
1210                 kmem_free(addr.buf, addr.maxlen);
1211         }
1212 
1213         return (access);
1214 
1215 retrieve:
1216         crfree(ac.auth_clnt_cred);
1217 
1218         /*
1219          * Retrieve the required data without caching.
1220          */
1221 
1222         ASSERT(p == NULL);
1223 
1224         atomic_inc_uint(&nfsauth_cache_miss);
1225 
1226         if (nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), flavor,
1227             &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
1228                 if (uid != NULL)
1229                         *uid = tmpuid;
1230                 if (gid != NULL)
1231                         *gid = tmpgid;
1232                 if (ngids != NULL && gids != NULL) {
1233                         *ngids = tmpngids;
1234                         *gids = tmpgids;
1235                 } else {
1236                         kmem_free(tmpgids, tmpngids * sizeof (gid_t));
1237                 }
1238         }
1239 
1240         kmem_free(addr.buf, addr.maxlen);
1241 
1242         return (access);
1243 }
1244 
1245 /*
1246  * Check if the requesting client has access to the filesystem with
1247  * a given nfs flavor number which is an explicitly shared flavor.
1248  */
1249 int
1250 nfsauth4_secinfo_access(struct exportinfo *exi, struct svc_req *req,
1251     int flavor, int perm, cred_t *cr)
1252 {
1253         int access;
1254 
1255         if (! (perm & M_4SEC_EXPORTED)) {
1256                 return (NFSAUTH_DENIED);
1257         }
1258 
1259         /*
1260          * Optimize if there are no lists
1261          */
1262         if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0) {
1263                 perm &= ~M_4SEC_EXPORTED;
1264                 if (perm == M_RO)
1265                         return (NFSAUTH_RO);
1266                 if (perm == M_RW)
1267                         return (NFSAUTH_RW);
1268         }
1269 
1270         access = nfsauth_cache_get(exi, req, flavor, cr, NULL, NULL, NULL,
1271             NULL);
1272 
1273         return (access);
1274 }
1275 
1276 int
1277 nfsauth_access(struct exportinfo *exi, struct svc_req *req, cred_t *cr,
1278     uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
1279 {
1280         int access, mapaccess;
1281         struct secinfo *sp;
1282         int i, flavor, perm;
1283         int authnone_entry = -1;
1284 
1285         /*
1286          * By default root is mapped to anonymous user.
1287          * This might get overriden later in nfsauth_cache_get().
1288          */
1289         if (crgetuid(cr) == 0) {
1290                 if (uid != NULL)
1291                         *uid = exi->exi_export.ex_anon;
1292                 if (gid != NULL)
1293                         *gid = exi->exi_export.ex_anon;
1294         } else {
1295                 if (uid != NULL)
1296                         *uid = crgetuid(cr);
1297                 if (gid != NULL)
1298                         *gid = crgetgid(cr);
1299         }
1300 
1301         if (ngids != NULL)
1302                 *ngids = 0;
1303         if (gids != NULL)
1304                 *gids = NULL;
1305 
1306         /*
1307          *  Get the nfs flavor number from xprt.
1308          */
1309         flavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;
1310 
1311         /*
1312          * First check the access restrictions on the filesystem.  If
1313          * there are no lists associated with this flavor then there's no
1314          * need to make an expensive call to the nfsauth service or to
1315          * cache anything.
1316          */
1317 
1318         sp = exi->exi_export.ex_secinfo;
1319         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1320                 if (flavor != sp[i].s_secinfo.sc_nfsnum) {
1321                         if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1322                                 authnone_entry = i;
1323                         continue;
1324                 }
1325                 break;
1326         }
1327 
1328         mapaccess = 0;
1329 
1330         if (i >= exi->exi_export.ex_seccnt) {
1331                 /*
1332                  * Flavor not found, but use AUTH_NONE if it exists
1333                  */
1334                 if (authnone_entry == -1)
1335                         return (NFSAUTH_DENIED);
1336                 flavor = AUTH_NONE;
1337                 mapaccess = NFSAUTH_MAPNONE;
1338                 i = authnone_entry;
1339         }
1340 
1341         /*
1342          * If the flavor is in the ex_secinfo list, but not an explicitly
1343          * shared flavor by the user, it is a result of the nfsv4 server
1344          * namespace setup. We will grant an RO permission similar for
1345          * a pseudo node except that this node is a shared one.
1346          *
1347          * e.g. flavor in (flavor) indicates that it is not explictly
1348          *      shared by the user:
1349          *
1350          *              /       (sys, krb5)
1351          *              |
1352          *              export  #share -o sec=sys (krb5)
1353          *              |
1354          *              secure  #share -o sec=krb5
1355          *
1356          *      In this case, when a krb5 request coming in to access
1357          *      /export, RO permission is granted.
1358          */
1359         if (!(sp[i].s_flags & M_4SEC_EXPORTED))
1360                 return (mapaccess | NFSAUTH_RO);
1361 
1362         /*
1363          * Optimize if there are no lists.
1364          * We cannot optimize for AUTH_SYS with NGRPS (16) supplemental groups.
1365          */
1366         perm = sp[i].s_flags;
1367         if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0 && (ngroups_max <= NGRPS ||
1368             flavor != AUTH_SYS || crgetngroups(cr) < NGRPS)) {
1369                 perm &= ~M_4SEC_EXPORTED;
1370                 if (perm == M_RO)
1371                         return (mapaccess | NFSAUTH_RO);
1372                 if (perm == M_RW)
1373                         return (mapaccess | NFSAUTH_RW);
1374         }
1375 
1376         access = nfsauth_cache_get(exi, req, flavor, cr, uid, gid, ngids, gids);
1377 
1378         /*
1379          * For both NFSAUTH_DENIED and NFSAUTH_WRONGSEC we do not care about
1380          * the supplemental groups.
1381          */
1382         if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
1383                 if (ngids != NULL && gids != NULL) {
1384                         kmem_free(*gids, *ngids * sizeof (gid_t));
1385                         *ngids = 0;
1386                         *gids = NULL;
1387                 }
1388         }
1389 
1390         /*
1391          * Client's security flavor doesn't match with "ro" or
1392          * "rw" list. Try again using AUTH_NONE if present.
1393          */
1394         if ((access & NFSAUTH_WRONGSEC) && (flavor != AUTH_NONE)) {
1395                 /*
1396                  * Have we already encountered AUTH_NONE ?
1397                  */
1398                 if (authnone_entry != -1) {
1399                         mapaccess = NFSAUTH_MAPNONE;
1400                         access = nfsauth_cache_get(exi, req, AUTH_NONE, cr,
1401                             NULL, NULL, NULL, NULL);
1402                 } else {
1403                         /*
1404                          * Check for AUTH_NONE presence.
1405                          */
1406                         for (; i < exi->exi_export.ex_seccnt; i++) {
1407                                 if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1408                                         mapaccess = NFSAUTH_MAPNONE;
1409                                         access = nfsauth_cache_get(exi, req,
1410                                             AUTH_NONE, cr, NULL, NULL, NULL,
1411                                             NULL);
1412                                         break;
1413                                 }
1414                         }
1415                 }
1416         }
1417 
1418         if (access & NFSAUTH_DENIED)
1419                 access = NFSAUTH_DENIED;
1420 
1421         return (access | mapaccess);
1422 }
1423 
1424 static void
1425 nfsauth_free_clnt_node(struct auth_cache_clnt *p)
1426 {
1427         void *cookie = NULL;
1428         struct auth_cache *node;
1429 
1430         while ((node = avl_destroy_nodes(&p->authc_tree, &cookie)) != NULL)
1431                 nfsauth_free_node(node);
1432         avl_destroy(&p->authc_tree);
1433 
1434         kmem_free(p->authc_addr.buf, p->authc_addr.maxlen);
1435         rw_destroy(&p->authc_lock);
1436 
1437         kmem_free(p, sizeof (*p));
1438 }
1439 
1440 static void
1441 nfsauth_free_node(struct auth_cache *p)
1442 {
1443         crfree(p->auth_clnt_cred);
1444         kmem_free(p->auth_srv_gids, p->auth_srv_ngids * sizeof (gid_t));
1445         mutex_destroy(&p->auth_lock);
1446         cv_destroy(&p->auth_cv);
1447         kmem_cache_free(exi_cache_handle, p);
1448 }
1449 
1450 /*
1451  * Free the nfsauth cache for a given export
1452  */
1453 void
1454 nfsauth_cache_free(struct exportinfo *exi)
1455 {
1456         int i;
1457 
1458         /*
1459          * The only way we got here was with an exi_rele, which means that no
1460          * auth cache entry is being refreshed.
1461          */
1462 
1463         for (i = 0; i < AUTH_TABLESIZE; i++) {
1464                 avl_tree_t *tree = exi->exi_cache[i];
1465                 void *cookie = NULL;
1466                 struct auth_cache_clnt *node;
1467 
1468                 while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
1469                         nfsauth_free_clnt_node(node);
1470         }
1471 }
1472 
1473 /*
1474  * Called by the kernel memory allocator when memory is low.
1475  * Free unused cache entries. If that's not enough, the VM system
1476  * will call again for some more.
1477  *
1478  * This needs to operate on all zones, so we take a reader lock
1479  * on the list of zones and walk the list.  This is OK here
1480  * becuase exi_cache_trim doesn't block or cause new objects
1481  * to be allocated (basically just frees lots of stuff).
1482  * Use care if nfssrv_globals_rwl is taken as reader in any
1483  * other cases because it will block nfs_server_zone_init
1484  * and nfs_server_zone_fini, which enter as writer.
1485  */
1486 /*ARGSUSED*/
1487 void
1488 exi_cache_reclaim(void *cdrarg)
1489 {
1490         nfs_globals_t *ng;
1491 
1492         rw_enter(&nfssrv_globals_rwl, RW_READER);
1493 
1494         ng = list_head(&nfssrv_globals_list);
1495         while (ng != NULL) {
1496                 exi_cache_reclaim_zone(ng);
1497                 ng = list_next(&nfssrv_globals_list, ng);
1498         }
1499 
1500         rw_exit(&nfssrv_globals_rwl);
1501 }
1502 
1503 static void
1504 exi_cache_reclaim_zone(nfs_globals_t *ng)
1505 {
1506         int i;
1507         struct exportinfo *exi;
1508         nfs_export_t *ne = ng->nfs_export;
1509 
1510         rw_enter(&ne->exported_lock, RW_READER);
1511 
1512         for (i = 0; i < EXPTABLESIZE; i++) {
1513                 for (exi = ne->exptable[i]; exi; exi = exi->fid_hash.next)
1514                         exi_cache_trim(exi);
1515         }
1516 
1517         rw_exit(&ne->exported_lock);
1518 
1519         atomic_inc_uint(&nfsauth_cache_reclaim);
1520 }
1521 
1522 static void
1523 exi_cache_trim(struct exportinfo *exi)
1524 {
1525         struct auth_cache_clnt *c;
1526         struct auth_cache_clnt *nextc;
1527         struct auth_cache *p;
1528         struct auth_cache *next;
1529         int i;
1530         time_t stale_time;
1531         avl_tree_t *tree;
1532 
1533         for (i = 0; i < AUTH_TABLESIZE; i++) {
1534                 tree = exi->exi_cache[i];
1535                 stale_time = gethrestime_sec() - NFSAUTH_CACHE_TRIM;
1536                 rw_enter(&exi->exi_cache_lock, RW_READER);
1537 
1538                 /*
1539                  * Free entries that have not been
1540                  * used for NFSAUTH_CACHE_TRIM seconds.
1541                  */
1542                 for (c = avl_first(tree); c != NULL; c = AVL_NEXT(tree, c)) {
1543                         /*
1544                          * We are being called by the kmem subsystem to reclaim
1545                          * memory so don't block if we can't get the lock.
1546                          */
1547                         if (rw_tryenter(&c->authc_lock, RW_WRITER) == 0) {
1548                                 exi_cache_auth_reclaim_failed++;
1549                                 rw_exit(&exi->exi_cache_lock);
1550                                 return;
1551                         }
1552 
1553                         for (p = avl_first(&c->authc_tree); p != NULL;
1554                             p = next) {
1555                                 next = AVL_NEXT(&c->authc_tree, p);
1556 
1557                                 ASSERT(p->auth_state != NFS_AUTH_INVALID);
1558 
1559                                 mutex_enter(&p->auth_lock);
1560 
1561                                 /*
1562                                  * We won't trim recently used and/or WAITING
1563                                  * entries.
1564                                  */
1565                                 if (p->auth_time > stale_time ||
1566                                     p->auth_state == NFS_AUTH_WAITING) {
1567                                         mutex_exit(&p->auth_lock);
1568                                         continue;
1569                                 }
1570 
1571                                 DTRACE_PROBE1(nfsauth__debug__trim__state,
1572                                     auth_state_t, p->auth_state);
1573 
1574                                 /*
1575                                  * STALE and REFRESHING entries needs to be
1576                                  * marked INVALID only because they are
1577                                  * referenced by some other structures or
1578                                  * threads.  They will be freed later.
1579                                  */
1580                                 if (p->auth_state == NFS_AUTH_STALE ||
1581                                     p->auth_state == NFS_AUTH_REFRESHING) {
1582                                         p->auth_state = NFS_AUTH_INVALID;
1583                                         mutex_exit(&p->auth_lock);
1584 
1585                                         avl_remove(&c->authc_tree, p);
1586                                 } else {
1587                                         mutex_exit(&p->auth_lock);
1588 
1589                                         avl_remove(&c->authc_tree, p);
1590                                         nfsauth_free_node(p);
1591                                 }
1592                         }
1593                         rw_exit(&c->authc_lock);
1594                 }
1595 
1596                 if (rw_tryupgrade(&exi->exi_cache_lock) == 0) {
1597                         rw_exit(&exi->exi_cache_lock);
1598                         exi_cache_clnt_reclaim_failed++;
1599                         continue;
1600                 }
1601 
1602                 for (c = avl_first(tree); c != NULL; c = nextc) {
1603                         nextc = AVL_NEXT(tree, c);
1604 
1605                         if (avl_is_empty(&c->authc_tree) == B_FALSE)
1606                                 continue;
1607 
1608                         avl_remove(tree, c);
1609 
1610                         nfsauth_free_clnt_node(c);
1611                 }
1612 
1613                 rw_exit(&exi->exi_cache_lock);
1614         }
1615 }