1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2015 by Delphix. All rights reserved.
  25  * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
  26  */
  27 
  28 #include <sys/param.h>
  29 #include <sys/errno.h>
  30 #include <sys/vfs.h>
  31 #include <sys/vnode.h>
  32 #include <sys/cred.h>
  33 #include <sys/cmn_err.h>
  34 #include <sys/systm.h>
  35 #include <sys/kmem.h>
  36 #include <sys/pathname.h>
  37 #include <sys/utsname.h>
  38 #include <sys/debug.h>
  39 #include <sys/door.h>
  40 #include <sys/sdt.h>
  41 #include <sys/thread.h>
  42 #include <sys/avl.h>
  43 
  44 #include <rpc/types.h>
  45 #include <rpc/auth.h>
  46 #include <rpc/clnt.h>
  47 
  48 #include <nfs/nfs.h>
  49 #include <nfs/export.h>
  50 #include <nfs/nfs_clnt.h>
  51 #include <nfs/auth.h>
  52 
  53 static struct kmem_cache *exi_cache_handle;
  54 static void exi_cache_reclaim(void *);
  55 static void exi_cache_reclaim_zone(nfs_globals_t *);
  56 static void exi_cache_trim(struct exportinfo *exi);
  57 
  58 extern pri_t minclsyspri;
  59 
  60 /* NFS auth cache statistics */
  61 volatile uint_t nfsauth_cache_hit;
  62 volatile uint_t nfsauth_cache_miss;
  63 volatile uint_t nfsauth_cache_refresh;
  64 volatile uint_t nfsauth_cache_reclaim;
  65 volatile uint_t exi_cache_auth_reclaim_failed;
  66 volatile uint_t exi_cache_clnt_reclaim_failed;
  67 
  68 /*
  69  * The lifetime of an auth cache entry:
  70  * ------------------------------------
  71  *
  72  * An auth cache entry is created with both the auth_time
  73  * and auth_freshness times set to the current time.
  74  *
  75  * Upon every client access which results in a hit, the
  76  * auth_time will be updated.
  77  *
  78  * If a client access determines that the auth_freshness
  79  * indicates that the entry is STALE, then it will be
  80  * refreshed. Note that this will explicitly reset
  81  * auth_time.
  82  *
  83  * When the REFRESH successfully occurs, then the
  84  * auth_freshness is updated.
  85  *
  86  * There are two ways for an entry to leave the cache:
  87  *
  88  * 1) Purged by an action on the export (remove or changed)
  89  * 2) Memory backpressure from the kernel (check against NFSAUTH_CACHE_TRIM)
  90  *
  91  * For 2) we check the timeout value against auth_time.
  92  */
  93 
  94 /*
  95  * Number of seconds until we mark for refresh an auth cache entry.
  96  */
  97 #define NFSAUTH_CACHE_REFRESH 600
  98 
  99 /*
 100  * Number of idle seconds until we yield to backpressure
 101  * to trim a cache entry.
 102  */
 103 #define NFSAUTH_CACHE_TRIM 3600
 104 
 105 /*
 106  * While we could encapuslate the exi_list inside the
 107  * exi structure, we can't do that for the auth_list.
 108  * So, to keep things looking clean, we keep them both
 109  * in these external lists.
 110  */
 111 typedef struct refreshq_exi_node {
 112         struct exportinfo       *ren_exi;
 113         list_t                  ren_authlist;
 114         list_node_t             ren_node;
 115 } refreshq_exi_node_t;
 116 
 117 typedef struct refreshq_auth_node {
 118         struct auth_cache       *ran_auth;
 119         char                    *ran_netid;
 120         list_node_t             ran_node;
 121 } refreshq_auth_node_t;
 122 
 123 /*
 124  * Used to manipulate things on the refreshq_queue.  Note that the refresh
 125  * thread will effectively pop a node off of the queue, at which point it
 126  * will no longer need to hold the mutex.
 127  */
 128 static kmutex_t refreshq_lock;
 129 static list_t refreshq_queue;
 130 static kcondvar_t refreshq_cv;
 131 
 132 /*
 133  * If there is ever a problem with loading the module, then nfsauth_fini()
 134  * needs to be called to remove state.  In that event, since the refreshq
 135  * thread has been started, they need to work together to get rid of state.
 136  */
 137 typedef enum nfsauth_refreshq_thread_state {
 138         REFRESHQ_THREAD_RUNNING,
 139         REFRESHQ_THREAD_FINI_REQ,
 140         REFRESHQ_THREAD_HALTED,
 141         REFRESHQ_THREAD_NEED_CREATE
 142 } nfsauth_refreshq_thread_state_t;
 143 
 144 typedef struct nfsauth_globals {
 145         kmutex_t        mountd_lock;
 146         door_handle_t   mountd_dh;
 147 
 148         /*
 149          * Used to manipulate things on the refreshq_queue.  Note that the
 150          * refresh thread will effectively pop a node off of the queue,
 151          * at which point it will no longer need to hold the mutex.
 152          */
 153         kmutex_t        refreshq_lock;
 154         list_t          refreshq_queue;
 155         kcondvar_t      refreshq_cv;
 156 
 157         /*
 158          * A list_t would be overkill.  These are auth_cache entries which are
 159          * no longer linked to an exi.  It should be the case that all of their
 160          * states are NFS_AUTH_INVALID, i.e., the only way to be put on this
 161          * list is iff their state indicated that they had been placed on the
 162          * refreshq_queue.
 163          *
 164          * Note that while there is no link from the exi or back to the exi,
 165          * the exi can not go away until these entries are harvested.
 166          */
 167         struct auth_cache               *refreshq_dead_entries;
 168         nfsauth_refreshq_thread_state_t refreshq_thread_state;
 169 
 170 } nfsauth_globals_t;
 171 
 172 static void nfsauth_free_node(struct auth_cache *);
 173 static void nfsauth_refresh_thread(nfsauth_globals_t *);
 174 
 175 static int nfsauth_cache_compar(const void *, const void *);
 176 
 177 static nfsauth_globals_t *
 178 nfsauth_get_zg(void)
 179 {
 180         nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
 181         nfsauth_globals_t *nag = ng->nfs_auth;
 182         ASSERT(nag != NULL);
 183         return (nag);
 184 }
 185 
 186 void
 187 mountd_args(uint_t did)
 188 {
 189         nfsauth_globals_t *nag;
 190 
 191         nag = nfsauth_get_zg();
 192         mutex_enter(&nag->mountd_lock);
 193         if (nag->mountd_dh != NULL)
 194                 door_ki_rele(nag->mountd_dh);
 195         nag->mountd_dh = door_ki_lookup(did);
 196         mutex_exit(&nag->mountd_lock);
 197 }
 198 
 199 void
 200 nfsauth_init(void)
 201 {
 202         exi_cache_handle = kmem_cache_create("exi_cache_handle",
 203             sizeof (struct auth_cache), 0, NULL, NULL,
 204             exi_cache_reclaim, NULL, NULL, 0);
 205 }
 206 
 207 void
 208 nfsauth_fini(void)
 209 {
 210         kmem_cache_destroy(exi_cache_handle);
 211 }
 212 
 213 void
 214 nfsauth_zone_init(nfs_globals_t *ng)
 215 {
 216         nfsauth_globals_t *nag;
 217 
 218         nag = kmem_zalloc(sizeof (*nag), KM_SLEEP);
 219 
 220         /*
 221          * mountd can be restarted by smf(5).  We need to make sure
 222          * the updated door handle will safely make it to mountd_dh.
 223          */
 224         mutex_init(&nag->mountd_lock, NULL, MUTEX_DEFAULT, NULL);
 225         mutex_init(&nag->refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
 226         list_create(&nag->refreshq_queue, sizeof (refreshq_exi_node_t),
 227             offsetof(refreshq_exi_node_t, ren_node));
 228         cv_init(&nag->refreshq_cv, NULL, CV_DEFAULT, NULL);
 229         nag->refreshq_thread_state = REFRESHQ_THREAD_NEED_CREATE;
 230 
 231         ng->nfs_auth = nag;
 232 }
 233 
 234 void
 235 nfsauth_zone_shutdown(nfs_globals_t *ng)
 236 {
 237         refreshq_exi_node_t     *ren;
 238         nfsauth_globals_t       *nag = ng->nfs_auth;
 239 
 240         /* Prevent the nfsauth_refresh_thread from getting new work */
 241         mutex_enter(&nag->refreshq_lock);
 242         if (nag->refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
 243                 nag->refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
 244                 cv_broadcast(&nag->refreshq_cv);
 245 
 246                 /* Wait for nfsauth_refresh_thread() to exit */
 247                 while (nag->refreshq_thread_state != REFRESHQ_THREAD_HALTED)
 248                         cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
 249         }
 250         mutex_exit(&nag->refreshq_lock);
 251 
 252         /*
 253          * Walk the exi_list and in turn, walk the auth_lists and free all
 254          * lists.  In addition, free INVALID auth_cache entries.
 255          */
 256         while ((ren = list_remove_head(&nag->refreshq_queue))) {
 257                 refreshq_auth_node_t *ran;
 258 
 259                 while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) {
 260                         struct auth_cache *p = ran->ran_auth;
 261                         if (p->auth_state == NFS_AUTH_INVALID)
 262                                 nfsauth_free_node(p);
 263                         strfree(ran->ran_netid);
 264                         kmem_free(ran, sizeof (*ran));
 265                 }
 266 
 267                 list_destroy(&ren->ren_authlist);
 268                 exi_rele(ren->ren_exi);
 269                 kmem_free(ren, sizeof (*ren));
 270         }
 271 }
 272 
 273 void
 274 nfsauth_zone_fini(nfs_globals_t *ng)
 275 {
 276         nfsauth_globals_t *nag = ng->nfs_auth;
 277 
 278         ng->nfs_auth = NULL;
 279 
 280         list_destroy(&nag->refreshq_queue);
 281         cv_destroy(&nag->refreshq_cv);
 282         mutex_destroy(&nag->refreshq_lock);
 283         mutex_destroy(&nag->mountd_lock);
 284         /* Extra cleanup. */
 285         if (nag->mountd_dh != NULL)
 286                 door_ki_rele(nag->mountd_dh);
 287         kmem_free(nag, sizeof (*nag));
 288 }
 289 
 290 /*
 291  * Convert the address in a netbuf to
 292  * a hash index for the auth_cache table.
 293  */
 294 static int
 295 hash(struct netbuf *a)
 296 {
 297         int i, h = 0;
 298 
 299         for (i = 0; i < a->len; i++)
 300                 h ^= a->buf[i];
 301 
 302         return (h & (AUTH_TABLESIZE - 1));
 303 }
 304 
 305 /*
 306  * Mask out the components of an
 307  * address that do not identify
 308  * a host. For socket addresses the
 309  * masking gets rid of the port number.
 310  */
 311 static void
 312 addrmask(struct netbuf *addr, struct netbuf *mask)
 313 {
 314         int i;
 315 
 316         for (i = 0; i < addr->len; i++)
 317                 addr->buf[i] &= mask->buf[i];
 318 }
 319 
 320 /*
 321  * nfsauth4_access is used for NFS V4 auth checking. Besides doing
 322  * the common nfsauth_access(), it will check if the client can
 323  * have a limited access to this vnode even if the security flavor
 324  * used does not meet the policy.
 325  */
 326 int
 327 nfsauth4_access(struct exportinfo *exi, vnode_t *vp, struct svc_req *req,
 328     cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
 329 {
 330         int access;
 331 
 332         access = nfsauth_access(exi, req, cr, uid, gid, ngids, gids);
 333 
 334         /*
 335          * There are cases that the server needs to allow the client
 336          * to have a limited view.
 337          *
 338          * e.g.
 339          * /export is shared as "sec=sys,rw=dfs-test-4,sec=krb5,rw"
 340          * /export/home is shared as "sec=sys,rw"
 341          *
 342          * When the client mounts /export with sec=sys, the client
 343          * would get a limited view with RO access on /export to see
 344          * "home" only because the client is allowed to access
 345          * /export/home with auth_sys.
 346          */
 347         if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
 348                 /*
 349                  * Allow ro permission with LIMITED view if there is a
 350                  * sub-dir exported under vp.
 351                  */
 352                 if (has_visible(exi, vp))
 353                         return (NFSAUTH_LIMITED);
 354         }
 355 
 356         return (access);
 357 }
 358 
 359 static void
 360 sys_log(const char *msg)
 361 {
 362         static time_t   tstamp = 0;
 363         time_t          now;
 364 
 365         /*
 366          * msg is shown (at most) once per minute
 367          */
 368         now = gethrestime_sec();
 369         if ((tstamp + 60) < now) {
 370                 tstamp = now;
 371                 cmn_err(CE_WARN, msg);
 372         }
 373 }
 374 
 375 /*
 376  * Callup to the mountd to get access information in the kernel.
 377  */
 378 static bool_t
 379 nfsauth_retrieve(nfsauth_globals_t *nag, struct exportinfo *exi,
 380     char *req_netid, int flavor, struct netbuf *addr, int *access,
 381     cred_t *clnt_cred, uid_t *srv_uid, gid_t *srv_gid, uint_t *srv_gids_cnt,
 382     gid_t **srv_gids)
 383 {
 384         varg_t                    varg = {0};
 385         nfsauth_res_t             res = {0};
 386         XDR                       xdrs;
 387         size_t                    absz;
 388         caddr_t                   abuf;
 389         int                       last = 0;
 390         door_arg_t                da;
 391         door_info_t               di;
 392         door_handle_t             dh;
 393         uint_t                    ntries = 0;
 394 
 395         /*
 396          * No entry in the cache for this client/flavor
 397          * so we need to call the nfsauth service in the
 398          * mount daemon.
 399          */
 400 
 401         varg.vers = V_PROTO;
 402         varg.arg_u.arg.cmd = NFSAUTH_ACCESS;
 403         varg.arg_u.arg.areq.req_client.n_len = addr->len;
 404         varg.arg_u.arg.areq.req_client.n_bytes = addr->buf;
 405         varg.arg_u.arg.areq.req_netid = req_netid;
 406         varg.arg_u.arg.areq.req_path = exi->exi_export.ex_path;
 407         varg.arg_u.arg.areq.req_flavor = flavor;
 408         varg.arg_u.arg.areq.req_clnt_uid = crgetuid(clnt_cred);
 409         varg.arg_u.arg.areq.req_clnt_gid = crgetgid(clnt_cred);
 410         varg.arg_u.arg.areq.req_clnt_gids.len = crgetngroups(clnt_cred);
 411         varg.arg_u.arg.areq.req_clnt_gids.val = (gid_t *)crgetgroups(clnt_cred);
 412 
 413         DTRACE_PROBE1(nfsserv__func__nfsauth__varg, varg_t *, &varg);
 414 
 415         /*
 416          * Setup the XDR stream for encoding the arguments. Notice that
 417          * in addition to the args having variable fields (req_netid and
 418          * req_path), the argument data structure is itself versioned,
 419          * so we need to make sure we can size the arguments buffer
 420          * appropriately to encode all the args. If we can't get sizing
 421          * info _or_ properly encode the arguments, there's really no
 422          * point in continuting, so we fail the request.
 423          */
 424         if ((absz = xdr_sizeof(xdr_varg, &varg)) == 0) {
 425                 *access = NFSAUTH_DENIED;
 426                 return (FALSE);
 427         }
 428 
 429         abuf = (caddr_t)kmem_alloc(absz, KM_SLEEP);
 430         xdrmem_create(&xdrs, abuf, absz, XDR_ENCODE);
 431         if (!xdr_varg(&xdrs, &varg)) {
 432                 XDR_DESTROY(&xdrs);
 433                 goto fail;
 434         }
 435         XDR_DESTROY(&xdrs);
 436 
 437         /*
 438          * Prepare the door arguments
 439          *
 440          * We don't know the size of the message the daemon
 441          * will pass back to us.  By setting rbuf to NULL,
 442          * we force the door code to allocate a buf of the
 443          * appropriate size.  We must set rsize > 0, however,
 444          * else the door code acts as if no response was
 445          * expected and doesn't pass the data to us.
 446          */
 447         da.data_ptr = (char *)abuf;
 448         da.data_size = absz;
 449         da.desc_ptr = NULL;
 450         da.desc_num = 0;
 451         da.rbuf = NULL;
 452         da.rsize = 1;
 453 
 454 retry:
 455         mutex_enter(&nag->mountd_lock);
 456         dh = nag->mountd_dh;
 457         if (dh != NULL)
 458                 door_ki_hold(dh);
 459         mutex_exit(&nag->mountd_lock);
 460 
 461         if (dh == NULL) {
 462                 /*
 463                  * The rendezvous point has not been established yet!
 464                  * This could mean that either mountd(1m) has not yet
 465                  * been started or that _this_ routine nuked the door
 466                  * handle after receiving an EINTR for a REVOKED door.
 467                  *
 468                  * Returning NFSAUTH_DROP will cause the NFS client
 469                  * to retransmit the request, so let's try to be more
 470                  * rescillient and attempt for ntries before we bail.
 471                  */
 472                 if (++ntries % NFSAUTH_DR_TRYCNT) {
 473                         delay(hz);
 474                         goto retry;
 475                 }
 476 
 477                 kmem_free(abuf, absz);
 478 
 479                 sys_log("nfsauth: mountd has not established door");
 480                 *access = NFSAUTH_DROP;
 481                 return (FALSE);
 482         }
 483 
 484         ntries = 0;
 485 
 486         /*
 487          * Now that we've got what we need, place the call.
 488          */
 489         switch (door_ki_upcall_limited(dh, &da, NULL, SIZE_MAX, 0)) {
 490         case 0:                         /* Success */
 491                 door_ki_rele(dh);
 492 
 493                 if (da.data_ptr == NULL && da.data_size == 0) {
 494                         /*
 495                          * The door_return that contained the data
 496                          * failed! We're here because of the 2nd
 497                          * door_return (w/o data) such that we can
 498                          * get control of the thread (and exit
 499                          * gracefully).
 500                          */
 501                         DTRACE_PROBE1(nfsserv__func__nfsauth__door__nil,
 502                             door_arg_t *, &da);
 503                         goto fail;
 504                 }
 505 
 506                 break;
 507 
 508         case EAGAIN:
 509                 /*
 510                  * Server out of resources; back off for a bit
 511                  */
 512                 door_ki_rele(dh);
 513                 delay(hz);
 514                 goto retry;
 515                 /* NOTREACHED */
 516 
 517         case EINTR:
 518                 if (!door_ki_info(dh, &di)) {
 519                         door_ki_rele(dh);
 520 
 521                         if (di.di_attributes & DOOR_REVOKED) {
 522                                 /*
 523                                  * The server barfed and revoked
 524                                  * the (existing) door on us; we
 525                                  * want to wait to give smf(5) a
 526                                  * chance to restart mountd(1m)
 527                                  * and establish a new door handle.
 528                                  */
 529                                 mutex_enter(&nag->mountd_lock);
 530                                 if (dh == nag->mountd_dh) {
 531                                         door_ki_rele(nag->mountd_dh);
 532                                         nag->mountd_dh = NULL;
 533                                 }
 534                                 mutex_exit(&nag->mountd_lock);
 535                                 delay(hz);
 536                                 goto retry;
 537                         }
 538                         /*
 539                          * If the door was _not_ revoked on us,
 540                          * then more than likely we took an INTR,
 541                          * so we need to fail the operation.
 542                          */
 543                         goto fail;
 544                 }
 545                 /*
 546                  * The only failure that can occur from getting
 547                  * the door info is EINVAL, so we let the code
 548                  * below handle it.
 549                  */
 550                 /* FALLTHROUGH */
 551 
 552         case EBADF:
 553         case EINVAL:
 554         default:
 555                 /*
 556                  * If we have a stale door handle, give smf a last
 557                  * chance to start it by sleeping for a little bit.
 558                  * If we're still hosed, we'll fail the call.
 559                  *
 560                  * Since we're going to reacquire the door handle
 561                  * upon the retry, we opt to sleep for a bit and
 562                  * _not_ to clear mountd_dh. If mountd restarted
 563                  * and was able to set mountd_dh, we should see
 564                  * the new instance; if not, we won't get caught
 565                  * up in the retry/DELAY loop.
 566                  */
 567                 door_ki_rele(dh);
 568                 if (!last) {
 569                         delay(hz);
 570                         last++;
 571                         goto retry;
 572                 }
 573                 sys_log("nfsauth: stale mountd door handle");
 574                 goto fail;
 575         }
 576 
 577         ASSERT(da.rbuf != NULL);
 578 
 579         /*
 580          * No door errors encountered; setup the XDR stream for decoding
 581          * the results. If we fail to decode the results, we've got no
 582          * other recourse than to fail the request.
 583          */
 584         xdrmem_create(&xdrs, da.rbuf, da.rsize, XDR_DECODE);
 585         if (!xdr_nfsauth_res(&xdrs, &res)) {
 586                 xdr_free(xdr_nfsauth_res, (char *)&res);
 587                 XDR_DESTROY(&xdrs);
 588                 kmem_free(da.rbuf, da.rsize);
 589                 goto fail;
 590         }
 591         XDR_DESTROY(&xdrs);
 592         kmem_free(da.rbuf, da.rsize);
 593 
 594         DTRACE_PROBE1(nfsserv__func__nfsauth__results, nfsauth_res_t *, &res);
 595         switch (res.stat) {
 596                 case NFSAUTH_DR_OKAY:
 597                         *access = res.ares.auth_perm;
 598                         *srv_uid = res.ares.auth_srv_uid;
 599                         *srv_gid = res.ares.auth_srv_gid;
 600                         *srv_gids_cnt = res.ares.auth_srv_gids.len;
 601                         *srv_gids = kmem_alloc(*srv_gids_cnt * sizeof (gid_t),
 602                             KM_SLEEP);
 603                         bcopy(res.ares.auth_srv_gids.val, *srv_gids,
 604                             *srv_gids_cnt * sizeof (gid_t));
 605                         break;
 606 
 607                 case NFSAUTH_DR_EFAIL:
 608                 case NFSAUTH_DR_DECERR:
 609                 case NFSAUTH_DR_BADCMD:
 610                 default:
 611                         xdr_free(xdr_nfsauth_res, (char *)&res);
 612 fail:
 613                         *access = NFSAUTH_DENIED;
 614                         kmem_free(abuf, absz);
 615                         return (FALSE);
 616                         /* NOTREACHED */
 617         }
 618 
 619         xdr_free(xdr_nfsauth_res, (char *)&res);
 620         kmem_free(abuf, absz);
 621 
 622         return (TRUE);
 623 }
 624 
 625 static void
 626 nfsauth_refresh_thread(nfsauth_globals_t *nag)
 627 {
 628         refreshq_exi_node_t     *ren;
 629         refreshq_auth_node_t    *ran;
 630 
 631         struct exportinfo       *exi;
 632 
 633         int                     access;
 634         bool_t                  retrieval;
 635 
 636         callb_cpr_t             cprinfo;
 637 
 638         CALLB_CPR_INIT(&cprinfo, &nag->refreshq_lock, callb_generic_cpr,
 639             "nfsauth_refresh");
 640 
 641         for (;;) {
 642                 mutex_enter(&nag->refreshq_lock);
 643                 if (nag->refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
 644                         /* Keep the hold on the lock! */
 645                         break;
 646                 }
 647 
 648                 ren = list_remove_head(&nag->refreshq_queue);
 649                 if (ren == NULL) {
 650                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
 651                         cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
 652                         CALLB_CPR_SAFE_END(&cprinfo, &nag->refreshq_lock);
 653                         mutex_exit(&nag->refreshq_lock);
 654                         continue;
 655                 }
 656                 mutex_exit(&nag->refreshq_lock);
 657 
 658                 exi = ren->ren_exi;
 659                 ASSERT(exi != NULL);
 660 
 661                 /*
 662                  * Since the ren was removed from the refreshq_queue above,
 663                  * this is the only thread aware about the ren existence, so we
 664                  * have the exclusive ownership of it and we do not need to
 665                  * protect it by any lock.
 666                  */
 667                 while ((ran = list_remove_head(&ren->ren_authlist))) {
 668                         uid_t uid;
 669                         gid_t gid;
 670                         uint_t ngids;
 671                         gid_t *gids;
 672                         struct auth_cache *p = ran->ran_auth;
 673                         char *netid = ran->ran_netid;
 674 
 675                         ASSERT(p != NULL);
 676                         ASSERT(netid != NULL);
 677 
 678                         kmem_free(ran, sizeof (refreshq_auth_node_t));
 679 
 680                         mutex_enter(&p->auth_lock);
 681 
 682                         /*
 683                          * Once the entry goes INVALID, it can not change
 684                          * state.
 685                          *
 686                          * No need to refresh entries also in a case we are
 687                          * just shutting down.
 688                          *
 689                          * In general, there is no need to hold the
 690                          * refreshq_lock to test the refreshq_thread_state.  We
 691                          * do hold it at other places because there is some
 692                          * related thread synchronization (or some other tasks)
 693                          * close to the refreshq_thread_state check.
 694                          *
 695                          * The check for the refreshq_thread_state value here
 696                          * is purely advisory to allow the faster
 697                          * nfsauth_refresh_thread() shutdown.  In a case we
 698                          * will miss such advisory, nothing catastrophic
 699                          * happens: we will just spin longer here before the
 700                          * shutdown.
 701                          */
 702                         if (p->auth_state == NFS_AUTH_INVALID ||
 703                             nag->refreshq_thread_state !=
 704                             REFRESHQ_THREAD_RUNNING) {
 705                                 mutex_exit(&p->auth_lock);
 706 
 707                                 if (p->auth_state == NFS_AUTH_INVALID)
 708                                         nfsauth_free_node(p);
 709 
 710                                 strfree(netid);
 711 
 712                                 continue;
 713                         }
 714 
 715                         /*
 716                          * Make sure the state is valid.  Note that once we
 717                          * change the state to NFS_AUTH_REFRESHING, no other
 718                          * thread will be able to work on this entry.
 719                          */
 720                         ASSERT(p->auth_state == NFS_AUTH_STALE);
 721 
 722                         p->auth_state = NFS_AUTH_REFRESHING;
 723                         mutex_exit(&p->auth_lock);
 724 
 725                         DTRACE_PROBE2(nfsauth__debug__cache__refresh,
 726                             struct exportinfo *, exi,
 727                             struct auth_cache *, p);
 728 
 729                         /*
 730                          * The first caching of the access rights
 731                          * is done with the netid pulled out of the
 732                          * request from the client. All subsequent
 733                          * users of the cache may or may not have
 734                          * the same netid. It doesn't matter. So
 735                          * when we refresh, we simply use the netid
 736                          * of the request which triggered the
 737                          * refresh attempt.
 738                          */
 739                         retrieval = nfsauth_retrieve(nag, exi, netid,
 740                             p->auth_flavor, &p->auth_clnt->authc_addr, &access,
 741                             p->auth_clnt_cred, &uid, &gid, &ngids, &gids);
 742 
 743                         /*
 744                          * This can only be set in one other place
 745                          * and the state has to be NFS_AUTH_FRESH.
 746                          */
 747                         strfree(netid);
 748 
 749                         mutex_enter(&p->auth_lock);
 750                         if (p->auth_state == NFS_AUTH_INVALID) {
 751                                 mutex_exit(&p->auth_lock);
 752                                 nfsauth_free_node(p);
 753                                 if (retrieval == TRUE)
 754                                         kmem_free(gids, ngids * sizeof (gid_t));
 755                         } else {
 756                                 /*
 757                                  * If we got an error, do not reset the
 758                                  * time. This will cause the next access
 759                                  * check for the client to reschedule this
 760                                  * node.
 761                                  */
 762                                 if (retrieval == TRUE) {
 763                                         p->auth_access = access;
 764 
 765                                         p->auth_srv_uid = uid;
 766                                         p->auth_srv_gid = gid;
 767                                         kmem_free(p->auth_srv_gids,
 768                                             p->auth_srv_ngids * sizeof (gid_t));
 769                                         p->auth_srv_ngids = ngids;
 770                                         p->auth_srv_gids = gids;
 771 
 772                                         p->auth_freshness = gethrestime_sec();
 773                                 }
 774                                 p->auth_state = NFS_AUTH_FRESH;
 775 
 776                                 cv_broadcast(&p->auth_cv);
 777                                 mutex_exit(&p->auth_lock);
 778                         }
 779                 }
 780 
 781                 list_destroy(&ren->ren_authlist);
 782                 exi_rele(ren->ren_exi);
 783                 kmem_free(ren, sizeof (refreshq_exi_node_t));
 784         }
 785 
 786         nag->refreshq_thread_state = REFRESHQ_THREAD_HALTED;
 787         cv_broadcast(&nag->refreshq_cv);
 788         CALLB_CPR_EXIT(&cprinfo);
 789         DTRACE_PROBE(nfsauth__nfsauth__refresh__thread__exit);
 790         zthread_exit();
 791 }
 792 
 793 int
 794 nfsauth_cache_clnt_compar(const void *v1, const void *v2)
 795 {
 796         int c;
 797 
 798         const struct auth_cache_clnt *a1 = (const struct auth_cache_clnt *)v1;
 799         const struct auth_cache_clnt *a2 = (const struct auth_cache_clnt *)v2;
 800 
 801         if (a1->authc_addr.len < a2->authc_addr.len)
 802                 return (-1);
 803         if (a1->authc_addr.len > a2->authc_addr.len)
 804                 return (1);
 805 
 806         c = memcmp(a1->authc_addr.buf, a2->authc_addr.buf, a1->authc_addr.len);
 807         if (c < 0)
 808                 return (-1);
 809         if (c > 0)
 810                 return (1);
 811 
 812         return (0);
 813 }
 814 
 815 static int
 816 nfsauth_cache_compar(const void *v1, const void *v2)
 817 {
 818         int c;
 819 
 820         const struct auth_cache *a1 = (const struct auth_cache *)v1;
 821         const struct auth_cache *a2 = (const struct auth_cache *)v2;
 822 
 823         if (a1->auth_flavor < a2->auth_flavor)
 824                 return (-1);
 825         if (a1->auth_flavor > a2->auth_flavor)
 826                 return (1);
 827 
 828         if (crgetuid(a1->auth_clnt_cred) < crgetuid(a2->auth_clnt_cred))
 829                 return (-1);
 830         if (crgetuid(a1->auth_clnt_cred) > crgetuid(a2->auth_clnt_cred))
 831                 return (1);
 832 
 833         if (crgetgid(a1->auth_clnt_cred) < crgetgid(a2->auth_clnt_cred))
 834                 return (-1);
 835         if (crgetgid(a1->auth_clnt_cred) > crgetgid(a2->auth_clnt_cred))
 836                 return (1);
 837 
 838         if (crgetngroups(a1->auth_clnt_cred) < crgetngroups(a2->auth_clnt_cred))
 839                 return (-1);
 840         if (crgetngroups(a1->auth_clnt_cred) > crgetngroups(a2->auth_clnt_cred))
 841                 return (1);
 842 
 843         c = memcmp(crgetgroups(a1->auth_clnt_cred),
 844             crgetgroups(a2->auth_clnt_cred), crgetngroups(a1->auth_clnt_cred));
 845         if (c < 0)
 846                 return (-1);
 847         if (c > 0)
 848                 return (1);
 849 
 850         return (0);
 851 }
 852 
 853 /*
 854  * Get the access information from the cache or callup to the mountd
 855  * to get and cache the access information in the kernel.
 856  */
 857 static int
 858 nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
 859     cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
 860 {
 861         nfsauth_globals_t       *nag;
 862         struct netbuf           *taddrmask;
 863         struct netbuf           addr;   /* temporary copy of client's address */
 864         const struct netbuf     *claddr;
 865         avl_tree_t              *tree;
 866         struct auth_cache       ac;     /* used as a template for avl_find() */
 867         struct auth_cache_clnt  *c;
 868         struct auth_cache_clnt  acc;    /* used as a template for avl_find() */
 869         struct auth_cache       *p = NULL;
 870         int                     access;
 871 
 872         uid_t                   tmpuid;
 873         gid_t                   tmpgid;
 874         uint_t                  tmpngids;
 875         gid_t                   *tmpgids;
 876 
 877         avl_index_t             where;  /* used for avl_find()/avl_insert() */
 878 
 879         ASSERT(cr != NULL);
 880 
 881         ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid);
 882         nag = nfsauth_get_zg();
 883 
 884         /*
 885          * Now check whether this client already
 886          * has an entry for this flavor in the cache
 887          * for this export.
 888          * Get the caller's address, mask off the
 889          * parts of the address that do not identify
 890          * the host (port number, etc), and then hash
 891          * it to find the chain of cache entries.
 892          */
 893 
 894         claddr = svc_getrpccaller(req->rq_xprt);
 895         addr = *claddr;
 896         addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP);
 897         bcopy(claddr->buf, addr.buf, claddr->len);
 898 
 899         SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask);
 900         ASSERT(taddrmask != NULL);
 901         addrmask(&addr, taddrmask);
 902 
 903         ac.auth_flavor = flavor;
 904         ac.auth_clnt_cred = crdup(cr);
 905 
 906         acc.authc_addr = addr;
 907 
 908         tree = exi->exi_cache[hash(&addr)];
 909 
 910         rw_enter(&exi->exi_cache_lock, RW_READER);
 911         c = (struct auth_cache_clnt *)avl_find(tree, &acc, NULL);
 912 
 913         if (c == NULL) {
 914                 struct auth_cache_clnt *nc;
 915 
 916                 rw_exit(&exi->exi_cache_lock);
 917 
 918                 nc = kmem_alloc(sizeof (*nc), KM_NOSLEEP | KM_NORMALPRI);
 919                 if (nc == NULL)
 920                         goto retrieve;
 921 
 922                 /*
 923                  * Initialize the new auth_cache_clnt
 924                  */
 925                 nc->authc_addr = addr;
 926                 nc->authc_addr.buf = kmem_alloc(addr.maxlen,
 927                     KM_NOSLEEP | KM_NORMALPRI);
 928                 if (addr.maxlen != 0 && nc->authc_addr.buf == NULL) {
 929                         kmem_free(nc, sizeof (*nc));
 930                         goto retrieve;
 931                 }
 932                 bcopy(addr.buf, nc->authc_addr.buf, addr.len);
 933                 rw_init(&nc->authc_lock, NULL, RW_DEFAULT, NULL);
 934                 avl_create(&nc->authc_tree, nfsauth_cache_compar,
 935                     sizeof (struct auth_cache),
 936                     offsetof(struct auth_cache, auth_link));
 937 
 938                 rw_enter(&exi->exi_cache_lock, RW_WRITER);
 939                 c = (struct auth_cache_clnt *)avl_find(tree, &acc, &where);
 940                 if (c == NULL) {
 941                         avl_insert(tree, nc, where);
 942                         rw_downgrade(&exi->exi_cache_lock);
 943                         c = nc;
 944                 } else {
 945                         rw_downgrade(&exi->exi_cache_lock);
 946 
 947                         avl_destroy(&nc->authc_tree);
 948                         rw_destroy(&nc->authc_lock);
 949                         kmem_free(nc->authc_addr.buf, nc->authc_addr.maxlen);
 950                         kmem_free(nc, sizeof (*nc));
 951                 }
 952         }
 953 
 954         ASSERT(c != NULL);
 955 
 956         rw_enter(&c->authc_lock, RW_READER);
 957         p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, NULL);
 958 
 959         if (p == NULL) {
 960                 struct auth_cache *np;
 961 
 962                 rw_exit(&c->authc_lock);
 963 
 964                 np = kmem_cache_alloc(exi_cache_handle,
 965                     KM_NOSLEEP | KM_NORMALPRI);
 966                 if (np == NULL) {
 967                         rw_exit(&exi->exi_cache_lock);
 968                         goto retrieve;
 969                 }
 970 
 971                 /*
 972                  * Initialize the new auth_cache
 973                  */
 974                 np->auth_clnt = c;
 975                 np->auth_flavor = flavor;
 976                 np->auth_clnt_cred = ac.auth_clnt_cred;
 977                 np->auth_srv_ngids = 0;
 978                 np->auth_srv_gids = NULL;
 979                 np->auth_time = np->auth_freshness = gethrestime_sec();
 980                 np->auth_state = NFS_AUTH_NEW;
 981                 mutex_init(&np->auth_lock, NULL, MUTEX_DEFAULT, NULL);
 982                 cv_init(&np->auth_cv, NULL, CV_DEFAULT, NULL);
 983 
 984                 rw_enter(&c->authc_lock, RW_WRITER);
 985                 rw_exit(&exi->exi_cache_lock);
 986 
 987                 p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, &where);
 988                 if (p == NULL) {
 989                         avl_insert(&c->authc_tree, np, where);
 990                         rw_downgrade(&c->authc_lock);
 991                         p = np;
 992                 } else {
 993                         rw_downgrade(&c->authc_lock);
 994 
 995                         cv_destroy(&np->auth_cv);
 996                         mutex_destroy(&np->auth_lock);
 997                         crfree(ac.auth_clnt_cred);
 998                         kmem_cache_free(exi_cache_handle, np);
 999                 }
1000         } else {
1001                 rw_exit(&exi->exi_cache_lock);
1002                 crfree(ac.auth_clnt_cred);
1003         }
1004 
1005         mutex_enter(&p->auth_lock);
1006         rw_exit(&c->authc_lock);
1007 
1008         /*
1009          * If the entry is in the WAITING state then some other thread is just
1010          * retrieving the required info.  The entry was either NEW, or the list
1011          * of client's supplemental groups is going to be changed (either by
1012          * this thread, or by some other thread).  We need to wait until the
1013          * nfsauth_retrieve() is done.
1014          */
1015         while (p->auth_state == NFS_AUTH_WAITING)
1016                 cv_wait(&p->auth_cv, &p->auth_lock);
1017 
1018         /*
1019          * Here the entry cannot be in WAITING or INVALID state.
1020          */
1021         ASSERT(p->auth_state != NFS_AUTH_WAITING);
1022         ASSERT(p->auth_state != NFS_AUTH_INVALID);
1023 
1024         /*
1025          * If the cache entry is not valid yet, we need to retrieve the
1026          * info ourselves.
1027          */
1028         if (p->auth_state == NFS_AUTH_NEW) {
1029                 bool_t res;
1030                 /*
1031                  * NFS_AUTH_NEW is the default output auth_state value in a
1032                  * case we failed somewhere below.
1033                  */
1034                 auth_state_t state = NFS_AUTH_NEW;
1035 
1036                 p->auth_state = NFS_AUTH_WAITING;
1037                 mutex_exit(&p->auth_lock);
1038                 kmem_free(addr.buf, addr.maxlen);
1039                 addr = p->auth_clnt->authc_addr;
1040 
1041                 atomic_inc_uint(&nfsauth_cache_miss);
1042 
1043                 res = nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt),
1044                     flavor, &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids,
1045                     &tmpgids);
1046 
1047                 p->auth_access = access;
1048                 p->auth_time = p->auth_freshness = gethrestime_sec();
1049 
1050                 if (res == TRUE) {
1051                         if (uid != NULL)
1052                                 *uid = tmpuid;
1053                         if (gid != NULL)
1054                                 *gid = tmpgid;
1055                         if (ngids != NULL && gids != NULL) {
1056                                 *ngids = tmpngids;
1057                                 *gids = tmpgids;
1058 
1059                                 /*
1060                                  * We need a copy of gids for the
1061                                  * auth_cache entry
1062                                  */
1063                                 tmpgids = kmem_alloc(tmpngids * sizeof (gid_t),
1064                                     KM_NOSLEEP | KM_NORMALPRI);
1065                                 if (tmpgids != NULL)
1066                                         bcopy(*gids, tmpgids,
1067                                             tmpngids * sizeof (gid_t));
1068                         }
1069 
1070                         if (tmpgids != NULL || tmpngids == 0) {
1071                                 p->auth_srv_uid = tmpuid;
1072                                 p->auth_srv_gid = tmpgid;
1073                                 p->auth_srv_ngids = tmpngids;
1074                                 p->auth_srv_gids = tmpgids;
1075 
1076                                 state = NFS_AUTH_FRESH;
1077                         }
1078                 }
1079 
1080                 /*
1081                  * Set the auth_state and notify waiters.
1082                  */
1083                 mutex_enter(&p->auth_lock);
1084                 p->auth_state = state;
1085                 cv_broadcast(&p->auth_cv);
1086                 mutex_exit(&p->auth_lock);
1087         } else {
1088                 uint_t nach;
1089                 time_t refresh;
1090 
1091                 refresh = gethrestime_sec() - p->auth_freshness;
1092 
1093                 p->auth_time = gethrestime_sec();
1094 
1095                 if (uid != NULL)
1096                         *uid = p->auth_srv_uid;
1097                 if (gid != NULL)
1098                         *gid = p->auth_srv_gid;
1099                 if (ngids != NULL && gids != NULL) {
1100                         *ngids = p->auth_srv_ngids;
1101                         *gids = kmem_alloc(*ngids * sizeof (gid_t), KM_SLEEP);
1102                         bcopy(p->auth_srv_gids, *gids, *ngids * sizeof (gid_t));
1103                 }
1104 
1105                 access = p->auth_access;
1106 
1107                 if ((refresh > NFSAUTH_CACHE_REFRESH) &&
1108                     p->auth_state == NFS_AUTH_FRESH) {
1109                         refreshq_auth_node_t *ran;
1110                         uint_t nacr;
1111 
1112                         p->auth_state = NFS_AUTH_STALE;
1113                         mutex_exit(&p->auth_lock);
1114 
1115                         nacr = atomic_inc_uint_nv(&nfsauth_cache_refresh);
1116                         DTRACE_PROBE3(nfsauth__debug__cache__stale,
1117                             struct exportinfo *, exi,
1118                             struct auth_cache *, p,
1119                             uint_t, nacr);
1120 
1121                         ran = kmem_alloc(sizeof (refreshq_auth_node_t),
1122                             KM_SLEEP);
1123                         ran->ran_auth = p;
1124                         ran->ran_netid = strdup(svc_getnetid(req->rq_xprt));
1125 
1126                         mutex_enter(&nag->refreshq_lock);
1127 
1128                         if (nag->refreshq_thread_state ==
1129                             REFRESHQ_THREAD_NEED_CREATE) {
1130                                 /* Launch nfsauth refresh thread */
1131                                 nag->refreshq_thread_state =
1132                                     REFRESHQ_THREAD_RUNNING;
1133                                 (void) zthread_create(NULL, 0,
1134                                     nfsauth_refresh_thread, nag, 0,
1135                                     minclsyspri);
1136                         }
1137 
1138                         /*
1139                          * We should not add a work queue item if the thread
1140                          * is not accepting them.
1141                          */
1142                         if (nag->refreshq_thread_state ==
1143                             REFRESHQ_THREAD_RUNNING) {
1144                                 refreshq_exi_node_t *ren;
1145 
1146                                 /*
1147                                  * Is there an existing exi_list?
1148                                  */
1149                                 for (ren = list_head(&nag->refreshq_queue);
1150                                     ren != NULL;
1151                                     ren = list_next(&nag->refreshq_queue,
1152                                     ren)) {
1153                                         if (ren->ren_exi == exi) {
1154                                                 list_insert_tail(
1155                                                     &ren->ren_authlist, ran);
1156                                                 break;
1157                                         }
1158                                 }
1159 
1160                                 if (ren == NULL) {
1161                                         ren = kmem_alloc(
1162                                             sizeof (refreshq_exi_node_t),
1163                                             KM_SLEEP);
1164 
1165                                         exi_hold(exi);
1166                                         ren->ren_exi = exi;
1167 
1168                                         list_create(&ren->ren_authlist,
1169                                             sizeof (refreshq_auth_node_t),
1170                                             offsetof(refreshq_auth_node_t,
1171                                             ran_node));
1172 
1173                                         list_insert_tail(&ren->ren_authlist,
1174                                             ran);
1175                                         list_insert_tail(&nag->refreshq_queue,
1176                                             ren);
1177                                 }
1178 
1179                                 cv_broadcast(&nag->refreshq_cv);
1180                         } else {
1181                                 strfree(ran->ran_netid);
1182                                 kmem_free(ran, sizeof (refreshq_auth_node_t));
1183                         }
1184 
1185                         mutex_exit(&nag->refreshq_lock);
1186                 } else {
1187                         mutex_exit(&p->auth_lock);
1188                 }
1189 
1190                 nach = atomic_inc_uint_nv(&nfsauth_cache_hit);
1191                 DTRACE_PROBE2(nfsauth__debug__cache__hit,
1192                     uint_t, nach,
1193                     time_t, refresh);
1194 
1195                 kmem_free(addr.buf, addr.maxlen);
1196         }
1197 
1198         return (access);
1199 
1200 retrieve:
1201         crfree(ac.auth_clnt_cred);
1202 
1203         /*
1204          * Retrieve the required data without caching.
1205          */
1206 
1207         ASSERT(p == NULL);
1208 
1209         atomic_inc_uint(&nfsauth_cache_miss);
1210 
1211         if (nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), flavor,
1212             &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
1213                 if (uid != NULL)
1214                         *uid = tmpuid;
1215                 if (gid != NULL)
1216                         *gid = tmpgid;
1217                 if (ngids != NULL && gids != NULL) {
1218                         *ngids = tmpngids;
1219                         *gids = tmpgids;
1220                 } else {
1221                         kmem_free(tmpgids, tmpngids * sizeof (gid_t));
1222                 }
1223         }
1224 
1225         kmem_free(addr.buf, addr.maxlen);
1226 
1227         return (access);
1228 }
1229 
1230 /*
1231  * Check if the requesting client has access to the filesystem with
1232  * a given nfs flavor number which is an explicitly shared flavor.
1233  */
1234 int
1235 nfsauth4_secinfo_access(struct exportinfo *exi, struct svc_req *req,
1236     int flavor, int perm, cred_t *cr)
1237 {
1238         int access;
1239 
1240         if (! (perm & M_4SEC_EXPORTED)) {
1241                 return (NFSAUTH_DENIED);
1242         }
1243 
1244         /*
1245          * Optimize if there are no lists
1246          */
1247         if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0) {
1248                 perm &= ~M_4SEC_EXPORTED;
1249                 if (perm == M_RO)
1250                         return (NFSAUTH_RO);
1251                 if (perm == M_RW)
1252                         return (NFSAUTH_RW);
1253         }
1254 
1255         access = nfsauth_cache_get(exi, req, flavor, cr, NULL, NULL, NULL,
1256             NULL);
1257 
1258         return (access);
1259 }
1260 
1261 int
1262 nfsauth_access(struct exportinfo *exi, struct svc_req *req, cred_t *cr,
1263     uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
1264 {
1265         int access, mapaccess;
1266         struct secinfo *sp;
1267         int i, flavor, perm;
1268         int authnone_entry = -1;
1269 
1270         /*
1271          * By default root is mapped to anonymous user.
1272          * This might get overriden later in nfsauth_cache_get().
1273          */
1274         if (crgetuid(cr) == 0) {
1275                 if (uid != NULL)
1276                         *uid = exi->exi_export.ex_anon;
1277                 if (gid != NULL)
1278                         *gid = exi->exi_export.ex_anon;
1279         } else {
1280                 if (uid != NULL)
1281                         *uid = crgetuid(cr);
1282                 if (gid != NULL)
1283                         *gid = crgetgid(cr);
1284         }
1285 
1286         if (ngids != NULL)
1287                 *ngids = 0;
1288         if (gids != NULL)
1289                 *gids = NULL;
1290 
1291         /*
1292          *  Get the nfs flavor number from xprt.
1293          */
1294         flavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;
1295 
1296         /*
1297          * First check the access restrictions on the filesystem.  If
1298          * there are no lists associated with this flavor then there's no
1299          * need to make an expensive call to the nfsauth service or to
1300          * cache anything.
1301          */
1302 
1303         sp = exi->exi_export.ex_secinfo;
1304         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1305                 if (flavor != sp[i].s_secinfo.sc_nfsnum) {
1306                         if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1307                                 authnone_entry = i;
1308                         continue;
1309                 }
1310                 break;
1311         }
1312 
1313         mapaccess = 0;
1314 
1315         if (i >= exi->exi_export.ex_seccnt) {
1316                 /*
1317                  * Flavor not found, but use AUTH_NONE if it exists
1318                  */
1319                 if (authnone_entry == -1)
1320                         return (NFSAUTH_DENIED);
1321                 flavor = AUTH_NONE;
1322                 mapaccess = NFSAUTH_MAPNONE;
1323                 i = authnone_entry;
1324         }
1325 
1326         /*
1327          * If the flavor is in the ex_secinfo list, but not an explicitly
1328          * shared flavor by the user, it is a result of the nfsv4 server
1329          * namespace setup. We will grant an RO permission similar for
1330          * a pseudo node except that this node is a shared one.
1331          *
1332          * e.g. flavor in (flavor) indicates that it is not explictly
1333          *      shared by the user:
1334          *
1335          *              /       (sys, krb5)
1336          *              |
1337          *              export  #share -o sec=sys (krb5)
1338          *              |
1339          *              secure  #share -o sec=krb5
1340          *
1341          *      In this case, when a krb5 request coming in to access
1342          *      /export, RO permission is granted.
1343          */
1344         if (!(sp[i].s_flags & M_4SEC_EXPORTED))
1345                 return (mapaccess | NFSAUTH_RO);
1346 
1347         /*
1348          * Optimize if there are no lists.
1349          * We cannot optimize for AUTH_SYS with NGRPS (16) supplemental groups.
1350          */
1351         perm = sp[i].s_flags;
1352         if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0 && (ngroups_max <= NGRPS ||
1353             flavor != AUTH_SYS || crgetngroups(cr) < NGRPS)) {
1354                 perm &= ~M_4SEC_EXPORTED;
1355                 if (perm == M_RO)
1356                         return (mapaccess | NFSAUTH_RO);
1357                 if (perm == M_RW)
1358                         return (mapaccess | NFSAUTH_RW);
1359         }
1360 
1361         access = nfsauth_cache_get(exi, req, flavor, cr, uid, gid, ngids, gids);
1362 
1363         /*
1364          * For both NFSAUTH_DENIED and NFSAUTH_WRONGSEC we do not care about
1365          * the supplemental groups.
1366          */
1367         if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
1368                 if (ngids != NULL && gids != NULL) {
1369                         kmem_free(*gids, *ngids * sizeof (gid_t));
1370                         *ngids = 0;
1371                         *gids = NULL;
1372                 }
1373         }
1374 
1375         /*
1376          * Client's security flavor doesn't match with "ro" or
1377          * "rw" list. Try again using AUTH_NONE if present.
1378          */
1379         if ((access & NFSAUTH_WRONGSEC) && (flavor != AUTH_NONE)) {
1380                 /*
1381                  * Have we already encountered AUTH_NONE ?
1382                  */
1383                 if (authnone_entry != -1) {
1384                         mapaccess = NFSAUTH_MAPNONE;
1385                         access = nfsauth_cache_get(exi, req, AUTH_NONE, cr,
1386                             NULL, NULL, NULL, NULL);
1387                 } else {
1388                         /*
1389                          * Check for AUTH_NONE presence.
1390                          */
1391                         for (; i < exi->exi_export.ex_seccnt; i++) {
1392                                 if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1393                                         mapaccess = NFSAUTH_MAPNONE;
1394                                         access = nfsauth_cache_get(exi, req,
1395                                             AUTH_NONE, cr, NULL, NULL, NULL,
1396                                             NULL);
1397                                         break;
1398                                 }
1399                         }
1400                 }
1401         }
1402 
1403         if (access & NFSAUTH_DENIED)
1404                 access = NFSAUTH_DENIED;
1405 
1406         return (access | mapaccess);
1407 }
1408 
1409 static void
1410 nfsauth_free_clnt_node(struct auth_cache_clnt *p)
1411 {
1412         void *cookie = NULL;
1413         struct auth_cache *node;
1414 
1415         while ((node = avl_destroy_nodes(&p->authc_tree, &cookie)) != NULL)
1416                 nfsauth_free_node(node);
1417         avl_destroy(&p->authc_tree);
1418 
1419         kmem_free(p->authc_addr.buf, p->authc_addr.maxlen);
1420         rw_destroy(&p->authc_lock);
1421 
1422         kmem_free(p, sizeof (*p));
1423 }
1424 
1425 static void
1426 nfsauth_free_node(struct auth_cache *p)
1427 {
1428         crfree(p->auth_clnt_cred);
1429         kmem_free(p->auth_srv_gids, p->auth_srv_ngids * sizeof (gid_t));
1430         mutex_destroy(&p->auth_lock);
1431         cv_destroy(&p->auth_cv);
1432         kmem_cache_free(exi_cache_handle, p);
1433 }
1434 
1435 /*
1436  * Free the nfsauth cache for a given export
1437  */
1438 void
1439 nfsauth_cache_free(struct exportinfo *exi)
1440 {
1441         int i;
1442 
1443         /*
1444          * The only way we got here was with an exi_rele, which means that no
1445          * auth cache entry is being refreshed.
1446          */
1447 
1448         for (i = 0; i < AUTH_TABLESIZE; i++) {
1449                 avl_tree_t *tree = exi->exi_cache[i];
1450                 void *cookie = NULL;
1451                 struct auth_cache_clnt *node;
1452 
1453                 while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
1454                         nfsauth_free_clnt_node(node);
1455         }
1456 }
1457 
1458 /*
1459  * Called by the kernel memory allocator when memory is low.
1460  * Free unused cache entries. If that's not enough, the VM system
1461  * will call again for some more.
1462  *
1463  * This needs to operate on all zones, so we take a reader lock
1464  * on the list of zones and walk the list.  This is OK here
1465  * becuase exi_cache_trim doesn't block or cause new objects
1466  * to be allocated (basically just frees lots of stuff).
1467  * Use care if nfssrv_globals_rwl is taken as reader in any
1468  * other cases because it will block nfs_server_zone_init
1469  * and nfs_server_zone_fini, which enter as writer.
1470  */
1471 /*ARGSUSED*/
1472 void
1473 exi_cache_reclaim(void *cdrarg)
1474 {
1475         nfs_globals_t *ng;
1476 
1477         rw_enter(&nfssrv_globals_rwl, RW_READER);
1478 
1479         ng = list_head(&nfssrv_globals_list);
1480         while (ng != NULL) {
1481                 exi_cache_reclaim_zone(ng);
1482                 ng = list_next(&nfssrv_globals_list, ng);
1483         }
1484 
1485         rw_exit(&nfssrv_globals_rwl);
1486 }
1487 
1488 static void
1489 exi_cache_reclaim_zone(nfs_globals_t *ng)
1490 {
1491         int i;
1492         struct exportinfo *exi;
1493         nfs_export_t *ne = ng->nfs_export;
1494 
1495         rw_enter(&ne->exported_lock, RW_READER);
1496 
1497         for (i = 0; i < EXPTABLESIZE; i++) {
1498                 for (exi = ne->exptable[i]; exi; exi = exi->fid_hash.next)
1499                         exi_cache_trim(exi);
1500         }
1501 
1502         rw_exit(&ne->exported_lock);
1503 
1504         atomic_inc_uint(&nfsauth_cache_reclaim);
1505 }
1506 
1507 static void
1508 exi_cache_trim(struct exportinfo *exi)
1509 {
1510         struct auth_cache_clnt *c;
1511         struct auth_cache_clnt *nextc;
1512         struct auth_cache *p;
1513         struct auth_cache *next;
1514         int i;
1515         time_t stale_time;
1516         avl_tree_t *tree;
1517 
1518         for (i = 0; i < AUTH_TABLESIZE; i++) {
1519                 tree = exi->exi_cache[i];
1520                 stale_time = gethrestime_sec() - NFSAUTH_CACHE_TRIM;
1521                 rw_enter(&exi->exi_cache_lock, RW_READER);
1522 
1523                 /*
1524                  * Free entries that have not been
1525                  * used for NFSAUTH_CACHE_TRIM seconds.
1526                  */
1527                 for (c = avl_first(tree); c != NULL; c = AVL_NEXT(tree, c)) {
1528                         /*
1529                          * We are being called by the kmem subsystem to reclaim
1530                          * memory so don't block if we can't get the lock.
1531                          */
1532                         if (rw_tryenter(&c->authc_lock, RW_WRITER) == 0) {
1533                                 exi_cache_auth_reclaim_failed++;
1534                                 rw_exit(&exi->exi_cache_lock);
1535                                 return;
1536                         }
1537 
1538                         for (p = avl_first(&c->authc_tree); p != NULL;
1539                             p = next) {
1540                                 next = AVL_NEXT(&c->authc_tree, p);
1541 
1542                                 ASSERT(p->auth_state != NFS_AUTH_INVALID);
1543 
1544                                 mutex_enter(&p->auth_lock);
1545 
1546                                 /*
1547                                  * We won't trim recently used and/or WAITING
1548                                  * entries.
1549                                  */
1550                                 if (p->auth_time > stale_time ||
1551                                     p->auth_state == NFS_AUTH_WAITING) {
1552                                         mutex_exit(&p->auth_lock);
1553                                         continue;
1554                                 }
1555 
1556                                 DTRACE_PROBE1(nfsauth__debug__trim__state,
1557                                     auth_state_t, p->auth_state);
1558 
1559                                 /*
1560                                  * STALE and REFRESHING entries needs to be
1561                                  * marked INVALID only because they are
1562                                  * referenced by some other structures or
1563                                  * threads.  They will be freed later.
1564                                  */
1565                                 if (p->auth_state == NFS_AUTH_STALE ||
1566                                     p->auth_state == NFS_AUTH_REFRESHING) {
1567                                         p->auth_state = NFS_AUTH_INVALID;
1568                                         mutex_exit(&p->auth_lock);
1569 
1570                                         avl_remove(&c->authc_tree, p);
1571                                 } else {
1572                                         mutex_exit(&p->auth_lock);
1573 
1574                                         avl_remove(&c->authc_tree, p);
1575                                         nfsauth_free_node(p);
1576                                 }
1577                         }
1578                         rw_exit(&c->authc_lock);
1579                 }
1580 
1581                 if (rw_tryupgrade(&exi->exi_cache_lock) == 0) {
1582                         rw_exit(&exi->exi_cache_lock);
1583                         exi_cache_clnt_reclaim_failed++;
1584                         continue;
1585                 }
1586 
1587                 for (c = avl_first(tree); c != NULL; c = nextc) {
1588                         nextc = AVL_NEXT(tree, c);
1589 
1590                         if (avl_is_empty(&c->authc_tree) == B_FALSE)
1591                                 continue;
1592 
1593                         avl_remove(tree, c);
1594 
1595                         nfsauth_free_clnt_node(c);
1596                 }
1597 
1598                 rw_exit(&exi->exi_cache_lock);
1599         }
1600 }