1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All rights reserved.
  29  *      Use is subject to license terms.
  30  */
  31 
  32 /*
  33  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
  34  * Copyright (c) 2013 by Delphix. All rights reserved.
  35  * Copyright 2018 Nexenta Systems, Inc.
  36  * Copyright (c) 2017 Joyent Inc
  37  */
  38 
  39 #include <sys/param.h>
  40 #include <sys/types.h>
  41 #include <sys/systm.h>
  42 #include <sys/cred.h>
  43 #include <sys/proc.h>
  44 #include <sys/user.h>
  45 #include <sys/buf.h>
  46 #include <sys/vfs.h>
  47 #include <sys/vnode.h>
  48 #include <sys/pathname.h>
  49 #include <sys/uio.h>
  50 #include <sys/file.h>
  51 #include <sys/stat.h>
  52 #include <sys/errno.h>
  53 #include <sys/socket.h>
  54 #include <sys/sysmacros.h>
  55 #include <sys/siginfo.h>
  56 #include <sys/tiuser.h>
  57 #include <sys/statvfs.h>
  58 #include <sys/stream.h>
  59 #include <sys/strsun.h>
  60 #include <sys/strsubr.h>
  61 #include <sys/stropts.h>
  62 #include <sys/timod.h>
  63 #include <sys/t_kuser.h>
  64 #include <sys/kmem.h>
  65 #include <sys/kstat.h>
  66 #include <sys/dirent.h>
  67 #include <sys/cmn_err.h>
  68 #include <sys/debug.h>
  69 #include <sys/unistd.h>
  70 #include <sys/vtrace.h>
  71 #include <sys/mode.h>
  72 #include <sys/acl.h>
  73 #include <sys/sdt.h>
  74 #include <sys/debug.h>
  75 
  76 #include <rpc/types.h>
  77 #include <rpc/auth.h>
  78 #include <rpc/auth_unix.h>
  79 #include <rpc/auth_des.h>
  80 #include <rpc/svc.h>
  81 #include <rpc/xdr.h>
  82 #include <rpc/rpc_rdma.h>
  83 
  84 #include <nfs/nfs.h>
  85 #include <nfs/export.h>
  86 #include <nfs/nfssys.h>
  87 #include <nfs/nfs_clnt.h>
  88 #include <nfs/nfs_acl.h>
  89 #include <nfs/nfs_log.h>
  90 #include <nfs/lm.h>
  91 #include <nfs/nfs_dispatch.h>
  92 #include <nfs/nfs4_drc.h>
  93 
  94 #include <sys/modctl.h>
  95 #include <sys/cladm.h>
  96 #include <sys/clconf.h>
  97 
  98 #include <sys/tsol/label.h>
  99 
 100 #define MAXHOST 32
 101 const char *kinet_ntop6(uchar_t *, char *, size_t);
 102 
 103 /*
 104  * Module linkage information.
 105  */
 106 
 107 static struct modlmisc modlmisc = {
 108         &mod_miscops, "NFS server module"
 109 };
 110 
 111 static struct modlinkage modlinkage = {
 112         MODREV_1, (void *)&modlmisc, NULL
 113 };
 114 
 115 zone_key_t nfssrv_zone_key;
 116 kmem_cache_t *nfs_xuio_cache;
 117 int nfs_loaned_buffers = 0;
 118 
 119 int
 120 _init(void)
 121 {
 122         int status;
 123 
 124         nfs_srvinit();
 125 
 126         status = mod_install((struct modlinkage *)&modlinkage);
 127         if (status != 0) {
 128                 /*
 129                  * Could not load module, cleanup previous
 130                  * initialization work.
 131                  */
 132                 nfs_srvfini();
 133 
 134                 return (status);
 135         }
 136 
 137         /*
 138          * Initialise some placeholders for nfssys() calls. These have
 139          * to be declared by the nfs module, since that handles nfssys()
 140          * calls - also used by NFS clients - but are provided by this
 141          * nfssrv module. These also then serve as confirmation to the
 142          * relevant code in nfs that nfssrv has been loaded, as they're
 143          * initially NULL.
 144          */
 145         nfs_srv_quiesce_func = nfs_srv_quiesce_all;
 146         nfs_srv_dss_func = rfs4_dss_setpaths;
 147 
 148         /* setup DSS paths here; must be done before initial server startup */
 149         rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
 150 
 151         /* initialize the copy reduction caches */
 152 
 153         nfs_xuio_cache = kmem_cache_create("nfs_xuio_cache",
 154             sizeof (nfs_xuio_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 155 
 156         return (status);
 157 }
 158 
 159 int
 160 _fini()
 161 {
 162         return (EBUSY);
 163 }
 164 
 165 int
 166 _info(struct modinfo *modinfop)
 167 {
 168         return (mod_info(&modlinkage, modinfop));
 169 }
 170 
 171 /*
 172  * PUBLICFH_CHECK() checks if the dispatch routine supports
 173  * RPC_PUBLICFH_OK, if the filesystem is exported public, and if the
 174  * incoming request is using the public filehandle. The check duplicates
 175  * the exportmatch() call done in checkexport(), and we should consider
 176  * modifying those routines to avoid the duplication. For now, we optimize
 177  * by calling exportmatch() only after checking that the dispatch routine
 178  * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
 179  * public (i.e., not the placeholder).
 180  */
 181 #define PUBLICFH_CHECK(ne, disp, exi, fsid, xfid) \
 182                 ((disp->dis_flags & RPC_PUBLICFH_OK) && \
 183                 ((exi->exi_export.ex_flags & EX_PUBLIC) || \
 184                 (exi == ne->exi_public && exportmatch(ne->exi_root, \
 185                 fsid, xfid))))
 186 
 187 static void     nfs_srv_shutdown_all(int);
 188 static void     rfs4_server_start(nfs_globals_t *, int);
 189 static void     nullfree(void);
 190 static void     rfs_dispatch(struct svc_req *, SVCXPRT *);
 191 static void     acl_dispatch(struct svc_req *, SVCXPRT *);
 192 static void     common_dispatch(struct svc_req *, SVCXPRT *,
 193                 rpcvers_t, rpcvers_t, char *,
 194                 struct rpc_disptable *);
 195 static  int     checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
 196                 bool_t, bool_t *);
 197 static char     *client_name(struct svc_req *req);
 198 static char     *client_addr(struct svc_req *req, char *buf);
 199 extern  int     sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
 200 extern  bool_t  sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
 201 static void     *nfs_srv_zone_init(zoneid_t);
 202 static void     nfs_srv_zone_fini(zoneid_t, void *);
 203 
 204 #define NFSLOG_COPY_NETBUF(exi, xprt, nb)       {               \
 205         (nb)->maxlen = (xprt)->xp_rtaddr.maxlen;          \
 206         (nb)->len = (xprt)->xp_rtaddr.len;                        \
 207         (nb)->buf = kmem_alloc((nb)->len, KM_SLEEP);              \
 208         bcopy((xprt)->xp_rtaddr.buf, (nb)->buf, (nb)->len);    \
 209         }
 210 
 211 /*
 212  * Public Filehandle common nfs routines
 213  */
 214 static int      MCLpath(char **);
 215 static void     URLparse(char *);
 216 
 217 /*
 218  * NFS callout table.
 219  * This table is used by svc_getreq() to dispatch a request with
 220  * a given prog/vers pair to an appropriate service provider
 221  * dispatch routine.
 222  *
 223  * NOTE: ordering is relied upon below when resetting the version min/max
 224  * for NFS_PROGRAM.  Careful, if this is ever changed.
 225  */
 226 static SVC_CALLOUT __nfs_sc_clts[] = {
 227         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 228         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 229 };
 230 
 231 static SVC_CALLOUT_TABLE nfs_sct_clts = {
 232         sizeof (__nfs_sc_clts) / sizeof (__nfs_sc_clts[0]), FALSE,
 233         __nfs_sc_clts
 234 };
 235 
 236 static SVC_CALLOUT __nfs_sc_cots[] = {
 237         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 238         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 239 };
 240 
 241 static SVC_CALLOUT_TABLE nfs_sct_cots = {
 242         sizeof (__nfs_sc_cots) / sizeof (__nfs_sc_cots[0]), FALSE, __nfs_sc_cots
 243 };
 244 
 245 static SVC_CALLOUT __nfs_sc_rdma[] = {
 246         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 247         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 248 };
 249 
 250 static SVC_CALLOUT_TABLE nfs_sct_rdma = {
 251         sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
 252 };
 253 
 254 /*
 255  * DSS: distributed stable storage
 256  * lists of all DSS paths: current, and before last warmstart
 257  */
 258 nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
 259 
 260 int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *,
 261     size_t *);
 262 bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
 263 
 264 /*
 265  * Will be called at the point the server pool is being unregistered
 266  * from the pool list. From that point onwards, the pool is waiting
 267  * to be drained and as such the server state is stale and pertains
 268  * to the old instantiation of the NFS server pool.
 269  */
 270 void
 271 nfs_srv_offline(void)
 272 {
 273         nfs_globals_t *ng;
 274 
 275         ng = zone_getspecific(nfssrv_zone_key, curzone);
 276 
 277         mutex_enter(&ng->nfs_server_upordown_lock);
 278         if (ng->nfs_server_upordown == NFS_SERVER_RUNNING) {
 279                 ng->nfs_server_upordown = NFS_SERVER_OFFLINE;
 280         }
 281         mutex_exit(&ng->nfs_server_upordown_lock);
 282 }
 283 
 284 /*
 285  * Will be called at the point the server pool is being destroyed so
 286  * all transports have been closed and no service threads are in
 287  * existence.
 288  *
 289  * If we quiesce the server, we're shutting it down without destroying the
 290  * server state. This allows it to warm start subsequently.
 291  */
 292 void
 293 nfs_srv_stop_all(void)
 294 {
 295         int quiesce = 0;
 296         nfs_srv_shutdown_all(quiesce);
 297 }
 298 
 299 /*
 300  * This alternative shutdown routine can be requested via nfssys()
 301  */
 302 void
 303 nfs_srv_quiesce_all(void)
 304 {
 305         int quiesce = 1;
 306         nfs_srv_shutdown_all(quiesce);
 307 }
 308 
 309 static void
 310 nfs_srv_shutdown_all(int quiesce)
 311 {
 312         nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
 313 
 314         mutex_enter(&ng->nfs_server_upordown_lock);
 315         if (quiesce) {
 316                 if (ng->nfs_server_upordown == NFS_SERVER_RUNNING ||
 317                     ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
 318                         ng->nfs_server_upordown = NFS_SERVER_QUIESCED;
 319                         cv_signal(&ng->nfs_server_upordown_cv);
 320 
 321                         /* reset DSS state, for subsequent warm restart */
 322                         rfs4_dss_numnewpaths = 0;
 323                         rfs4_dss_newpaths = NULL;
 324 
 325                         cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
 326                             "NFSv4 state has been preserved");
 327                 }
 328         } else {
 329                 if (ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
 330                         ng->nfs_server_upordown = NFS_SERVER_STOPPING;
 331                         mutex_exit(&ng->nfs_server_upordown_lock);
 332                         rfs4_state_zone_fini();
 333                         rfs4_fini_drc();
 334                         mutex_enter(&ng->nfs_server_upordown_lock);
 335                         ng->nfs_server_upordown = NFS_SERVER_STOPPED;
 336                         cv_signal(&ng->nfs_server_upordown_cv);
 337                 }
 338         }
 339         mutex_exit(&ng->nfs_server_upordown_lock);
 340 }
 341 
 342 static int
 343 nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
 344     rpcvers_t versmin, rpcvers_t versmax)
 345 {
 346         struct strioctl strioc;
 347         struct T_info_ack tinfo;
 348         int             error, retval;
 349 
 350         /*
 351          * Find out what type of transport this is.
 352          */
 353         strioc.ic_cmd = TI_GETINFO;
 354         strioc.ic_timout = -1;
 355         strioc.ic_len = sizeof (tinfo);
 356         strioc.ic_dp = (char *)&tinfo;
 357         tinfo.PRIM_type = T_INFO_REQ;
 358 
 359         error = strioctl(fp->f_vnode, I_STR, (intptr_t)&strioc, 0, K_TO_K,
 360             CRED(), &retval);
 361         if (error || retval)
 362                 return (error);
 363 
 364         /*
 365          * Based on our query of the transport type...
 366          *
 367          * Reset the min/max versions based on the caller's request
 368          * NOTE: This assumes that NFS_PROGRAM is first in the array!!
 369          * And the second entry is the NFS_ACL_PROGRAM.
 370          */
 371         switch (tinfo.SERV_type) {
 372         case T_CLTS:
 373                 if (versmax == NFS_V4)
 374                         return (EINVAL);
 375                 __nfs_sc_clts[0].sc_versmin = versmin;
 376                 __nfs_sc_clts[0].sc_versmax = versmax;
 377                 __nfs_sc_clts[1].sc_versmin = versmin;
 378                 __nfs_sc_clts[1].sc_versmax = versmax;
 379                 *sctpp = &nfs_sct_clts;
 380                 break;
 381         case T_COTS:
 382         case T_COTS_ORD:
 383                 __nfs_sc_cots[0].sc_versmin = versmin;
 384                 __nfs_sc_cots[0].sc_versmax = versmax;
 385                 /* For the NFS_ACL program, check the max version */
 386                 if (versmax > NFS_ACL_VERSMAX)
 387                         versmax = NFS_ACL_VERSMAX;
 388                 __nfs_sc_cots[1].sc_versmin = versmin;
 389                 __nfs_sc_cots[1].sc_versmax = versmax;
 390                 *sctpp = &nfs_sct_cots;
 391                 break;
 392         default:
 393                 error = EINVAL;
 394         }
 395 
 396         return (error);
 397 }
 398 
 399 /*
 400  * NFS Server system call.
 401  * Does all of the work of running a NFS server.
 402  * uap->fd is the fd of an open transport provider
 403  */
 404 int
 405 nfs_svc(struct nfs_svc_args *arg, model_t model)
 406 {
 407         nfs_globals_t *ng;
 408         file_t *fp;
 409         SVCMASTERXPRT *xprt;
 410         int error;
 411         int readsize;
 412         char buf[KNC_STRSIZE];
 413         size_t len;
 414         STRUCT_HANDLE(nfs_svc_args, uap);
 415         struct netbuf addrmask;
 416         SVC_CALLOUT_TABLE *sctp = NULL;
 417 
 418 #ifdef lint
 419         model = model;          /* STRUCT macros don't always refer to it */
 420 #endif
 421 
 422         ng = zone_getspecific(nfssrv_zone_key, curzone);
 423         STRUCT_SET_HANDLE(uap, model, arg);
 424 
 425         /* Check privileges in nfssys() */
 426 
 427         if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 428                 return (EBADF);
 429 
 430         /*
 431          * Set read buffer size to rsize
 432          * and add room for RPC headers.
 433          */
 434         readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
 435         if (readsize < RPC_MAXDATASIZE)
 436                 readsize = RPC_MAXDATASIZE;
 437 
 438         error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
 439             KNC_STRSIZE, &len);
 440         if (error) {
 441                 releasef(STRUCT_FGET(uap, fd));
 442                 return (error);
 443         }
 444 
 445         addrmask.len = STRUCT_FGET(uap, addrmask.len);
 446         addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
 447         addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
 448         error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
 449             addrmask.len);
 450         if (error) {
 451                 releasef(STRUCT_FGET(uap, fd));
 452                 kmem_free(addrmask.buf, addrmask.maxlen);
 453                 return (error);
 454         }
 455 
 456         ng->nfs_versmin = STRUCT_FGET(uap, versmin);
 457         ng->nfs_versmax = STRUCT_FGET(uap, versmax);
 458 
 459         /* Double check the vers min/max ranges */
 460         if ((ng->nfs_versmin > ng->nfs_versmax) ||
 461             (ng->nfs_versmin < NFS_VERSMIN) ||
 462             (ng->nfs_versmax > NFS_VERSMAX)) {
 463                 ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
 464                 ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
 465         }
 466 
 467         if (error = nfs_srv_set_sc_versions(fp, &sctp, ng->nfs_versmin,
 468             ng->nfs_versmax)) {
 469                 releasef(STRUCT_FGET(uap, fd));
 470                 kmem_free(addrmask.buf, addrmask.maxlen);
 471                 return (error);
 472         }
 473 
 474         /* Initialize nfsv4 server */
 475         if (ng->nfs_versmax == (rpcvers_t)NFS_V4)
 476                 rfs4_server_start(ng, STRUCT_FGET(uap, delegation));
 477 
 478         /* Create a transport handle. */
 479         error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
 480             sctp, NULL, NFS_SVCPOOL_ID, TRUE);
 481 
 482         if (error)
 483                 kmem_free(addrmask.buf, addrmask.maxlen);
 484 
 485         releasef(STRUCT_FGET(uap, fd));
 486 
 487         /* HA-NFSv4: save the cluster nodeid */
 488         if (cluster_bootflags & CLUSTER_BOOTED)
 489                 lm_global_nlmid = clconf_get_nodeid();
 490 
 491         return (error);
 492 }
 493 
 494 static void
 495 rfs4_server_start(nfs_globals_t *ng, int nfs4_srv_delegation)
 496 {
 497         /*
 498          * Determine if the server has previously been "started" and
 499          * if not, do the per instance initialization
 500          */
 501         mutex_enter(&ng->nfs_server_upordown_lock);
 502 
 503         if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
 504                 /* Do we need to stop and wait on the previous server? */
 505                 while (ng->nfs_server_upordown == NFS_SERVER_STOPPING ||
 506                     ng->nfs_server_upordown == NFS_SERVER_OFFLINE)
 507                         cv_wait(&ng->nfs_server_upordown_cv,
 508                             &ng->nfs_server_upordown_lock);
 509 
 510                 if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
 511                         (void) svc_pool_control(NFS_SVCPOOL_ID,
 512                             SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
 513                         (void) svc_pool_control(NFS_SVCPOOL_ID,
 514                             SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
 515 
 516                         rfs4_do_server_start(ng->nfs_server_upordown,
 517                             nfs4_srv_delegation,
 518                             cluster_bootflags & CLUSTER_BOOTED);
 519 
 520                         ng->nfs_server_upordown = NFS_SERVER_RUNNING;
 521                 }
 522                 cv_signal(&ng->nfs_server_upordown_cv);
 523         }
 524         mutex_exit(&ng->nfs_server_upordown_lock);
 525 }
 526 
 527 /*
 528  * If RDMA device available,
 529  * start RDMA listener.
 530  */
 531 int
 532 rdma_start(struct rdma_svc_args *rsa)
 533 {
 534         nfs_globals_t *ng;
 535         int error;
 536         rdma_xprt_group_t started_rdma_xprts;
 537         rdma_stat stat;
 538         int svc_state = 0;
 539 
 540         /* Double check the vers min/max ranges */
 541         if ((rsa->nfs_versmin > rsa->nfs_versmax) ||
 542             (rsa->nfs_versmin < NFS_VERSMIN) ||
 543             (rsa->nfs_versmax > NFS_VERSMAX)) {
 544                 rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
 545                 rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
 546         }
 547 
 548         ng = zone_getspecific(nfssrv_zone_key, curzone);
 549         ng->nfs_versmin = rsa->nfs_versmin;
 550         ng->nfs_versmax = rsa->nfs_versmax;
 551 
 552         /* Set the versions in the callout table */
 553         __nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
 554         __nfs_sc_rdma[0].sc_versmax = rsa->nfs_versmax;
 555         /* For the NFS_ACL program, check the max version */
 556         __nfs_sc_rdma[1].sc_versmin = rsa->nfs_versmin;
 557         if (rsa->nfs_versmax > NFS_ACL_VERSMAX)
 558                 __nfs_sc_rdma[1].sc_versmax = NFS_ACL_VERSMAX;
 559         else
 560                 __nfs_sc_rdma[1].sc_versmax = rsa->nfs_versmax;
 561 
 562         /* Initialize nfsv4 server */
 563         if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
 564                 rfs4_server_start(ng, rsa->delegation);
 565 
 566         started_rdma_xprts.rtg_count = 0;
 567         started_rdma_xprts.rtg_listhead = NULL;
 568         started_rdma_xprts.rtg_poolid = rsa->poolid;
 569 
 570 restart:
 571         error = svc_rdma_kcreate(rsa->netid, &nfs_sct_rdma, rsa->poolid,
 572             &started_rdma_xprts);
 573 
 574         svc_state = !error;
 575 
 576         while (!error) {
 577 
 578                 /*
 579                  * wait till either interrupted by a signal on
 580                  * nfs service stop/restart or signalled by a
 581                  * rdma attach/detatch.
 582                  */
 583 
 584                 stat = rdma_kwait();
 585 
 586                 /*
 587                  * stop services if running -- either on a HCA detach event
 588                  * or if the nfs service is stopped/restarted.
 589                  */
 590 
 591                 if ((stat == RDMA_HCA_DETACH || stat == RDMA_INTR) &&
 592                     svc_state) {
 593                         rdma_stop(&started_rdma_xprts);
 594                         svc_state = 0;
 595                 }
 596 
 597                 /*
 598                  * nfs service stop/restart, break out of the
 599                  * wait loop and return;
 600                  */
 601                 if (stat == RDMA_INTR)
 602                         return (0);
 603 
 604                 /*
 605                  * restart stopped services on a HCA attach event
 606                  * (if not already running)
 607                  */
 608 
 609                 if ((stat == RDMA_HCA_ATTACH) && (svc_state == 0))
 610                         goto restart;
 611 
 612                 /*
 613                  * loop until a nfs service stop/restart
 614                  */
 615         }
 616 
 617         return (error);
 618 }
 619 
 620 /* ARGSUSED */
 621 void
 622 rpc_null(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 623     struct svc_req *req, cred_t *cr, bool_t ro)
 624 {
 625 }
 626 
 627 /* ARGSUSED */
 628 void
 629 rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 630     struct svc_req *req, cred_t *cr, bool_t ro)
 631 {
 632         DTRACE_NFSV3_4(op__null__start, struct svc_req *, req,
 633             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
 634         DTRACE_NFSV3_4(op__null__done, struct svc_req *, req,
 635             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
 636 }
 637 
 638 /* ARGSUSED */
 639 static void
 640 rfs_error(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 641     struct svc_req *req, cred_t *cr, bool_t ro)
 642 {
 643         /* return (EOPNOTSUPP); */
 644 }
 645 
 646 static void
 647 nullfree(void)
 648 {
 649 }
 650 
 651 static char *rfscallnames_v2[] = {
 652         "RFS2_NULL",
 653         "RFS2_GETATTR",
 654         "RFS2_SETATTR",
 655         "RFS2_ROOT",
 656         "RFS2_LOOKUP",
 657         "RFS2_READLINK",
 658         "RFS2_READ",
 659         "RFS2_WRITECACHE",
 660         "RFS2_WRITE",
 661         "RFS2_CREATE",
 662         "RFS2_REMOVE",
 663         "RFS2_RENAME",
 664         "RFS2_LINK",
 665         "RFS2_SYMLINK",
 666         "RFS2_MKDIR",
 667         "RFS2_RMDIR",
 668         "RFS2_READDIR",
 669         "RFS2_STATFS"
 670 };
 671 
 672 static struct rpcdisp rfsdisptab_v2[] = {
 673         /*
 674          * NFS VERSION 2
 675          */
 676 
 677         /* RFS_NULL = 0 */
 678         {rpc_null,
 679             xdr_void, NULL_xdrproc_t, 0,
 680             xdr_void, NULL_xdrproc_t, 0,
 681             nullfree, RPC_IDEMPOTENT,
 682             0},
 683 
 684         /* RFS_GETATTR = 1 */
 685         {rfs_getattr,
 686             xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
 687             xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
 688             nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
 689             rfs_getattr_getfh},
 690 
 691         /* RFS_SETATTR = 2 */
 692         {rfs_setattr,
 693             xdr_saargs, NULL_xdrproc_t, sizeof (struct nfssaargs),
 694             xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
 695             nullfree, RPC_MAPRESP,
 696             rfs_setattr_getfh},
 697 
 698         /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
 699         {rfs_error,
 700             xdr_void, NULL_xdrproc_t, 0,
 701             xdr_void, NULL_xdrproc_t, 0,
 702             nullfree, RPC_IDEMPOTENT,
 703             0},
 704 
 705         /* RFS_LOOKUP = 4 */
 706         {rfs_lookup,
 707             xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
 708             xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
 709             nullfree, RPC_IDEMPOTENT|RPC_MAPRESP|RPC_PUBLICFH_OK,
 710             rfs_lookup_getfh},
 711 
 712         /* RFS_READLINK = 5 */
 713         {rfs_readlink,
 714             xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
 715             xdr_rdlnres, NULL_xdrproc_t, sizeof (struct nfsrdlnres),
 716             rfs_rlfree, RPC_IDEMPOTENT,
 717             rfs_readlink_getfh},
 718 
 719         /* RFS_READ = 6 */
 720         {rfs_read,
 721             xdr_readargs, NULL_xdrproc_t, sizeof (struct nfsreadargs),
 722             xdr_rdresult, NULL_xdrproc_t, sizeof (struct nfsrdresult),
 723             rfs_rdfree, RPC_IDEMPOTENT,
 724             rfs_read_getfh},
 725 
 726         /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
 727         {rfs_error,
 728             xdr_void, NULL_xdrproc_t, 0,
 729             xdr_void, NULL_xdrproc_t, 0,
 730             nullfree, RPC_IDEMPOTENT,
 731             0},
 732 
 733         /* RFS_WRITE = 8 */
 734         {rfs_write,
 735             xdr_writeargs, NULL_xdrproc_t, sizeof (struct nfswriteargs),
 736             xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
 737             nullfree, RPC_MAPRESP,
 738             rfs_write_getfh},
 739 
 740         /* RFS_CREATE = 9 */
 741         {rfs_create,
 742             xdr_creatargs, NULL_xdrproc_t, sizeof (struct nfscreatargs),
 743             xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
 744             nullfree, RPC_MAPRESP,
 745             rfs_create_getfh},
 746 
 747         /* RFS_REMOVE = 10 */
 748         {rfs_remove,
 749             xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
 750 #ifdef _LITTLE_ENDIAN
 751             xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 752 #else
 753             xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 754 #endif
 755             nullfree, RPC_MAPRESP,
 756             rfs_remove_getfh},
 757 
 758         /* RFS_RENAME = 11 */
 759         {rfs_rename,
 760             xdr_rnmargs, NULL_xdrproc_t, sizeof (struct nfsrnmargs),
 761 #ifdef _LITTLE_ENDIAN
 762             xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 763 #else
 764             xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 765 #endif
 766             nullfree, RPC_MAPRESP,
 767             rfs_rename_getfh},
 768 
 769         /* RFS_LINK = 12 */
 770         {rfs_link,
 771             xdr_linkargs, NULL_xdrproc_t, sizeof (struct nfslinkargs),
 772 #ifdef _LITTLE_ENDIAN
 773             xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 774 #else
 775             xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 776 #endif
 777             nullfree, RPC_MAPRESP,
 778             rfs_link_getfh},
 779 
 780         /* RFS_SYMLINK = 13 */
 781         {rfs_symlink,
 782             xdr_slargs, NULL_xdrproc_t, sizeof (struct nfsslargs),
 783 #ifdef _LITTLE_ENDIAN
 784             xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 785 #else
 786             xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 787 #endif
 788             nullfree, RPC_MAPRESP,
 789             rfs_symlink_getfh},
 790 
 791         /* RFS_MKDIR = 14 */
 792         {rfs_mkdir,
 793             xdr_creatargs, NULL_xdrproc_t, sizeof (struct nfscreatargs),
 794             xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
 795             nullfree, RPC_MAPRESP,
 796             rfs_mkdir_getfh},
 797 
 798         /* RFS_RMDIR = 15 */
 799         {rfs_rmdir,
 800             xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
 801 #ifdef _LITTLE_ENDIAN
 802             xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 803 #else
 804             xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 805 #endif
 806             nullfree, RPC_MAPRESP,
 807             rfs_rmdir_getfh},
 808 
 809         /* RFS_READDIR = 16 */
 810         {rfs_readdir,
 811             xdr_rddirargs, NULL_xdrproc_t, sizeof (struct nfsrddirargs),
 812             xdr_putrddirres, NULL_xdrproc_t, sizeof (struct nfsrddirres),
 813             rfs_rddirfree, RPC_IDEMPOTENT,
 814             rfs_readdir_getfh},
 815 
 816         /* RFS_STATFS = 17 */
 817         {rfs_statfs,
 818             xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
 819             xdr_statfs, xdr_faststatfs, sizeof (struct nfsstatfs),
 820             nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
 821             rfs_statfs_getfh},
 822 };
 823 
 824 static char *rfscallnames_v3[] = {
 825         "RFS3_NULL",
 826         "RFS3_GETATTR",
 827         "RFS3_SETATTR",
 828         "RFS3_LOOKUP",
 829         "RFS3_ACCESS",
 830         "RFS3_READLINK",
 831         "RFS3_READ",
 832         "RFS3_WRITE",
 833         "RFS3_CREATE",
 834         "RFS3_MKDIR",
 835         "RFS3_SYMLINK",
 836         "RFS3_MKNOD",
 837         "RFS3_REMOVE",
 838         "RFS3_RMDIR",
 839         "RFS3_RENAME",
 840         "RFS3_LINK",
 841         "RFS3_READDIR",
 842         "RFS3_READDIRPLUS",
 843         "RFS3_FSSTAT",
 844         "RFS3_FSINFO",
 845         "RFS3_PATHCONF",
 846         "RFS3_COMMIT"
 847 };
 848 
 849 static struct rpcdisp rfsdisptab_v3[] = {
 850         /*
 851          * NFS VERSION 3
 852          */
 853 
 854         /* RFS_NULL = 0 */
 855         {rpc_null_v3,
 856             xdr_void, NULL_xdrproc_t, 0,
 857             xdr_void, NULL_xdrproc_t, 0,
 858             nullfree, RPC_IDEMPOTENT,
 859             0},
 860 
 861         /* RFS3_GETATTR = 1 */
 862         {rfs3_getattr,
 863             xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (GETATTR3args),
 864             xdr_GETATTR3res, NULL_xdrproc_t, sizeof (GETATTR3res),
 865             nullfree, (RPC_IDEMPOTENT | RPC_ALLOWANON),
 866             rfs3_getattr_getfh},
 867 
 868         /* RFS3_SETATTR = 2 */
 869         {rfs3_setattr,
 870             xdr_SETATTR3args, NULL_xdrproc_t, sizeof (SETATTR3args),
 871             xdr_SETATTR3res, NULL_xdrproc_t, sizeof (SETATTR3res),
 872             nullfree, 0,
 873             rfs3_setattr_getfh},
 874 
 875         /* RFS3_LOOKUP = 3 */
 876         {rfs3_lookup,
 877             xdr_diropargs3, NULL_xdrproc_t, sizeof (LOOKUP3args),
 878             xdr_LOOKUP3res, NULL_xdrproc_t, sizeof (LOOKUP3res),
 879             nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
 880             rfs3_lookup_getfh},
 881 
 882         /* RFS3_ACCESS = 4 */
 883         {rfs3_access,
 884             xdr_ACCESS3args, NULL_xdrproc_t, sizeof (ACCESS3args),
 885             xdr_ACCESS3res, NULL_xdrproc_t, sizeof (ACCESS3res),
 886             nullfree, RPC_IDEMPOTENT,
 887             rfs3_access_getfh},
 888 
 889         /* RFS3_READLINK = 5 */
 890         {rfs3_readlink,
 891             xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (READLINK3args),
 892             xdr_READLINK3res, NULL_xdrproc_t, sizeof (READLINK3res),
 893             rfs3_readlink_free, RPC_IDEMPOTENT,
 894             rfs3_readlink_getfh},
 895 
 896         /* RFS3_READ = 6 */
 897         {rfs3_read,
 898             xdr_READ3args, NULL_xdrproc_t, sizeof (READ3args),
 899             xdr_READ3res, NULL_xdrproc_t, sizeof (READ3res),
 900             rfs3_read_free, RPC_IDEMPOTENT,
 901             rfs3_read_getfh},
 902 
 903         /* RFS3_WRITE = 7 */
 904         {rfs3_write,
 905             xdr_WRITE3args, NULL_xdrproc_t, sizeof (WRITE3args),
 906             xdr_WRITE3res, NULL_xdrproc_t, sizeof (WRITE3res),
 907             nullfree, 0,
 908             rfs3_write_getfh},
 909 
 910         /* RFS3_CREATE = 8 */
 911         {rfs3_create,
 912             xdr_CREATE3args, NULL_xdrproc_t, sizeof (CREATE3args),
 913             xdr_CREATE3res, NULL_xdrproc_t, sizeof (CREATE3res),
 914             nullfree, 0,
 915             rfs3_create_getfh},
 916 
 917         /* RFS3_MKDIR = 9 */
 918         {rfs3_mkdir,
 919             xdr_MKDIR3args, NULL_xdrproc_t, sizeof (MKDIR3args),
 920             xdr_MKDIR3res, NULL_xdrproc_t, sizeof (MKDIR3res),
 921             nullfree, 0,
 922             rfs3_mkdir_getfh},
 923 
 924         /* RFS3_SYMLINK = 10 */
 925         {rfs3_symlink,
 926             xdr_SYMLINK3args, NULL_xdrproc_t, sizeof (SYMLINK3args),
 927             xdr_SYMLINK3res, NULL_xdrproc_t, sizeof (SYMLINK3res),
 928             nullfree, 0,
 929             rfs3_symlink_getfh},
 930 
 931         /* RFS3_MKNOD = 11 */
 932         {rfs3_mknod,
 933             xdr_MKNOD3args, NULL_xdrproc_t, sizeof (MKNOD3args),
 934             xdr_MKNOD3res, NULL_xdrproc_t, sizeof (MKNOD3res),
 935             nullfree, 0,
 936             rfs3_mknod_getfh},
 937 
 938         /* RFS3_REMOVE = 12 */
 939         {rfs3_remove,
 940             xdr_diropargs3, NULL_xdrproc_t, sizeof (REMOVE3args),
 941             xdr_REMOVE3res, NULL_xdrproc_t, sizeof (REMOVE3res),
 942             nullfree, 0,
 943             rfs3_remove_getfh},
 944 
 945         /* RFS3_RMDIR = 13 */
 946         {rfs3_rmdir,
 947             xdr_diropargs3, NULL_xdrproc_t, sizeof (RMDIR3args),
 948             xdr_RMDIR3res, NULL_xdrproc_t, sizeof (RMDIR3res),
 949             nullfree, 0,
 950             rfs3_rmdir_getfh},
 951 
 952         /* RFS3_RENAME = 14 */
 953         {rfs3_rename,
 954             xdr_RENAME3args, NULL_xdrproc_t, sizeof (RENAME3args),
 955             xdr_RENAME3res, NULL_xdrproc_t, sizeof (RENAME3res),
 956             nullfree, 0,
 957             rfs3_rename_getfh},
 958 
 959         /* RFS3_LINK = 15 */
 960         {rfs3_link,
 961             xdr_LINK3args, NULL_xdrproc_t, sizeof (LINK3args),
 962             xdr_LINK3res, NULL_xdrproc_t, sizeof (LINK3res),
 963             nullfree, 0,
 964             rfs3_link_getfh},
 965 
 966         /* RFS3_READDIR = 16 */
 967         {rfs3_readdir,
 968             xdr_READDIR3args, NULL_xdrproc_t, sizeof (READDIR3args),
 969             xdr_READDIR3res, NULL_xdrproc_t, sizeof (READDIR3res),
 970             rfs3_readdir_free, RPC_IDEMPOTENT,
 971             rfs3_readdir_getfh},
 972 
 973         /* RFS3_READDIRPLUS = 17 */
 974         {rfs3_readdirplus,
 975             xdr_READDIRPLUS3args, NULL_xdrproc_t, sizeof (READDIRPLUS3args),
 976             xdr_READDIRPLUS3res, NULL_xdrproc_t, sizeof (READDIRPLUS3res),
 977             rfs3_readdirplus_free, RPC_AVOIDWORK,
 978             rfs3_readdirplus_getfh},
 979 
 980         /* RFS3_FSSTAT = 18 */
 981         {rfs3_fsstat,
 982             xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (FSSTAT3args),
 983             xdr_FSSTAT3res, NULL_xdrproc_t, sizeof (FSSTAT3res),
 984             nullfree, RPC_IDEMPOTENT,
 985             rfs3_fsstat_getfh},
 986 
 987         /* RFS3_FSINFO = 19 */
 988         {rfs3_fsinfo,
 989             xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (FSINFO3args),
 990             xdr_FSINFO3res, NULL_xdrproc_t, sizeof (FSINFO3res),
 991             nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON,
 992             rfs3_fsinfo_getfh},
 993 
 994         /* RFS3_PATHCONF = 20 */
 995         {rfs3_pathconf,
 996             xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (PATHCONF3args),
 997             xdr_PATHCONF3res, NULL_xdrproc_t, sizeof (PATHCONF3res),
 998             nullfree, RPC_IDEMPOTENT,
 999             rfs3_pathconf_getfh},
1000 
1001         /* RFS3_COMMIT = 21 */
1002         {rfs3_commit,
1003             xdr_COMMIT3args, NULL_xdrproc_t, sizeof (COMMIT3args),
1004             xdr_COMMIT3res, NULL_xdrproc_t, sizeof (COMMIT3res),
1005             nullfree, RPC_IDEMPOTENT,
1006             rfs3_commit_getfh},
1007 };
1008 
1009 static char *rfscallnames_v4[] = {
1010         "RFS4_NULL",
1011         "RFS4_COMPOUND",
1012         "RFS4_NULL",
1013         "RFS4_NULL",
1014         "RFS4_NULL",
1015         "RFS4_NULL",
1016         "RFS4_NULL",
1017         "RFS4_NULL",
1018         "RFS4_CREATE"
1019 };
1020 
1021 static struct rpcdisp rfsdisptab_v4[] = {
1022         /*
1023          * NFS VERSION 4
1024          */
1025 
1026         /* RFS_NULL = 0 */
1027         {rpc_null,
1028             xdr_void, NULL_xdrproc_t, 0,
1029             xdr_void, NULL_xdrproc_t, 0,
1030             nullfree, RPC_IDEMPOTENT, 0},
1031 
1032         /* RFS4_compound = 1 */
1033         {rfs4_compound,
1034             xdr_COMPOUND4args_srv, NULL_xdrproc_t, sizeof (COMPOUND4args),
1035             xdr_COMPOUND4res_srv, NULL_xdrproc_t, sizeof (COMPOUND4res),
1036             rfs4_compound_free, 0, 0},
1037 };
1038 
1039 union rfs_args {
1040         /*
1041          * NFS VERSION 2
1042          */
1043 
1044         /* RFS_NULL = 0 */
1045 
1046         /* RFS_GETATTR = 1 */
1047         fhandle_t nfs2_getattr_args;
1048 
1049         /* RFS_SETATTR = 2 */
1050         struct nfssaargs nfs2_setattr_args;
1051 
1052         /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
1053 
1054         /* RFS_LOOKUP = 4 */
1055         struct nfsdiropargs nfs2_lookup_args;
1056 
1057         /* RFS_READLINK = 5 */
1058         fhandle_t nfs2_readlink_args;
1059 
1060         /* RFS_READ = 6 */
1061         struct nfsreadargs nfs2_read_args;
1062 
1063         /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
1064 
1065         /* RFS_WRITE = 8 */
1066         struct nfswriteargs nfs2_write_args;
1067 
1068         /* RFS_CREATE = 9 */
1069         struct nfscreatargs nfs2_create_args;
1070 
1071         /* RFS_REMOVE = 10 */
1072         struct nfsdiropargs nfs2_remove_args;
1073 
1074         /* RFS_RENAME = 11 */
1075         struct nfsrnmargs nfs2_rename_args;
1076 
1077         /* RFS_LINK = 12 */
1078         struct nfslinkargs nfs2_link_args;
1079 
1080         /* RFS_SYMLINK = 13 */
1081         struct nfsslargs nfs2_symlink_args;
1082 
1083         /* RFS_MKDIR = 14 */
1084         struct nfscreatargs nfs2_mkdir_args;
1085 
1086         /* RFS_RMDIR = 15 */
1087         struct nfsdiropargs nfs2_rmdir_args;
1088 
1089         /* RFS_READDIR = 16 */
1090         struct nfsrddirargs nfs2_readdir_args;
1091 
1092         /* RFS_STATFS = 17 */
1093         fhandle_t nfs2_statfs_args;
1094 
1095         /*
1096          * NFS VERSION 3
1097          */
1098 
1099         /* RFS_NULL = 0 */
1100 
1101         /* RFS3_GETATTR = 1 */
1102         GETATTR3args nfs3_getattr_args;
1103 
1104         /* RFS3_SETATTR = 2 */
1105         SETATTR3args nfs3_setattr_args;
1106 
1107         /* RFS3_LOOKUP = 3 */
1108         LOOKUP3args nfs3_lookup_args;
1109 
1110         /* RFS3_ACCESS = 4 */
1111         ACCESS3args nfs3_access_args;
1112 
1113         /* RFS3_READLINK = 5 */
1114         READLINK3args nfs3_readlink_args;
1115 
1116         /* RFS3_READ = 6 */
1117         READ3args nfs3_read_args;
1118 
1119         /* RFS3_WRITE = 7 */
1120         WRITE3args nfs3_write_args;
1121 
1122         /* RFS3_CREATE = 8 */
1123         CREATE3args nfs3_create_args;
1124 
1125         /* RFS3_MKDIR = 9 */
1126         MKDIR3args nfs3_mkdir_args;
1127 
1128         /* RFS3_SYMLINK = 10 */
1129         SYMLINK3args nfs3_symlink_args;
1130 
1131         /* RFS3_MKNOD = 11 */
1132         MKNOD3args nfs3_mknod_args;
1133 
1134         /* RFS3_REMOVE = 12 */
1135         REMOVE3args nfs3_remove_args;
1136 
1137         /* RFS3_RMDIR = 13 */
1138         RMDIR3args nfs3_rmdir_args;
1139 
1140         /* RFS3_RENAME = 14 */
1141         RENAME3args nfs3_rename_args;
1142 
1143         /* RFS3_LINK = 15 */
1144         LINK3args nfs3_link_args;
1145 
1146         /* RFS3_READDIR = 16 */
1147         READDIR3args nfs3_readdir_args;
1148 
1149         /* RFS3_READDIRPLUS = 17 */
1150         READDIRPLUS3args nfs3_readdirplus_args;
1151 
1152         /* RFS3_FSSTAT = 18 */
1153         FSSTAT3args nfs3_fsstat_args;
1154 
1155         /* RFS3_FSINFO = 19 */
1156         FSINFO3args nfs3_fsinfo_args;
1157 
1158         /* RFS3_PATHCONF = 20 */
1159         PATHCONF3args nfs3_pathconf_args;
1160 
1161         /* RFS3_COMMIT = 21 */
1162         COMMIT3args nfs3_commit_args;
1163 
1164         /*
1165          * NFS VERSION 4
1166          */
1167 
1168         /* RFS_NULL = 0 */
1169 
1170         /* COMPUND = 1 */
1171         COMPOUND4args nfs4_compound_args;
1172 };
1173 
1174 union rfs_res {
1175         /*
1176          * NFS VERSION 2
1177          */
1178 
1179         /* RFS_NULL = 0 */
1180 
1181         /* RFS_GETATTR = 1 */
1182         struct nfsattrstat nfs2_getattr_res;
1183 
1184         /* RFS_SETATTR = 2 */
1185         struct nfsattrstat nfs2_setattr_res;
1186 
1187         /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
1188 
1189         /* RFS_LOOKUP = 4 */
1190         struct nfsdiropres nfs2_lookup_res;
1191 
1192         /* RFS_READLINK = 5 */
1193         struct nfsrdlnres nfs2_readlink_res;
1194 
1195         /* RFS_READ = 6 */
1196         struct nfsrdresult nfs2_read_res;
1197 
1198         /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
1199 
1200         /* RFS_WRITE = 8 */
1201         struct nfsattrstat nfs2_write_res;
1202 
1203         /* RFS_CREATE = 9 */
1204         struct nfsdiropres nfs2_create_res;
1205 
1206         /* RFS_REMOVE = 10 */
1207         enum nfsstat nfs2_remove_res;
1208 
1209         /* RFS_RENAME = 11 */
1210         enum nfsstat nfs2_rename_res;
1211 
1212         /* RFS_LINK = 12 */
1213         enum nfsstat nfs2_link_res;
1214 
1215         /* RFS_SYMLINK = 13 */
1216         enum nfsstat nfs2_symlink_res;
1217 
1218         /* RFS_MKDIR = 14 */
1219         struct nfsdiropres nfs2_mkdir_res;
1220 
1221         /* RFS_RMDIR = 15 */
1222         enum nfsstat nfs2_rmdir_res;
1223 
1224         /* RFS_READDIR = 16 */
1225         struct nfsrddirres nfs2_readdir_res;
1226 
1227         /* RFS_STATFS = 17 */
1228         struct nfsstatfs nfs2_statfs_res;
1229 
1230         /*
1231          * NFS VERSION 3
1232          */
1233 
1234         /* RFS_NULL = 0 */
1235 
1236         /* RFS3_GETATTR = 1 */
1237         GETATTR3res nfs3_getattr_res;
1238 
1239         /* RFS3_SETATTR = 2 */
1240         SETATTR3res nfs3_setattr_res;
1241 
1242         /* RFS3_LOOKUP = 3 */
1243         LOOKUP3res nfs3_lookup_res;
1244 
1245         /* RFS3_ACCESS = 4 */
1246         ACCESS3res nfs3_access_res;
1247 
1248         /* RFS3_READLINK = 5 */
1249         READLINK3res nfs3_readlink_res;
1250 
1251         /* RFS3_READ = 6 */
1252         READ3res nfs3_read_res;
1253 
1254         /* RFS3_WRITE = 7 */
1255         WRITE3res nfs3_write_res;
1256 
1257         /* RFS3_CREATE = 8 */
1258         CREATE3res nfs3_create_res;
1259 
1260         /* RFS3_MKDIR = 9 */
1261         MKDIR3res nfs3_mkdir_res;
1262 
1263         /* RFS3_SYMLINK = 10 */
1264         SYMLINK3res nfs3_symlink_res;
1265 
1266         /* RFS3_MKNOD = 11 */
1267         MKNOD3res nfs3_mknod_res;
1268 
1269         /* RFS3_REMOVE = 12 */
1270         REMOVE3res nfs3_remove_res;
1271 
1272         /* RFS3_RMDIR = 13 */
1273         RMDIR3res nfs3_rmdir_res;
1274 
1275         /* RFS3_RENAME = 14 */
1276         RENAME3res nfs3_rename_res;
1277 
1278         /* RFS3_LINK = 15 */
1279         LINK3res nfs3_link_res;
1280 
1281         /* RFS3_READDIR = 16 */
1282         READDIR3res nfs3_readdir_res;
1283 
1284         /* RFS3_READDIRPLUS = 17 */
1285         READDIRPLUS3res nfs3_readdirplus_res;
1286 
1287         /* RFS3_FSSTAT = 18 */
1288         FSSTAT3res nfs3_fsstat_res;
1289 
1290         /* RFS3_FSINFO = 19 */
1291         FSINFO3res nfs3_fsinfo_res;
1292 
1293         /* RFS3_PATHCONF = 20 */
1294         PATHCONF3res nfs3_pathconf_res;
1295 
1296         /* RFS3_COMMIT = 21 */
1297         COMMIT3res nfs3_commit_res;
1298 
1299         /*
1300          * NFS VERSION 4
1301          */
1302 
1303         /* RFS_NULL = 0 */
1304 
1305         /* RFS4_COMPOUND = 1 */
1306         COMPOUND4res nfs4_compound_res;
1307 
1308 };
1309 
1310 static struct rpc_disptable rfs_disptable[] = {
1311         {sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
1312             rfscallnames_v2,
1313             &rfsproccnt_v2_ptr, &rfsprocio_v2_ptr, rfsdisptab_v2},
1314         {sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
1315             rfscallnames_v3,
1316             &rfsproccnt_v3_ptr, &rfsprocio_v3_ptr, rfsdisptab_v3},
1317         {sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
1318             rfscallnames_v4,
1319             &rfsproccnt_v4_ptr, &rfsprocio_v4_ptr, rfsdisptab_v4},
1320 };
1321 
1322 /*
1323  * If nfs_portmon is set, then clients are required to use privileged
1324  * ports (ports < IPPORT_RESERVED) in order to get NFS services.
1325  *
1326  * N.B.: this attempt to carry forward the already ill-conceived notion
1327  * of privileged ports for TCP/UDP is really quite ineffectual.  Not only
1328  * is it transport-dependent, it's laughably easy to spoof.  If you're
1329  * really interested in security, you must start with secure RPC instead.
1330  */
1331 volatile int nfs_portmon = 0;
1332 
1333 #ifdef DEBUG
1334 static int cred_hits = 0;
1335 static int cred_misses = 0;
1336 #endif
1337 
1338 #ifdef DEBUG
1339 /*
1340  * Debug code to allow disabling of rfs_dispatch() use of
1341  * fastxdrargs() and fastxdrres() calls for testing purposes.
1342  */
1343 static int rfs_no_fast_xdrargs = 0;
1344 static int rfs_no_fast_xdrres = 0;
1345 #endif
1346 
1347 union acl_args {
1348         /*
1349          * ACL VERSION 2
1350          */
1351 
1352         /* ACL2_NULL = 0 */
1353 
1354         /* ACL2_GETACL = 1 */
1355         GETACL2args acl2_getacl_args;
1356 
1357         /* ACL2_SETACL = 2 */
1358         SETACL2args acl2_setacl_args;
1359 
1360         /* ACL2_GETATTR = 3 */
1361         GETATTR2args acl2_getattr_args;
1362 
1363         /* ACL2_ACCESS = 4 */
1364         ACCESS2args acl2_access_args;
1365 
1366         /* ACL2_GETXATTRDIR = 5 */
1367         GETXATTRDIR2args acl2_getxattrdir_args;
1368 
1369         /*
1370          * ACL VERSION 3
1371          */
1372 
1373         /* ACL3_NULL = 0 */
1374 
1375         /* ACL3_GETACL = 1 */
1376         GETACL3args acl3_getacl_args;
1377 
1378         /* ACL3_SETACL = 2 */
1379         SETACL3args acl3_setacl;
1380 
1381         /* ACL3_GETXATTRDIR = 3 */
1382         GETXATTRDIR3args acl3_getxattrdir_args;
1383 
1384 };
1385 
1386 union acl_res {
1387         /*
1388          * ACL VERSION 2
1389          */
1390 
1391         /* ACL2_NULL = 0 */
1392 
1393         /* ACL2_GETACL = 1 */
1394         GETACL2res acl2_getacl_res;
1395 
1396         /* ACL2_SETACL = 2 */
1397         SETACL2res acl2_setacl_res;
1398 
1399         /* ACL2_GETATTR = 3 */
1400         GETATTR2res acl2_getattr_res;
1401 
1402         /* ACL2_ACCESS = 4 */
1403         ACCESS2res acl2_access_res;
1404 
1405         /* ACL2_GETXATTRDIR = 5 */
1406         GETXATTRDIR2args acl2_getxattrdir_res;
1407 
1408         /*
1409          * ACL VERSION 3
1410          */
1411 
1412         /* ACL3_NULL = 0 */
1413 
1414         /* ACL3_GETACL = 1 */
1415         GETACL3res acl3_getacl_res;
1416 
1417         /* ACL3_SETACL = 2 */
1418         SETACL3res acl3_setacl_res;
1419 
1420         /* ACL3_GETXATTRDIR = 3 */
1421         GETXATTRDIR3res acl3_getxattrdir_res;
1422 
1423 };
1424 
1425 static bool_t
1426 auth_tooweak(struct svc_req *req, char *res)
1427 {
1428 
1429         if (req->rq_vers == NFS_VERSION && req->rq_proc == RFS_LOOKUP) {
1430                 struct nfsdiropres *dr = (struct nfsdiropres *)res;
1431                 if ((enum wnfsstat)dr->dr_status == WNFSERR_CLNT_FLAVOR)
1432                         return (TRUE);
1433         } else if (req->rq_vers == NFS_V3 && req->rq_proc == NFSPROC3_LOOKUP) {
1434                 LOOKUP3res *resp = (LOOKUP3res *)res;
1435                 if ((enum wnfsstat)resp->status == WNFSERR_CLNT_FLAVOR)
1436                         return (TRUE);
1437         }
1438         return (FALSE);
1439 }
1440 
1441 
1442 static void
1443 common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
1444     rpcvers_t max_vers, char *pgmname, struct rpc_disptable *disptable)
1445 {
1446         int which;
1447         rpcvers_t vers;
1448         char *args;
1449         union {
1450                         union rfs_args ra;
1451                         union acl_args aa;
1452                 } args_buf;
1453         char *res;
1454         union {
1455                         union rfs_res rr;
1456                         union acl_res ar;
1457                 } res_buf;
1458         struct rpcdisp *disp = NULL;
1459         int dis_flags = 0;
1460         cred_t *cr;
1461         int error = 0;
1462         int anon_ok;
1463         struct exportinfo *exi = NULL;
1464         unsigned int nfslog_rec_id;
1465         int dupstat;
1466         struct dupreq *dr;
1467         int authres;
1468         bool_t publicfh_ok = FALSE;
1469         enum_t auth_flavor;
1470         bool_t dupcached = FALSE;
1471         struct netbuf   nb;
1472         bool_t logging_enabled = FALSE;
1473         struct exportinfo *nfslog_exi = NULL;
1474         char **procnames;
1475         char cbuf[INET6_ADDRSTRLEN];    /* to hold both IPv4 and IPv6 addr */
1476         bool_t ro = FALSE;
1477         kstat_t *ksp = NULL;
1478         kstat_t *exi_ksp = NULL;
1479         size_t pos;                     /* request size */
1480         size_t rlen;                    /* reply size */
1481         bool_t rsent = FALSE;           /* reply was sent successfully */
1482         nfs_export_t *ne = nfs_get_export();
1483 
1484         vers = req->rq_vers;
1485 
1486         if (vers < min_vers || vers > max_vers) {
1487                 svcerr_progvers(req->rq_xprt, min_vers, max_vers);
1488                 error++;
1489                 cmn_err(CE_NOTE, "%s: bad version number %u", pgmname, vers);
1490                 goto done;
1491         }
1492         vers -= min_vers;
1493 
1494         which = req->rq_proc;
1495         if (which < 0 || which >= disptable[(int)vers].dis_nprocs) {
1496                 svcerr_noproc(req->rq_xprt);
1497                 error++;
1498                 goto done;
1499         }
1500 
1501         (*(disptable[(int)vers].dis_proccntp))[which].value.ui64++;
1502 
1503         ksp = (*(disptable[(int)vers].dis_prociop))[which];
1504         if (ksp != NULL) {
1505                 mutex_enter(ksp->ks_lock);
1506                 kstat_runq_enter(KSTAT_IO_PTR(ksp));
1507                 mutex_exit(ksp->ks_lock);
1508         }
1509         pos = XDR_GETPOS(&xprt->xp_xdrin);
1510 
1511         disp = &disptable[(int)vers].dis_table[which];
1512         procnames = disptable[(int)vers].dis_procnames;
1513 
1514         auth_flavor = req->rq_cred.oa_flavor;
1515 
1516         /*
1517          * Deserialize into the args struct.
1518          */
1519         args = (char *)&args_buf;
1520 
1521 #ifdef DEBUG
1522         if (rfs_no_fast_xdrargs || (auth_flavor == RPCSEC_GSS) ||
1523             disp->dis_fastxdrargs == NULL_xdrproc_t ||
1524             !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1525 #else
1526         if ((auth_flavor == RPCSEC_GSS) ||
1527             disp->dis_fastxdrargs == NULL_xdrproc_t ||
1528             !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1529 #endif
1530         {
1531                 bzero(args, disp->dis_argsz);
1532                 if (!SVC_GETARGS(xprt, disp->dis_xdrargs, args)) {
1533                         error++;
1534                         /*
1535                          * Check if we are outside our capabilities.
1536                          */
1537                         if (rfs4_minorvers_mismatch(req, xprt, (void *)args))
1538                                 goto done;
1539 
1540                         svcerr_decode(xprt);
1541                         cmn_err(CE_NOTE,
1542                             "Failed to decode arguments for %s version %u "
1543                             "procedure %s client %s%s",
1544                             pgmname, vers + min_vers, procnames[which],
1545                             client_name(req), client_addr(req, cbuf));
1546                         goto done;
1547                 }
1548         }
1549 
1550         /*
1551          * If Version 4 use that specific dispatch function.
1552          */
1553         if (req->rq_vers == 4) {
1554                 error += rfs4_dispatch(disp, req, xprt, args, &rlen);
1555                 if (error == 0)
1556                         rsent = TRUE;
1557                 goto done;
1558         }
1559 
1560         dis_flags = disp->dis_flags;
1561 
1562         /*
1563          * Find export information and check authentication,
1564          * setting the credential if everything is ok.
1565          */
1566         if (disp->dis_getfh != NULL) {
1567                 void *fh;
1568                 fsid_t *fsid;
1569                 fid_t *fid, *xfid;
1570                 fhandle_t *fh2;
1571                 nfs_fh3 *fh3;
1572 
1573                 fh = (*disp->dis_getfh)(args);
1574                 switch (req->rq_vers) {
1575                 case NFS_VERSION:
1576                         fh2 = (fhandle_t *)fh;
1577                         fsid = &fh2->fh_fsid;
1578                         fid = (fid_t *)&fh2->fh_len;
1579                         xfid = (fid_t *)&fh2->fh_xlen;
1580                         break;
1581                 case NFS_V3:
1582                         fh3 = (nfs_fh3 *)fh;
1583                         fsid = &fh3->fh3_fsid;
1584                         fid = FH3TOFIDP(fh3);
1585                         xfid = FH3TOXFIDP(fh3);
1586                         break;
1587                 }
1588 
1589                 /*
1590                  * Fix for bug 1038302 - corbin
1591                  * There is a problem here if anonymous access is
1592                  * disallowed.  If the current request is part of the
1593                  * client's mount process for the requested filesystem,
1594                  * then it will carry root (uid 0) credentials on it, and
1595                  * will be denied by checkauth if that client does not
1596                  * have explicit root=0 permission.  This will cause the
1597                  * client's mount operation to fail.  As a work-around,
1598                  * we check here to see if the request is a getattr or
1599                  * statfs operation on the exported vnode itself, and
1600                  * pass a flag to checkauth with the result of this test.
1601                  *
1602                  * The filehandle refers to the mountpoint itself if
1603                  * the fh_data and fh_xdata portions of the filehandle
1604                  * are equal.
1605                  *
1606                  * Added anon_ok argument to checkauth().
1607                  */
1608 
1609                 if ((dis_flags & RPC_ALLOWANON) && EQFID(fid, xfid))
1610                         anon_ok = 1;
1611                 else
1612                         anon_ok = 0;
1613 
1614                 cr = xprt->xp_cred;
1615                 ASSERT(cr != NULL);
1616 #ifdef DEBUG
1617                 {
1618                         if (crgetref(cr) != 1) {
1619                                 crfree(cr);
1620                                 cr = crget();
1621                                 xprt->xp_cred = cr;
1622                                 cred_misses++;
1623                         } else
1624                                 cred_hits++;
1625                 }
1626 #else
1627                 if (crgetref(cr) != 1) {
1628                         crfree(cr);
1629                         cr = crget();
1630                         xprt->xp_cred = cr;
1631                 }
1632 #endif
1633 
1634                 exi = checkexport(fsid, xfid);
1635 
1636                 if (exi != NULL) {
1637                         rw_enter(&ne->exported_lock, RW_READER);
1638                         exi_ksp = NULL;
1639 
1640                         if (exi->exi_kstats != NULL) {
1641                                 switch (req->rq_vers) {
1642                                 case NFS_VERSION:
1643                                         exi_ksp = exp_kstats_v2(exi->exi_kstats,
1644                                             which);
1645                                         break;
1646                                 case NFS_V3:
1647                                         exi_ksp = exp_kstats_v3(exi->exi_kstats,
1648                                             which);
1649                                         break;
1650                                 default:
1651                                         ASSERT(0);
1652                                         break;
1653                                 }
1654                         }
1655 
1656                         if (exi_ksp != NULL) {
1657                                 mutex_enter(exi_ksp->ks_lock);
1658                                 kstat_runq_enter(KSTAT_IO_PTR(exi_ksp));
1659                                 mutex_exit(exi_ksp->ks_lock);
1660                         } else {
1661                                 rw_exit(&ne->exported_lock);
1662                         }
1663 
1664                         publicfh_ok = PUBLICFH_CHECK(ne, disp, exi, fsid, xfid);
1665                         /*
1666                          * Don't allow non-V4 clients access
1667                          * to pseudo exports
1668                          */
1669                         if (PSEUDO(exi)) {
1670                                 svcerr_weakauth(xprt);
1671                                 error++;
1672                                 goto done;
1673                         }
1674 
1675                         authres = checkauth(exi, req, cr, anon_ok, publicfh_ok,
1676                             &ro);
1677                         /*
1678                          * authres >  0: authentication OK - proceed
1679                          * authres == 0: authentication weak - return error
1680                          * authres <  0: authentication timeout - drop
1681                          */
1682                         if (authres <= 0) {
1683                                 if (authres == 0) {
1684                                         svcerr_weakauth(xprt);
1685                                         error++;
1686                                 }
1687                                 goto done;
1688                         }
1689                 }
1690         } else
1691                 cr = NULL;
1692 
1693         if ((dis_flags & RPC_MAPRESP) && (auth_flavor != RPCSEC_GSS)) {
1694                 res = (char *)SVC_GETRES(xprt, disp->dis_ressz);
1695                 if (res == NULL)
1696                         res = (char *)&res_buf;
1697         } else
1698                 res = (char *)&res_buf;
1699 
1700         if (!(dis_flags & RPC_IDEMPOTENT)) {
1701                 dupstat = SVC_DUP_EXT(xprt, req, res, disp->dis_ressz, &dr,
1702                     &dupcached);
1703 
1704                 switch (dupstat) {
1705                 case DUP_ERROR:
1706                         svcerr_systemerr(xprt);
1707                         error++;
1708                         goto done;
1709                         /* NOTREACHED */
1710                 case DUP_INPROGRESS:
1711                         if (res != (char *)&res_buf)
1712                                 SVC_FREERES(xprt);
1713                         error++;
1714                         goto done;
1715                         /* NOTREACHED */
1716                 case DUP_NEW:
1717                 case DUP_DROP:
1718                         curthread->t_flag |= T_DONTPEND;
1719 
1720                         (*disp->dis_proc)(args, res, exi, req, cr, ro);
1721 
1722                         curthread->t_flag &= ~T_DONTPEND;
1723                         if (curthread->t_flag & T_WOULDBLOCK) {
1724                                 curthread->t_flag &= ~T_WOULDBLOCK;
1725                                 SVC_DUPDONE_EXT(xprt, dr, res, NULL,
1726                                     disp->dis_ressz, DUP_DROP);
1727                                 if (res != (char *)&res_buf)
1728                                         SVC_FREERES(xprt);
1729                                 error++;
1730                                 goto done;
1731                         }
1732                         if (dis_flags & RPC_AVOIDWORK) {
1733                                 SVC_DUPDONE_EXT(xprt, dr, res, NULL,
1734                                     disp->dis_ressz, DUP_DROP);
1735                         } else {
1736                                 SVC_DUPDONE_EXT(xprt, dr, res,
1737                                     disp->dis_resfree == nullfree ? NULL :
1738                                     disp->dis_resfree,
1739                                     disp->dis_ressz, DUP_DONE);
1740                                 dupcached = TRUE;
1741                         }
1742                         break;
1743                 case DUP_DONE:
1744                         break;
1745                 }
1746 
1747         } else {
1748                 curthread->t_flag |= T_DONTPEND;
1749 
1750                 (*disp->dis_proc)(args, res, exi, req, cr, ro);
1751 
1752                 curthread->t_flag &= ~T_DONTPEND;
1753                 if (curthread->t_flag & T_WOULDBLOCK) {
1754                         curthread->t_flag &= ~T_WOULDBLOCK;
1755                         if (res != (char *)&res_buf)
1756                                 SVC_FREERES(xprt);
1757                         error++;
1758                         goto done;
1759                 }
1760         }
1761 
1762         if (auth_tooweak(req, res)) {
1763                 svcerr_weakauth(xprt);
1764                 error++;
1765                 goto done;
1766         }
1767 
1768         /*
1769          * Check to see if logging has been enabled on the server.
1770          * If so, then obtain the export info struct to be used for
1771          * the later writing of the log record.  This is done for
1772          * the case that a lookup is done across a non-logged public
1773          * file system.
1774          */
1775         if (nfslog_buffer_list != NULL) {
1776                 nfslog_exi = nfslog_get_exi(ne, exi, req, res, &nfslog_rec_id);
1777                 /*
1778                  * Is logging enabled?
1779                  */
1780                 logging_enabled = (nfslog_exi != NULL);
1781 
1782                 /*
1783                  * Copy the netbuf for logging purposes, before it is
1784                  * freed by svc_sendreply().
1785                  */
1786                 if (logging_enabled) {
1787                         NFSLOG_COPY_NETBUF(nfslog_exi, xprt, &nb);
1788                         /*
1789                          * If RPC_MAPRESP flag set (i.e. in V2 ops) the
1790                          * res gets copied directly into the mbuf and
1791                          * may be freed soon after the sendreply. So we
1792                          * must copy it here to a safe place...
1793                          */
1794                         if (res != (char *)&res_buf) {
1795                                 bcopy(res, (char *)&res_buf, disp->dis_ressz);
1796                         }
1797                 }
1798         }
1799 
1800         /*
1801          * Serialize and send results struct
1802          */
1803 #ifdef DEBUG
1804         if (rfs_no_fast_xdrres == 0 && res != (char *)&res_buf)
1805 #else
1806         if (res != (char *)&res_buf)
1807 #endif
1808         {
1809                 if (!svc_sendreply(xprt, disp->dis_fastxdrres, res)) {
1810                         cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1811                         svcerr_systemerr(xprt);
1812                         error++;
1813                 } else {
1814                         rlen = xdr_sizeof(disp->dis_fastxdrres, res);
1815                         rsent = TRUE;
1816                 }
1817         } else {
1818                 if (!svc_sendreply(xprt, disp->dis_xdrres, res)) {
1819                         cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1820                         svcerr_systemerr(xprt);
1821                         error++;
1822                 } else {
1823                         rlen = xdr_sizeof(disp->dis_xdrres, res);
1824                         rsent = TRUE;
1825                 }
1826         }
1827 
1828         /*
1829          * Log if needed
1830          */
1831         if (logging_enabled) {
1832                 nfslog_write_record(nfslog_exi, req, args, (char *)&res_buf,
1833                     cr, &nb, nfslog_rec_id, NFSLOG_ONE_BUFFER);
1834                 exi_rele(&nfslog_exi);
1835                 kmem_free((&nb)->buf, (&nb)->len);
1836         }
1837 
1838         /*
1839          * Free results struct. With the addition of NFS V4 we can
1840          * have non-idempotent procedures with functions.
1841          */
1842         if (disp->dis_resfree != nullfree && dupcached == FALSE) {
1843                 (*disp->dis_resfree)(res);
1844         }
1845 
1846 done:
1847         if (ksp != NULL || exi_ksp != NULL) {
1848                 pos = XDR_GETPOS(&xprt->xp_xdrin) - pos;
1849         }
1850 
1851         /*
1852          * Free arguments struct
1853          */
1854         if (disp) {
1855                 if (!SVC_FREEARGS(xprt, disp->dis_xdrargs, args)) {
1856                         cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1857                         error++;
1858                 }
1859         } else {
1860                 if (!SVC_FREEARGS(xprt, (xdrproc_t)0, (caddr_t)0)) {
1861                         cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1862                         error++;
1863                 }
1864         }
1865 
1866         if (exi_ksp != NULL) {
1867                 mutex_enter(exi_ksp->ks_lock);
1868                 KSTAT_IO_PTR(exi_ksp)->nwritten += pos;
1869                 KSTAT_IO_PTR(exi_ksp)->writes++;
1870                 if (rsent) {
1871                         KSTAT_IO_PTR(exi_ksp)->nread += rlen;
1872                         KSTAT_IO_PTR(exi_ksp)->reads++;
1873                 }
1874                 kstat_runq_exit(KSTAT_IO_PTR(exi_ksp));
1875                 mutex_exit(exi_ksp->ks_lock);
1876 
1877                 rw_exit(&ne->exported_lock);
1878         }
1879 
1880         if (exi != NULL)
1881                 exi_rele(&exi);
1882 
1883         if (ksp != NULL) {
1884                 mutex_enter(ksp->ks_lock);
1885                 KSTAT_IO_PTR(ksp)->nwritten += pos;
1886                 KSTAT_IO_PTR(ksp)->writes++;
1887                 if (rsent) {
1888                         KSTAT_IO_PTR(ksp)->nread += rlen;
1889                         KSTAT_IO_PTR(ksp)->reads++;
1890                 }
1891                 kstat_runq_exit(KSTAT_IO_PTR(ksp));
1892                 mutex_exit(ksp->ks_lock);
1893         }
1894 
1895         global_svstat_ptr[req->rq_vers][NFS_BADCALLS].value.ui64 += error;
1896 
1897         global_svstat_ptr[req->rq_vers][NFS_CALLS].value.ui64++;
1898 }
1899 
1900 static void
1901 rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
1902 {
1903         common_dispatch(req, xprt, NFS_VERSMIN, NFS_VERSMAX,
1904             "NFS", rfs_disptable);
1905 }
1906 
1907 static char *aclcallnames_v2[] = {
1908         "ACL2_NULL",
1909         "ACL2_GETACL",
1910         "ACL2_SETACL",
1911         "ACL2_GETATTR",
1912         "ACL2_ACCESS",
1913         "ACL2_GETXATTRDIR"
1914 };
1915 
1916 static struct rpcdisp acldisptab_v2[] = {
1917         /*
1918          * ACL VERSION 2
1919          */
1920 
1921         /* ACL2_NULL = 0 */
1922         {rpc_null,
1923             xdr_void, NULL_xdrproc_t, 0,
1924             xdr_void, NULL_xdrproc_t, 0,
1925             nullfree, RPC_IDEMPOTENT,
1926             0},
1927 
1928         /* ACL2_GETACL = 1 */
1929         {acl2_getacl,
1930             xdr_GETACL2args, xdr_fastGETACL2args, sizeof (GETACL2args),
1931             xdr_GETACL2res, NULL_xdrproc_t, sizeof (GETACL2res),
1932             acl2_getacl_free, RPC_IDEMPOTENT,
1933             acl2_getacl_getfh},
1934 
1935         /* ACL2_SETACL = 2 */
1936         {acl2_setacl,
1937             xdr_SETACL2args, NULL_xdrproc_t, sizeof (SETACL2args),
1938 #ifdef _LITTLE_ENDIAN
1939             xdr_SETACL2res, xdr_fastSETACL2res, sizeof (SETACL2res),
1940 #else
1941             xdr_SETACL2res, NULL_xdrproc_t, sizeof (SETACL2res),
1942 #endif
1943             nullfree, RPC_MAPRESP,
1944             acl2_setacl_getfh},
1945 
1946         /* ACL2_GETATTR = 3 */
1947         {acl2_getattr,
1948             xdr_GETATTR2args, xdr_fastGETATTR2args, sizeof (GETATTR2args),
1949 #ifdef _LITTLE_ENDIAN
1950             xdr_GETATTR2res, xdr_fastGETATTR2res, sizeof (GETATTR2res),
1951 #else
1952             xdr_GETATTR2res, NULL_xdrproc_t, sizeof (GETATTR2res),
1953 #endif
1954             nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
1955             acl2_getattr_getfh},
1956 
1957         /* ACL2_ACCESS = 4 */
1958         {acl2_access,
1959             xdr_ACCESS2args, xdr_fastACCESS2args, sizeof (ACCESS2args),
1960 #ifdef _LITTLE_ENDIAN
1961             xdr_ACCESS2res, xdr_fastACCESS2res, sizeof (ACCESS2res),
1962 #else
1963             xdr_ACCESS2res, NULL_xdrproc_t, sizeof (ACCESS2res),
1964 #endif
1965             nullfree, RPC_IDEMPOTENT|RPC_MAPRESP,
1966             acl2_access_getfh},
1967 
1968         /* ACL2_GETXATTRDIR = 5 */
1969         {acl2_getxattrdir,
1970             xdr_GETXATTRDIR2args, NULL_xdrproc_t, sizeof (GETXATTRDIR2args),
1971             xdr_GETXATTRDIR2res, NULL_xdrproc_t, sizeof (GETXATTRDIR2res),
1972             nullfree, RPC_IDEMPOTENT,
1973             acl2_getxattrdir_getfh},
1974 };
1975 
1976 static char *aclcallnames_v3[] = {
1977         "ACL3_NULL",
1978         "ACL3_GETACL",
1979         "ACL3_SETACL",
1980         "ACL3_GETXATTRDIR"
1981 };
1982 
1983 static struct rpcdisp acldisptab_v3[] = {
1984         /*
1985          * ACL VERSION 3
1986          */
1987 
1988         /* ACL3_NULL = 0 */
1989         {rpc_null,
1990             xdr_void, NULL_xdrproc_t, 0,
1991             xdr_void, NULL_xdrproc_t, 0,
1992             nullfree, RPC_IDEMPOTENT,
1993             0},
1994 
1995         /* ACL3_GETACL = 1 */
1996         {acl3_getacl,
1997             xdr_GETACL3args, NULL_xdrproc_t, sizeof (GETACL3args),
1998             xdr_GETACL3res, NULL_xdrproc_t, sizeof (GETACL3res),
1999             acl3_getacl_free, RPC_IDEMPOTENT,
2000             acl3_getacl_getfh},
2001 
2002         /* ACL3_SETACL = 2 */
2003         {acl3_setacl,
2004             xdr_SETACL3args, NULL_xdrproc_t, sizeof (SETACL3args),
2005             xdr_SETACL3res, NULL_xdrproc_t, sizeof (SETACL3res),
2006             nullfree, 0,
2007             acl3_setacl_getfh},
2008 
2009         /* ACL3_GETXATTRDIR = 3 */
2010         {acl3_getxattrdir,
2011             xdr_GETXATTRDIR3args, NULL_xdrproc_t, sizeof (GETXATTRDIR3args),
2012             xdr_GETXATTRDIR3res, NULL_xdrproc_t, sizeof (GETXATTRDIR3res),
2013             nullfree, RPC_IDEMPOTENT,
2014             acl3_getxattrdir_getfh},
2015 };
2016 
2017 static struct rpc_disptable acl_disptable[] = {
2018         {sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
2019                 aclcallnames_v2,
2020                 &aclproccnt_v2_ptr, &aclprocio_v2_ptr, acldisptab_v2},
2021         {sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
2022                 aclcallnames_v3,
2023                 &aclproccnt_v3_ptr, &aclprocio_v3_ptr, acldisptab_v3},
2024 };
2025 
2026 static void
2027 acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
2028 {
2029         common_dispatch(req, xprt, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,
2030             "ACL", acl_disptable);
2031 }
2032 
2033 int
2034 checkwin(int flavor, int window, struct svc_req *req)
2035 {
2036         struct authdes_cred *adc;
2037 
2038         switch (flavor) {
2039         case AUTH_DES:
2040                 adc = (struct authdes_cred *)req->rq_clntcred;
2041                 CTASSERT(sizeof (struct authdes_cred) <= RQCRED_SIZE);
2042                 if (adc->adc_fullname.window > window)
2043                         return (0);
2044                 break;
2045 
2046         default:
2047                 break;
2048         }
2049         return (1);
2050 }
2051 
2052 
2053 /*
2054  * checkauth() will check the access permission against the export
2055  * information.  Then map root uid/gid to appropriate uid/gid.
2056  *
2057  * This routine is used by NFS V3 and V2 code.
2058  */
2059 static int
2060 checkauth(struct exportinfo *exi, struct svc_req *req, cred_t *cr, int anon_ok,
2061     bool_t publicfh_ok, bool_t *ro)
2062 {
2063         int i, nfsflavor, rpcflavor, stat, access;
2064         struct secinfo *secp;
2065         caddr_t principal;
2066         char buf[INET6_ADDRSTRLEN]; /* to hold both IPv4 and IPv6 addr */
2067         int anon_res = 0;
2068 
2069         uid_t uid;
2070         gid_t gid;
2071         uint_t ngids;
2072         gid_t *gids;
2073 
2074         /*
2075          * Check for privileged port number
2076          * N.B.:  this assumes that we know the format of a netbuf.
2077          */
2078         if (nfs_portmon) {
2079                 struct sockaddr *ca;
2080                 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2081 
2082                 if (ca == NULL)
2083                         return (0);
2084 
2085                 if ((ca->sa_family == AF_INET &&
2086                     ntohs(((struct sockaddr_in *)ca)->sin_port) >=
2087                     IPPORT_RESERVED) ||
2088                     (ca->sa_family == AF_INET6 &&
2089                     ntohs(((struct sockaddr_in6 *)ca)->sin6_port) >=
2090                     IPPORT_RESERVED)) {
2091                         cmn_err(CE_NOTE,
2092                             "nfs_server: client %s%ssent NFS request from "
2093                             "unprivileged port",
2094                             client_name(req), client_addr(req, buf));
2095                         return (0);
2096                 }
2097         }
2098 
2099         /*
2100          *  return 1 on success or 0 on failure
2101          */
2102         stat = sec_svc_getcred(req, cr, &principal, &nfsflavor);
2103 
2104         /*
2105          * A failed AUTH_UNIX sec_svc_getcred() implies we couldn't set
2106          * the credentials; below we map that to anonymous.
2107          */
2108         if (!stat && nfsflavor != AUTH_UNIX) {
2109                 cmn_err(CE_NOTE,
2110                     "nfs_server: couldn't get unix cred for %s",
2111                     client_name(req));
2112                 return (0);
2113         }
2114 
2115         /*
2116          * Short circuit checkauth() on operations that support the
2117          * public filehandle, and if the request for that operation
2118          * is using the public filehandle. Note that we must call
2119          * sec_svc_getcred() first so that xp_cookie is set to the
2120          * right value. Normally xp_cookie is just the RPC flavor
2121          * of the the request, but in the case of RPCSEC_GSS it
2122          * could be a pseudo flavor.
2123          */
2124         if (publicfh_ok)
2125                 return (1);
2126 
2127         rpcflavor = req->rq_cred.oa_flavor;
2128         /*
2129          * Check if the auth flavor is valid for this export
2130          */
2131         access = nfsauth_access(exi, req, cr, &uid, &gid, &ngids, &gids);
2132         if (access & NFSAUTH_DROP)
2133                 return (-1);    /* drop the request */
2134 
2135         if (access & NFSAUTH_RO)
2136                 *ro = TRUE;
2137 
2138         if (access & NFSAUTH_DENIED) {
2139                 /*
2140                  * If anon_ok == 1 and we got NFSAUTH_DENIED, it was
2141                  * probably due to the flavor not matching during
2142                  * the mount attempt. So map the flavor to AUTH_NONE
2143                  * so that the credentials get mapped to the anonymous
2144                  * user.
2145                  */
2146                 if (anon_ok == 1)
2147                         rpcflavor = AUTH_NONE;
2148                 else
2149                         return (0);     /* deny access */
2150 
2151         } else if (access & NFSAUTH_MAPNONE) {
2152                 /*
2153                  * Access was granted even though the flavor mismatched
2154                  * because AUTH_NONE was one of the exported flavors.
2155                  */
2156                 rpcflavor = AUTH_NONE;
2157 
2158         } else if (access & NFSAUTH_WRONGSEC) {
2159                 /*
2160                  * NFSAUTH_WRONGSEC is used for NFSv4. If we get here,
2161                  * it means a client ignored the list of allowed flavors
2162                  * returned via the MOUNT protocol. So we just disallow it!
2163                  */
2164                 return (0);
2165         }
2166 
2167         if (rpcflavor != AUTH_SYS)
2168                 kmem_free(gids, ngids * sizeof (gid_t));
2169 
2170         switch (rpcflavor) {
2171         case AUTH_NONE:
2172                 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2173                     exi->exi_export.ex_anon);
2174                 (void) crsetgroups(cr, 0, NULL);
2175                 break;
2176 
2177         case AUTH_UNIX:
2178                 if (!stat || crgetuid(cr) == 0 && !(access & NFSAUTH_UIDMAP)) {
2179                         anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2180                             exi->exi_export.ex_anon);
2181                         (void) crsetgroups(cr, 0, NULL);
2182                 } else if (crgetuid(cr) == 0 && access & NFSAUTH_ROOT) {
2183                         /*
2184                          * It is root, so apply rootid to get real UID
2185                          * Find the secinfo structure.  We should be able
2186                          * to find it by the time we reach here.
2187                          * nfsauth_access() has done the checking.
2188                          */
2189                         secp = NULL;
2190                         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2191                                 struct secinfo *sptr;
2192                                 sptr = &exi->exi_export.ex_secinfo[i];
2193                                 if (sptr->s_secinfo.sc_nfsnum == nfsflavor) {
2194                                         secp = sptr;
2195                                         break;
2196                                 }
2197                         }
2198                         if (secp != NULL) {
2199                                 (void) crsetugid(cr, secp->s_rootid,
2200                                     secp->s_rootid);
2201                                 (void) crsetgroups(cr, 0, NULL);
2202                         }
2203                 } else if (crgetuid(cr) != uid || crgetgid(cr) != gid) {
2204                         if (crsetugid(cr, uid, gid) != 0)
2205                                 anon_res = crsetugid(cr,
2206                                     exi->exi_export.ex_anon,
2207                                     exi->exi_export.ex_anon);
2208                         (void) crsetgroups(cr, 0, NULL);
2209                 } else if (access & NFSAUTH_GROUPS) {
2210                         (void) crsetgroups(cr, ngids, gids);
2211                 }
2212 
2213                 kmem_free(gids, ngids * sizeof (gid_t));
2214 
2215                 break;
2216 
2217         case AUTH_DES:
2218         case RPCSEC_GSS:
2219                 /*
2220                  *  Find the secinfo structure.  We should be able
2221                  *  to find it by the time we reach here.
2222                  *  nfsauth_access() has done the checking.
2223                  */
2224                 secp = NULL;
2225                 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2226                         if (exi->exi_export.ex_secinfo[i].s_secinfo.sc_nfsnum ==
2227                             nfsflavor) {
2228                                 secp = &exi->exi_export.ex_secinfo[i];
2229                                 break;
2230                         }
2231                 }
2232 
2233                 if (!secp) {
2234                         cmn_err(CE_NOTE, "nfs_server: client %s%shad "
2235                             "no secinfo data for flavor %d",
2236                             client_name(req), client_addr(req, buf),
2237                             nfsflavor);
2238                         return (0);
2239                 }
2240 
2241                 if (!checkwin(rpcflavor, secp->s_window, req)) {
2242                         cmn_err(CE_NOTE,
2243                             "nfs_server: client %s%sused invalid "
2244                             "auth window value",
2245                             client_name(req), client_addr(req, buf));
2246                         return (0);
2247                 }
2248 
2249                 /*
2250                  * Map root principals listed in the share's root= list to root,
2251                  * and map any others principals that were mapped to root by RPC
2252                  * to anon.
2253                  */
2254                 if (principal && sec_svc_inrootlist(rpcflavor, principal,
2255                     secp->s_rootcnt, secp->s_rootnames)) {
2256                         if (crgetuid(cr) == 0 && secp->s_rootid == 0)
2257                                 return (1);
2258 
2259 
2260                         (void) crsetugid(cr, secp->s_rootid, secp->s_rootid);
2261 
2262                         /*
2263                          * NOTE: If and when kernel-land privilege tracing is
2264                          * added this may have to be replaced with code that
2265                          * retrieves root's supplementary groups (e.g., using
2266                          * kgss_get_group_info().  In the meantime principals
2267                          * mapped to uid 0 get all privileges, so setting cr's
2268                          * supplementary groups for them does nothing.
2269                          */
2270                         (void) crsetgroups(cr, 0, NULL);
2271 
2272                         return (1);
2273                 }
2274 
2275                 /*
2276                  * Not a root princ, or not in root list, map UID 0/nobody to
2277                  * the anon ID for the share.  (RPC sets cr's UIDs and GIDs to
2278                  * UID_NOBODY and GID_NOBODY, respectively.)
2279                  */
2280                 if (crgetuid(cr) != 0 &&
2281                     (crgetuid(cr) != UID_NOBODY || crgetgid(cr) != GID_NOBODY))
2282                         return (1);
2283 
2284                 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2285                     exi->exi_export.ex_anon);
2286                 (void) crsetgroups(cr, 0, NULL);
2287                 break;
2288         default:
2289                 return (0);
2290         } /* switch on rpcflavor */
2291 
2292         /*
2293          * Even if anon access is disallowed via ex_anon == -1, we allow
2294          * this access if anon_ok is set.  So set creds to the default
2295          * "nobody" id.
2296          */
2297         if (anon_res != 0) {
2298                 if (anon_ok == 0) {
2299                         cmn_err(CE_NOTE,
2300                             "nfs_server: client %s%ssent wrong "
2301                             "authentication for %s",
2302                             client_name(req), client_addr(req, buf),
2303                             exi->exi_export.ex_path ?
2304                             exi->exi_export.ex_path : "?");
2305                         return (0);
2306                 }
2307 
2308                 if (crsetugid(cr, UID_NOBODY, GID_NOBODY) != 0)
2309                         return (0);
2310         }
2311 
2312         return (1);
2313 }
2314 
2315 /*
2316  * returns 0 on failure, -1 on a drop, -2 on wrong security flavor,
2317  * and 1 on success
2318  */
2319 int
2320 checkauth4(struct compound_state *cs, struct svc_req *req)
2321 {
2322         int i, rpcflavor, access;
2323         struct secinfo *secp;
2324         char buf[MAXHOST + 1];
2325         int anon_res = 0, nfsflavor;
2326         struct exportinfo *exi;
2327         cred_t  *cr;
2328         caddr_t principal;
2329 
2330         uid_t uid;
2331         gid_t gid;
2332         uint_t ngids;
2333         gid_t *gids;
2334 
2335         exi = cs->exi;
2336         cr = cs->cr;
2337         principal = cs->principal;
2338         nfsflavor = cs->nfsflavor;
2339 
2340         ASSERT(cr != NULL);
2341 
2342         rpcflavor = req->rq_cred.oa_flavor;
2343         cs->access &= ~CS_ACCESS_LIMITED;
2344 
2345         /*
2346          * Check for privileged port number
2347          * N.B.:  this assumes that we know the format of a netbuf.
2348          */
2349         if (nfs_portmon) {
2350                 struct sockaddr *ca;
2351                 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2352 
2353                 if (ca == NULL)
2354                         return (0);
2355 
2356                 if ((ca->sa_family == AF_INET &&
2357                     ntohs(((struct sockaddr_in *)ca)->sin_port) >=
2358                     IPPORT_RESERVED) ||
2359                     (ca->sa_family == AF_INET6 &&
2360                     ntohs(((struct sockaddr_in6 *)ca)->sin6_port) >=
2361                     IPPORT_RESERVED)) {
2362                         cmn_err(CE_NOTE,
2363                             "nfs_server: client %s%ssent NFSv4 request from "
2364                             "unprivileged port",
2365                             client_name(req), client_addr(req, buf));
2366                         return (0);
2367                 }
2368         }
2369 
2370         /*
2371          * Check the access right per auth flavor on the vnode of
2372          * this export for the given request.
2373          */
2374         access = nfsauth4_access(cs->exi, cs->vp, req, cr, &uid, &gid, &ngids,
2375             &gids);
2376 
2377         if (access & NFSAUTH_WRONGSEC)
2378                 return (-2);    /* no access for this security flavor */
2379 
2380         if (access & NFSAUTH_DROP)
2381                 return (-1);    /* drop the request */
2382 
2383         if (access & NFSAUTH_DENIED) {
2384 
2385                 if (exi->exi_export.ex_seccnt > 0)
2386                         return (0);     /* deny access */
2387 
2388         } else if (access & NFSAUTH_LIMITED) {
2389 
2390                 cs->access |= CS_ACCESS_LIMITED;
2391 
2392         } else if (access & NFSAUTH_MAPNONE) {
2393                 /*
2394                  * Access was granted even though the flavor mismatched
2395                  * because AUTH_NONE was one of the exported flavors.
2396                  */
2397                 rpcflavor = AUTH_NONE;
2398         }
2399 
2400         /*
2401          * XXX probably need to redo some of it for nfsv4?
2402          * return 1 on success or 0 on failure
2403          */
2404 
2405         if (rpcflavor != AUTH_SYS)
2406                 kmem_free(gids, ngids * sizeof (gid_t));
2407 
2408         switch (rpcflavor) {
2409         case AUTH_NONE:
2410                 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2411                     exi->exi_export.ex_anon);
2412                 (void) crsetgroups(cr, 0, NULL);
2413                 break;
2414 
2415         case AUTH_UNIX:
2416                 if (crgetuid(cr) == 0 && !(access & NFSAUTH_UIDMAP)) {
2417                         anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2418                             exi->exi_export.ex_anon);
2419                         (void) crsetgroups(cr, 0, NULL);
2420                 } else if (crgetuid(cr) == 0 && access & NFSAUTH_ROOT) {
2421                         /*
2422                          * It is root, so apply rootid to get real UID
2423                          * Find the secinfo structure.  We should be able
2424                          * to find it by the time we reach here.
2425                          * nfsauth_access() has done the checking.
2426                          */
2427                         secp = NULL;
2428                         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2429                                 struct secinfo *sptr;
2430                                 sptr = &exi->exi_export.ex_secinfo[i];
2431                                 if (sptr->s_secinfo.sc_nfsnum == nfsflavor) {
2432                                         secp = &exi->exi_export.ex_secinfo[i];
2433                                         break;
2434                                 }
2435                         }
2436                         if (secp != NULL) {
2437                                 (void) crsetugid(cr, secp->s_rootid,
2438                                     secp->s_rootid);
2439                                 (void) crsetgroups(cr, 0, NULL);
2440                         }
2441                 } else if (crgetuid(cr) != uid || crgetgid(cr) != gid) {
2442                         if (crsetugid(cr, uid, gid) != 0)
2443                                 anon_res = crsetugid(cr,
2444                                     exi->exi_export.ex_anon,
2445                                     exi->exi_export.ex_anon);
2446                         (void) crsetgroups(cr, 0, NULL);
2447                 } if (access & NFSAUTH_GROUPS) {
2448                         (void) crsetgroups(cr, ngids, gids);
2449                 }
2450 
2451                 kmem_free(gids, ngids * sizeof (gid_t));
2452 
2453                 break;
2454 
2455         default:
2456                 /*
2457                  *  Find the secinfo structure.  We should be able
2458                  *  to find it by the time we reach here.
2459                  *  nfsauth_access() has done the checking.
2460                  */
2461                 secp = NULL;
2462                 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2463                         if (exi->exi_export.ex_secinfo[i].s_secinfo.sc_nfsnum ==
2464                             nfsflavor) {
2465                                 secp = &exi->exi_export.ex_secinfo[i];
2466                                 break;
2467                         }
2468                 }
2469 
2470                 if (!secp) {
2471                         cmn_err(CE_NOTE, "nfs_server: client %s%shad "
2472                             "no secinfo data for flavor %d",
2473                             client_name(req), client_addr(req, buf),
2474                             nfsflavor);
2475                         return (0);
2476                 }
2477 
2478                 if (!checkwin(rpcflavor, secp->s_window, req)) {
2479                         cmn_err(CE_NOTE,
2480                             "nfs_server: client %s%sused invalid "
2481                             "auth window value",
2482                             client_name(req), client_addr(req, buf));
2483                         return (0);
2484                 }
2485 
2486                 /*
2487                  * Map root principals listed in the share's root= list to root,
2488                  * and map any others principals that were mapped to root by RPC
2489                  * to anon. If not going to anon, set to rootid (root_mapping).
2490                  */
2491                 if (principal && sec_svc_inrootlist(rpcflavor, principal,
2492                     secp->s_rootcnt, secp->s_rootnames)) {
2493                         if (crgetuid(cr) == 0 && secp->s_rootid == 0)
2494                                 return (1);
2495 
2496                         (void) crsetugid(cr, secp->s_rootid, secp->s_rootid);
2497 
2498                         /*
2499                          * NOTE: If and when kernel-land privilege tracing is
2500                          * added this may have to be replaced with code that
2501                          * retrieves root's supplementary groups (e.g., using
2502                          * kgss_get_group_info().  In the meantime principals
2503                          * mapped to uid 0 get all privileges, so setting cr's
2504                          * supplementary groups for them does nothing.
2505                          */
2506                         (void) crsetgroups(cr, 0, NULL);
2507 
2508                         return (1);
2509                 }
2510 
2511                 /*
2512                  * Not a root princ, or not in root list, map UID 0/nobody to
2513                  * the anon ID for the share.  (RPC sets cr's UIDs and GIDs to
2514                  * UID_NOBODY and GID_NOBODY, respectively.)
2515                  */
2516                 if (crgetuid(cr) != 0 &&
2517                     (crgetuid(cr) != UID_NOBODY || crgetgid(cr) != GID_NOBODY))
2518                         return (1);
2519 
2520                 anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2521                     exi->exi_export.ex_anon);
2522                 (void) crsetgroups(cr, 0, NULL);
2523                 break;
2524         } /* switch on rpcflavor */
2525 
2526         /*
2527          * Even if anon access is disallowed via ex_anon == -1, we allow
2528          * this access if anon_ok is set.  So set creds to the default
2529          * "nobody" id.
2530          */
2531 
2532         if (anon_res != 0) {
2533                 cmn_err(CE_NOTE,
2534                     "nfs_server: client %s%ssent wrong "
2535                     "authentication for %s",
2536                     client_name(req), client_addr(req, buf),
2537                     exi->exi_export.ex_path ?
2538                     exi->exi_export.ex_path : "?");
2539                 return (0);
2540         }
2541 
2542         return (1);
2543 }
2544 
2545 
2546 static char *
2547 client_name(struct svc_req *req)
2548 {
2549         char *hostname = NULL;
2550 
2551         /*
2552          * If it's a Unix cred then use the
2553          * hostname from the credential.
2554          */
2555         if (req->rq_cred.oa_flavor == AUTH_UNIX) {
2556                 hostname = ((struct authunix_parms *)
2557                     req->rq_clntcred)->aup_machname;
2558         }
2559         if (hostname == NULL)
2560                 hostname = "";
2561 
2562         return (hostname);
2563 }
2564 
2565 static char *
2566 client_addr(struct svc_req *req, char *buf)
2567 {
2568         struct sockaddr *ca;
2569         uchar_t *b;
2570         char *frontspace = "";
2571 
2572         /*
2573          * We assume we are called in tandem with client_name and the
2574          * format string looks like "...client %s%sblah blah..."
2575          *
2576          * If it's a Unix cred then client_name returned
2577          * a host name, so we need insert a space between host name
2578          * and IP address.
2579          */
2580         if (req->rq_cred.oa_flavor == AUTH_UNIX)
2581                 frontspace = " ";
2582 
2583         /*
2584          * Convert the caller's IP address to a dotted string
2585          */
2586         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2587 
2588         if (ca->sa_family == AF_INET) {
2589                 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
2590                 (void) sprintf(buf, "%s(%d.%d.%d.%d) ", frontspace,
2591                     b[0] & 0xFF, b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
2592         } else if (ca->sa_family == AF_INET6) {
2593                 struct sockaddr_in6 *sin6;
2594                 sin6 = (struct sockaddr_in6 *)ca;
2595                 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
2596                     buf, INET6_ADDRSTRLEN);
2597 
2598         } else {
2599 
2600                 /*
2601                  * No IP address to print. If there was a host name
2602                  * printed, then we print a space.
2603                  */
2604                 (void) sprintf(buf, frontspace);
2605         }
2606 
2607         return (buf);
2608 }
2609 
2610 /*
2611  * NFS Server initialization routine.  This routine should only be called
2612  * once.  It performs the following tasks:
2613  *      - Call sub-initialization routines (localize access to variables)
2614  *      - Initialize all locks
2615  *      - initialize the version 3 write verifier
2616  */
2617 void
2618 nfs_srvinit(void)
2619 {
2620         /* NFS server zone-specific global variables */
2621         zone_key_create(&nfssrv_zone_key, nfs_srv_zone_init,
2622             NULL, nfs_srv_zone_fini);
2623 
2624         nfs_exportinit();
2625         rfs_srvrinit();
2626         rfs3_srvrinit();
2627         rfs4_srvrinit();
2628         nfsauth_init();
2629 }
2630 
2631 /*
2632  * NFS Server finalization routine. This routine is called to cleanup the
2633  * initialization work previously performed if the NFS server module could
2634  * not be loaded correctly.
2635  */
2636 void
2637 nfs_srvfini(void)
2638 {
2639         nfsauth_fini();
2640         rfs4_srvrfini();
2641         rfs3_srvrfini();
2642         rfs_srvrfini();
2643         nfs_exportfini();
2644 
2645         (void) zone_key_delete(nfssrv_zone_key);
2646 }
2647 
2648 /* ARGSUSED */
2649 static void *
2650 nfs_srv_zone_init(zoneid_t zoneid)
2651 {
2652         nfs_globals_t *ng;
2653 
2654         ng = kmem_zalloc(sizeof (*ng), KM_SLEEP);
2655 
2656         ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
2657         ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
2658 
2659         /* Init the stuff to control start/stop */
2660         ng->nfs_server_upordown = NFS_SERVER_STOPPED;
2661         mutex_init(&ng->nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
2662         cv_init(&ng->nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
2663         mutex_init(&ng->rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
2664         cv_init(&ng->rdma_wait_cv, NULL, CV_DEFAULT, NULL);
2665 
2666         return (ng);
2667 }
2668 
2669 /* ARGSUSED */
2670 static void
2671 nfs_srv_zone_fini(zoneid_t zoneid, void *data)
2672 {
2673         nfs_globals_t *ng;
2674 
2675         ng = (nfs_globals_t *)data;
2676         mutex_destroy(&ng->nfs_server_upordown_lock);
2677         cv_destroy(&ng->nfs_server_upordown_cv);
2678         mutex_destroy(&ng->rdma_wait_mutex);
2679         cv_destroy(&ng->rdma_wait_cv);
2680 
2681         kmem_free(ng, sizeof (*ng));
2682 }
2683 
2684 /*
2685  * Set up an iovec array of up to cnt pointers.
2686  */
2687 void
2688 mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
2689 {
2690         while (m != NULL && cnt-- > 0) {
2691                 iovp->iov_base = (caddr_t)m->b_rptr;
2692                 iovp->iov_len = (m->b_wptr - m->b_rptr);
2693                 iovp++;
2694                 m = m->b_cont;
2695         }
2696 }
2697 
2698 /*
2699  * Common code between NFS Version 2 and NFS Version 3 for the public
2700  * filehandle multicomponent lookups.
2701  */
2702 
2703 /*
2704  * Public filehandle evaluation of a multi-component lookup, following
2705  * symbolic links, if necessary. This may result in a vnode in another
2706  * filesystem, which is OK as long as the other filesystem is exported.
2707  *
2708  * Note that the exi will be set either to NULL or a new reference to the
2709  * exportinfo struct that corresponds to the vnode of the multi-component path.
2710  * It is the callers responsibility to release this reference.
2711  */
2712 int
2713 rfs_publicfh_mclookup(char *p, vnode_t *dvp, cred_t *cr, vnode_t **vpp,
2714     struct exportinfo **exi, struct sec_ol *sec)
2715 {
2716         int pathflag;
2717         vnode_t *mc_dvp = NULL;
2718         vnode_t *realvp;
2719         int error;
2720 
2721         *exi = NULL;
2722 
2723         /*
2724          * check if the given path is a url or native path. Since p is
2725          * modified by MCLpath(), it may be empty after returning from
2726          * there, and should be checked.
2727          */
2728         if ((pathflag = MCLpath(&p)) == -1)
2729                 return (EIO);
2730 
2731         /*
2732          * If pathflag is SECURITY_QUERY, turn the SEC_QUERY bit
2733          * on in sec->sec_flags. This bit will later serve as an
2734          * indication in makefh_ol() or makefh3_ol() to overload the
2735          * filehandle to contain the sec modes used by the server for
2736          * the path.
2737          */
2738         if (pathflag == SECURITY_QUERY) {
2739                 if ((sec->sec_index = (uint_t)(*p)) > 0) {
2740                         sec->sec_flags |= SEC_QUERY;
2741                         p++;
2742                         if ((pathflag = MCLpath(&p)) == -1)
2743                                 return (EIO);
2744                 } else {
2745                         cmn_err(CE_NOTE,
2746                             "nfs_server: invalid security index %d, "
2747                             "violating WebNFS SNEGO protocol.", sec->sec_index);
2748                         return (EIO);
2749                 }
2750         }
2751 
2752         if (p[0] == '\0') {
2753                 error = ENOENT;
2754                 goto publicfh_done;
2755         }
2756 
2757         error = rfs_pathname(p, &mc_dvp, vpp, dvp, cr, pathflag);
2758 
2759         /*
2760          * If name resolves to "/" we get EINVAL since we asked for
2761          * the vnode of the directory that the file is in. Try again
2762          * with NULL directory vnode.
2763          */
2764         if (error == EINVAL) {
2765                 error = rfs_pathname(p, NULL, vpp, dvp, cr, pathflag);
2766                 if (!error) {
2767                         ASSERT(*vpp != NULL);
2768                         if ((*vpp)->v_type == VDIR) {
2769                                 VN_HOLD(*vpp);
2770                                 mc_dvp = *vpp;
2771                         } else {
2772                                 /*
2773                                  * This should not happen, the filesystem is
2774                                  * in an inconsistent state. Fail the lookup
2775                                  * at this point.
2776                                  */
2777                                 VN_RELE(*vpp);
2778                                 error = EINVAL;
2779                         }
2780                 }
2781         }
2782 
2783         if (error)
2784                 goto publicfh_done;
2785 
2786         if (*vpp == NULL) {
2787                 error = ENOENT;
2788                 goto publicfh_done;
2789         }
2790 
2791         ASSERT(mc_dvp != NULL);
2792         ASSERT(*vpp != NULL);
2793 
2794         if ((*vpp)->v_type == VDIR) {
2795                 do {
2796                         /*
2797                          * *vpp may be an AutoFS node, so we perform
2798                          * a VOP_ACCESS() to trigger the mount of the intended
2799                          * filesystem, so we can perform the lookup in the
2800                          * intended filesystem.
2801                          */
2802                         (void) VOP_ACCESS(*vpp, 0, 0, cr, NULL);
2803 
2804                         /*
2805                          * If vnode is covered, get the
2806                          * the topmost vnode.
2807                          */
2808                         if (vn_mountedvfs(*vpp) != NULL) {
2809                                 error = traverse(vpp);
2810                                 if (error) {
2811                                         VN_RELE(*vpp);
2812                                         goto publicfh_done;
2813                                 }
2814                         }
2815 
2816                         if (VOP_REALVP(*vpp, &realvp, NULL) == 0 &&
2817                             realvp != *vpp) {
2818                                 /*
2819                                  * If realvp is different from *vpp
2820                                  * then release our reference on *vpp, so that
2821                                  * the export access check be performed on the
2822                                  * real filesystem instead.
2823                                  */
2824                                 VN_HOLD(realvp);
2825                                 VN_RELE(*vpp);
2826                                 *vpp = realvp;
2827                         } else {
2828                                 break;
2829                         }
2830                 /* LINTED */
2831                 } while (TRUE);
2832 
2833                 /*
2834                  * Let nfs_vptexi() figure what the real parent is.
2835                  */
2836                 VN_RELE(mc_dvp);
2837                 mc_dvp = NULL;
2838 
2839         } else {
2840                 /*
2841                  * If vnode is covered, get the
2842                  * the topmost vnode.
2843                  */
2844                 if (vn_mountedvfs(mc_dvp) != NULL) {
2845                         error = traverse(&mc_dvp);
2846                         if (error) {
2847                                 VN_RELE(*vpp);
2848                                 goto publicfh_done;
2849                         }
2850                 }
2851 
2852                 if (VOP_REALVP(mc_dvp, &realvp, NULL) == 0 &&
2853                     realvp != mc_dvp) {
2854                         /*
2855                          * *vpp is a file, obtain realvp of the parent
2856                          * directory vnode.
2857                          */
2858                         VN_HOLD(realvp);
2859                         VN_RELE(mc_dvp);
2860                         mc_dvp = realvp;
2861                 }
2862         }
2863 
2864         /*
2865          * The pathname may take us from the public filesystem to another.
2866          * If that's the case then just set the exportinfo to the new export
2867          * and build filehandle for it. Thanks to per-access checking there's
2868          * no security issues with doing this. If the client is not allowed
2869          * access to this new export then it will get an access error when it
2870          * tries to use the filehandle
2871          */
2872         if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
2873                 VN_RELE(*vpp);
2874                 goto publicfh_done;
2875         }
2876 
2877         /*
2878          * Not allowed access to pseudo exports.
2879          */
2880         if (PSEUDO(*exi)) {
2881                 error = ENOENT;
2882                 VN_RELE(*vpp);
2883                 goto publicfh_done;
2884         }
2885 
2886         /*
2887          * Do a lookup for the index file. We know the index option doesn't
2888          * allow paths through handling in the share command, so mc_dvp will
2889          * be the parent for the index file vnode, if its present. Use
2890          * temporary pointers to preserve and reuse the vnode pointers of the
2891          * original directory in case there's no index file. Note that the
2892          * index file is a native path, and should not be interpreted by
2893          * the URL parser in rfs_pathname()
2894          */
2895         if (((*exi)->exi_export.ex_flags & EX_INDEX) &&
2896             ((*vpp)->v_type == VDIR) && (pathflag == URLPATH)) {
2897                 vnode_t *tvp, *tmc_dvp; /* temporary vnode pointers */
2898 
2899                 tmc_dvp = mc_dvp;
2900                 mc_dvp = tvp = *vpp;
2901 
2902                 error = rfs_pathname((*exi)->exi_export.ex_index, NULL, vpp,
2903                     mc_dvp, cr, NATIVEPATH);
2904 
2905                 if (error == ENOENT) {
2906                         *vpp = tvp;
2907                         mc_dvp = tmc_dvp;
2908                         error = 0;
2909                 } else {        /* ok or error other than ENOENT */
2910                         if (tmc_dvp)
2911                                 VN_RELE(tmc_dvp);
2912                         if (error)
2913                                 goto publicfh_done;
2914 
2915                         /*
2916                          * Found a valid vp for index "filename". Sanity check
2917                          * for odd case where a directory is provided as index
2918                          * option argument and leads us to another filesystem
2919                          */
2920 
2921                         /* Release the reference on the old exi value */
2922                         ASSERT(*exi != NULL);
2923                         exi_rele(exi);
2924 
2925                         if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
2926                                 VN_RELE(*vpp);
2927                                 goto publicfh_done;
2928                         }
2929                 }
2930         }
2931 
2932 publicfh_done:
2933         if (mc_dvp)
2934                 VN_RELE(mc_dvp);
2935 
2936         return (error);
2937 }
2938 
2939 /*
2940  * Evaluate a multi-component path
2941  */
2942 int
2943 rfs_pathname(
2944         char *path,                     /* pathname to evaluate */
2945         vnode_t **dirvpp,               /* ret for ptr to parent dir vnode */
2946         vnode_t **compvpp,              /* ret for ptr to component vnode */
2947         vnode_t *startdvp,              /* starting vnode */
2948         cred_t *cr,                     /* user's credential */
2949         int pathflag)                   /* flag to identify path, e.g. URL */
2950 {
2951         char namebuf[TYPICALMAXPATHLEN];
2952         struct pathname pn;
2953         int error;
2954 
2955         /*
2956          * If pathname starts with '/', then set startdvp to root.
2957          */
2958         if (*path == '/') {
2959                 while (*path == '/')
2960                         path++;
2961 
2962                 startdvp = ZONE_ROOTVP();
2963         }
2964 
2965         error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
2966         if (error == 0) {
2967                 /*
2968                  * Call the URL parser for URL paths to modify the original
2969                  * string to handle any '%' encoded characters that exist.
2970                  * Done here to avoid an extra bcopy in the lookup.
2971                  * We need to be careful about pathlen's. We know that
2972                  * rfs_pathname() is called with a non-empty path. However,
2973                  * it could be emptied due to the path simply being all /'s,
2974                  * which is valid to proceed with the lookup, or due to the
2975                  * URL parser finding an encoded null character at the
2976                  * beginning of path which should not proceed with the lookup.
2977                  */
2978                 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2979                         URLparse(pn.pn_path);
2980                         if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
2981                                 return (ENOENT);
2982                 }
2983                 VN_HOLD(startdvp);
2984                 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
2985                     ZONE_ROOTVP(), startdvp, cr);
2986         }
2987         if (error == ENAMETOOLONG) {
2988                 /*
2989                  * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
2990                  */
2991                 if (error = pn_get(path, UIO_SYSSPACE, &pn))
2992                         return (error);
2993                 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2994                         URLparse(pn.pn_path);
2995                         if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0) {
2996                                 pn_free(&pn);
2997                                 return (ENOENT);
2998                         }
2999                 }
3000                 VN_HOLD(startdvp);
3001                 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
3002                     ZONE_ROOTVP(), startdvp, cr);
3003                 pn_free(&pn);
3004         }
3005 
3006         return (error);
3007 }
3008 
3009 /*
3010  * Adapt the multicomponent lookup path depending on the pathtype
3011  */
3012 static int
3013 MCLpath(char **path)
3014 {
3015         unsigned char c = (unsigned char)**path;
3016 
3017         /*
3018          * If the MCL path is between 0x20 and 0x7E (graphic printable
3019          * character of the US-ASCII coded character set), its a URL path,
3020          * per RFC 1738.
3021          */
3022         if (c >= 0x20 && c <= 0x7E)
3023                 return (URLPATH);
3024 
3025         /*
3026          * If the first octet of the MCL path is not an ASCII character
3027          * then it must be interpreted as a tag value that describes the
3028          * format of the remaining octets of the MCL path.
3029          *
3030          * If the first octet of the MCL path is 0x81 it is a query
3031          * for the security info.
3032          */
3033         switch (c) {
3034         case 0x80:      /* native path, i.e. MCL via mount protocol */
3035                 (*path)++;
3036                 return (NATIVEPATH);
3037         case 0x81:      /* security query */
3038                 (*path)++;
3039                 return (SECURITY_QUERY);
3040         default:
3041                 return (-1);
3042         }
3043 }
3044 
3045 #define fromhex(c)  ((c >= '0' && c <= '9') ? (c - '0') : \
3046                         ((c >= 'A' && c <= 'F') ? (c - 'A' + 10) :\
3047                         ((c >= 'a' && c <= 'f') ? (c - 'a' + 10) : 0)))
3048 
3049 /*
3050  * The implementation of URLparse guarantees that the final string will
3051  * fit in the original one. Replaces '%' occurrences followed by 2 characters
3052  * with its corresponding hexadecimal character.
3053  */
3054 static void
3055 URLparse(char *str)
3056 {
3057         char *p, *q;
3058 
3059         p = q = str;
3060         while (*p) {
3061                 *q = *p;
3062                 if (*p++ == '%') {
3063                         if (*p) {
3064                                 *q = fromhex(*p) * 16;
3065                                 p++;
3066                                 if (*p) {
3067                                         *q += fromhex(*p);
3068                                         p++;
3069                                 }
3070                         }
3071                 }
3072                 q++;
3073         }
3074         *q = '\0';
3075 }
3076 
3077 
3078 /*
3079  * Get the export information for the lookup vnode, and verify its
3080  * useable.
3081  */
3082 int
3083 nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr,
3084     struct exportinfo **exi)
3085 {
3086         int walk;
3087         int error = 0;
3088 
3089         *exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
3090         if (*exi == NULL)
3091                 error = EACCES;
3092         else {
3093                 /*
3094                  * If nosub is set for this export then
3095                  * a lookup relative to the public fh
3096                  * must not terminate below the
3097                  * exported directory.
3098                  */
3099                 if ((*exi)->exi_export.ex_flags & EX_NOSUB && walk > 0)
3100                         error = EACCES;
3101         }
3102 
3103         return (error);
3104 }
3105 
3106 /*
3107  * Used by NFSv3 and NFSv4 server to query label of
3108  * a pathname component during lookup/access ops.
3109  */
3110 ts_label_t *
3111 nfs_getflabel(vnode_t *vp, struct exportinfo *exi)
3112 {
3113         zone_t *zone;
3114         ts_label_t *zone_label;
3115         char *path;
3116 
3117         mutex_enter(&vp->v_lock);
3118         if (vp->v_path != vn_vpath_empty) {
3119                 zone = zone_find_by_any_path(vp->v_path, B_FALSE);
3120                 mutex_exit(&vp->v_lock);
3121         } else {
3122                 /*
3123                  * v_path not cached. Fall back on pathname of exported
3124                  * file system as we rely on pathname from which we can
3125                  * derive a label. The exported file system portion of
3126                  * path is sufficient to obtain a label.
3127                  */
3128                 path = exi->exi_export.ex_path;
3129                 if (path == NULL) {
3130                         mutex_exit(&vp->v_lock);
3131                         return (NULL);
3132                 }
3133                 zone = zone_find_by_any_path(path, B_FALSE);
3134                 mutex_exit(&vp->v_lock);
3135         }
3136         /*
3137          * Caller has verified that the file is either
3138          * exported or visible. So if the path falls in
3139          * global zone, admin_low is returned; otherwise
3140          * the zone's label is returned.
3141          */
3142         zone_label = zone->zone_slabel;
3143         label_hold(zone_label);
3144         zone_rele(zone);
3145         return (zone_label);
3146 }
3147 
3148 /*
3149  * TX NFS routine used by NFSv3 and NFSv4 to do label check
3150  * on client label and server's file object lable.
3151  */
3152 boolean_t
3153 do_rfs_label_check(bslabel_t *clabel, vnode_t *vp, int flag,
3154     struct exportinfo *exi)
3155 {
3156         bslabel_t *slabel;
3157         ts_label_t *tslabel;
3158         boolean_t result;
3159 
3160         if ((tslabel = nfs_getflabel(vp, exi)) == NULL) {
3161                 return (B_FALSE);
3162         }
3163         slabel = label2bslabel(tslabel);
3164         DTRACE_PROBE4(tx__rfs__log__info__labelcheck, char *,
3165             "comparing server's file label(1) with client label(2) (vp(3))",
3166             bslabel_t *, slabel, bslabel_t *, clabel, vnode_t *, vp);
3167 
3168         if (flag == EQUALITY_CHECK)
3169                 result = blequal(clabel, slabel);
3170         else
3171                 result = bldominates(clabel, slabel);
3172         label_rele(tslabel);
3173         return (result);
3174 }
3175 
3176 /*
3177  * Callback function to return the loaned buffers.
3178  * Calls VOP_RETZCBUF() only after all uio_iov[]
3179  * buffers are returned. nu_ref maintains the count.
3180  */
3181 void
3182 rfs_free_xuio(void *free_arg)
3183 {
3184         uint_t ref;
3185         nfs_xuio_t *nfsuiop = (nfs_xuio_t *)free_arg;
3186 
3187         ref = atomic_dec_uint_nv(&nfsuiop->nu_ref);
3188 
3189         /*
3190          * Call VOP_RETZCBUF() only when all the iov buffers
3191          * are sent OTW.
3192          */
3193         if (ref != 0)
3194                 return;
3195 
3196         if (((uio_t *)nfsuiop)->uio_extflg & UIO_XUIO) {
3197                 (void) VOP_RETZCBUF(nfsuiop->nu_vp, (xuio_t *)free_arg, NULL,
3198                     NULL);
3199                 VN_RELE(nfsuiop->nu_vp);
3200         }
3201 
3202         kmem_cache_free(nfs_xuio_cache, free_arg);
3203 }
3204 
3205 xuio_t *
3206 rfs_setup_xuio(vnode_t *vp)
3207 {
3208         nfs_xuio_t *nfsuiop;
3209 
3210         nfsuiop = kmem_cache_alloc(nfs_xuio_cache, KM_SLEEP);
3211 
3212         bzero(nfsuiop, sizeof (nfs_xuio_t));
3213         nfsuiop->nu_vp = vp;
3214 
3215         /*
3216          * ref count set to 1. more may be added
3217          * if multiple mblks refer to multiple iov's.
3218          * This is done in uio_to_mblk().
3219          */
3220 
3221         nfsuiop->nu_ref = 1;
3222 
3223         nfsuiop->nu_frtn.free_func = rfs_free_xuio;
3224         nfsuiop->nu_frtn.free_arg = (char *)nfsuiop;
3225 
3226         nfsuiop->nu_uio.xu_type = UIOTYPE_ZEROCOPY;
3227 
3228         return (&nfsuiop->nu_uio);
3229 }
3230 
3231 mblk_t *
3232 uio_to_mblk(uio_t *uiop)
3233 {
3234         struct iovec *iovp;
3235         int i;
3236         mblk_t *mp, *mp1;
3237         nfs_xuio_t *nfsuiop = (nfs_xuio_t *)uiop;
3238 
3239         if (uiop->uio_iovcnt == 0)
3240                 return (NULL);
3241 
3242         iovp = uiop->uio_iov;
3243         mp = mp1 = esballoca((uchar_t *)iovp->iov_base, iovp->iov_len,
3244             BPRI_MED, &nfsuiop->nu_frtn);
3245         ASSERT(mp != NULL);
3246 
3247         mp->b_wptr += iovp->iov_len;
3248         mp->b_datap->db_type = M_DATA;
3249 
3250         for (i = 1; i < uiop->uio_iovcnt; i++) {
3251                 iovp = (uiop->uio_iov + i);
3252 
3253                 mp1->b_cont = esballoca(
3254                     (uchar_t *)iovp->iov_base, iovp->iov_len, BPRI_MED,
3255                     &nfsuiop->nu_frtn);
3256 
3257                 mp1 = mp1->b_cont;
3258                 ASSERT(mp1 != NULL);
3259                 mp1->b_wptr += iovp->iov_len;
3260                 mp1->b_datap->db_type = M_DATA;
3261         }
3262 
3263         nfsuiop->nu_ref = uiop->uio_iovcnt;
3264 
3265         return (mp);
3266 }
3267 
3268 /*
3269  * Allocate memory to hold data for a read request of len bytes.
3270  *
3271  * We don't allocate buffers greater than kmem_max_cached in size to avoid
3272  * allocating memory from the kmem_oversized arena.  If we allocate oversized
3273  * buffers, we incur heavy cross-call activity when freeing these large buffers
3274  * in the TCP receive path. Note that we can't set b_wptr here since the
3275  * length of the data returned may differ from the length requested when
3276  * reading the end of a file; we set b_wptr in rfs_rndup_mblks() once the
3277  * length of the read is known.
3278  */
3279 mblk_t *
3280 rfs_read_alloc(uint_t len, struct iovec **iov, int *iovcnt)
3281 {
3282         struct iovec *iovarr;
3283         mblk_t *mp, **mpp = &mp;
3284         size_t mpsize;
3285         uint_t remain = len;
3286         int i, err = 0;
3287 
3288         *iovcnt = howmany(len, kmem_max_cached);
3289 
3290         iovarr = kmem_alloc(*iovcnt * sizeof (struct iovec), KM_SLEEP);
3291         *iov = iovarr;
3292 
3293         for (i = 0; i < *iovcnt; remain -= mpsize, i++) {
3294                 ASSERT(remain <= len);
3295                 /*
3296                  * We roundup the size we allocate to a multiple of
3297                  * BYTES_PER_XDR_UNIT (4 bytes) so that the call to
3298                  * xdrmblk_putmblk() never fails.
3299                  */
3300                 ASSERT(kmem_max_cached % BYTES_PER_XDR_UNIT == 0);
3301                 mpsize = MIN(kmem_max_cached, remain);
3302                 *mpp = allocb_wait(RNDUP(mpsize), BPRI_MED, STR_NOSIG, &err);
3303                 ASSERT(*mpp != NULL);
3304                 ASSERT(err == 0);
3305 
3306                 iovarr[i].iov_base = (caddr_t)(*mpp)->b_rptr;
3307                 iovarr[i].iov_len = mpsize;
3308                 mpp = &(*mpp)->b_cont;
3309         }
3310         return (mp);
3311 }
3312 
3313 void
3314 rfs_rndup_mblks(mblk_t *mp, uint_t len, int buf_loaned)
3315 {
3316         int i;
3317         int alloc_err = 0;
3318         mblk_t *rmp;
3319         uint_t mpsize, remainder;
3320 
3321         remainder = P2NPHASE(len, BYTES_PER_XDR_UNIT);
3322 
3323         /*
3324          * Non copy-reduction case.  This function assumes that blocks were
3325          * allocated in multiples of BYTES_PER_XDR_UNIT bytes, which makes this
3326          * padding safe without bounds checking.
3327          */
3328         if (!buf_loaned) {
3329                 /*
3330                  * Set the size of each mblk in the chain until we've consumed
3331                  * the specified length for all but the last one.
3332                  */
3333                 while ((mpsize = MBLKSIZE(mp)) < len) {
3334                         ASSERT(mpsize % BYTES_PER_XDR_UNIT == 0);
3335                         mp->b_wptr += mpsize;
3336                         len -= mpsize;
3337                         mp = mp->b_cont;
3338                         ASSERT(mp != NULL);
3339                 }
3340 
3341                 ASSERT(len + remainder <= mpsize);
3342                 mp->b_wptr += len;
3343                 for (i = 0; i < remainder; i++)
3344                         *mp->b_wptr++ = '\0';
3345                 return;
3346         }
3347 
3348         /*
3349          * No remainder mblk required.
3350          */
3351         if (remainder == 0)
3352                 return;
3353 
3354         /*
3355          * Get to the last mblk in the chain.
3356          */
3357         while (mp->b_cont != NULL)
3358                 mp = mp->b_cont;
3359 
3360         /*
3361          * In case of copy-reduction mblks, the size of the mblks are fixed
3362          * and are of the size of the loaned buffers.  Allocate a remainder
3363          * mblk and chain it to the data buffers. This is sub-optimal, but not
3364          * expected to happen commonly.
3365          */
3366         rmp = allocb_wait(remainder, BPRI_MED, STR_NOSIG, &alloc_err);
3367         ASSERT(rmp != NULL);
3368         ASSERT(alloc_err == 0);
3369 
3370         for (i = 0; i < remainder; i++)
3371                 *rmp->b_wptr++ = '\0';
3372 
3373         rmp->b_datap->db_type = M_DATA;
3374         mp->b_cont = rmp;
3375 }