5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
  24  * Copyright (c) 2013 by Delphix. All rights reserved.
  25  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright (c) 2017 Joyent Inc
  27  */
  28 
  29 /*
  30  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  31  *      All rights reserved.
  32  *      Use is subject to license terms.
  33  */
  34 
  35 #include <sys/param.h>
  36 #include <sys/types.h>
  37 #include <sys/systm.h>
  38 #include <sys/cred.h>
  39 #include <sys/proc.h>
  40 #include <sys/user.h>
  41 #include <sys/buf.h>
  42 #include <sys/vfs.h>
  43 #include <sys/vnode.h>
  44 #include <sys/pathname.h>
  45 #include <sys/uio.h>
  46 #include <sys/file.h>
 
 
  66 #include <sys/vtrace.h>
  67 #include <sys/mode.h>
  68 #include <sys/acl.h>
  69 #include <sys/sdt.h>
  70 #include <sys/debug.h>
  71 
  72 #include <rpc/types.h>
  73 #include <rpc/auth.h>
  74 #include <rpc/auth_unix.h>
  75 #include <rpc/auth_des.h>
  76 #include <rpc/svc.h>
  77 #include <rpc/xdr.h>
  78 #include <rpc/rpc_rdma.h>
  79 
  80 #include <nfs/nfs.h>
  81 #include <nfs/export.h>
  82 #include <nfs/nfssys.h>
  83 #include <nfs/nfs_clnt.h>
  84 #include <nfs/nfs_acl.h>
  85 #include <nfs/nfs_log.h>
  86 #include <nfs/nfs_cmd.h>
  87 #include <nfs/lm.h>
  88 #include <nfs/nfs_dispatch.h>
  89 #include <nfs/nfs4_drc.h>
  90 
  91 #include <sys/modctl.h>
  92 #include <sys/cladm.h>
  93 #include <sys/clconf.h>
  94 
  95 #include <sys/tsol/label.h>
  96 
  97 #define MAXHOST 32
  98 const char *kinet_ntop6(uchar_t *, char *, size_t);
  99 
 100 /*
 101  * Module linkage information.
 102  */
 103 
 104 static struct modlmisc modlmisc = {
 105         &mod_miscops, "NFS server module"
 106 };
 107 
 108 static struct modlinkage modlinkage = {
 109         MODREV_1, (void *)&modlmisc, NULL
 110 };
 111 
 112 kmem_cache_t *nfs_xuio_cache;
 113 int nfs_loaned_buffers = 0;
 114 
 115 int
 116 _init(void)
 117 {
 118         int status;
 119 
 120         if ((status = nfs_srvinit()) != 0) {
 121                 cmn_err(CE_WARN, "_init: nfs_srvinit failed");
 122                 return (status);
 123         }
 124 
 125         status = mod_install((struct modlinkage *)&modlinkage);
 126         if (status != 0) {
 127                 /*
 128                  * Could not load module, cleanup previous
 129                  * initialization work.
 130                  */
 131                 nfs_srvfini();
 132 
 133                 return (status);
 134         }
 135 
 136         /*
 137          * Initialise some placeholders for nfssys() calls. These have
 138          * to be declared by the nfs module, since that handles nfssys()
 139          * calls - also used by NFS clients - but are provided by this
 140          * nfssrv module. These also then serve as confirmation to the
 141          * relevant code in nfs that nfssrv has been loaded, as they're
 142          * initially NULL.
 143          */
 
 160 {
 161         return (EBUSY);
 162 }
 163 
 164 int
 165 _info(struct modinfo *modinfop)
 166 {
 167         return (mod_info(&modlinkage, modinfop));
 168 }
 169 
 170 /*
 171  * PUBLICFH_CHECK() checks if the dispatch routine supports
 172  * RPC_PUBLICFH_OK, if the filesystem is exported public, and if the
 173  * incoming request is using the public filehandle. The check duplicates
 174  * the exportmatch() call done in checkexport(), and we should consider
 175  * modifying those routines to avoid the duplication. For now, we optimize
 176  * by calling exportmatch() only after checking that the dispatch routine
 177  * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
 178  * public (i.e., not the placeholder).
 179  */
 180 #define PUBLICFH_CHECK(disp, exi, fsid, xfid) \
 181                 ((disp->dis_flags & RPC_PUBLICFH_OK) && \
 182                 ((exi->exi_export.ex_flags & EX_PUBLIC) || \
 183                 (exi == exi_public && exportmatch(exi_root, \
 184                 fsid, xfid))))
 185 
 186 static void     nfs_srv_shutdown_all(int);
 187 static void     rfs4_server_start(int);
 188 static void     nullfree(void);
 189 static void     rfs_dispatch(struct svc_req *, SVCXPRT *);
 190 static void     acl_dispatch(struct svc_req *, SVCXPRT *);
 191 static void     common_dispatch(struct svc_req *, SVCXPRT *,
 192                 rpcvers_t, rpcvers_t, char *,
 193                 struct rpc_disptable *);
 194 static void     hanfsv4_failover(void);
 195 static  int     checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
 196                 bool_t, bool_t *);
 197 static char     *client_name(struct svc_req *req);
 198 static char     *client_addr(struct svc_req *req, char *buf);
 199 extern  int     sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
 200 extern  bool_t  sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
 201 
 202 #define NFSLOG_COPY_NETBUF(exi, xprt, nb)       {               \
 203         (nb)->maxlen = (xprt)->xp_rtaddr.maxlen;          \
 204         (nb)->len = (xprt)->xp_rtaddr.len;                        \
 205         (nb)->buf = kmem_alloc((nb)->len, KM_SLEEP);              \
 206         bcopy((xprt)->xp_rtaddr.buf, (nb)->buf, (nb)->len);    \
 207         }
 208 
 209 /*
 210  * Public Filehandle common nfs routines
 211  */
 212 static int      MCLpath(char **);
 213 static void     URLparse(char *);
 214 
 215 /*
 216  * NFS callout table.
 217  * This table is used by svc_getreq() to dispatch a request with
 218  * a given prog/vers pair to an appropriate service provider
 219  * dispatch routine.
 220  *
 
 231         __nfs_sc_clts
 232 };
 233 
 234 static SVC_CALLOUT __nfs_sc_cots[] = {
 235         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 236         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 237 };
 238 
 239 static SVC_CALLOUT_TABLE nfs_sct_cots = {
 240         sizeof (__nfs_sc_cots) / sizeof (__nfs_sc_cots[0]), FALSE, __nfs_sc_cots
 241 };
 242 
 243 static SVC_CALLOUT __nfs_sc_rdma[] = {
 244         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 245         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 246 };
 247 
 248 static SVC_CALLOUT_TABLE nfs_sct_rdma = {
 249         sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
 250 };
 251 rpcvers_t nfs_versmin = NFS_VERSMIN_DEFAULT;
 252 rpcvers_t nfs_versmax = NFS_VERSMAX_DEFAULT;
 253 
 254 /*
 255  * Used to track the state of the server so that initialization
 256  * can be done properly.
 257  */
 258 typedef enum {
 259         NFS_SERVER_STOPPED,     /* server state destroyed */
 260         NFS_SERVER_STOPPING,    /* server state being destroyed */
 261         NFS_SERVER_RUNNING,
 262         NFS_SERVER_QUIESCED,    /* server state preserved */
 263         NFS_SERVER_OFFLINE      /* server pool offline */
 264 } nfs_server_running_t;
 265 
 266 static nfs_server_running_t nfs_server_upordown;
 267 static kmutex_t nfs_server_upordown_lock;
 268 static  kcondvar_t nfs_server_upordown_cv;
 269 
 270 /*
 271  * DSS: distributed stable storage
 272  * lists of all DSS paths: current, and before last warmstart
 273  */
 274 nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
 275 
 276 int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
 277 bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
 278 
 279 /*
 280  * RDMA wait variables.
 281  */
 282 static kcondvar_t rdma_wait_cv;
 283 static kmutex_t rdma_wait_mutex;
 284 
 285 /*
 286  * Will be called at the point the server pool is being unregistered
 287  * from the pool list. From that point onwards, the pool is waiting
 288  * to be drained and as such the server state is stale and pertains
 289  * to the old instantiation of the NFS server pool.
 290  */
 291 void
 292 nfs_srv_offline(void)
 293 {
 294         mutex_enter(&nfs_server_upordown_lock);
 295         if (nfs_server_upordown == NFS_SERVER_RUNNING) {
 296                 nfs_server_upordown = NFS_SERVER_OFFLINE;
 297         }
 298         mutex_exit(&nfs_server_upordown_lock);
 299 }
 300 
 301 /*
 302  * Will be called at the point the server pool is being destroyed so
 303  * all transports have been closed and no service threads are in
 304  * existence.
 305  *
 306  * If we quiesce the server, we're shutting it down without destroying the
 307  * server state. This allows it to warm start subsequently.
 308  */
 309 void
 310 nfs_srv_stop_all(void)
 311 {
 312         int quiesce = 0;
 313         nfs_srv_shutdown_all(quiesce);
 314 }
 315 
 316 /*
 317  * This alternative shutdown routine can be requested via nfssys()
 318  */
 319 void
 320 nfs_srv_quiesce_all(void)
 321 {
 322         int quiesce = 1;
 323         nfs_srv_shutdown_all(quiesce);
 324 }
 325 
 326 static void
 327 nfs_srv_shutdown_all(int quiesce) {
 328         mutex_enter(&nfs_server_upordown_lock);
 329         if (quiesce) {
 330                 if (nfs_server_upordown == NFS_SERVER_RUNNING ||
 331                         nfs_server_upordown == NFS_SERVER_OFFLINE) {
 332                         nfs_server_upordown = NFS_SERVER_QUIESCED;
 333                         cv_signal(&nfs_server_upordown_cv);
 334 
 335                         /* reset DSS state, for subsequent warm restart */
 336                         rfs4_dss_numnewpaths = 0;
 337                         rfs4_dss_newpaths = NULL;
 338 
 339                         cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
 340                             "NFSv4 state has been preserved");
 341                 }
 342         } else {
 343                 if (nfs_server_upordown == NFS_SERVER_OFFLINE) {
 344                         nfs_server_upordown = NFS_SERVER_STOPPING;
 345                         mutex_exit(&nfs_server_upordown_lock);
 346                         rfs4_state_fini();
 347                         rfs4_fini_drc(nfs4_drc);
 348                         mutex_enter(&nfs_server_upordown_lock);
 349                         nfs_server_upordown = NFS_SERVER_STOPPED;
 350                         cv_signal(&nfs_server_upordown_cv);
 351                 }
 352         }
 353         mutex_exit(&nfs_server_upordown_lock);
 354 }
 355 
 356 static int
 357 nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
 358                         rpcvers_t versmin, rpcvers_t versmax)
 359 {
 360         struct strioctl strioc;
 361         struct T_info_ack tinfo;
 362         int             error, retval;
 363 
 364         /*
 365          * Find out what type of transport this is.
 366          */
 367         strioc.ic_cmd = TI_GETINFO;
 368         strioc.ic_timout = -1;
 369         strioc.ic_len = sizeof (tinfo);
 370         strioc.ic_dp = (char *)&tinfo;
 371         tinfo.PRIM_type = T_INFO_REQ;
 372 
 373         error = strioctl(fp->f_vnode, I_STR, (intptr_t)&strioc, 0, K_TO_K,
 
 401                         versmax = NFS_ACL_VERSMAX;
 402                 __nfs_sc_cots[1].sc_versmin = versmin;
 403                 __nfs_sc_cots[1].sc_versmax = versmax;
 404                 *sctpp = &nfs_sct_cots;
 405                 break;
 406         default:
 407                 error = EINVAL;
 408         }
 409 
 410         return (error);
 411 }
 412 
 413 /*
 414  * NFS Server system call.
 415  * Does all of the work of running a NFS server.
 416  * uap->fd is the fd of an open transport provider
 417  */
 418 int
 419 nfs_svc(struct nfs_svc_args *arg, model_t model)
 420 {
 421         file_t *fp;
 422         SVCMASTERXPRT *xprt;
 423         int error;
 424         int readsize;
 425         char buf[KNC_STRSIZE];
 426         size_t len;
 427         STRUCT_HANDLE(nfs_svc_args, uap);
 428         struct netbuf addrmask;
 429         SVC_CALLOUT_TABLE *sctp = NULL;
 430 
 431 #ifdef lint
 432         model = model;          /* STRUCT macros don't always refer to it */
 433 #endif
 434 
 435         STRUCT_SET_HANDLE(uap, model, arg);
 436 
 437         /* Check privileges in nfssys() */
 438 
 439         if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 440                 return (EBADF);
 441 
 442         /*
 443          * Set read buffer size to rsize
 444          * and add room for RPC headers.
 445          */
 446         readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
 447         if (readsize < RPC_MAXDATASIZE)
 448                 readsize = RPC_MAXDATASIZE;
 449 
 450         error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
 451             KNC_STRSIZE, &len);
 452         if (error) {
 453                 releasef(STRUCT_FGET(uap, fd));
 454                 return (error);
 455         }
 456 
 457         addrmask.len = STRUCT_FGET(uap, addrmask.len);
 458         addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
 459         addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
 460         error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
 461             addrmask.len);
 462         if (error) {
 463                 releasef(STRUCT_FGET(uap, fd));
 464                 kmem_free(addrmask.buf, addrmask.maxlen);
 465                 return (error);
 466         }
 467 
 468         nfs_versmin = STRUCT_FGET(uap, versmin);
 469         nfs_versmax = STRUCT_FGET(uap, versmax);
 470 
 471         /* Double check the vers min/max ranges */
 472         if ((nfs_versmin > nfs_versmax) ||
 473             (nfs_versmin < NFS_VERSMIN) ||
 474             (nfs_versmax > NFS_VERSMAX)) {
 475                 nfs_versmin = NFS_VERSMIN_DEFAULT;
 476                 nfs_versmax = NFS_VERSMAX_DEFAULT;
 477         }
 478 
 479         if (error =
 480             nfs_srv_set_sc_versions(fp, &sctp, nfs_versmin, nfs_versmax)) {
 481                 releasef(STRUCT_FGET(uap, fd));
 482                 kmem_free(addrmask.buf, addrmask.maxlen);
 483                 return (error);
 484         }
 485 
 486         /* Initialize nfsv4 server */
 487         if (nfs_versmax == (rpcvers_t)NFS_V4)
 488                 rfs4_server_start(STRUCT_FGET(uap, delegation));
 489 
 490         /* Create a transport handle. */
 491         error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
 492             sctp, NULL, NFS_SVCPOOL_ID, TRUE);
 493 
 494         if (error)
 495                 kmem_free(addrmask.buf, addrmask.maxlen);
 496 
 497         releasef(STRUCT_FGET(uap, fd));
 498 
 499         /* HA-NFSv4: save the cluster nodeid */
 500         if (cluster_bootflags & CLUSTER_BOOTED)
 501                 lm_global_nlmid = clconf_get_nodeid();
 502 
 503         return (error);
 504 }
 505 
 506 static void
 507 rfs4_server_start(int nfs4_srv_delegation)
 508 {
 509         /*
 510          * Determine if the server has previously been "started" and
 511          * if not, do the per instance initialization
 512          */
 513         mutex_enter(&nfs_server_upordown_lock);
 514 
 515         if (nfs_server_upordown != NFS_SERVER_RUNNING) {
 516                 /* Do we need to stop and wait on the previous server? */
 517                 while (nfs_server_upordown == NFS_SERVER_STOPPING ||
 518                     nfs_server_upordown == NFS_SERVER_OFFLINE)
 519                         cv_wait(&nfs_server_upordown_cv,
 520                             &nfs_server_upordown_lock);
 521 
 522                 if (nfs_server_upordown != NFS_SERVER_RUNNING) {
 523                         (void) svc_pool_control(NFS_SVCPOOL_ID,
 524                             SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
 525                         (void) svc_pool_control(NFS_SVCPOOL_ID,
 526                             SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
 527 
 528                         /* is this an nfsd warm start? */
 529                         if (nfs_server_upordown == NFS_SERVER_QUIESCED) {
 530                                 cmn_err(CE_NOTE, "nfs_server: "
 531                                     "server was previously quiesced; "
 532                                     "existing NFSv4 state will be re-used");
 533 
 534                                 /*
 535                                  * HA-NFSv4: this is also the signal
 536                                  * that a Resource Group failover has
 537                                  * occurred.
 538                                  */
 539                                 if (cluster_bootflags & CLUSTER_BOOTED)
 540                                         hanfsv4_failover();
 541                         } else {
 542                                 /* cold start */
 543                                 rfs4_state_init();
 544                                 nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 545                                     nfs4_drc_hash);
 546                         }
 547 
 548                         /*
 549                          * Check to see if delegation is to be
 550                          * enabled at the server
 551                          */
 552                         if (nfs4_srv_delegation != FALSE)
 553                                 rfs4_set_deleg_policy(SRV_NORMAL_DELEGATE);
 554 
 555                         nfs_server_upordown = NFS_SERVER_RUNNING;
 556                 }
 557                 cv_signal(&nfs_server_upordown_cv);
 558         }
 559         mutex_exit(&nfs_server_upordown_lock);
 560 }
 561 
 562 /*
 563  * If RDMA device available,
 564  * start RDMA listener.
 565  */
 566 int
 567 rdma_start(struct rdma_svc_args *rsa)
 568 {
 569         int error;
 570         rdma_xprt_group_t started_rdma_xprts;
 571         rdma_stat stat;
 572         int svc_state = 0;
 573 
 574         /* Double check the vers min/max ranges */
 575         if ((rsa->nfs_versmin > rsa->nfs_versmax) ||
 576             (rsa->nfs_versmin < NFS_VERSMIN) ||
 577             (rsa->nfs_versmax > NFS_VERSMAX)) {
 578                 rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
 579                 rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
 580         }
 581         nfs_versmin = rsa->nfs_versmin;
 582         nfs_versmax = rsa->nfs_versmax;
 583 
 584         /* Set the versions in the callout table */
 585         __nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
 586         __nfs_sc_rdma[0].sc_versmax = rsa->nfs_versmax;
 587         /* For the NFS_ACL program, check the max version */
 588         __nfs_sc_rdma[1].sc_versmin = rsa->nfs_versmin;
 589         if (rsa->nfs_versmax > NFS_ACL_VERSMAX)
 590                 __nfs_sc_rdma[1].sc_versmax = NFS_ACL_VERSMAX;
 591         else
 592                 __nfs_sc_rdma[1].sc_versmax = rsa->nfs_versmax;
 593 
 594         /* Initialize nfsv4 server */
 595         if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
 596                 rfs4_server_start(rsa->delegation);
 597 
 598         started_rdma_xprts.rtg_count = 0;
 599         started_rdma_xprts.rtg_listhead = NULL;
 600         started_rdma_xprts.rtg_poolid = rsa->poolid;
 601 
 602 restart:
 603         error = svc_rdma_kcreate(rsa->netid, &nfs_sct_rdma, rsa->poolid,
 604             &started_rdma_xprts);
 605 
 606         svc_state = !error;
 607 
 608         while (!error) {
 609 
 610                 /*
 611                  * wait till either interrupted by a signal on
 612                  * nfs service stop/restart or signalled by a
 613                  * rdma plugin attach/detatch.
 614                  */
 615 
 616                 stat = rdma_kwait();
 617 
 618                 /*
 619                  * stop services if running -- either on a HCA detach event
 620                  * or if the nfs service is stopped/restarted.
 621                  */
 622 
 623                 if ((stat == RDMA_HCA_DETACH || stat == RDMA_INTR) &&
 624                     svc_state) {
 625                         rdma_stop(&started_rdma_xprts);
 626                         svc_state = 0;
 627                 }
 628 
 629                 /*
 630                  * nfs service stop/restart, break out of the
 631                  * wait loop and return;
 632                  */
 633                 if (stat == RDMA_INTR)
 
 644                 /*
 645                  * loop until a nfs service stop/restart
 646                  */
 647         }
 648 
 649         return (error);
 650 }
 651 
 652 /* ARGSUSED */
 653 void
 654 rpc_null(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 655     struct svc_req *req, cred_t *cr, bool_t ro)
 656 {
 657 }
 658 
 659 /* ARGSUSED */
 660 void
 661 rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 662     struct svc_req *req, cred_t *cr, bool_t ro)
 663 {
 664         DTRACE_NFSV3_3(op__null__start, struct svc_req *, req,
 665             cred_t *, cr, vnode_t *, NULL);
 666         DTRACE_NFSV3_3(op__null__done, struct svc_req *, req,
 667             cred_t *, cr, vnode_t *, NULL);
 668 }
 669 
 670 /* ARGSUSED */
 671 static void
 672 rfs_error(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 673     struct svc_req *req, cred_t *cr, bool_t ro)
 674 {
 675         /* return (EOPNOTSUPP); */
 676 }
 677 
 678 static void
 679 nullfree(void)
 680 {
 681 }
 682 
 683 static char *rfscallnames_v2[] = {
 684         "RFS2_NULL",
 685         "RFS2_GETATTR",
 686         "RFS2_SETATTR",
 687         "RFS2_ROOT",
 
1325         /* RFS3_PATHCONF = 20 */
1326         PATHCONF3res nfs3_pathconf_res;
1327 
1328         /* RFS3_COMMIT = 21 */
1329         COMMIT3res nfs3_commit_res;
1330 
1331         /*
1332          * NFS VERSION 4
1333          */
1334 
1335         /* RFS_NULL = 0 */
1336 
1337         /* RFS4_COMPOUND = 1 */
1338         COMPOUND4res nfs4_compound_res;
1339 
1340 };
1341 
1342 static struct rpc_disptable rfs_disptable[] = {
1343         {sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
1344             rfscallnames_v2,
1345             &rfsproccnt_v2_ptr, rfsdisptab_v2},
1346         {sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
1347             rfscallnames_v3,
1348             &rfsproccnt_v3_ptr, rfsdisptab_v3},
1349         {sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
1350             rfscallnames_v4,
1351             &rfsproccnt_v4_ptr, rfsdisptab_v4},
1352 };
1353 
1354 /*
1355  * If nfs_portmon is set, then clients are required to use privileged
1356  * ports (ports < IPPORT_RESERVED) in order to get NFS services.
1357  *
1358  * N.B.: this attempt to carry forward the already ill-conceived notion
1359  * of privileged ports for TCP/UDP is really quite ineffectual.  Not only
1360  * is it transport-dependent, it's laughably easy to spoof.  If you're
1361  * really interested in security, you must start with secure RPC instead.
1362  */
1363 static int nfs_portmon = 0;
1364 
1365 #ifdef DEBUG
1366 static int cred_hits = 0;
1367 static int cred_misses = 0;
1368 #endif
1369 
1370 
1371 #ifdef DEBUG
1372 /*
1373  * Debug code to allow disabling of rfs_dispatch() use of
1374  * fastxdrargs() and fastxdrres() calls for testing purposes.
1375  */
1376 static int rfs_no_fast_xdrargs = 0;
1377 static int rfs_no_fast_xdrres = 0;
1378 #endif
1379 
1380 union acl_args {
1381         /*
1382          * ACL VERSION 2
1383          */
1384 
1385         /* ACL2_NULL = 0 */
1386 
1387         /* ACL2_GETACL = 1 */
1388         GETACL2args acl2_getacl_args;
1389 
1390         /* ACL2_SETACL = 2 */
 
1454         GETXATTRDIR3res acl3_getxattrdir_res;
1455 
1456 };
1457 
1458 static bool_t
1459 auth_tooweak(struct svc_req *req, char *res)
1460 {
1461 
1462         if (req->rq_vers == NFS_VERSION && req->rq_proc == RFS_LOOKUP) {
1463                 struct nfsdiropres *dr = (struct nfsdiropres *)res;
1464                 if ((enum wnfsstat)dr->dr_status == WNFSERR_CLNT_FLAVOR)
1465                         return (TRUE);
1466         } else if (req->rq_vers == NFS_V3 && req->rq_proc == NFSPROC3_LOOKUP) {
1467                 LOOKUP3res *resp = (LOOKUP3res *)res;
1468                 if ((enum wnfsstat)resp->status == WNFSERR_CLNT_FLAVOR)
1469                         return (TRUE);
1470         }
1471         return (FALSE);
1472 }
1473 
1474 
1475 static void
1476 common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
1477                 rpcvers_t max_vers, char *pgmname,
1478                 struct rpc_disptable *disptable)
1479 {
1480         int which;
1481         rpcvers_t vers;
1482         char *args;
1483         union {
1484                         union rfs_args ra;
1485                         union acl_args aa;
1486                 } args_buf;
1487         char *res;
1488         union {
1489                         union rfs_res rr;
1490                         union acl_res ar;
1491                 } res_buf;
1492         struct rpcdisp *disp = NULL;
1493         int dis_flags = 0;
1494         cred_t *cr;
1495         int error = 0;
1496         int anon_ok;
1497         struct exportinfo *exi = NULL;
1498         unsigned int nfslog_rec_id;
1499         int dupstat;
1500         struct dupreq *dr;
1501         int authres;
1502         bool_t publicfh_ok = FALSE;
1503         enum_t auth_flavor;
1504         bool_t dupcached = FALSE;
1505         struct netbuf   nb;
1506         bool_t logging_enabled = FALSE;
1507         struct exportinfo *nfslog_exi = NULL;
1508         char **procnames;
1509         char cbuf[INET6_ADDRSTRLEN];    /* to hold both IPv4 and IPv6 addr */
1510         bool_t ro = FALSE;
1511 
1512         vers = req->rq_vers;
1513 
1514         if (vers < min_vers || vers > max_vers) {
1515                 svcerr_progvers(req->rq_xprt, min_vers, max_vers);
1516                 error++;
1517                 cmn_err(CE_NOTE, "%s: bad version number %u", pgmname, vers);
1518                 goto done;
1519         }
1520         vers -= min_vers;
1521 
1522         which = req->rq_proc;
1523         if (which < 0 || which >= disptable[(int)vers].dis_nprocs) {
1524                 svcerr_noproc(req->rq_xprt);
1525                 error++;
1526                 goto done;
1527         }
1528 
1529         (*(disptable[(int)vers].dis_proccntp))[which].value.ui64++;
1530 
1531         disp = &disptable[(int)vers].dis_table[which];
1532         procnames = disptable[(int)vers].dis_procnames;
1533 
1534         auth_flavor = req->rq_cred.oa_flavor;
1535 
1536         /*
1537          * Deserialize into the args struct.
1538          */
1539         args = (char *)&args_buf;
1540 
1541 #ifdef DEBUG
1542         if (rfs_no_fast_xdrargs || (auth_flavor == RPCSEC_GSS) ||
1543             disp->dis_fastxdrargs == NULL_xdrproc_t ||
1544             !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1545 #else
1546         if ((auth_flavor == RPCSEC_GSS) ||
1547             disp->dis_fastxdrargs == NULL_xdrproc_t ||
1548             !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1549 #endif
 
1615                  * client's mount operation to fail.  As a work-around,
1616                  * we check here to see if the request is a getattr or
1617                  * statfs operation on the exported vnode itself, and
1618                  * pass a flag to checkauth with the result of this test.
1619                  *
1620                  * The filehandle refers to the mountpoint itself if
1621                  * the fh_data and fh_xdata portions of the filehandle
1622                  * are equal.
1623                  *
1624                  * Added anon_ok argument to checkauth().
1625                  */
1626 
1627                 if ((dis_flags & RPC_ALLOWANON) && EQFID(fid, xfid))
1628                         anon_ok = 1;
1629                 else
1630                         anon_ok = 0;
1631 
1632                 cr = xprt->xp_cred;
1633                 ASSERT(cr != NULL);
1634 #ifdef DEBUG
1635                 if (crgetref(cr) != 1) {
1636                         crfree(cr);
1637                         cr = crget();
1638                         xprt->xp_cred = cr;
1639                         cred_misses++;
1640                 } else
1641                         cred_hits++;
1642 #else
1643                 if (crgetref(cr) != 1) {
1644                         crfree(cr);
1645                         cr = crget();
1646                         xprt->xp_cred = cr;
1647                 }
1648 #endif
1649 
1650                 exi = checkexport(fsid, xfid);
1651 
1652                 if (exi != NULL) {
1653                         publicfh_ok = PUBLICFH_CHECK(disp, exi, fsid, xfid);
1654 
1655                         /*
1656                          * Don't allow non-V4 clients access
1657                          * to pseudo exports
1658                          */
1659                         if (PSEUDO(exi)) {
1660                                 svcerr_weakauth(xprt);
1661                                 error++;
1662                                 goto done;
1663                         }
1664 
1665                         authres = checkauth(exi, req, cr, anon_ok, publicfh_ok,
1666                             &ro);
1667                         /*
1668                          * authres >  0: authentication OK - proceed
1669                          * authres == 0: authentication weak - return error
1670                          * authres <  0: authentication timeout - drop
1671                          */
1672                         if (authres <= 0) {
1673                                 if (authres == 0) {
 
1746                                 SVC_FREERES(xprt);
1747                         error++;
1748                         goto done;
1749                 }
1750         }
1751 
1752         if (auth_tooweak(req, res)) {
1753                 svcerr_weakauth(xprt);
1754                 error++;
1755                 goto done;
1756         }
1757 
1758         /*
1759          * Check to see if logging has been enabled on the server.
1760          * If so, then obtain the export info struct to be used for
1761          * the later writing of the log record.  This is done for
1762          * the case that a lookup is done across a non-logged public
1763          * file system.
1764          */
1765         if (nfslog_buffer_list != NULL) {
1766                 nfslog_exi = nfslog_get_exi(exi, req, res, &nfslog_rec_id);
1767                 /*
1768                  * Is logging enabled?
1769                  */
1770                 logging_enabled = (nfslog_exi != NULL);
1771 
1772                 /*
1773                  * Copy the netbuf for logging purposes, before it is
1774                  * freed by svc_sendreply().
1775                  */
1776                 if (logging_enabled) {
1777                         NFSLOG_COPY_NETBUF(nfslog_exi, xprt, &nb);
1778                         /*
1779                          * If RPC_MAPRESP flag set (i.e. in V2 ops) the
1780                          * res gets copied directly into the mbuf and
1781                          * may be freed soon after the sendreply. So we
1782                          * must copy it here to a safe place...
1783                          */
1784                         if (res != (char *)&res_buf) {
1785                                 bcopy(res, (char *)&res_buf, disp->dis_ressz);
1786                         }
 
1829 
1830 done:
1831         /*
1832          * Free arguments struct
1833          */
1834         if (disp) {
1835                 if (!SVC_FREEARGS(xprt, disp->dis_xdrargs, args)) {
1836                         cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1837                         error++;
1838                 }
1839         } else {
1840                 if (!SVC_FREEARGS(xprt, (xdrproc_t)0, (caddr_t)0)) {
1841                         cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1842                         error++;
1843                 }
1844         }
1845 
1846         if (exi != NULL)
1847                 exi_rele(exi);
1848 
1849         global_svstat_ptr[req->rq_vers][NFS_BADCALLS].value.ui64 += error;
1850 
1851         global_svstat_ptr[req->rq_vers][NFS_CALLS].value.ui64++;
1852 }
1853 
1854 static void
1855 rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
1856 {
1857         common_dispatch(req, xprt, NFS_VERSMIN, NFS_VERSMAX,
1858             "NFS", rfs_disptable);
1859 }
1860 
1861 static char *aclcallnames_v2[] = {
1862         "ACL2_NULL",
1863         "ACL2_GETACL",
1864         "ACL2_SETACL",
1865         "ACL2_GETATTR",
1866         "ACL2_ACCESS",
1867         "ACL2_GETXATTRDIR"
1868 };
1869 
1870 static struct rpcdisp acldisptab_v2[] = {
1871         /*
 
1954             acl3_getacl_getfh},
1955 
1956         /* ACL3_SETACL = 2 */
1957         {acl3_setacl,
1958             xdr_SETACL3args, NULL_xdrproc_t, sizeof (SETACL3args),
1959             xdr_SETACL3res, NULL_xdrproc_t, sizeof (SETACL3res),
1960             nullfree, 0,
1961             acl3_setacl_getfh},
1962 
1963         /* ACL3_GETXATTRDIR = 3 */
1964         {acl3_getxattrdir,
1965             xdr_GETXATTRDIR3args, NULL_xdrproc_t, sizeof (GETXATTRDIR3args),
1966             xdr_GETXATTRDIR3res, NULL_xdrproc_t, sizeof (GETXATTRDIR3res),
1967             nullfree, RPC_IDEMPOTENT,
1968             acl3_getxattrdir_getfh},
1969 };
1970 
1971 static struct rpc_disptable acl_disptable[] = {
1972         {sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
1973                 aclcallnames_v2,
1974                 &aclproccnt_v2_ptr, acldisptab_v2},
1975         {sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
1976                 aclcallnames_v3,
1977                 &aclproccnt_v3_ptr, acldisptab_v3},
1978 };
1979 
1980 static void
1981 acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
1982 {
1983         common_dispatch(req, xprt, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,
1984             "ACL", acl_disptable);
1985 }
1986 
1987 int
1988 checkwin(int flavor, int window, struct svc_req *req)
1989 {
1990         struct authdes_cred *adc;
1991 
1992         switch (flavor) {
1993         case AUTH_DES:
1994                 adc = (struct authdes_cred *)req->rq_clntcred;
1995                 CTASSERT(sizeof (struct authdes_cred) <= RQCRED_SIZE);
1996                 if (adc->adc_fullname.window > window)
1997                         return (0);
 
2551 
2552         } else {
2553 
2554                 /*
2555                  * No IP address to print. If there was a host name
2556                  * printed, then we print a space.
2557                  */
2558                 (void) sprintf(buf, frontspace);
2559         }
2560 
2561         return (buf);
2562 }
2563 
2564 /*
2565  * NFS Server initialization routine.  This routine should only be called
2566  * once.  It performs the following tasks:
2567  *      - Call sub-initialization routines (localize access to variables)
2568  *      - Initialize all locks
2569  *      - initialize the version 3 write verifier
2570  */
2571 int
2572 nfs_srvinit(void)
2573 {
2574         int error;
2575 
2576         error = nfs_exportinit();
2577         if (error != 0)
2578                 return (error);
2579         error = rfs4_srvrinit();
2580         if (error != 0) {
2581                 nfs_exportfini();
2582                 return (error);
2583         }
2584         rfs_srvrinit();
2585         rfs3_srvrinit();
2586         nfsauth_init();
2587 
2588         /* Init the stuff to control start/stop */
2589         nfs_server_upordown = NFS_SERVER_STOPPED;
2590         mutex_init(&nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
2591         cv_init(&nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
2592         mutex_init(&rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
2593         cv_init(&rdma_wait_cv, NULL, CV_DEFAULT, NULL);
2594 
2595         return (0);
2596 }
2597 
2598 /*
2599  * NFS Server finalization routine. This routine is called to cleanup the
2600  * initialization work previously performed if the NFS server module could
2601  * not be loaded correctly.
2602  */
2603 void
2604 nfs_srvfini(void)
2605 {
2606         nfsauth_fini();
2607         rfs3_srvrfini();
2608         rfs_srvrfini();
2609         nfs_exportfini();
2610 
2611         mutex_destroy(&nfs_server_upordown_lock);
2612         cv_destroy(&nfs_server_upordown_cv);
2613         mutex_destroy(&rdma_wait_mutex);
2614         cv_destroy(&rdma_wait_cv);
2615 }
2616 
2617 /*
2618  * Set up an iovec array of up to cnt pointers.
2619  */
2620 
2621 void
2622 mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
2623 {
2624         while (m != NULL && cnt-- > 0) {
2625                 iovp->iov_base = (caddr_t)m->b_rptr;
2626                 iovp->iov_len = (m->b_wptr - m->b_rptr);
2627                 iovp++;
2628                 m = m->b_cont;
2629         }
2630 }
2631 
2632 /*
2633  * Common code between NFS Version 2 and NFS Version 3 for the public
2634  * filehandle multicomponent lookups.
2635  */
2636 
2637 /*
2638  * Public filehandle evaluation of a multi-component lookup, following
2639  * symbolic links, if necessary. This may result in a vnode in another
2640  * filesystem, which is OK as long as the other filesystem is exported.
 
2838 
2839                 if (error == ENOENT) {
2840                         *vpp = tvp;
2841                         mc_dvp = tmc_dvp;
2842                         error = 0;
2843                 } else {        /* ok or error other than ENOENT */
2844                         if (tmc_dvp)
2845                                 VN_RELE(tmc_dvp);
2846                         if (error)
2847                                 goto publicfh_done;
2848 
2849                         /*
2850                          * Found a valid vp for index "filename". Sanity check
2851                          * for odd case where a directory is provided as index
2852                          * option argument and leads us to another filesystem
2853                          */
2854 
2855                         /* Release the reference on the old exi value */
2856                         ASSERT(*exi != NULL);
2857                         exi_rele(*exi);
2858 
2859                         if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
2860                                 VN_RELE(*vpp);
2861                                 goto publicfh_done;
2862                         }
2863                 }
2864         }
2865 
2866 publicfh_done:
2867         if (mc_dvp)
2868                 VN_RELE(mc_dvp);
2869 
2870         return (error);
2871 }
2872 
2873 /*
2874  * Evaluate a multi-component path
2875  */
2876 int
2877 rfs_pathname(
2878         char *path,                     /* pathname to evaluate */
2879         vnode_t **dirvpp,               /* ret for ptr to parent dir vnode */
2880         vnode_t **compvpp,              /* ret for ptr to component vnode */
2881         vnode_t *startdvp,              /* starting vnode */
2882         cred_t *cr,                     /* user's credential */
2883         int pathflag)                   /* flag to identify path, e.g. URL */
2884 {
2885         char namebuf[TYPICALMAXPATHLEN];
2886         struct pathname pn;
2887         int error;
2888 
2889         /*
2890          * If pathname starts with '/', then set startdvp to root.
2891          */
2892         if (*path == '/') {
2893                 while (*path == '/')
2894                         path++;
2895 
2896                 startdvp = rootdir;
2897         }
2898 
2899         error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
2900         if (error == 0) {
2901                 /*
2902                  * Call the URL parser for URL paths to modify the original
2903                  * string to handle any '%' encoded characters that exist.
2904                  * Done here to avoid an extra bcopy in the lookup.
2905                  * We need to be careful about pathlen's. We know that
2906                  * rfs_pathname() is called with a non-empty path. However,
2907                  * it could be emptied due to the path simply being all /'s,
2908                  * which is valid to proceed with the lookup, or due to the
2909                  * URL parser finding an encoded null character at the
2910                  * beginning of path which should not proceed with the lookup.
2911                  */
2912                 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2913                         URLparse(pn.pn_path);
2914                         if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
2915                                 return (ENOENT);
2916                 }
2917                 VN_HOLD(startdvp);
2918                 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
2919                     rootdir, startdvp, cr);
2920         }
2921         if (error == ENAMETOOLONG) {
2922                 /*
2923                  * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
2924                  */
2925                 if (error = pn_get(path, UIO_SYSSPACE, &pn))
2926                         return (error);
2927                 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2928                         URLparse(pn.pn_path);
2929                         if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0) {
2930                                 pn_free(&pn);
2931                                 return (ENOENT);
2932                         }
2933                 }
2934                 VN_HOLD(startdvp);
2935                 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
2936                     rootdir, startdvp, cr);
2937                 pn_free(&pn);
2938         }
2939 
2940         return (error);
2941 }
2942 
2943 /*
2944  * Adapt the multicomponent lookup path depending on the pathtype
2945  */
2946 static int
2947 MCLpath(char **path)
2948 {
2949         unsigned char c = (unsigned char)**path;
2950 
2951         /*
2952          * If the MCL path is between 0x20 and 0x7E (graphic printable
2953          * character of the US-ASCII coded character set), its a URL path,
2954          * per RFC 1738.
2955          */
2956         if (c >= 0x20 && c <= 0x7E)
 
3020         int walk;
3021         int error = 0;
3022 
3023         *exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
3024         if (*exi == NULL)
3025                 error = EACCES;
3026         else {
3027                 /*
3028                  * If nosub is set for this export then
3029                  * a lookup relative to the public fh
3030                  * must not terminate below the
3031                  * exported directory.
3032                  */
3033                 if ((*exi)->exi_export.ex_flags & EX_NOSUB && walk > 0)
3034                         error = EACCES;
3035         }
3036 
3037         return (error);
3038 }
3039 
3040 /*
3041  * Do the main work of handling HA-NFSv4 Resource Group failover on
3042  * Sun Cluster.
3043  * We need to detect whether any RG admin paths have been added or removed,
3044  * and adjust resources accordingly.
3045  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
3046  * order to scale, the list and array of paths need to be held in more
3047  * suitable data structures.
3048  */
3049 static void
3050 hanfsv4_failover(void)
3051 {
3052         int i, start_grace, numadded_paths = 0;
3053         char **added_paths = NULL;
3054         rfs4_dss_path_t *dss_path;
3055 
3056         /*
3057          * Note: currently, rfs4_dss_pathlist cannot be NULL, since
3058          * it will always include an entry for NFS4_DSS_VAR_DIR. If we
3059          * make the latter dynamically specified too, the following will
3060          * need to be adjusted.
3061          */
3062 
3063         /*
3064          * First, look for removed paths: RGs that have been failed-over
3065          * away from this node.
3066          * Walk the "currently-serving" rfs4_dss_pathlist and, for each
3067          * path, check if it is on the "passed-in" rfs4_dss_newpaths array
3068          * from nfsd. If not, that RG path has been removed.
3069          *
3070          * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
3071          * any duplicates.
3072          */
3073         dss_path = rfs4_dss_pathlist;
3074         do {
3075                 int found = 0;
3076                 char *path = dss_path->path;
3077 
3078                 /* used only for non-HA so may not be removed */
3079                 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
3080                         dss_path = dss_path->next;
3081                         continue;
3082                 }
3083 
3084                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
3085                         int cmpret;
3086                         char *newpath = rfs4_dss_newpaths[i];
3087 
3088                         /*
3089                          * Since nfsd has sorted rfs4_dss_newpaths for us,
3090                          * once the return from strcmp is negative we know
3091                          * we've passed the point where "path" should be,
3092                          * and can stop searching: "path" has been removed.
3093                          */
3094                         cmpret = strcmp(path, newpath);
3095                         if (cmpret < 0)
3096                                 break;
3097                         if (cmpret == 0) {
3098                                 found = 1;
3099                                 break;
3100                         }
3101                 }
3102 
3103                 if (found == 0) {
3104                         unsigned index = dss_path->index;
3105                         rfs4_servinst_t *sip = dss_path->sip;
3106                         rfs4_dss_path_t *path_next = dss_path->next;
3107 
3108                         /*
3109                          * This path has been removed.
3110                          * We must clear out the servinst reference to
3111                          * it, since it's now owned by another
3112                          * node: we should not attempt to touch it.
3113                          */
3114                         ASSERT(dss_path == sip->dss_paths[index]);
3115                         sip->dss_paths[index] = NULL;
3116 
3117                         /* remove from "currently-serving" list, and destroy */
3118                         remque(dss_path);
3119                         /* allow for NUL */
3120                         kmem_free(dss_path->path, strlen(dss_path->path) + 1);
3121                         kmem_free(dss_path, sizeof (rfs4_dss_path_t));
3122 
3123                         dss_path = path_next;
3124                 } else {
3125                         /* path was found; not removed */
3126                         dss_path = dss_path->next;
3127                 }
3128         } while (dss_path != rfs4_dss_pathlist);
3129 
3130         /*
3131          * Now, look for added paths: RGs that have been failed-over
3132          * to this node.
3133          * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
3134          * for each path, check if it is on the "currently-serving"
3135          * rfs4_dss_pathlist. If not, that RG path has been added.
3136          *
3137          * Note: we don't do duplicate detection here; nfsd does that for us.
3138          *
3139          * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
3140          * an upper bound for the size needed for added_paths[numadded_paths].
3141          */
3142 
3143         /* probably more space than we need, but guaranteed to be enough */
3144         if (rfs4_dss_numnewpaths > 0) {
3145                 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
3146                 added_paths = kmem_zalloc(sz, KM_SLEEP);
3147         }
3148 
3149         /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
3150         for (i = 0; i < rfs4_dss_numnewpaths; i++) {
3151                 int found = 0;
3152                 char *newpath = rfs4_dss_newpaths[i];
3153 
3154                 dss_path = rfs4_dss_pathlist;
3155                 do {
3156                         char *path = dss_path->path;
3157 
3158                         /* used only for non-HA */
3159                         if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
3160                                 dss_path = dss_path->next;
3161                                 continue;
3162                         }
3163 
3164                         if (strncmp(path, newpath, strlen(path)) == 0) {
3165                                 found = 1;
3166                                 break;
3167                         }
3168 
3169                         dss_path = dss_path->next;
3170                 } while (dss_path != rfs4_dss_pathlist);
3171 
3172                 if (found == 0) {
3173                         added_paths[numadded_paths] = newpath;
3174                         numadded_paths++;
3175                 }
3176         }
3177 
3178         /* did we find any added paths? */
3179         if (numadded_paths > 0) {
3180                 /* create a new server instance, and start its grace period */
3181                 start_grace = 1;
3182                 rfs4_servinst_create(start_grace, numadded_paths, added_paths);
3183 
3184                 /* read in the stable storage state from these paths */
3185                 rfs4_dss_readstate(numadded_paths, added_paths);
3186 
3187                 /*
3188                  * Multiple failovers during a grace period will cause
3189                  * clients of the same resource group to be partitioned
3190                  * into different server instances, with different
3191                  * grace periods.  Since clients of the same resource
3192                  * group must be subject to the same grace period,
3193                  * we need to reset all currently active grace periods.
3194                  */
3195                 rfs4_grace_reset_all();
3196         }
3197 
3198         if (rfs4_dss_numnewpaths > 0)
3199                 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
3200 }
3201 
3202 /*
3203  * Used by NFSv3 and NFSv4 server to query label of
3204  * a pathname component during lookup/access ops.
3205  */
3206 ts_label_t *
3207 nfs_getflabel(vnode_t *vp, struct exportinfo *exi)
3208 {
3209         zone_t *zone;
3210         ts_label_t *zone_label;
3211         char *path;
3212 
3213         mutex_enter(&vp->v_lock);
3214         if (vp->v_path != vn_vpath_empty) {
3215                 zone = zone_find_by_any_path(vp->v_path, B_FALSE);
3216                 mutex_exit(&vp->v_lock);
3217         } else {
3218                 /*
3219                  * v_path not cached. Fall back on pathname of exported
3220                  * file system as we rely on pathname from which we can
3221                  * derive a label. The exported file system portion of
 
 | 
 
 
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
  24  * Copyright (c) 2013 by Delphix. All rights reserved.
  25  * Copyright (c) 2017 Joyent Inc
  26  * Copyright 2019 Nexenta by DDN, Inc.
  27  */
  28 
  29 /*
  30  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  31  *      All rights reserved.
  32  *      Use is subject to license terms.
  33  */
  34 
  35 #include <sys/param.h>
  36 #include <sys/types.h>
  37 #include <sys/systm.h>
  38 #include <sys/cred.h>
  39 #include <sys/proc.h>
  40 #include <sys/user.h>
  41 #include <sys/buf.h>
  42 #include <sys/vfs.h>
  43 #include <sys/vnode.h>
  44 #include <sys/pathname.h>
  45 #include <sys/uio.h>
  46 #include <sys/file.h>
 
 
  66 #include <sys/vtrace.h>
  67 #include <sys/mode.h>
  68 #include <sys/acl.h>
  69 #include <sys/sdt.h>
  70 #include <sys/debug.h>
  71 
  72 #include <rpc/types.h>
  73 #include <rpc/auth.h>
  74 #include <rpc/auth_unix.h>
  75 #include <rpc/auth_des.h>
  76 #include <rpc/svc.h>
  77 #include <rpc/xdr.h>
  78 #include <rpc/rpc_rdma.h>
  79 
  80 #include <nfs/nfs.h>
  81 #include <nfs/export.h>
  82 #include <nfs/nfssys.h>
  83 #include <nfs/nfs_clnt.h>
  84 #include <nfs/nfs_acl.h>
  85 #include <nfs/nfs_log.h>
  86 #include <nfs/lm.h>
  87 #include <nfs/nfs_dispatch.h>
  88 #include <nfs/nfs4_drc.h>
  89 
  90 #include <sys/modctl.h>
  91 #include <sys/cladm.h>
  92 #include <sys/clconf.h>
  93 
  94 #include <sys/tsol/label.h>
  95 
  96 #define MAXHOST 32
  97 const char *kinet_ntop6(uchar_t *, char *, size_t);
  98 
  99 /*
 100  * Module linkage information.
 101  */
 102 
 103 static struct modlmisc modlmisc = {
 104         &mod_miscops, "NFS server module"
 105 };
 106 
 107 static struct modlinkage modlinkage = {
 108         MODREV_1, (void *)&modlmisc, NULL
 109 };
 110 
 111 zone_key_t      nfssrv_zone_key;
 112 list_t          nfssrv_globals_list;
 113 krwlock_t       nfssrv_globals_rwl;
 114 
 115 kmem_cache_t *nfs_xuio_cache;
 116 int nfs_loaned_buffers = 0;
 117 
 118 int
 119 _init(void)
 120 {
 121         int status;
 122 
 123         nfs_srvinit();
 124 
 125         status = mod_install((struct modlinkage *)&modlinkage);
 126         if (status != 0) {
 127                 /*
 128                  * Could not load module, cleanup previous
 129                  * initialization work.
 130                  */
 131                 nfs_srvfini();
 132 
 133                 return (status);
 134         }
 135 
 136         /*
 137          * Initialise some placeholders for nfssys() calls. These have
 138          * to be declared by the nfs module, since that handles nfssys()
 139          * calls - also used by NFS clients - but are provided by this
 140          * nfssrv module. These also then serve as confirmation to the
 141          * relevant code in nfs that nfssrv has been loaded, as they're
 142          * initially NULL.
 143          */
 
 160 {
 161         return (EBUSY);
 162 }
 163 
 164 int
 165 _info(struct modinfo *modinfop)
 166 {
 167         return (mod_info(&modlinkage, modinfop));
 168 }
 169 
 170 /*
 171  * PUBLICFH_CHECK() checks if the dispatch routine supports
 172  * RPC_PUBLICFH_OK, if the filesystem is exported public, and if the
 173  * incoming request is using the public filehandle. The check duplicates
 174  * the exportmatch() call done in checkexport(), and we should consider
 175  * modifying those routines to avoid the duplication. For now, we optimize
 176  * by calling exportmatch() only after checking that the dispatch routine
 177  * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
 178  * public (i.e., not the placeholder).
 179  */
 180 #define PUBLICFH_CHECK(ne, disp, exi, fsid, xfid) \
 181                 ((disp->dis_flags & RPC_PUBLICFH_OK) && \
 182                 ((exi->exi_export.ex_flags & EX_PUBLIC) || \
 183                 (exi == ne->exi_public && exportmatch(ne->exi_root, \
 184                 fsid, xfid))))
 185 
 186 static void     nfs_srv_shutdown_all(int);
 187 static void     rfs4_server_start(nfs_globals_t *, int);
 188 static void     nullfree(void);
 189 static void     rfs_dispatch(struct svc_req *, SVCXPRT *);
 190 static void     acl_dispatch(struct svc_req *, SVCXPRT *);
 191 static  int     checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
 192                 bool_t, bool_t *);
 193 static char     *client_name(struct svc_req *req);
 194 static char     *client_addr(struct svc_req *req, char *buf);
 195 extern  int     sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
 196 extern  bool_t  sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
 197 static void     *nfs_server_zone_init(zoneid_t);
 198 static void     nfs_server_zone_fini(zoneid_t, void *);
 199 static void     nfs_server_zone_shutdown(zoneid_t, void *);
 200 
 201 #define NFSLOG_COPY_NETBUF(exi, xprt, nb)       {               \
 202         (nb)->maxlen = (xprt)->xp_rtaddr.maxlen;          \
 203         (nb)->len = (xprt)->xp_rtaddr.len;                        \
 204         (nb)->buf = kmem_alloc((nb)->len, KM_SLEEP);              \
 205         bcopy((xprt)->xp_rtaddr.buf, (nb)->buf, (nb)->len);    \
 206         }
 207 
 208 /*
 209  * Public Filehandle common nfs routines
 210  */
 211 static int      MCLpath(char **);
 212 static void     URLparse(char *);
 213 
 214 /*
 215  * NFS callout table.
 216  * This table is used by svc_getreq() to dispatch a request with
 217  * a given prog/vers pair to an appropriate service provider
 218  * dispatch routine.
 219  *
 
 230         __nfs_sc_clts
 231 };
 232 
 233 static SVC_CALLOUT __nfs_sc_cots[] = {
 234         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 235         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 236 };
 237 
 238 static SVC_CALLOUT_TABLE nfs_sct_cots = {
 239         sizeof (__nfs_sc_cots) / sizeof (__nfs_sc_cots[0]), FALSE, __nfs_sc_cots
 240 };
 241 
 242 static SVC_CALLOUT __nfs_sc_rdma[] = {
 243         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 244         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 245 };
 246 
 247 static SVC_CALLOUT_TABLE nfs_sct_rdma = {
 248         sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
 249 };
 250 
 251 /*
 252  * DSS: distributed stable storage
 253  * lists of all DSS paths: current, and before last warmstart
 254  */
 255 nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
 256 
 257 int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
 258 bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
 259 
 260 /*
 261  * Stash NFS zone globals in TSD to avoid some lock contention
 262  * from frequent zone_getspecific calls.
 263  */
 264 static uint_t nfs_server_tsd_key;
 265 
 266 nfs_globals_t *
 267 nfs_srv_getzg(void)
 268 {
 269         nfs_globals_t *ng;
 270 
 271         ng = tsd_get(nfs_server_tsd_key);
 272         if (ng == NULL) {
 273                 ng = zone_getspecific(nfssrv_zone_key, curzone);
 274                 (void) tsd_set(nfs_server_tsd_key, ng);
 275         }
 276 
 277         return (ng);
 278 }
 279 
 280 /*
 281  * Will be called at the point the server pool is being unregistered
 282  * from the pool list. From that point onwards, the pool is waiting
 283  * to be drained and as such the server state is stale and pertains
 284  * to the old instantiation of the NFS server pool.
 285  */
 286 void
 287 nfs_srv_offline(void)
 288 {
 289         nfs_globals_t *ng;
 290 
 291         ng = nfs_srv_getzg();
 292 
 293         mutex_enter(&ng->nfs_server_upordown_lock);
 294         if (ng->nfs_server_upordown == NFS_SERVER_RUNNING) {
 295                 ng->nfs_server_upordown = NFS_SERVER_OFFLINE;
 296         }
 297         mutex_exit(&ng->nfs_server_upordown_lock);
 298 }
 299 
 300 /*
 301  * Will be called at the point the server pool is being destroyed so
 302  * all transports have been closed and no service threads are in
 303  * existence.
 304  *
 305  * If we quiesce the server, we're shutting it down without destroying the
 306  * server state. This allows it to warm start subsequently.
 307  */
 308 void
 309 nfs_srv_stop_all(void)
 310 {
 311         int quiesce = 0;
 312         nfs_srv_shutdown_all(quiesce);
 313 }
 314 
 315 /*
 316  * This alternative shutdown routine can be requested via nfssys()
 317  */
 318 void
 319 nfs_srv_quiesce_all(void)
 320 {
 321         int quiesce = 1;
 322         nfs_srv_shutdown_all(quiesce);
 323 }
 324 
 325 static void
 326 nfs_srv_shutdown_all(int quiesce)
 327 {
 328         nfs_globals_t *ng = nfs_srv_getzg();
 329 
 330         mutex_enter(&ng->nfs_server_upordown_lock);
 331         if (quiesce) {
 332                 if (ng->nfs_server_upordown == NFS_SERVER_RUNNING ||
 333                     ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
 334                         ng->nfs_server_upordown = NFS_SERVER_QUIESCED;
 335                         cv_signal(&ng->nfs_server_upordown_cv);
 336 
 337                         /* reset DSS state */
 338                         rfs4_dss_numnewpaths = 0;
 339                         rfs4_dss_newpaths = NULL;
 340 
 341                         cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
 342                             "NFSv4 state has been preserved");
 343                 }
 344         } else {
 345                 if (ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
 346                         ng->nfs_server_upordown = NFS_SERVER_STOPPING;
 347                         mutex_exit(&ng->nfs_server_upordown_lock);
 348                         rfs4_state_zone_fini();
 349                         rfs4_fini_drc();
 350                         mutex_enter(&ng->nfs_server_upordown_lock);
 351                         ng->nfs_server_upordown = NFS_SERVER_STOPPED;
 352 
 353                         /* reset DSS state */
 354                         rfs4_dss_numnewpaths = 0;
 355                         rfs4_dss_newpaths = NULL;
 356 
 357                         cv_signal(&ng->nfs_server_upordown_cv);
 358                 }
 359         }
 360         mutex_exit(&ng->nfs_server_upordown_lock);
 361 }
 362 
 363 static int
 364 nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
 365     rpcvers_t versmin, rpcvers_t versmax)
 366 {
 367         struct strioctl strioc;
 368         struct T_info_ack tinfo;
 369         int             error, retval;
 370 
 371         /*
 372          * Find out what type of transport this is.
 373          */
 374         strioc.ic_cmd = TI_GETINFO;
 375         strioc.ic_timout = -1;
 376         strioc.ic_len = sizeof (tinfo);
 377         strioc.ic_dp = (char *)&tinfo;
 378         tinfo.PRIM_type = T_INFO_REQ;
 379 
 380         error = strioctl(fp->f_vnode, I_STR, (intptr_t)&strioc, 0, K_TO_K,
 
 408                         versmax = NFS_ACL_VERSMAX;
 409                 __nfs_sc_cots[1].sc_versmin = versmin;
 410                 __nfs_sc_cots[1].sc_versmax = versmax;
 411                 *sctpp = &nfs_sct_cots;
 412                 break;
 413         default:
 414                 error = EINVAL;
 415         }
 416 
 417         return (error);
 418 }
 419 
 420 /*
 421  * NFS Server system call.
 422  * Does all of the work of running a NFS server.
 423  * uap->fd is the fd of an open transport provider
 424  */
 425 int
 426 nfs_svc(struct nfs_svc_args *arg, model_t model)
 427 {
 428         nfs_globals_t *ng;
 429         file_t *fp;
 430         SVCMASTERXPRT *xprt;
 431         int error;
 432         int readsize;
 433         char buf[KNC_STRSIZE];
 434         size_t len;
 435         STRUCT_HANDLE(nfs_svc_args, uap);
 436         struct netbuf addrmask;
 437         SVC_CALLOUT_TABLE *sctp = NULL;
 438 
 439 #ifdef lint
 440         model = model;          /* STRUCT macros don't always refer to it */
 441 #endif
 442 
 443         ng = nfs_srv_getzg();
 444         STRUCT_SET_HANDLE(uap, model, arg);
 445 
 446         /* Check privileges in nfssys() */
 447 
 448         if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 449                 return (EBADF);
 450 
 451         /* Setup global file handle in nfs_export */
 452         if ((error = nfs_export_get_rootfh(ng)) != 0)
 453                 return (error);
 454 
 455         /*
 456          * Set read buffer size to rsize
 457          * and add room for RPC headers.
 458          */
 459         readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
 460         if (readsize < RPC_MAXDATASIZE)
 461                 readsize = RPC_MAXDATASIZE;
 462 
 463         error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
 464             KNC_STRSIZE, &len);
 465         if (error) {
 466                 releasef(STRUCT_FGET(uap, fd));
 467                 return (error);
 468         }
 469 
 470         addrmask.len = STRUCT_FGET(uap, addrmask.len);
 471         addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
 472         addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
 473         error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
 474             addrmask.len);
 475         if (error) {
 476                 releasef(STRUCT_FGET(uap, fd));
 477                 kmem_free(addrmask.buf, addrmask.maxlen);
 478                 return (error);
 479         }
 480 
 481         ng->nfs_versmin = STRUCT_FGET(uap, versmin);
 482         ng->nfs_versmax = STRUCT_FGET(uap, versmax);
 483 
 484         /* Double check the vers min/max ranges */
 485         if ((ng->nfs_versmin > ng->nfs_versmax) ||
 486             (ng->nfs_versmin < NFS_VERSMIN) ||
 487             (ng->nfs_versmax > NFS_VERSMAX)) {
 488                 ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
 489                 ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
 490         }
 491 
 492         if (error = nfs_srv_set_sc_versions(fp, &sctp, ng->nfs_versmin,
 493             ng->nfs_versmax)) {
 494                 releasef(STRUCT_FGET(uap, fd));
 495                 kmem_free(addrmask.buf, addrmask.maxlen);
 496                 return (error);
 497         }
 498 
 499         /* Initialize nfsv4 server */
 500         if (ng->nfs_versmax == (rpcvers_t)NFS_V4)
 501                 rfs4_server_start(ng, STRUCT_FGET(uap, delegation));
 502 
 503         /* Create a transport handle. */
 504         error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
 505             sctp, NULL, NFS_SVCPOOL_ID, TRUE);
 506 
 507         if (error)
 508                 kmem_free(addrmask.buf, addrmask.maxlen);
 509 
 510         releasef(STRUCT_FGET(uap, fd));
 511 
 512         /* HA-NFSv4: save the cluster nodeid */
 513         if (cluster_bootflags & CLUSTER_BOOTED)
 514                 lm_global_nlmid = clconf_get_nodeid();
 515 
 516         return (error);
 517 }
 518 
 519 static void
 520 rfs4_server_start(nfs_globals_t *ng, int nfs4_srv_delegation)
 521 {
 522         /*
 523          * Determine if the server has previously been "started" and
 524          * if not, do the per instance initialization
 525          */
 526         mutex_enter(&ng->nfs_server_upordown_lock);
 527 
 528         if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
 529                 /* Do we need to stop and wait on the previous server? */
 530                 while (ng->nfs_server_upordown == NFS_SERVER_STOPPING ||
 531                     ng->nfs_server_upordown == NFS_SERVER_OFFLINE)
 532                         cv_wait(&ng->nfs_server_upordown_cv,
 533                             &ng->nfs_server_upordown_lock);
 534 
 535                 if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
 536                         (void) svc_pool_control(NFS_SVCPOOL_ID,
 537                             SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
 538                         (void) svc_pool_control(NFS_SVCPOOL_ID,
 539                             SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
 540 
 541                         rfs4_do_server_start(ng->nfs_server_upordown,
 542                             nfs4_srv_delegation,
 543                             cluster_bootflags & CLUSTER_BOOTED);
 544 
 545                         ng->nfs_server_upordown = NFS_SERVER_RUNNING;
 546                 }
 547                 cv_signal(&ng->nfs_server_upordown_cv);
 548         }
 549         mutex_exit(&ng->nfs_server_upordown_lock);
 550 }
 551 
 552 /*
 553  * If RDMA device available,
 554  * start RDMA listener.
 555  */
 556 int
 557 rdma_start(struct rdma_svc_args *rsa)
 558 {
 559         nfs_globals_t *ng;
 560         int error;
 561         rdma_xprt_group_t started_rdma_xprts;
 562         rdma_stat stat;
 563         int svc_state = 0;
 564 
 565         /* Double check the vers min/max ranges */
 566         if ((rsa->nfs_versmin > rsa->nfs_versmax) ||
 567             (rsa->nfs_versmin < NFS_VERSMIN) ||
 568             (rsa->nfs_versmax > NFS_VERSMAX)) {
 569                 rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
 570                 rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
 571         }
 572 
 573         ng = nfs_srv_getzg();
 574         ng->nfs_versmin = rsa->nfs_versmin;
 575         ng->nfs_versmax = rsa->nfs_versmax;
 576 
 577         /* Set the versions in the callout table */
 578         __nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
 579         __nfs_sc_rdma[0].sc_versmax = rsa->nfs_versmax;
 580         /* For the NFS_ACL program, check the max version */
 581         __nfs_sc_rdma[1].sc_versmin = rsa->nfs_versmin;
 582         if (rsa->nfs_versmax > NFS_ACL_VERSMAX)
 583                 __nfs_sc_rdma[1].sc_versmax = NFS_ACL_VERSMAX;
 584         else
 585                 __nfs_sc_rdma[1].sc_versmax = rsa->nfs_versmax;
 586 
 587         /* Initialize nfsv4 server */
 588         if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
 589                 rfs4_server_start(ng, rsa->delegation);
 590 
 591         started_rdma_xprts.rtg_count = 0;
 592         started_rdma_xprts.rtg_listhead = NULL;
 593         started_rdma_xprts.rtg_poolid = rsa->poolid;
 594 
 595 restart:
 596         error = svc_rdma_kcreate(rsa->netid, &nfs_sct_rdma, rsa->poolid,
 597             &started_rdma_xprts);
 598 
 599         svc_state = !error;
 600 
 601         while (!error) {
 602 
 603                 /*
 604                  * wait till either interrupted by a signal on
 605                  * nfs service stop/restart or signalled by a
 606                  * rdma attach/detatch.
 607                  */
 608 
 609                 stat = rdma_kwait();
 610 
 611                 /*
 612                  * stop services if running -- either on a HCA detach event
 613                  * or if the nfs service is stopped/restarted.
 614                  */
 615 
 616                 if ((stat == RDMA_HCA_DETACH || stat == RDMA_INTR) &&
 617                     svc_state) {
 618                         rdma_stop(&started_rdma_xprts);
 619                         svc_state = 0;
 620                 }
 621 
 622                 /*
 623                  * nfs service stop/restart, break out of the
 624                  * wait loop and return;
 625                  */
 626                 if (stat == RDMA_INTR)
 
 637                 /*
 638                  * loop until a nfs service stop/restart
 639                  */
 640         }
 641 
 642         return (error);
 643 }
 644 
 645 /* ARGSUSED */
 646 void
 647 rpc_null(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 648     struct svc_req *req, cred_t *cr, bool_t ro)
 649 {
 650 }
 651 
 652 /* ARGSUSED */
 653 void
 654 rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 655     struct svc_req *req, cred_t *cr, bool_t ro)
 656 {
 657         DTRACE_NFSV3_4(op__null__start, struct svc_req *, req,
 658             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
 659         DTRACE_NFSV3_4(op__null__done, struct svc_req *, req,
 660             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
 661 }
 662 
 663 /* ARGSUSED */
 664 static void
 665 rfs_error(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 666     struct svc_req *req, cred_t *cr, bool_t ro)
 667 {
 668         /* return (EOPNOTSUPP); */
 669 }
 670 
 671 static void
 672 nullfree(void)
 673 {
 674 }
 675 
 676 static char *rfscallnames_v2[] = {
 677         "RFS2_NULL",
 678         "RFS2_GETATTR",
 679         "RFS2_SETATTR",
 680         "RFS2_ROOT",
 
1318         /* RFS3_PATHCONF = 20 */
1319         PATHCONF3res nfs3_pathconf_res;
1320 
1321         /* RFS3_COMMIT = 21 */
1322         COMMIT3res nfs3_commit_res;
1323 
1324         /*
1325          * NFS VERSION 4
1326          */
1327 
1328         /* RFS_NULL = 0 */
1329 
1330         /* RFS4_COMPOUND = 1 */
1331         COMPOUND4res nfs4_compound_res;
1332 
1333 };
1334 
1335 static struct rpc_disptable rfs_disptable[] = {
1336         {sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
1337             rfscallnames_v2,
1338             rfsdisptab_v2},
1339         {sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
1340             rfscallnames_v3,
1341             rfsdisptab_v3},
1342         {sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
1343             rfscallnames_v4,
1344             rfsdisptab_v4},
1345 };
1346 
1347 /*
1348  * If nfs_portmon is set, then clients are required to use privileged
1349  * ports (ports < IPPORT_RESERVED) in order to get NFS services.
1350  *
1351  * N.B.: this attempt to carry forward the already ill-conceived notion
1352  * of privileged ports for TCP/UDP is really quite ineffectual.  Not only
1353  * is it transport-dependent, it's laughably easy to spoof.  If you're
1354  * really interested in security, you must start with secure RPC instead.
1355  */
1356 static int nfs_portmon = 0;
1357 
1358 #ifdef DEBUG
1359 static int cred_hits = 0;
1360 static int cred_misses = 0;
1361 #endif
1362 
1363 #ifdef DEBUG
1364 /*
1365  * Debug code to allow disabling of rfs_dispatch() use of
1366  * fastxdrargs() and fastxdrres() calls for testing purposes.
1367  */
1368 static int rfs_no_fast_xdrargs = 0;
1369 static int rfs_no_fast_xdrres = 0;
1370 #endif
1371 
1372 union acl_args {
1373         /*
1374          * ACL VERSION 2
1375          */
1376 
1377         /* ACL2_NULL = 0 */
1378 
1379         /* ACL2_GETACL = 1 */
1380         GETACL2args acl2_getacl_args;
1381 
1382         /* ACL2_SETACL = 2 */
 
1446         GETXATTRDIR3res acl3_getxattrdir_res;
1447 
1448 };
1449 
1450 static bool_t
1451 auth_tooweak(struct svc_req *req, char *res)
1452 {
1453 
1454         if (req->rq_vers == NFS_VERSION && req->rq_proc == RFS_LOOKUP) {
1455                 struct nfsdiropres *dr = (struct nfsdiropres *)res;
1456                 if ((enum wnfsstat)dr->dr_status == WNFSERR_CLNT_FLAVOR)
1457                         return (TRUE);
1458         } else if (req->rq_vers == NFS_V3 && req->rq_proc == NFSPROC3_LOOKUP) {
1459                 LOOKUP3res *resp = (LOOKUP3res *)res;
1460                 if ((enum wnfsstat)resp->status == WNFSERR_CLNT_FLAVOR)
1461                         return (TRUE);
1462         }
1463         return (FALSE);
1464 }
1465 
1466 static void
1467 common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
1468     rpcvers_t max_vers, char *pgmname, struct rpc_disptable *disptable)
1469 {
1470         int which;
1471         rpcvers_t vers;
1472         char *args;
1473         union {
1474                         union rfs_args ra;
1475                         union acl_args aa;
1476                 } args_buf;
1477         char *res;
1478         union {
1479                         union rfs_res rr;
1480                         union acl_res ar;
1481                 } res_buf;
1482         struct rpcdisp *disp = NULL;
1483         int dis_flags = 0;
1484         cred_t *cr;
1485         int error = 0;
1486         int anon_ok;
1487         struct exportinfo *exi = NULL;
1488         unsigned int nfslog_rec_id;
1489         int dupstat;
1490         struct dupreq *dr;
1491         int authres;
1492         bool_t publicfh_ok = FALSE;
1493         enum_t auth_flavor;
1494         bool_t dupcached = FALSE;
1495         struct netbuf   nb;
1496         bool_t logging_enabled = FALSE;
1497         struct exportinfo *nfslog_exi = NULL;
1498         char **procnames;
1499         char cbuf[INET6_ADDRSTRLEN];    /* to hold both IPv4 and IPv6 addr */
1500         bool_t ro = FALSE;
1501         nfs_globals_t *ng = nfs_srv_getzg();
1502         nfs_export_t *ne = ng->nfs_export;
1503         kstat_named_t *svstat, *procstat;
1504 
1505         ASSERT(req->rq_prog == NFS_PROGRAM || req->rq_prog == NFS_ACL_PROGRAM);
1506 
1507         vers = req->rq_vers;
1508 
1509         svstat = ng->svstat[req->rq_vers];
1510         procstat = (req->rq_prog == NFS_PROGRAM) ?
1511             ng->rfsproccnt[vers] : ng->aclproccnt[vers];
1512 
1513         if (vers < min_vers || vers > max_vers) {
1514                 svcerr_progvers(req->rq_xprt, min_vers, max_vers);
1515                 error++;
1516                 cmn_err(CE_NOTE, "%s: bad version number %u", pgmname, vers);
1517                 goto done;
1518         }
1519         vers -= min_vers;
1520 
1521         which = req->rq_proc;
1522         if (which < 0 || which >= disptable[(int)vers].dis_nprocs) {
1523                 svcerr_noproc(req->rq_xprt);
1524                 error++;
1525                 goto done;
1526         }
1527 
1528         procstat[which].value.ui64++;
1529 
1530         disp = &disptable[(int)vers].dis_table[which];
1531         procnames = disptable[(int)vers].dis_procnames;
1532 
1533         auth_flavor = req->rq_cred.oa_flavor;
1534 
1535         /*
1536          * Deserialize into the args struct.
1537          */
1538         args = (char *)&args_buf;
1539 
1540 #ifdef DEBUG
1541         if (rfs_no_fast_xdrargs || (auth_flavor == RPCSEC_GSS) ||
1542             disp->dis_fastxdrargs == NULL_xdrproc_t ||
1543             !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1544 #else
1545         if ((auth_flavor == RPCSEC_GSS) ||
1546             disp->dis_fastxdrargs == NULL_xdrproc_t ||
1547             !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1548 #endif
 
1614                  * client's mount operation to fail.  As a work-around,
1615                  * we check here to see if the request is a getattr or
1616                  * statfs operation on the exported vnode itself, and
1617                  * pass a flag to checkauth with the result of this test.
1618                  *
1619                  * The filehandle refers to the mountpoint itself if
1620                  * the fh_data and fh_xdata portions of the filehandle
1621                  * are equal.
1622                  *
1623                  * Added anon_ok argument to checkauth().
1624                  */
1625 
1626                 if ((dis_flags & RPC_ALLOWANON) && EQFID(fid, xfid))
1627                         anon_ok = 1;
1628                 else
1629                         anon_ok = 0;
1630 
1631                 cr = xprt->xp_cred;
1632                 ASSERT(cr != NULL);
1633 #ifdef DEBUG
1634                 {
1635                         if (crgetref(cr) != 1) {
1636                                 crfree(cr);
1637                                 cr = crget();
1638                                 xprt->xp_cred = cr;
1639                                 cred_misses++;
1640                         } else
1641                                 cred_hits++;
1642                 }
1643 #else
1644                 if (crgetref(cr) != 1) {
1645                         crfree(cr);
1646                         cr = crget();
1647                         xprt->xp_cred = cr;
1648                 }
1649 #endif
1650 
1651                 exi = checkexport(fsid, xfid);
1652 
1653                 if (exi != NULL) {
1654                         publicfh_ok = PUBLICFH_CHECK(ne, disp, exi, fsid, xfid);
1655 
1656                         /*
1657                          * Don't allow non-V4 clients access
1658                          * to pseudo exports
1659                          */
1660                         if (PSEUDO(exi)) {
1661                                 svcerr_weakauth(xprt);
1662                                 error++;
1663                                 goto done;
1664                         }
1665 
1666                         authres = checkauth(exi, req, cr, anon_ok, publicfh_ok,
1667                             &ro);
1668                         /*
1669                          * authres >  0: authentication OK - proceed
1670                          * authres == 0: authentication weak - return error
1671                          * authres <  0: authentication timeout - drop
1672                          */
1673                         if (authres <= 0) {
1674                                 if (authres == 0) {
 
1747                                 SVC_FREERES(xprt);
1748                         error++;
1749                         goto done;
1750                 }
1751         }
1752 
1753         if (auth_tooweak(req, res)) {
1754                 svcerr_weakauth(xprt);
1755                 error++;
1756                 goto done;
1757         }
1758 
1759         /*
1760          * Check to see if logging has been enabled on the server.
1761          * If so, then obtain the export info struct to be used for
1762          * the later writing of the log record.  This is done for
1763          * the case that a lookup is done across a non-logged public
1764          * file system.
1765          */
1766         if (nfslog_buffer_list != NULL) {
1767                 nfslog_exi = nfslog_get_exi(ne, exi, req, res, &nfslog_rec_id);
1768                 /*
1769                  * Is logging enabled?
1770                  */
1771                 logging_enabled = (nfslog_exi != NULL);
1772 
1773                 /*
1774                  * Copy the netbuf for logging purposes, before it is
1775                  * freed by svc_sendreply().
1776                  */
1777                 if (logging_enabled) {
1778                         NFSLOG_COPY_NETBUF(nfslog_exi, xprt, &nb);
1779                         /*
1780                          * If RPC_MAPRESP flag set (i.e. in V2 ops) the
1781                          * res gets copied directly into the mbuf and
1782                          * may be freed soon after the sendreply. So we
1783                          * must copy it here to a safe place...
1784                          */
1785                         if (res != (char *)&res_buf) {
1786                                 bcopy(res, (char *)&res_buf, disp->dis_ressz);
1787                         }
 
1830 
1831 done:
1832         /*
1833          * Free arguments struct
1834          */
1835         if (disp) {
1836                 if (!SVC_FREEARGS(xprt, disp->dis_xdrargs, args)) {
1837                         cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1838                         error++;
1839                 }
1840         } else {
1841                 if (!SVC_FREEARGS(xprt, (xdrproc_t)0, (caddr_t)0)) {
1842                         cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1843                         error++;
1844                 }
1845         }
1846 
1847         if (exi != NULL)
1848                 exi_rele(exi);
1849 
1850         svstat[NFS_BADCALLS].value.ui64 += error;
1851         svstat[NFS_CALLS].value.ui64++;
1852 }
1853 
1854 static void
1855 rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
1856 {
1857         common_dispatch(req, xprt, NFS_VERSMIN, NFS_VERSMAX,
1858             "NFS", rfs_disptable);
1859 }
1860 
1861 static char *aclcallnames_v2[] = {
1862         "ACL2_NULL",
1863         "ACL2_GETACL",
1864         "ACL2_SETACL",
1865         "ACL2_GETATTR",
1866         "ACL2_ACCESS",
1867         "ACL2_GETXATTRDIR"
1868 };
1869 
1870 static struct rpcdisp acldisptab_v2[] = {
1871         /*
 
1954             acl3_getacl_getfh},
1955 
1956         /* ACL3_SETACL = 2 */
1957         {acl3_setacl,
1958             xdr_SETACL3args, NULL_xdrproc_t, sizeof (SETACL3args),
1959             xdr_SETACL3res, NULL_xdrproc_t, sizeof (SETACL3res),
1960             nullfree, 0,
1961             acl3_setacl_getfh},
1962 
1963         /* ACL3_GETXATTRDIR = 3 */
1964         {acl3_getxattrdir,
1965             xdr_GETXATTRDIR3args, NULL_xdrproc_t, sizeof (GETXATTRDIR3args),
1966             xdr_GETXATTRDIR3res, NULL_xdrproc_t, sizeof (GETXATTRDIR3res),
1967             nullfree, RPC_IDEMPOTENT,
1968             acl3_getxattrdir_getfh},
1969 };
1970 
1971 static struct rpc_disptable acl_disptable[] = {
1972         {sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
1973                 aclcallnames_v2,
1974                 acldisptab_v2},
1975         {sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
1976                 aclcallnames_v3,
1977                 acldisptab_v3},
1978 };
1979 
1980 static void
1981 acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
1982 {
1983         common_dispatch(req, xprt, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,
1984             "ACL", acl_disptable);
1985 }
1986 
1987 int
1988 checkwin(int flavor, int window, struct svc_req *req)
1989 {
1990         struct authdes_cred *adc;
1991 
1992         switch (flavor) {
1993         case AUTH_DES:
1994                 adc = (struct authdes_cred *)req->rq_clntcred;
1995                 CTASSERT(sizeof (struct authdes_cred) <= RQCRED_SIZE);
1996                 if (adc->adc_fullname.window > window)
1997                         return (0);
 
2551 
2552         } else {
2553 
2554                 /*
2555                  * No IP address to print. If there was a host name
2556                  * printed, then we print a space.
2557                  */
2558                 (void) sprintf(buf, frontspace);
2559         }
2560 
2561         return (buf);
2562 }
2563 
2564 /*
2565  * NFS Server initialization routine.  This routine should only be called
2566  * once.  It performs the following tasks:
2567  *      - Call sub-initialization routines (localize access to variables)
2568  *      - Initialize all locks
2569  *      - initialize the version 3 write verifier
2570  */
2571 void
2572 nfs_srvinit(void)
2573 {
2574 
2575         /* Truly global stuff in this module (not per zone) */
2576         rw_init(&nfssrv_globals_rwl, NULL, RW_DEFAULT, NULL);
2577         list_create(&nfssrv_globals_list, sizeof (nfs_globals_t),
2578             offsetof(nfs_globals_t, nfs_g_link));
2579         tsd_create(&nfs_server_tsd_key, NULL);
2580 
2581         /* The order here is important */
2582         nfs_exportinit();
2583         rfs_srvrinit();
2584         rfs3_srvrinit();
2585         rfs4_srvrinit();
2586         nfsauth_init();
2587 
2588         /*
2589          * NFS server zone-specific global variables
2590          * Note the zone_init is called for the GZ here.
2591          */
2592         zone_key_create(&nfssrv_zone_key, nfs_server_zone_init,
2593             nfs_server_zone_shutdown, nfs_server_zone_fini);
2594 }
2595 
2596 /*
2597  * NFS Server finalization routine. This routine is called to cleanup the
2598  * initialization work previously performed if the NFS server module could
2599  * not be loaded correctly.
2600  */
2601 void
2602 nfs_srvfini(void)
2603 {
2604 
2605         /*
2606          * NFS server zone-specific global variables
2607          * Note the zone_fini is called for the GZ here.
2608          */
2609         (void) zone_key_delete(nfssrv_zone_key);
2610 
2611         /* The order here is important (reverse of init) */
2612         nfsauth_fini();
2613         rfs4_srvrfini();
2614         rfs3_srvrfini();
2615         rfs_srvrfini();
2616         nfs_exportfini();
2617 
2618         /* Truly global stuff in this module (not per zone) */
2619         tsd_destroy(&nfs_server_tsd_key);
2620         list_destroy(&nfssrv_globals_list);
2621         rw_destroy(&nfssrv_globals_rwl);
2622 }
2623 
2624 /*
2625  * Zone init, shutdown, fini functions for the NFS server
2626  *
2627  * This design is careful to create the entire hierarhcy of
2628  * NFS server "globals" (including those created by various
2629  * per-module *_zone_init functions, etc.) so that all these
2630  * objects have exactly the same lifetime.
2631  *
2632  * These objects are also kept on a list for two reasons:
2633  * 1: It makes finding these in mdb _much_ easier.
2634  * 2: It allows operating across all zone globals for
2635  *    functions like nfs_auth.c:exi_cache_reclaim
2636  */
2637 static void *
2638 nfs_server_zone_init(zoneid_t zoneid)
2639 {
2640         nfs_globals_t *ng;
2641 
2642         ng = kmem_zalloc(sizeof (*ng), KM_SLEEP);
2643 
2644         ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
2645         ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
2646 
2647         /* Init the stuff to control start/stop */
2648         ng->nfs_server_upordown = NFS_SERVER_STOPPED;
2649         mutex_init(&ng->nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
2650         cv_init(&ng->nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
2651         mutex_init(&ng->rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
2652         cv_init(&ng->rdma_wait_cv, NULL, CV_DEFAULT, NULL);
2653 
2654         ng->nfs_zoneid = zoneid;
2655 
2656         /*
2657          * Order here is important.
2658          * export init must precede srv init calls.
2659          */
2660         nfs_export_zone_init(ng);
2661         rfs_stat_zone_init(ng);
2662         rfs_srv_zone_init(ng);
2663         rfs3_srv_zone_init(ng);
2664         rfs4_srv_zone_init(ng);
2665         nfsauth_zone_init(ng);
2666 
2667         rw_enter(&nfssrv_globals_rwl, RW_WRITER);
2668         list_insert_tail(&nfssrv_globals_list, ng);
2669         rw_exit(&nfssrv_globals_rwl);
2670 
2671         return (ng);
2672 }
2673 
2674 /* ARGSUSED */
2675 static void
2676 nfs_server_zone_shutdown(zoneid_t zoneid, void *data)
2677 {
2678         nfs_globals_t *ng;
2679 
2680         ng = (nfs_globals_t *)data;
2681 
2682         /*
2683          * Order is like _fini, but only
2684          * some modules need this hook.
2685          */
2686         nfsauth_zone_shutdown(ng);
2687         nfs_export_zone_shutdown(ng);
2688 }
2689 
2690 /* ARGSUSED */
2691 static void
2692 nfs_server_zone_fini(zoneid_t zoneid, void *data)
2693 {
2694         nfs_globals_t *ng;
2695 
2696         ng = (nfs_globals_t *)data;
2697 
2698         rw_enter(&nfssrv_globals_rwl, RW_WRITER);
2699         list_remove(&nfssrv_globals_list, ng);
2700         rw_exit(&nfssrv_globals_rwl);
2701 
2702         /*
2703          * Order here is important.
2704          * reverse order from init
2705          */
2706         nfsauth_zone_fini(ng);
2707         rfs4_srv_zone_fini(ng);
2708         rfs3_srv_zone_fini(ng);
2709         rfs_srv_zone_fini(ng);
2710         rfs_stat_zone_fini(ng);
2711         nfs_export_zone_fini(ng);
2712 
2713         mutex_destroy(&ng->nfs_server_upordown_lock);
2714         cv_destroy(&ng->nfs_server_upordown_cv);
2715         mutex_destroy(&ng->rdma_wait_mutex);
2716         cv_destroy(&ng->rdma_wait_cv);
2717 
2718         kmem_free(ng, sizeof (*ng));
2719 }
2720 
2721 /*
2722  * Set up an iovec array of up to cnt pointers.
2723  */
2724 void
2725 mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
2726 {
2727         while (m != NULL && cnt-- > 0) {
2728                 iovp->iov_base = (caddr_t)m->b_rptr;
2729                 iovp->iov_len = (m->b_wptr - m->b_rptr);
2730                 iovp++;
2731                 m = m->b_cont;
2732         }
2733 }
2734 
2735 /*
2736  * Common code between NFS Version 2 and NFS Version 3 for the public
2737  * filehandle multicomponent lookups.
2738  */
2739 
2740 /*
2741  * Public filehandle evaluation of a multi-component lookup, following
2742  * symbolic links, if necessary. This may result in a vnode in another
2743  * filesystem, which is OK as long as the other filesystem is exported.
 
2941 
2942                 if (error == ENOENT) {
2943                         *vpp = tvp;
2944                         mc_dvp = tmc_dvp;
2945                         error = 0;
2946                 } else {        /* ok or error other than ENOENT */
2947                         if (tmc_dvp)
2948                                 VN_RELE(tmc_dvp);
2949                         if (error)
2950                                 goto publicfh_done;
2951 
2952                         /*
2953                          * Found a valid vp for index "filename". Sanity check
2954                          * for odd case where a directory is provided as index
2955                          * option argument and leads us to another filesystem
2956                          */
2957 
2958                         /* Release the reference on the old exi value */
2959                         ASSERT(*exi != NULL);
2960                         exi_rele(*exi);
2961                         *exi = NULL;
2962 
2963                         if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
2964                                 VN_RELE(*vpp);
2965                                 goto publicfh_done;
2966                         }
2967                         /* Have a new *exi */
2968                 }
2969         }
2970 
2971 publicfh_done:
2972         if (mc_dvp)
2973                 VN_RELE(mc_dvp);
2974 
2975         return (error);
2976 }
2977 
2978 /*
2979  * Evaluate a multi-component path
2980  */
2981 int
2982 rfs_pathname(
2983         char *path,                     /* pathname to evaluate */
2984         vnode_t **dirvpp,               /* ret for ptr to parent dir vnode */
2985         vnode_t **compvpp,              /* ret for ptr to component vnode */
2986         vnode_t *startdvp,              /* starting vnode */
2987         cred_t *cr,                     /* user's credential */
2988         int pathflag)                   /* flag to identify path, e.g. URL */
2989 {
2990         char namebuf[TYPICALMAXPATHLEN];
2991         struct pathname pn;
2992         int error;
2993 
2994         ASSERT3U(crgetzoneid(cr), ==, curzone->zone_id);
2995 
2996         /*
2997          * If pathname starts with '/', then set startdvp to root.
2998          */
2999         if (*path == '/') {
3000                 while (*path == '/')
3001                         path++;
3002 
3003                 startdvp = ZONE_ROOTVP();
3004         }
3005 
3006         error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
3007         if (error == 0) {
3008                 /*
3009                  * Call the URL parser for URL paths to modify the original
3010                  * string to handle any '%' encoded characters that exist.
3011                  * Done here to avoid an extra bcopy in the lookup.
3012                  * We need to be careful about pathlen's. We know that
3013                  * rfs_pathname() is called with a non-empty path. However,
3014                  * it could be emptied due to the path simply being all /'s,
3015                  * which is valid to proceed with the lookup, or due to the
3016                  * URL parser finding an encoded null character at the
3017                  * beginning of path which should not proceed with the lookup.
3018                  */
3019                 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
3020                         URLparse(pn.pn_path);
3021                         if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
3022                                 return (ENOENT);
3023                 }
3024                 VN_HOLD(startdvp);
3025                 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
3026                     ZONE_ROOTVP(), startdvp, cr);
3027         }
3028         if (error == ENAMETOOLONG) {
3029                 /*
3030                  * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
3031                  */
3032                 if (error = pn_get(path, UIO_SYSSPACE, &pn))
3033                         return (error);
3034                 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
3035                         URLparse(pn.pn_path);
3036                         if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0) {
3037                                 pn_free(&pn);
3038                                 return (ENOENT);
3039                         }
3040                 }
3041                 VN_HOLD(startdvp);
3042                 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
3043                     ZONE_ROOTVP(), startdvp, cr);
3044                 pn_free(&pn);
3045         }
3046 
3047         return (error);
3048 }
3049 
3050 /*
3051  * Adapt the multicomponent lookup path depending on the pathtype
3052  */
3053 static int
3054 MCLpath(char **path)
3055 {
3056         unsigned char c = (unsigned char)**path;
3057 
3058         /*
3059          * If the MCL path is between 0x20 and 0x7E (graphic printable
3060          * character of the US-ASCII coded character set), its a URL path,
3061          * per RFC 1738.
3062          */
3063         if (c >= 0x20 && c <= 0x7E)
 
3127         int walk;
3128         int error = 0;
3129 
3130         *exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
3131         if (*exi == NULL)
3132                 error = EACCES;
3133         else {
3134                 /*
3135                  * If nosub is set for this export then
3136                  * a lookup relative to the public fh
3137                  * must not terminate below the
3138                  * exported directory.
3139                  */
3140                 if ((*exi)->exi_export.ex_flags & EX_NOSUB && walk > 0)
3141                         error = EACCES;
3142         }
3143 
3144         return (error);
3145 }
3146 
3147 /*
3148  * Used by NFSv3 and NFSv4 server to query label of
3149  * a pathname component during lookup/access ops.
3150  */
3151 ts_label_t *
3152 nfs_getflabel(vnode_t *vp, struct exportinfo *exi)
3153 {
3154         zone_t *zone;
3155         ts_label_t *zone_label;
3156         char *path;
3157 
3158         mutex_enter(&vp->v_lock);
3159         if (vp->v_path != vn_vpath_empty) {
3160                 zone = zone_find_by_any_path(vp->v_path, B_FALSE);
3161                 mutex_exit(&vp->v_lock);
3162         } else {
3163                 /*
3164                  * v_path not cached. Fall back on pathname of exported
3165                  * file system as we rely on pathname from which we can
3166                  * derive a label. The exported file system portion of
 
 |