1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
  24  * Copyright (c) 2013 by Delphix. All rights reserved.
  25  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright (c) 2017 Joyent Inc
  27  */
  28 
  29 /*
  30  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  31  *      All rights reserved.
  32  *      Use is subject to license terms.
  33  */
  34 
  35 #include <sys/param.h>
  36 #include <sys/types.h>
  37 #include <sys/systm.h>
  38 #include <sys/cred.h>
  39 #include <sys/proc.h>
  40 #include <sys/user.h>
  41 #include <sys/buf.h>
  42 #include <sys/vfs.h>
  43 #include <sys/vnode.h>
  44 #include <sys/pathname.h>
  45 #include <sys/uio.h>
  46 #include <sys/file.h>
  47 #include <sys/stat.h>
  48 #include <sys/errno.h>
  49 #include <sys/socket.h>
  50 #include <sys/sysmacros.h>
  51 #include <sys/siginfo.h>
  52 #include <sys/tiuser.h>
  53 #include <sys/statvfs.h>
  54 #include <sys/stream.h>
 
 
  66 #include <sys/vtrace.h>
  67 #include <sys/mode.h>
  68 #include <sys/acl.h>
  69 #include <sys/sdt.h>
  70 #include <sys/debug.h>
  71 
  72 #include <rpc/types.h>
  73 #include <rpc/auth.h>
  74 #include <rpc/auth_unix.h>
  75 #include <rpc/auth_des.h>
  76 #include <rpc/svc.h>
  77 #include <rpc/xdr.h>
  78 #include <rpc/rpc_rdma.h>
  79 
  80 #include <nfs/nfs.h>
  81 #include <nfs/export.h>
  82 #include <nfs/nfssys.h>
  83 #include <nfs/nfs_clnt.h>
  84 #include <nfs/nfs_acl.h>
  85 #include <nfs/nfs_log.h>
  86 #include <nfs/nfs_cmd.h>
  87 #include <nfs/lm.h>
  88 #include <nfs/nfs_dispatch.h>
  89 #include <nfs/nfs4_drc.h>
  90 
  91 #include <sys/modctl.h>
  92 #include <sys/cladm.h>
  93 #include <sys/clconf.h>
  94 
  95 #include <sys/tsol/label.h>
  96 
  97 #define MAXHOST 32
  98 const char *kinet_ntop6(uchar_t *, char *, size_t);
  99 
 100 /*
 101  * Module linkage information.
 102  */
 103 
 104 static struct modlmisc modlmisc = {
 105         &mod_miscops, "NFS server module"
 106 };
 107 
 108 static struct modlinkage modlinkage = {
 109         MODREV_1, (void *)&modlmisc, NULL
 110 };
 111 
 112 kmem_cache_t *nfs_xuio_cache;
 113 int nfs_loaned_buffers = 0;
 114 
 115 int
 116 _init(void)
 117 {
 118         int status;
 119 
 120         if ((status = nfs_srvinit()) != 0) {
 121                 cmn_err(CE_WARN, "_init: nfs_srvinit failed");
 122                 return (status);
 123         }
 124 
 125         status = mod_install((struct modlinkage *)&modlinkage);
 126         if (status != 0) {
 127                 /*
 128                  * Could not load module, cleanup previous
 129                  * initialization work.
 130                  */
 131                 nfs_srvfini();
 132 
 133                 return (status);
 134         }
 135 
 136         /*
 137          * Initialise some placeholders for nfssys() calls. These have
 138          * to be declared by the nfs module, since that handles nfssys()
 139          * calls - also used by NFS clients - but are provided by this
 140          * nfssrv module. These also then serve as confirmation to the
 141          * relevant code in nfs that nfssrv has been loaded, as they're
 142          * initially NULL.
 143          */
 
 160 {
 161         return (EBUSY);
 162 }
 163 
 164 int
 165 _info(struct modinfo *modinfop)
 166 {
 167         return (mod_info(&modlinkage, modinfop));
 168 }
 169 
 170 /*
 171  * PUBLICFH_CHECK() checks if the dispatch routine supports
 172  * RPC_PUBLICFH_OK, if the filesystem is exported public, and if the
 173  * incoming request is using the public filehandle. The check duplicates
 174  * the exportmatch() call done in checkexport(), and we should consider
 175  * modifying those routines to avoid the duplication. For now, we optimize
 176  * by calling exportmatch() only after checking that the dispatch routine
 177  * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
 178  * public (i.e., not the placeholder).
 179  */
 180 #define PUBLICFH_CHECK(disp, exi, fsid, xfid) \
 181                 ((disp->dis_flags & RPC_PUBLICFH_OK) && \
 182                 ((exi->exi_export.ex_flags & EX_PUBLIC) || \
 183                 (exi == exi_public && exportmatch(exi_root, \
 184                 fsid, xfid))))
 185 
 186 static void     nfs_srv_shutdown_all(int);
 187 static void     rfs4_server_start(int);
 188 static void     nullfree(void);
 189 static void     rfs_dispatch(struct svc_req *, SVCXPRT *);
 190 static void     acl_dispatch(struct svc_req *, SVCXPRT *);
 191 static void     common_dispatch(struct svc_req *, SVCXPRT *,
 192                 rpcvers_t, rpcvers_t, char *,
 193                 struct rpc_disptable *);
 194 static void     hanfsv4_failover(void);
 195 static  int     checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
 196                 bool_t, bool_t *);
 197 static char     *client_name(struct svc_req *req);
 198 static char     *client_addr(struct svc_req *req, char *buf);
 199 extern  int     sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
 200 extern  bool_t  sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
 201 
 202 #define NFSLOG_COPY_NETBUF(exi, xprt, nb)       {               \
 203         (nb)->maxlen = (xprt)->xp_rtaddr.maxlen;          \
 204         (nb)->len = (xprt)->xp_rtaddr.len;                        \
 205         (nb)->buf = kmem_alloc((nb)->len, KM_SLEEP);              \
 206         bcopy((xprt)->xp_rtaddr.buf, (nb)->buf, (nb)->len);    \
 207         }
 208 
 209 /*
 210  * Public Filehandle common nfs routines
 211  */
 212 static int      MCLpath(char **);
 213 static void     URLparse(char *);
 214 
 215 /*
 216  * NFS callout table.
 217  * This table is used by svc_getreq() to dispatch a request with
 218  * a given prog/vers pair to an appropriate service provider
 219  * dispatch routine.
 220  *
 
 231         __nfs_sc_clts
 232 };
 233 
 234 static SVC_CALLOUT __nfs_sc_cots[] = {
 235         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 236         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 237 };
 238 
 239 static SVC_CALLOUT_TABLE nfs_sct_cots = {
 240         sizeof (__nfs_sc_cots) / sizeof (__nfs_sc_cots[0]), FALSE, __nfs_sc_cots
 241 };
 242 
 243 static SVC_CALLOUT __nfs_sc_rdma[] = {
 244         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 245         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 246 };
 247 
 248 static SVC_CALLOUT_TABLE nfs_sct_rdma = {
 249         sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
 250 };
 251 rpcvers_t nfs_versmin = NFS_VERSMIN_DEFAULT;
 252 rpcvers_t nfs_versmax = NFS_VERSMAX_DEFAULT;
 253 
 254 /*
 255  * Used to track the state of the server so that initialization
 256  * can be done properly.
 257  */
 258 typedef enum {
 259         NFS_SERVER_STOPPED,     /* server state destroyed */
 260         NFS_SERVER_STOPPING,    /* server state being destroyed */
 261         NFS_SERVER_RUNNING,
 262         NFS_SERVER_QUIESCED,    /* server state preserved */
 263         NFS_SERVER_OFFLINE      /* server pool offline */
 264 } nfs_server_running_t;
 265 
 266 static nfs_server_running_t nfs_server_upordown;
 267 static kmutex_t nfs_server_upordown_lock;
 268 static  kcondvar_t nfs_server_upordown_cv;
 269 
 270 /*
 271  * DSS: distributed stable storage
 272  * lists of all DSS paths: current, and before last warmstart
 273  */
 274 nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
 275 
 276 int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
 277 bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
 278 
 279 /*
 280  * RDMA wait variables.
 281  */
 282 static kcondvar_t rdma_wait_cv;
 283 static kmutex_t rdma_wait_mutex;
 284 
 285 /*
 286  * Will be called at the point the server pool is being unregistered
 287  * from the pool list. From that point onwards, the pool is waiting
 288  * to be drained and as such the server state is stale and pertains
 289  * to the old instantiation of the NFS server pool.
 290  */
 291 void
 292 nfs_srv_offline(void)
 293 {
 294         mutex_enter(&nfs_server_upordown_lock);
 295         if (nfs_server_upordown == NFS_SERVER_RUNNING) {
 296                 nfs_server_upordown = NFS_SERVER_OFFLINE;
 297         }
 298         mutex_exit(&nfs_server_upordown_lock);
 299 }
 300 
 301 /*
 302  * Will be called at the point the server pool is being destroyed so
 303  * all transports have been closed and no service threads are in
 304  * existence.
 305  *
 306  * If we quiesce the server, we're shutting it down without destroying the
 307  * server state. This allows it to warm start subsequently.
 308  */
 309 void
 310 nfs_srv_stop_all(void)
 311 {
 312         int quiesce = 0;
 313         nfs_srv_shutdown_all(quiesce);
 314 }
 315 
 316 /*
 317  * This alternative shutdown routine can be requested via nfssys()
 318  */
 319 void
 320 nfs_srv_quiesce_all(void)
 321 {
 322         int quiesce = 1;
 323         nfs_srv_shutdown_all(quiesce);
 324 }
 325 
 326 static void
 327 nfs_srv_shutdown_all(int quiesce) {
 328         mutex_enter(&nfs_server_upordown_lock);
 329         if (quiesce) {
 330                 if (nfs_server_upordown == NFS_SERVER_RUNNING ||
 331                         nfs_server_upordown == NFS_SERVER_OFFLINE) {
 332                         nfs_server_upordown = NFS_SERVER_QUIESCED;
 333                         cv_signal(&nfs_server_upordown_cv);
 334 
 335                         /* reset DSS state, for subsequent warm restart */
 336                         rfs4_dss_numnewpaths = 0;
 337                         rfs4_dss_newpaths = NULL;
 338 
 339                         cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
 340                             "NFSv4 state has been preserved");
 341                 }
 342         } else {
 343                 if (nfs_server_upordown == NFS_SERVER_OFFLINE) {
 344                         nfs_server_upordown = NFS_SERVER_STOPPING;
 345                         mutex_exit(&nfs_server_upordown_lock);
 346                         rfs4_state_fini();
 347                         rfs4_fini_drc(nfs4_drc);
 348                         mutex_enter(&nfs_server_upordown_lock);
 349                         nfs_server_upordown = NFS_SERVER_STOPPED;
 350                         cv_signal(&nfs_server_upordown_cv);
 351                 }
 352         }
 353         mutex_exit(&nfs_server_upordown_lock);
 354 }
 355 
 356 static int
 357 nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
 358                         rpcvers_t versmin, rpcvers_t versmax)
 359 {
 360         struct strioctl strioc;
 361         struct T_info_ack tinfo;
 362         int             error, retval;
 363 
 364         /*
 365          * Find out what type of transport this is.
 366          */
 367         strioc.ic_cmd = TI_GETINFO;
 368         strioc.ic_timout = -1;
 369         strioc.ic_len = sizeof (tinfo);
 370         strioc.ic_dp = (char *)&tinfo;
 371         tinfo.PRIM_type = T_INFO_REQ;
 372 
 373         error = strioctl(fp->f_vnode, I_STR, (intptr_t)&strioc, 0, K_TO_K,
 
 401                         versmax = NFS_ACL_VERSMAX;
 402                 __nfs_sc_cots[1].sc_versmin = versmin;
 403                 __nfs_sc_cots[1].sc_versmax = versmax;
 404                 *sctpp = &nfs_sct_cots;
 405                 break;
 406         default:
 407                 error = EINVAL;
 408         }
 409 
 410         return (error);
 411 }
 412 
 413 /*
 414  * NFS Server system call.
 415  * Does all of the work of running a NFS server.
 416  * uap->fd is the fd of an open transport provider
 417  */
 418 int
 419 nfs_svc(struct nfs_svc_args *arg, model_t model)
 420 {
 421         file_t *fp;
 422         SVCMASTERXPRT *xprt;
 423         int error;
 424         int readsize;
 425         char buf[KNC_STRSIZE];
 426         size_t len;
 427         STRUCT_HANDLE(nfs_svc_args, uap);
 428         struct netbuf addrmask;
 429         SVC_CALLOUT_TABLE *sctp = NULL;
 430 
 431 #ifdef lint
 432         model = model;          /* STRUCT macros don't always refer to it */
 433 #endif
 434 
 435         STRUCT_SET_HANDLE(uap, model, arg);
 436 
 437         /* Check privileges in nfssys() */
 438 
 439         if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 440                 return (EBADF);
 441 
 442         /*
 443          * Set read buffer size to rsize
 444          * and add room for RPC headers.
 445          */
 446         readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
 447         if (readsize < RPC_MAXDATASIZE)
 448                 readsize = RPC_MAXDATASIZE;
 449 
 450         error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
 451             KNC_STRSIZE, &len);
 452         if (error) {
 453                 releasef(STRUCT_FGET(uap, fd));
 454                 return (error);
 455         }
 456 
 457         addrmask.len = STRUCT_FGET(uap, addrmask.len);
 458         addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
 459         addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
 460         error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
 461             addrmask.len);
 462         if (error) {
 463                 releasef(STRUCT_FGET(uap, fd));
 464                 kmem_free(addrmask.buf, addrmask.maxlen);
 465                 return (error);
 466         }
 467 
 468         nfs_versmin = STRUCT_FGET(uap, versmin);
 469         nfs_versmax = STRUCT_FGET(uap, versmax);
 470 
 471         /* Double check the vers min/max ranges */
 472         if ((nfs_versmin > nfs_versmax) ||
 473             (nfs_versmin < NFS_VERSMIN) ||
 474             (nfs_versmax > NFS_VERSMAX)) {
 475                 nfs_versmin = NFS_VERSMIN_DEFAULT;
 476                 nfs_versmax = NFS_VERSMAX_DEFAULT;
 477         }
 478 
 479         if (error =
 480             nfs_srv_set_sc_versions(fp, &sctp, nfs_versmin, nfs_versmax)) {
 481                 releasef(STRUCT_FGET(uap, fd));
 482                 kmem_free(addrmask.buf, addrmask.maxlen);
 483                 return (error);
 484         }
 485 
 486         /* Initialize nfsv4 server */
 487         if (nfs_versmax == (rpcvers_t)NFS_V4)
 488                 rfs4_server_start(STRUCT_FGET(uap, delegation));
 489 
 490         /* Create a transport handle. */
 491         error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
 492             sctp, NULL, NFS_SVCPOOL_ID, TRUE);
 493 
 494         if (error)
 495                 kmem_free(addrmask.buf, addrmask.maxlen);
 496 
 497         releasef(STRUCT_FGET(uap, fd));
 498 
 499         /* HA-NFSv4: save the cluster nodeid */
 500         if (cluster_bootflags & CLUSTER_BOOTED)
 501                 lm_global_nlmid = clconf_get_nodeid();
 502 
 503         return (error);
 504 }
 505 
 506 static void
 507 rfs4_server_start(int nfs4_srv_delegation)
 508 {
 509         /*
 510          * Determine if the server has previously been "started" and
 511          * if not, do the per instance initialization
 512          */
 513         mutex_enter(&nfs_server_upordown_lock);
 514 
 515         if (nfs_server_upordown != NFS_SERVER_RUNNING) {
 516                 /* Do we need to stop and wait on the previous server? */
 517                 while (nfs_server_upordown == NFS_SERVER_STOPPING ||
 518                     nfs_server_upordown == NFS_SERVER_OFFLINE)
 519                         cv_wait(&nfs_server_upordown_cv,
 520                             &nfs_server_upordown_lock);
 521 
 522                 if (nfs_server_upordown != NFS_SERVER_RUNNING) {
 523                         (void) svc_pool_control(NFS_SVCPOOL_ID,
 524                             SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
 525                         (void) svc_pool_control(NFS_SVCPOOL_ID,
 526                             SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
 527 
 528                         /* is this an nfsd warm start? */
 529                         if (nfs_server_upordown == NFS_SERVER_QUIESCED) {
 530                                 cmn_err(CE_NOTE, "nfs_server: "
 531                                     "server was previously quiesced; "
 532                                     "existing NFSv4 state will be re-used");
 533 
 534                                 /*
 535                                  * HA-NFSv4: this is also the signal
 536                                  * that a Resource Group failover has
 537                                  * occurred.
 538                                  */
 539                                 if (cluster_bootflags & CLUSTER_BOOTED)
 540                                         hanfsv4_failover();
 541                         } else {
 542                                 /* cold start */
 543                                 rfs4_state_init();
 544                                 nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 545                                     nfs4_drc_hash);
 546                         }
 547 
 548                         /*
 549                          * Check to see if delegation is to be
 550                          * enabled at the server
 551                          */
 552                         if (nfs4_srv_delegation != FALSE)
 553                                 rfs4_set_deleg_policy(SRV_NORMAL_DELEGATE);
 554 
 555                         nfs_server_upordown = NFS_SERVER_RUNNING;
 556                 }
 557                 cv_signal(&nfs_server_upordown_cv);
 558         }
 559         mutex_exit(&nfs_server_upordown_lock);
 560 }
 561 
 562 /*
 563  * If RDMA device available,
 564  * start RDMA listener.
 565  */
 566 int
 567 rdma_start(struct rdma_svc_args *rsa)
 568 {
 569         int error;
 570         rdma_xprt_group_t started_rdma_xprts;
 571         rdma_stat stat;
 572         int svc_state = 0;
 573 
 574         /* Double check the vers min/max ranges */
 575         if ((rsa->nfs_versmin > rsa->nfs_versmax) ||
 576             (rsa->nfs_versmin < NFS_VERSMIN) ||
 577             (rsa->nfs_versmax > NFS_VERSMAX)) {
 578                 rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
 579                 rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
 580         }
 581         nfs_versmin = rsa->nfs_versmin;
 582         nfs_versmax = rsa->nfs_versmax;
 583 
 584         /* Set the versions in the callout table */
 585         __nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
 586         __nfs_sc_rdma[0].sc_versmax = rsa->nfs_versmax;
 587         /* For the NFS_ACL program, check the max version */
 588         __nfs_sc_rdma[1].sc_versmin = rsa->nfs_versmin;
 589         if (rsa->nfs_versmax > NFS_ACL_VERSMAX)
 590                 __nfs_sc_rdma[1].sc_versmax = NFS_ACL_VERSMAX;
 591         else
 592                 __nfs_sc_rdma[1].sc_versmax = rsa->nfs_versmax;
 593 
 594         /* Initialize nfsv4 server */
 595         if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
 596                 rfs4_server_start(rsa->delegation);
 597 
 598         started_rdma_xprts.rtg_count = 0;
 599         started_rdma_xprts.rtg_listhead = NULL;
 600         started_rdma_xprts.rtg_poolid = rsa->poolid;
 601 
 602 restart:
 603         error = svc_rdma_kcreate(rsa->netid, &nfs_sct_rdma, rsa->poolid,
 604             &started_rdma_xprts);
 605 
 606         svc_state = !error;
 607 
 608         while (!error) {
 609 
 610                 /*
 611                  * wait till either interrupted by a signal on
 612                  * nfs service stop/restart or signalled by a
 613                  * rdma plugin attach/detatch.
 614                  */
 615 
 616                 stat = rdma_kwait();
 617 
 618                 /*
 619                  * stop services if running -- either on a HCA detach event
 620                  * or if the nfs service is stopped/restarted.
 621                  */
 622 
 623                 if ((stat == RDMA_HCA_DETACH || stat == RDMA_INTR) &&
 624                     svc_state) {
 625                         rdma_stop(&started_rdma_xprts);
 626                         svc_state = 0;
 627                 }
 628 
 629                 /*
 630                  * nfs service stop/restart, break out of the
 631                  * wait loop and return;
 632                  */
 633                 if (stat == RDMA_INTR)
 
 644                 /*
 645                  * loop until a nfs service stop/restart
 646                  */
 647         }
 648 
 649         return (error);
 650 }
 651 
 652 /* ARGSUSED */
 653 void
 654 rpc_null(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 655     struct svc_req *req, cred_t *cr, bool_t ro)
 656 {
 657 }
 658 
 659 /* ARGSUSED */
 660 void
 661 rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 662     struct svc_req *req, cred_t *cr, bool_t ro)
 663 {
 664         DTRACE_NFSV3_3(op__null__start, struct svc_req *, req,
 665             cred_t *, cr, vnode_t *, NULL);
 666         DTRACE_NFSV3_3(op__null__done, struct svc_req *, req,
 667             cred_t *, cr, vnode_t *, NULL);
 668 }
 669 
 670 /* ARGSUSED */
 671 static void
 672 rfs_error(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 673     struct svc_req *req, cred_t *cr, bool_t ro)
 674 {
 675         /* return (EOPNOTSUPP); */
 676 }
 677 
 678 static void
 679 nullfree(void)
 680 {
 681 }
 682 
 683 static char *rfscallnames_v2[] = {
 684         "RFS2_NULL",
 685         "RFS2_GETATTR",
 686         "RFS2_SETATTR",
 687         "RFS2_ROOT",
 
1325         /* RFS3_PATHCONF = 20 */
1326         PATHCONF3res nfs3_pathconf_res;
1327 
1328         /* RFS3_COMMIT = 21 */
1329         COMMIT3res nfs3_commit_res;
1330 
1331         /*
1332          * NFS VERSION 4
1333          */
1334 
1335         /* RFS_NULL = 0 */
1336 
1337         /* RFS4_COMPOUND = 1 */
1338         COMPOUND4res nfs4_compound_res;
1339 
1340 };
1341 
1342 static struct rpc_disptable rfs_disptable[] = {
1343         {sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
1344             rfscallnames_v2,
1345             &rfsproccnt_v2_ptr, rfsdisptab_v2},
1346         {sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
1347             rfscallnames_v3,
1348             &rfsproccnt_v3_ptr, rfsdisptab_v3},
1349         {sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
1350             rfscallnames_v4,
1351             &rfsproccnt_v4_ptr, rfsdisptab_v4},
1352 };
1353 
1354 /*
1355  * If nfs_portmon is set, then clients are required to use privileged
1356  * ports (ports < IPPORT_RESERVED) in order to get NFS services.
1357  *
1358  * N.B.: this attempt to carry forward the already ill-conceived notion
1359  * of privileged ports for TCP/UDP is really quite ineffectual.  Not only
1360  * is it transport-dependent, it's laughably easy to spoof.  If you're
1361  * really interested in security, you must start with secure RPC instead.
1362  */
1363 static int nfs_portmon = 0;
1364 
1365 #ifdef DEBUG
1366 static int cred_hits = 0;
1367 static int cred_misses = 0;
1368 #endif
1369 
1370 
1371 #ifdef DEBUG
1372 /*
1373  * Debug code to allow disabling of rfs_dispatch() use of
1374  * fastxdrargs() and fastxdrres() calls for testing purposes.
1375  */
1376 static int rfs_no_fast_xdrargs = 0;
1377 static int rfs_no_fast_xdrres = 0;
1378 #endif
1379 
1380 union acl_args {
1381         /*
1382          * ACL VERSION 2
1383          */
1384 
1385         /* ACL2_NULL = 0 */
1386 
1387         /* ACL2_GETACL = 1 */
1388         GETACL2args acl2_getacl_args;
1389 
1390         /* ACL2_SETACL = 2 */
 
1457 
1458 static bool_t
1459 auth_tooweak(struct svc_req *req, char *res)
1460 {
1461 
1462         if (req->rq_vers == NFS_VERSION && req->rq_proc == RFS_LOOKUP) {
1463                 struct nfsdiropres *dr = (struct nfsdiropres *)res;
1464                 if ((enum wnfsstat)dr->dr_status == WNFSERR_CLNT_FLAVOR)
1465                         return (TRUE);
1466         } else if (req->rq_vers == NFS_V3 && req->rq_proc == NFSPROC3_LOOKUP) {
1467                 LOOKUP3res *resp = (LOOKUP3res *)res;
1468                 if ((enum wnfsstat)resp->status == WNFSERR_CLNT_FLAVOR)
1469                         return (TRUE);
1470         }
1471         return (FALSE);
1472 }
1473 
1474 
1475 static void
1476 common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
1477                 rpcvers_t max_vers, char *pgmname,
1478                 struct rpc_disptable *disptable)
1479 {
1480         int which;
1481         rpcvers_t vers;
1482         char *args;
1483         union {
1484                         union rfs_args ra;
1485                         union acl_args aa;
1486                 } args_buf;
1487         char *res;
1488         union {
1489                         union rfs_res rr;
1490                         union acl_res ar;
1491                 } res_buf;
1492         struct rpcdisp *disp = NULL;
1493         int dis_flags = 0;
1494         cred_t *cr;
1495         int error = 0;
1496         int anon_ok;
1497         struct exportinfo *exi = NULL;
1498         unsigned int nfslog_rec_id;
1499         int dupstat;
1500         struct dupreq *dr;
1501         int authres;
1502         bool_t publicfh_ok = FALSE;
1503         enum_t auth_flavor;
1504         bool_t dupcached = FALSE;
1505         struct netbuf   nb;
1506         bool_t logging_enabled = FALSE;
1507         struct exportinfo *nfslog_exi = NULL;
1508         char **procnames;
1509         char cbuf[INET6_ADDRSTRLEN];    /* to hold both IPv4 and IPv6 addr */
1510         bool_t ro = FALSE;
1511 
1512         vers = req->rq_vers;
1513 
1514         if (vers < min_vers || vers > max_vers) {
1515                 svcerr_progvers(req->rq_xprt, min_vers, max_vers);
1516                 error++;
1517                 cmn_err(CE_NOTE, "%s: bad version number %u", pgmname, vers);
1518                 goto done;
1519         }
1520         vers -= min_vers;
1521 
1522         which = req->rq_proc;
1523         if (which < 0 || which >= disptable[(int)vers].dis_nprocs) {
1524                 svcerr_noproc(req->rq_xprt);
1525                 error++;
1526                 goto done;
1527         }
1528 
1529         (*(disptable[(int)vers].dis_proccntp))[which].value.ui64++;
1530 
1531         disp = &disptable[(int)vers].dis_table[which];
1532         procnames = disptable[(int)vers].dis_procnames;
1533 
1534         auth_flavor = req->rq_cred.oa_flavor;
1535 
1536         /*
1537          * Deserialize into the args struct.
1538          */
1539         args = (char *)&args_buf;
1540 
1541 #ifdef DEBUG
1542         if (rfs_no_fast_xdrargs || (auth_flavor == RPCSEC_GSS) ||
1543             disp->dis_fastxdrargs == NULL_xdrproc_t ||
1544             !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1545 #else
1546         if ((auth_flavor == RPCSEC_GSS) ||
1547             disp->dis_fastxdrargs == NULL_xdrproc_t ||
1548             !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1549 #endif
1550         {
 
1554                         /*
1555                          * Check if we are outside our capabilities.
1556                          */
1557                         if (rfs4_minorvers_mismatch(req, xprt, (void *)args))
1558                                 goto done;
1559 
1560                         svcerr_decode(xprt);
1561                         cmn_err(CE_NOTE,
1562                             "Failed to decode arguments for %s version %u "
1563                             "procedure %s client %s%s",
1564                             pgmname, vers + min_vers, procnames[which],
1565                             client_name(req), client_addr(req, cbuf));
1566                         goto done;
1567                 }
1568         }
1569 
1570         /*
1571          * If Version 4 use that specific dispatch function.
1572          */
1573         if (req->rq_vers == 4) {
1574                 error += rfs4_dispatch(disp, req, xprt, args);
1575                 goto done;
1576         }
1577 
1578         dis_flags = disp->dis_flags;
1579 
1580         /*
1581          * Find export information and check authentication,
1582          * setting the credential if everything is ok.
1583          */
1584         if (disp->dis_getfh != NULL) {
1585                 void *fh;
1586                 fsid_t *fsid;
1587                 fid_t *fid, *xfid;
1588                 fhandle_t *fh2;
1589                 nfs_fh3 *fh3;
1590 
1591                 fh = (*disp->dis_getfh)(args);
1592                 switch (req->rq_vers) {
1593                 case NFS_VERSION:
1594                         fh2 = (fhandle_t *)fh;
 
1615                  * client's mount operation to fail.  As a work-around,
1616                  * we check here to see if the request is a getattr or
1617                  * statfs operation on the exported vnode itself, and
1618                  * pass a flag to checkauth with the result of this test.
1619                  *
1620                  * The filehandle refers to the mountpoint itself if
1621                  * the fh_data and fh_xdata portions of the filehandle
1622                  * are equal.
1623                  *
1624                  * Added anon_ok argument to checkauth().
1625                  */
1626 
1627                 if ((dis_flags & RPC_ALLOWANON) && EQFID(fid, xfid))
1628                         anon_ok = 1;
1629                 else
1630                         anon_ok = 0;
1631 
1632                 cr = xprt->xp_cred;
1633                 ASSERT(cr != NULL);
1634 #ifdef DEBUG
1635                 if (crgetref(cr) != 1) {
1636                         crfree(cr);
1637                         cr = crget();
1638                         xprt->xp_cred = cr;
1639                         cred_misses++;
1640                 } else
1641                         cred_hits++;
1642 #else
1643                 if (crgetref(cr) != 1) {
1644                         crfree(cr);
1645                         cr = crget();
1646                         xprt->xp_cred = cr;
1647                 }
1648 #endif
1649 
1650                 exi = checkexport(fsid, xfid);
1651 
1652                 if (exi != NULL) {
1653                         publicfh_ok = PUBLICFH_CHECK(disp, exi, fsid, xfid);
1654 
1655                         /*
1656                          * Don't allow non-V4 clients access
1657                          * to pseudo exports
1658                          */
1659                         if (PSEUDO(exi)) {
1660                                 svcerr_weakauth(xprt);
1661                                 error++;
1662                                 goto done;
1663                         }
1664 
1665                         authres = checkauth(exi, req, cr, anon_ok, publicfh_ok,
1666                             &ro);
1667                         /*
1668                          * authres >  0: authentication OK - proceed
1669                          * authres == 0: authentication weak - return error
1670                          * authres <  0: authentication timeout - drop
1671                          */
1672                         if (authres <= 0) {
1673                                 if (authres == 0) {
1674                                         svcerr_weakauth(xprt);
 
1746                                 SVC_FREERES(xprt);
1747                         error++;
1748                         goto done;
1749                 }
1750         }
1751 
1752         if (auth_tooweak(req, res)) {
1753                 svcerr_weakauth(xprt);
1754                 error++;
1755                 goto done;
1756         }
1757 
1758         /*
1759          * Check to see if logging has been enabled on the server.
1760          * If so, then obtain the export info struct to be used for
1761          * the later writing of the log record.  This is done for
1762          * the case that a lookup is done across a non-logged public
1763          * file system.
1764          */
1765         if (nfslog_buffer_list != NULL) {
1766                 nfslog_exi = nfslog_get_exi(exi, req, res, &nfslog_rec_id);
1767                 /*
1768                  * Is logging enabled?
1769                  */
1770                 logging_enabled = (nfslog_exi != NULL);
1771 
1772                 /*
1773                  * Copy the netbuf for logging purposes, before it is
1774                  * freed by svc_sendreply().
1775                  */
1776                 if (logging_enabled) {
1777                         NFSLOG_COPY_NETBUF(nfslog_exi, xprt, &nb);
1778                         /*
1779                          * If RPC_MAPRESP flag set (i.e. in V2 ops) the
1780                          * res gets copied directly into the mbuf and
1781                          * may be freed soon after the sendreply. So we
1782                          * must copy it here to a safe place...
1783                          */
1784                         if (res != (char *)&res_buf) {
1785                                 bcopy(res, (char *)&res_buf, disp->dis_ressz);
1786                         }
1787                 }
1788         }
1789 
1790         /*
1791          * Serialize and send results struct
1792          */
1793 #ifdef DEBUG
1794         if (rfs_no_fast_xdrres == 0 && res != (char *)&res_buf)
1795 #else
1796         if (res != (char *)&res_buf)
1797 #endif
1798         {
1799                 if (!svc_sendreply(xprt, disp->dis_fastxdrres, res)) {
1800                         cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1801                         svcerr_systemerr(xprt);
1802                         error++;
1803                 }
1804         } else {
1805                 if (!svc_sendreply(xprt, disp->dis_xdrres, res)) {
1806                         cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1807                         svcerr_systemerr(xprt);
1808                         error++;
1809                 }
1810         }
1811 
1812         /*
1813          * Log if needed
1814          */
1815         if (logging_enabled) {
1816                 nfslog_write_record(nfslog_exi, req, args, (char *)&res_buf,
1817                     cr, &nb, nfslog_rec_id, NFSLOG_ONE_BUFFER);
1818                 exi_rele(nfslog_exi);
1819                 kmem_free((&nb)->buf, (&nb)->len);
1820         }
1821 
1822         /*
1823          * Free results struct. With the addition of NFS V4 we can
1824          * have non-idempotent procedures with functions.
1825          */
1826         if (disp->dis_resfree != nullfree && dupcached == FALSE) {
1827                 (*disp->dis_resfree)(res);
1828         }
1829 
1830 done:
1831         /*
1832          * Free arguments struct
1833          */
1834         if (disp) {
1835                 if (!SVC_FREEARGS(xprt, disp->dis_xdrargs, args)) {
1836                         cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1837                         error++;
1838                 }
1839         } else {
1840                 if (!SVC_FREEARGS(xprt, (xdrproc_t)0, (caddr_t)0)) {
1841                         cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1842                         error++;
1843                 }
1844         }
1845 
1846         if (exi != NULL)
1847                 exi_rele(exi);
1848 
1849         global_svstat_ptr[req->rq_vers][NFS_BADCALLS].value.ui64 += error;
1850 
1851         global_svstat_ptr[req->rq_vers][NFS_CALLS].value.ui64++;
1852 }
1853 
1854 static void
1855 rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
1856 {
1857         common_dispatch(req, xprt, NFS_VERSMIN, NFS_VERSMAX,
1858             "NFS", rfs_disptable);
1859 }
1860 
1861 static char *aclcallnames_v2[] = {
1862         "ACL2_NULL",
1863         "ACL2_GETACL",
1864         "ACL2_SETACL",
1865         "ACL2_GETATTR",
1866         "ACL2_ACCESS",
1867         "ACL2_GETXATTRDIR"
1868 };
 
1954             acl3_getacl_getfh},
1955 
1956         /* ACL3_SETACL = 2 */
1957         {acl3_setacl,
1958             xdr_SETACL3args, NULL_xdrproc_t, sizeof (SETACL3args),
1959             xdr_SETACL3res, NULL_xdrproc_t, sizeof (SETACL3res),
1960             nullfree, 0,
1961             acl3_setacl_getfh},
1962 
1963         /* ACL3_GETXATTRDIR = 3 */
1964         {acl3_getxattrdir,
1965             xdr_GETXATTRDIR3args, NULL_xdrproc_t, sizeof (GETXATTRDIR3args),
1966             xdr_GETXATTRDIR3res, NULL_xdrproc_t, sizeof (GETXATTRDIR3res),
1967             nullfree, RPC_IDEMPOTENT,
1968             acl3_getxattrdir_getfh},
1969 };
1970 
1971 static struct rpc_disptable acl_disptable[] = {
1972         {sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
1973                 aclcallnames_v2,
1974                 &aclproccnt_v2_ptr, acldisptab_v2},
1975         {sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
1976                 aclcallnames_v3,
1977                 &aclproccnt_v3_ptr, acldisptab_v3},
1978 };
1979 
1980 static void
1981 acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
1982 {
1983         common_dispatch(req, xprt, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,
1984             "ACL", acl_disptable);
1985 }
1986 
1987 int
1988 checkwin(int flavor, int window, struct svc_req *req)
1989 {
1990         struct authdes_cred *adc;
1991 
1992         switch (flavor) {
1993         case AUTH_DES:
1994                 adc = (struct authdes_cred *)req->rq_clntcred;
1995                 CTASSERT(sizeof (struct authdes_cred) <= RQCRED_SIZE);
1996                 if (adc->adc_fullname.window > window)
1997                         return (0);
 
2551 
2552         } else {
2553 
2554                 /*
2555                  * No IP address to print. If there was a host name
2556                  * printed, then we print a space.
2557                  */
2558                 (void) sprintf(buf, frontspace);
2559         }
2560 
2561         return (buf);
2562 }
2563 
2564 /*
2565  * NFS Server initialization routine.  This routine should only be called
2566  * once.  It performs the following tasks:
2567  *      - Call sub-initialization routines (localize access to variables)
2568  *      - Initialize all locks
2569  *      - initialize the version 3 write verifier
2570  */
2571 int
2572 nfs_srvinit(void)
2573 {
2574         int error;
2575 
2576         error = nfs_exportinit();
2577         if (error != 0)
2578                 return (error);
2579         error = rfs4_srvrinit();
2580         if (error != 0) {
2581                 nfs_exportfini();
2582                 return (error);
2583         }
2584         rfs_srvrinit();
2585         rfs3_srvrinit();
2586         nfsauth_init();
2587 
2588         /* Init the stuff to control start/stop */
2589         nfs_server_upordown = NFS_SERVER_STOPPED;
2590         mutex_init(&nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
2591         cv_init(&nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
2592         mutex_init(&rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
2593         cv_init(&rdma_wait_cv, NULL, CV_DEFAULT, NULL);
2594 
2595         return (0);
2596 }
2597 
2598 /*
2599  * NFS Server finalization routine. This routine is called to cleanup the
2600  * initialization work previously performed if the NFS server module could
2601  * not be loaded correctly.
2602  */
2603 void
2604 nfs_srvfini(void)
2605 {
2606         nfsauth_fini();
2607         rfs3_srvrfini();
2608         rfs_srvrfini();
2609         nfs_exportfini();
2610 
2611         mutex_destroy(&nfs_server_upordown_lock);
2612         cv_destroy(&nfs_server_upordown_cv);
2613         mutex_destroy(&rdma_wait_mutex);
2614         cv_destroy(&rdma_wait_cv);
2615 }
2616 
2617 /*
2618  * Set up an iovec array of up to cnt pointers.
2619  */
2620 
2621 void
2622 mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
2623 {
2624         while (m != NULL && cnt-- > 0) {
2625                 iovp->iov_base = (caddr_t)m->b_rptr;
2626                 iovp->iov_len = (m->b_wptr - m->b_rptr);
2627                 iovp++;
2628                 m = m->b_cont;
2629         }
2630 }
2631 
2632 /*
2633  * Common code between NFS Version 2 and NFS Version 3 for the public
2634  * filehandle multicomponent lookups.
2635  */
2636 
2637 /*
2638  * Public filehandle evaluation of a multi-component lookup, following
2639  * symbolic links, if necessary. This may result in a vnode in another
2640  * filesystem, which is OK as long as the other filesystem is exported.
 
2837                     mc_dvp, cr, NATIVEPATH);
2838 
2839                 if (error == ENOENT) {
2840                         *vpp = tvp;
2841                         mc_dvp = tmc_dvp;
2842                         error = 0;
2843                 } else {        /* ok or error other than ENOENT */
2844                         if (tmc_dvp)
2845                                 VN_RELE(tmc_dvp);
2846                         if (error)
2847                                 goto publicfh_done;
2848 
2849                         /*
2850                          * Found a valid vp for index "filename". Sanity check
2851                          * for odd case where a directory is provided as index
2852                          * option argument and leads us to another filesystem
2853                          */
2854 
2855                         /* Release the reference on the old exi value */
2856                         ASSERT(*exi != NULL);
2857                         exi_rele(*exi);
2858 
2859                         if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
2860                                 VN_RELE(*vpp);
2861                                 goto publicfh_done;
2862                         }
2863                 }
2864         }
2865 
2866 publicfh_done:
2867         if (mc_dvp)
2868                 VN_RELE(mc_dvp);
2869 
2870         return (error);
2871 }
2872 
2873 /*
2874  * Evaluate a multi-component path
2875  */
2876 int
2877 rfs_pathname(
2878         char *path,                     /* pathname to evaluate */
2879         vnode_t **dirvpp,               /* ret for ptr to parent dir vnode */
2880         vnode_t **compvpp,              /* ret for ptr to component vnode */
2881         vnode_t *startdvp,              /* starting vnode */
2882         cred_t *cr,                     /* user's credential */
2883         int pathflag)                   /* flag to identify path, e.g. URL */
2884 {
2885         char namebuf[TYPICALMAXPATHLEN];
2886         struct pathname pn;
2887         int error;
2888 
2889         /*
2890          * If pathname starts with '/', then set startdvp to root.
2891          */
2892         if (*path == '/') {
2893                 while (*path == '/')
2894                         path++;
2895 
2896                 startdvp = rootdir;
2897         }
2898 
2899         error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
2900         if (error == 0) {
2901                 /*
2902                  * Call the URL parser for URL paths to modify the original
2903                  * string to handle any '%' encoded characters that exist.
2904                  * Done here to avoid an extra bcopy in the lookup.
2905                  * We need to be careful about pathlen's. We know that
2906                  * rfs_pathname() is called with a non-empty path. However,
2907                  * it could be emptied due to the path simply being all /'s,
2908                  * which is valid to proceed with the lookup, or due to the
2909                  * URL parser finding an encoded null character at the
2910                  * beginning of path which should not proceed with the lookup.
2911                  */
2912                 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2913                         URLparse(pn.pn_path);
2914                         if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
2915                                 return (ENOENT);
2916                 }
2917                 VN_HOLD(startdvp);
2918                 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
2919                     rootdir, startdvp, cr);
2920         }
2921         if (error == ENAMETOOLONG) {
2922                 /*
2923                  * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
2924                  */
2925                 if (error = pn_get(path, UIO_SYSSPACE, &pn))
2926                         return (error);
2927                 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2928                         URLparse(pn.pn_path);
2929                         if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0) {
2930                                 pn_free(&pn);
2931                                 return (ENOENT);
2932                         }
2933                 }
2934                 VN_HOLD(startdvp);
2935                 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
2936                     rootdir, startdvp, cr);
2937                 pn_free(&pn);
2938         }
2939 
2940         return (error);
2941 }
2942 
2943 /*
2944  * Adapt the multicomponent lookup path depending on the pathtype
2945  */
2946 static int
2947 MCLpath(char **path)
2948 {
2949         unsigned char c = (unsigned char)**path;
2950 
2951         /*
2952          * If the MCL path is between 0x20 and 0x7E (graphic printable
2953          * character of the US-ASCII coded character set), its a URL path,
2954          * per RFC 1738.
2955          */
2956         if (c >= 0x20 && c <= 0x7E)
 
3020         int walk;
3021         int error = 0;
3022 
3023         *exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
3024         if (*exi == NULL)
3025                 error = EACCES;
3026         else {
3027                 /*
3028                  * If nosub is set for this export then
3029                  * a lookup relative to the public fh
3030                  * must not terminate below the
3031                  * exported directory.
3032                  */
3033                 if ((*exi)->exi_export.ex_flags & EX_NOSUB && walk > 0)
3034                         error = EACCES;
3035         }
3036 
3037         return (error);
3038 }
3039 
3040 /*
3041  * Do the main work of handling HA-NFSv4 Resource Group failover on
3042  * Sun Cluster.
3043  * We need to detect whether any RG admin paths have been added or removed,
3044  * and adjust resources accordingly.
3045  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
3046  * order to scale, the list and array of paths need to be held in more
3047  * suitable data structures.
3048  */
3049 static void
3050 hanfsv4_failover(void)
3051 {
3052         int i, start_grace, numadded_paths = 0;
3053         char **added_paths = NULL;
3054         rfs4_dss_path_t *dss_path;
3055 
3056         /*
3057          * Note: currently, rfs4_dss_pathlist cannot be NULL, since
3058          * it will always include an entry for NFS4_DSS_VAR_DIR. If we
3059          * make the latter dynamically specified too, the following will
3060          * need to be adjusted.
3061          */
3062 
3063         /*
3064          * First, look for removed paths: RGs that have been failed-over
3065          * away from this node.
3066          * Walk the "currently-serving" rfs4_dss_pathlist and, for each
3067          * path, check if it is on the "passed-in" rfs4_dss_newpaths array
3068          * from nfsd. If not, that RG path has been removed.
3069          *
3070          * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
3071          * any duplicates.
3072          */
3073         dss_path = rfs4_dss_pathlist;
3074         do {
3075                 int found = 0;
3076                 char *path = dss_path->path;
3077 
3078                 /* used only for non-HA so may not be removed */
3079                 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
3080                         dss_path = dss_path->next;
3081                         continue;
3082                 }
3083 
3084                 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
3085                         int cmpret;
3086                         char *newpath = rfs4_dss_newpaths[i];
3087 
3088                         /*
3089                          * Since nfsd has sorted rfs4_dss_newpaths for us,
3090                          * once the return from strcmp is negative we know
3091                          * we've passed the point where "path" should be,
3092                          * and can stop searching: "path" has been removed.
3093                          */
3094                         cmpret = strcmp(path, newpath);
3095                         if (cmpret < 0)
3096                                 break;
3097                         if (cmpret == 0) {
3098                                 found = 1;
3099                                 break;
3100                         }
3101                 }
3102 
3103                 if (found == 0) {
3104                         unsigned index = dss_path->index;
3105                         rfs4_servinst_t *sip = dss_path->sip;
3106                         rfs4_dss_path_t *path_next = dss_path->next;
3107 
3108                         /*
3109                          * This path has been removed.
3110                          * We must clear out the servinst reference to
3111                          * it, since it's now owned by another
3112                          * node: we should not attempt to touch it.
3113                          */
3114                         ASSERT(dss_path == sip->dss_paths[index]);
3115                         sip->dss_paths[index] = NULL;
3116 
3117                         /* remove from "currently-serving" list, and destroy */
3118                         remque(dss_path);
3119                         /* allow for NUL */
3120                         kmem_free(dss_path->path, strlen(dss_path->path) + 1);
3121                         kmem_free(dss_path, sizeof (rfs4_dss_path_t));
3122 
3123                         dss_path = path_next;
3124                 } else {
3125                         /* path was found; not removed */
3126                         dss_path = dss_path->next;
3127                 }
3128         } while (dss_path != rfs4_dss_pathlist);
3129 
3130         /*
3131          * Now, look for added paths: RGs that have been failed-over
3132          * to this node.
3133          * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
3134          * for each path, check if it is on the "currently-serving"
3135          * rfs4_dss_pathlist. If not, that RG path has been added.
3136          *
3137          * Note: we don't do duplicate detection here; nfsd does that for us.
3138          *
3139          * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
3140          * an upper bound for the size needed for added_paths[numadded_paths].
3141          */
3142 
3143         /* probably more space than we need, but guaranteed to be enough */
3144         if (rfs4_dss_numnewpaths > 0) {
3145                 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
3146                 added_paths = kmem_zalloc(sz, KM_SLEEP);
3147         }
3148 
3149         /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
3150         for (i = 0; i < rfs4_dss_numnewpaths; i++) {
3151                 int found = 0;
3152                 char *newpath = rfs4_dss_newpaths[i];
3153 
3154                 dss_path = rfs4_dss_pathlist;
3155                 do {
3156                         char *path = dss_path->path;
3157 
3158                         /* used only for non-HA */
3159                         if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
3160                                 dss_path = dss_path->next;
3161                                 continue;
3162                         }
3163 
3164                         if (strncmp(path, newpath, strlen(path)) == 0) {
3165                                 found = 1;
3166                                 break;
3167                         }
3168 
3169                         dss_path = dss_path->next;
3170                 } while (dss_path != rfs4_dss_pathlist);
3171 
3172                 if (found == 0) {
3173                         added_paths[numadded_paths] = newpath;
3174                         numadded_paths++;
3175                 }
3176         }
3177 
3178         /* did we find any added paths? */
3179         if (numadded_paths > 0) {
3180                 /* create a new server instance, and start its grace period */
3181                 start_grace = 1;
3182                 rfs4_servinst_create(start_grace, numadded_paths, added_paths);
3183 
3184                 /* read in the stable storage state from these paths */
3185                 rfs4_dss_readstate(numadded_paths, added_paths);
3186 
3187                 /*
3188                  * Multiple failovers during a grace period will cause
3189                  * clients of the same resource group to be partitioned
3190                  * into different server instances, with different
3191                  * grace periods.  Since clients of the same resource
3192                  * group must be subject to the same grace period,
3193                  * we need to reset all currently active grace periods.
3194                  */
3195                 rfs4_grace_reset_all();
3196         }
3197 
3198         if (rfs4_dss_numnewpaths > 0)
3199                 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
3200 }
3201 
3202 /*
3203  * Used by NFSv3 and NFSv4 server to query label of
3204  * a pathname component during lookup/access ops.
3205  */
3206 ts_label_t *
3207 nfs_getflabel(vnode_t *vp, struct exportinfo *exi)
3208 {
3209         zone_t *zone;
3210         ts_label_t *zone_label;
3211         char *path;
3212 
3213         mutex_enter(&vp->v_lock);
3214         if (vp->v_path != vn_vpath_empty) {
3215                 zone = zone_find_by_any_path(vp->v_path, B_FALSE);
3216                 mutex_exit(&vp->v_lock);
3217         } else {
3218                 /*
3219                  * v_path not cached. Fall back on pathname of exported
3220                  * file system as we rely on pathname from which we can
3221                  * derive a label. The exported file system portion of
 
 | 
   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28  *      All rights reserved.
  29  *      Use is subject to license terms.
  30  */
  31 
  32 /*
  33  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
  34  * Copyright (c) 2013 by Delphix. All rights reserved.
  35  * Copyright 2018 Nexenta Systems, Inc.
  36  * Copyright (c) 2017 Joyent Inc
  37  */
  38 
  39 #include <sys/param.h>
  40 #include <sys/types.h>
  41 #include <sys/systm.h>
  42 #include <sys/cred.h>
  43 #include <sys/proc.h>
  44 #include <sys/user.h>
  45 #include <sys/buf.h>
  46 #include <sys/vfs.h>
  47 #include <sys/vnode.h>
  48 #include <sys/pathname.h>
  49 #include <sys/uio.h>
  50 #include <sys/file.h>
  51 #include <sys/stat.h>
  52 #include <sys/errno.h>
  53 #include <sys/socket.h>
  54 #include <sys/sysmacros.h>
  55 #include <sys/siginfo.h>
  56 #include <sys/tiuser.h>
  57 #include <sys/statvfs.h>
  58 #include <sys/stream.h>
 
 
  70 #include <sys/vtrace.h>
  71 #include <sys/mode.h>
  72 #include <sys/acl.h>
  73 #include <sys/sdt.h>
  74 #include <sys/debug.h>
  75 
  76 #include <rpc/types.h>
  77 #include <rpc/auth.h>
  78 #include <rpc/auth_unix.h>
  79 #include <rpc/auth_des.h>
  80 #include <rpc/svc.h>
  81 #include <rpc/xdr.h>
  82 #include <rpc/rpc_rdma.h>
  83 
  84 #include <nfs/nfs.h>
  85 #include <nfs/export.h>
  86 #include <nfs/nfssys.h>
  87 #include <nfs/nfs_clnt.h>
  88 #include <nfs/nfs_acl.h>
  89 #include <nfs/nfs_log.h>
  90 #include <nfs/lm.h>
  91 #include <nfs/nfs_dispatch.h>
  92 #include <nfs/nfs4_drc.h>
  93 
  94 #include <sys/modctl.h>
  95 #include <sys/cladm.h>
  96 #include <sys/clconf.h>
  97 
  98 #include <sys/tsol/label.h>
  99 
 100 #define MAXHOST 32
 101 const char *kinet_ntop6(uchar_t *, char *, size_t);
 102 
 103 /*
 104  * Module linkage information.
 105  */
 106 
 107 static struct modlmisc modlmisc = {
 108         &mod_miscops, "NFS server module"
 109 };
 110 
 111 static struct modlinkage modlinkage = {
 112         MODREV_1, (void *)&modlmisc, NULL
 113 };
 114 
 115 zone_key_t nfssrv_zone_key;
 116 kmem_cache_t *nfs_xuio_cache;
 117 int nfs_loaned_buffers = 0;
 118 
 119 int
 120 _init(void)
 121 {
 122         int status;
 123 
 124         nfs_srvinit();
 125 
 126         status = mod_install((struct modlinkage *)&modlinkage);
 127         if (status != 0) {
 128                 /*
 129                  * Could not load module, cleanup previous
 130                  * initialization work.
 131                  */
 132                 nfs_srvfini();
 133 
 134                 return (status);
 135         }
 136 
 137         /*
 138          * Initialise some placeholders for nfssys() calls. These have
 139          * to be declared by the nfs module, since that handles nfssys()
 140          * calls - also used by NFS clients - but are provided by this
 141          * nfssrv module. These also then serve as confirmation to the
 142          * relevant code in nfs that nfssrv has been loaded, as they're
 143          * initially NULL.
 144          */
 
 161 {
 162         return (EBUSY);
 163 }
 164 
 165 int
 166 _info(struct modinfo *modinfop)
 167 {
 168         return (mod_info(&modlinkage, modinfop));
 169 }
 170 
 171 /*
 172  * PUBLICFH_CHECK() checks if the dispatch routine supports
 173  * RPC_PUBLICFH_OK, if the filesystem is exported public, and if the
 174  * incoming request is using the public filehandle. The check duplicates
 175  * the exportmatch() call done in checkexport(), and we should consider
 176  * modifying those routines to avoid the duplication. For now, we optimize
 177  * by calling exportmatch() only after checking that the dispatch routine
 178  * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
 179  * public (i.e., not the placeholder).
 180  */
 181 #define PUBLICFH_CHECK(ne, disp, exi, fsid, xfid) \
 182                 ((disp->dis_flags & RPC_PUBLICFH_OK) && \
 183                 ((exi->exi_export.ex_flags & EX_PUBLIC) || \
 184                 (exi == ne->exi_public && exportmatch(ne->exi_root, \
 185                 fsid, xfid))))
 186 
 187 static void     nfs_srv_shutdown_all(int);
 188 static void     rfs4_server_start(nfs_globals_t *, int);
 189 static void     nullfree(void);
 190 static void     rfs_dispatch(struct svc_req *, SVCXPRT *);
 191 static void     acl_dispatch(struct svc_req *, SVCXPRT *);
 192 static void     common_dispatch(struct svc_req *, SVCXPRT *,
 193                 rpcvers_t, rpcvers_t, char *,
 194                 struct rpc_disptable *);
 195 static  int     checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
 196                 bool_t, bool_t *);
 197 static char     *client_name(struct svc_req *req);
 198 static char     *client_addr(struct svc_req *req, char *buf);
 199 extern  int     sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
 200 extern  bool_t  sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
 201 static void     *nfs_srv_zone_init(zoneid_t);
 202 static void     nfs_srv_zone_fini(zoneid_t, void *);
 203 
 204 #define NFSLOG_COPY_NETBUF(exi, xprt, nb)       {               \
 205         (nb)->maxlen = (xprt)->xp_rtaddr.maxlen;          \
 206         (nb)->len = (xprt)->xp_rtaddr.len;                        \
 207         (nb)->buf = kmem_alloc((nb)->len, KM_SLEEP);              \
 208         bcopy((xprt)->xp_rtaddr.buf, (nb)->buf, (nb)->len);    \
 209         }
 210 
 211 /*
 212  * Public Filehandle common nfs routines
 213  */
 214 static int      MCLpath(char **);
 215 static void     URLparse(char *);
 216 
 217 /*
 218  * NFS callout table.
 219  * This table is used by svc_getreq() to dispatch a request with
 220  * a given prog/vers pair to an appropriate service provider
 221  * dispatch routine.
 222  *
 
 233         __nfs_sc_clts
 234 };
 235 
 236 static SVC_CALLOUT __nfs_sc_cots[] = {
 237         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 238         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 239 };
 240 
 241 static SVC_CALLOUT_TABLE nfs_sct_cots = {
 242         sizeof (__nfs_sc_cots) / sizeof (__nfs_sc_cots[0]), FALSE, __nfs_sc_cots
 243 };
 244 
 245 static SVC_CALLOUT __nfs_sc_rdma[] = {
 246         { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 247         { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 248 };
 249 
 250 static SVC_CALLOUT_TABLE nfs_sct_rdma = {
 251         sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
 252 };
 253 
 254 /*
 255  * DSS: distributed stable storage
 256  * lists of all DSS paths: current, and before last warmstart
 257  */
 258 nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
 259 
 260 int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *,
 261     size_t *);
 262 bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
 263 
 264 /*
 265  * Will be called at the point the server pool is being unregistered
 266  * from the pool list. From that point onwards, the pool is waiting
 267  * to be drained and as such the server state is stale and pertains
 268  * to the old instantiation of the NFS server pool.
 269  */
 270 void
 271 nfs_srv_offline(void)
 272 {
 273         nfs_globals_t *ng;
 274 
 275         ng = zone_getspecific(nfssrv_zone_key, curzone);
 276 
 277         mutex_enter(&ng->nfs_server_upordown_lock);
 278         if (ng->nfs_server_upordown == NFS_SERVER_RUNNING) {
 279                 ng->nfs_server_upordown = NFS_SERVER_OFFLINE;
 280         }
 281         mutex_exit(&ng->nfs_server_upordown_lock);
 282 }
 283 
 284 /*
 285  * Will be called at the point the server pool is being destroyed so
 286  * all transports have been closed and no service threads are in
 287  * existence.
 288  *
 289  * If we quiesce the server, we're shutting it down without destroying the
 290  * server state. This allows it to warm start subsequently.
 291  */
 292 void
 293 nfs_srv_stop_all(void)
 294 {
 295         int quiesce = 0;
 296         nfs_srv_shutdown_all(quiesce);
 297 }
 298 
 299 /*
 300  * This alternative shutdown routine can be requested via nfssys()
 301  */
 302 void
 303 nfs_srv_quiesce_all(void)
 304 {
 305         int quiesce = 1;
 306         nfs_srv_shutdown_all(quiesce);
 307 }
 308 
 309 static void
 310 nfs_srv_shutdown_all(int quiesce)
 311 {
 312         nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
 313 
 314         mutex_enter(&ng->nfs_server_upordown_lock);
 315         if (quiesce) {
 316                 if (ng->nfs_server_upordown == NFS_SERVER_RUNNING ||
 317                     ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
 318                         ng->nfs_server_upordown = NFS_SERVER_QUIESCED;
 319                         cv_signal(&ng->nfs_server_upordown_cv);
 320 
 321                         /* reset DSS state, for subsequent warm restart */
 322                         rfs4_dss_numnewpaths = 0;
 323                         rfs4_dss_newpaths = NULL;
 324 
 325                         cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
 326                             "NFSv4 state has been preserved");
 327                 }
 328         } else {
 329                 if (ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
 330                         ng->nfs_server_upordown = NFS_SERVER_STOPPING;
 331                         mutex_exit(&ng->nfs_server_upordown_lock);
 332                         rfs4_state_zone_fini();
 333                         rfs4_fini_drc();
 334                         mutex_enter(&ng->nfs_server_upordown_lock);
 335                         ng->nfs_server_upordown = NFS_SERVER_STOPPED;
 336                         cv_signal(&ng->nfs_server_upordown_cv);
 337                 }
 338         }
 339         mutex_exit(&ng->nfs_server_upordown_lock);
 340 }
 341 
 342 static int
 343 nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
 344     rpcvers_t versmin, rpcvers_t versmax)
 345 {
 346         struct strioctl strioc;
 347         struct T_info_ack tinfo;
 348         int             error, retval;
 349 
 350         /*
 351          * Find out what type of transport this is.
 352          */
 353         strioc.ic_cmd = TI_GETINFO;
 354         strioc.ic_timout = -1;
 355         strioc.ic_len = sizeof (tinfo);
 356         strioc.ic_dp = (char *)&tinfo;
 357         tinfo.PRIM_type = T_INFO_REQ;
 358 
 359         error = strioctl(fp->f_vnode, I_STR, (intptr_t)&strioc, 0, K_TO_K,
 
 387                         versmax = NFS_ACL_VERSMAX;
 388                 __nfs_sc_cots[1].sc_versmin = versmin;
 389                 __nfs_sc_cots[1].sc_versmax = versmax;
 390                 *sctpp = &nfs_sct_cots;
 391                 break;
 392         default:
 393                 error = EINVAL;
 394         }
 395 
 396         return (error);
 397 }
 398 
 399 /*
 400  * NFS Server system call.
 401  * Does all of the work of running a NFS server.
 402  * uap->fd is the fd of an open transport provider
 403  */
 404 int
 405 nfs_svc(struct nfs_svc_args *arg, model_t model)
 406 {
 407         nfs_globals_t *ng;
 408         file_t *fp;
 409         SVCMASTERXPRT *xprt;
 410         int error;
 411         int readsize;
 412         char buf[KNC_STRSIZE];
 413         size_t len;
 414         STRUCT_HANDLE(nfs_svc_args, uap);
 415         struct netbuf addrmask;
 416         SVC_CALLOUT_TABLE *sctp = NULL;
 417 
 418 #ifdef lint
 419         model = model;          /* STRUCT macros don't always refer to it */
 420 #endif
 421 
 422         ng = zone_getspecific(nfssrv_zone_key, curzone);
 423         STRUCT_SET_HANDLE(uap, model, arg);
 424 
 425         /* Check privileges in nfssys() */
 426 
 427         if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 428                 return (EBADF);
 429 
 430         /*
 431          * Set read buffer size to rsize
 432          * and add room for RPC headers.
 433          */
 434         readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
 435         if (readsize < RPC_MAXDATASIZE)
 436                 readsize = RPC_MAXDATASIZE;
 437 
 438         error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
 439             KNC_STRSIZE, &len);
 440         if (error) {
 441                 releasef(STRUCT_FGET(uap, fd));
 442                 return (error);
 443         }
 444 
 445         addrmask.len = STRUCT_FGET(uap, addrmask.len);
 446         addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
 447         addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
 448         error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
 449             addrmask.len);
 450         if (error) {
 451                 releasef(STRUCT_FGET(uap, fd));
 452                 kmem_free(addrmask.buf, addrmask.maxlen);
 453                 return (error);
 454         }
 455 
 456         ng->nfs_versmin = STRUCT_FGET(uap, versmin);
 457         ng->nfs_versmax = STRUCT_FGET(uap, versmax);
 458 
 459         /* Double check the vers min/max ranges */
 460         if ((ng->nfs_versmin > ng->nfs_versmax) ||
 461             (ng->nfs_versmin < NFS_VERSMIN) ||
 462             (ng->nfs_versmax > NFS_VERSMAX)) {
 463                 ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
 464                 ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
 465         }
 466 
 467         if (error = nfs_srv_set_sc_versions(fp, &sctp, ng->nfs_versmin,
 468             ng->nfs_versmax)) {
 469                 releasef(STRUCT_FGET(uap, fd));
 470                 kmem_free(addrmask.buf, addrmask.maxlen);
 471                 return (error);
 472         }
 473 
 474         /* Initialize nfsv4 server */
 475         if (ng->nfs_versmax == (rpcvers_t)NFS_V4)
 476                 rfs4_server_start(ng, STRUCT_FGET(uap, delegation));
 477 
 478         /* Create a transport handle. */
 479         error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
 480             sctp, NULL, NFS_SVCPOOL_ID, TRUE);
 481 
 482         if (error)
 483                 kmem_free(addrmask.buf, addrmask.maxlen);
 484 
 485         releasef(STRUCT_FGET(uap, fd));
 486 
 487         /* HA-NFSv4: save the cluster nodeid */
 488         if (cluster_bootflags & CLUSTER_BOOTED)
 489                 lm_global_nlmid = clconf_get_nodeid();
 490 
 491         return (error);
 492 }
 493 
 494 static void
 495 rfs4_server_start(nfs_globals_t *ng, int nfs4_srv_delegation)
 496 {
 497         /*
 498          * Determine if the server has previously been "started" and
 499          * if not, do the per instance initialization
 500          */
 501         mutex_enter(&ng->nfs_server_upordown_lock);
 502 
 503         if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
 504                 /* Do we need to stop and wait on the previous server? */
 505                 while (ng->nfs_server_upordown == NFS_SERVER_STOPPING ||
 506                     ng->nfs_server_upordown == NFS_SERVER_OFFLINE)
 507                         cv_wait(&ng->nfs_server_upordown_cv,
 508                             &ng->nfs_server_upordown_lock);
 509 
 510                 if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
 511                         (void) svc_pool_control(NFS_SVCPOOL_ID,
 512                             SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
 513                         (void) svc_pool_control(NFS_SVCPOOL_ID,
 514                             SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
 515 
 516                         rfs4_do_server_start(ng->nfs_server_upordown,
 517                             nfs4_srv_delegation,
 518                             cluster_bootflags & CLUSTER_BOOTED);
 519 
 520                         ng->nfs_server_upordown = NFS_SERVER_RUNNING;
 521                 }
 522                 cv_signal(&ng->nfs_server_upordown_cv);
 523         }
 524         mutex_exit(&ng->nfs_server_upordown_lock);
 525 }
 526 
 527 /*
 528  * If RDMA device available,
 529  * start RDMA listener.
 530  */
 531 int
 532 rdma_start(struct rdma_svc_args *rsa)
 533 {
 534         nfs_globals_t *ng;
 535         int error;
 536         rdma_xprt_group_t started_rdma_xprts;
 537         rdma_stat stat;
 538         int svc_state = 0;
 539 
 540         /* Double check the vers min/max ranges */
 541         if ((rsa->nfs_versmin > rsa->nfs_versmax) ||
 542             (rsa->nfs_versmin < NFS_VERSMIN) ||
 543             (rsa->nfs_versmax > NFS_VERSMAX)) {
 544                 rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
 545                 rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
 546         }
 547 
 548         ng = zone_getspecific(nfssrv_zone_key, curzone);
 549         ng->nfs_versmin = rsa->nfs_versmin;
 550         ng->nfs_versmax = rsa->nfs_versmax;
 551 
 552         /* Set the versions in the callout table */
 553         __nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
 554         __nfs_sc_rdma[0].sc_versmax = rsa->nfs_versmax;
 555         /* For the NFS_ACL program, check the max version */
 556         __nfs_sc_rdma[1].sc_versmin = rsa->nfs_versmin;
 557         if (rsa->nfs_versmax > NFS_ACL_VERSMAX)
 558                 __nfs_sc_rdma[1].sc_versmax = NFS_ACL_VERSMAX;
 559         else
 560                 __nfs_sc_rdma[1].sc_versmax = rsa->nfs_versmax;
 561 
 562         /* Initialize nfsv4 server */
 563         if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
 564                 rfs4_server_start(ng, rsa->delegation);
 565 
 566         started_rdma_xprts.rtg_count = 0;
 567         started_rdma_xprts.rtg_listhead = NULL;
 568         started_rdma_xprts.rtg_poolid = rsa->poolid;
 569 
 570 restart:
 571         error = svc_rdma_kcreate(rsa->netid, &nfs_sct_rdma, rsa->poolid,
 572             &started_rdma_xprts);
 573 
 574         svc_state = !error;
 575 
 576         while (!error) {
 577 
 578                 /*
 579                  * wait till either interrupted by a signal on
 580                  * nfs service stop/restart or signalled by a
 581                  * rdma attach/detatch.
 582                  */
 583 
 584                 stat = rdma_kwait();
 585 
 586                 /*
 587                  * stop services if running -- either on a HCA detach event
 588                  * or if the nfs service is stopped/restarted.
 589                  */
 590 
 591                 if ((stat == RDMA_HCA_DETACH || stat == RDMA_INTR) &&
 592                     svc_state) {
 593                         rdma_stop(&started_rdma_xprts);
 594                         svc_state = 0;
 595                 }
 596 
 597                 /*
 598                  * nfs service stop/restart, break out of the
 599                  * wait loop and return;
 600                  */
 601                 if (stat == RDMA_INTR)
 
 612                 /*
 613                  * loop until a nfs service stop/restart
 614                  */
 615         }
 616 
 617         return (error);
 618 }
 619 
 620 /* ARGSUSED */
 621 void
 622 rpc_null(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 623     struct svc_req *req, cred_t *cr, bool_t ro)
 624 {
 625 }
 626 
 627 /* ARGSUSED */
 628 void
 629 rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 630     struct svc_req *req, cred_t *cr, bool_t ro)
 631 {
 632         DTRACE_NFSV3_4(op__null__start, struct svc_req *, req,
 633             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
 634         DTRACE_NFSV3_4(op__null__done, struct svc_req *, req,
 635             cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
 636 }
 637 
 638 /* ARGSUSED */
 639 static void
 640 rfs_error(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 641     struct svc_req *req, cred_t *cr, bool_t ro)
 642 {
 643         /* return (EOPNOTSUPP); */
 644 }
 645 
 646 static void
 647 nullfree(void)
 648 {
 649 }
 650 
 651 static char *rfscallnames_v2[] = {
 652         "RFS2_NULL",
 653         "RFS2_GETATTR",
 654         "RFS2_SETATTR",
 655         "RFS2_ROOT",
 
1293         /* RFS3_PATHCONF = 20 */
1294         PATHCONF3res nfs3_pathconf_res;
1295 
1296         /* RFS3_COMMIT = 21 */
1297         COMMIT3res nfs3_commit_res;
1298 
1299         /*
1300          * NFS VERSION 4
1301          */
1302 
1303         /* RFS_NULL = 0 */
1304 
1305         /* RFS4_COMPOUND = 1 */
1306         COMPOUND4res nfs4_compound_res;
1307 
1308 };
1309 
1310 static struct rpc_disptable rfs_disptable[] = {
1311         {sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
1312             rfscallnames_v2,
1313             &rfsproccnt_v2_ptr, &rfsprocio_v2_ptr, rfsdisptab_v2},
1314         {sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
1315             rfscallnames_v3,
1316             &rfsproccnt_v3_ptr, &rfsprocio_v3_ptr, rfsdisptab_v3},
1317         {sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
1318             rfscallnames_v4,
1319             &rfsproccnt_v4_ptr, &rfsprocio_v4_ptr, rfsdisptab_v4},
1320 };
1321 
1322 /*
1323  * If nfs_portmon is set, then clients are required to use privileged
1324  * ports (ports < IPPORT_RESERVED) in order to get NFS services.
1325  *
1326  * N.B.: this attempt to carry forward the already ill-conceived notion
1327  * of privileged ports for TCP/UDP is really quite ineffectual.  Not only
1328  * is it transport-dependent, it's laughably easy to spoof.  If you're
1329  * really interested in security, you must start with secure RPC instead.
1330  */
1331 volatile int nfs_portmon = 0;
1332 
1333 #ifdef DEBUG
1334 static int cred_hits = 0;
1335 static int cred_misses = 0;
1336 #endif
1337 
1338 #ifdef DEBUG
1339 /*
1340  * Debug code to allow disabling of rfs_dispatch() use of
1341  * fastxdrargs() and fastxdrres() calls for testing purposes.
1342  */
1343 static int rfs_no_fast_xdrargs = 0;
1344 static int rfs_no_fast_xdrres = 0;
1345 #endif
1346 
1347 union acl_args {
1348         /*
1349          * ACL VERSION 2
1350          */
1351 
1352         /* ACL2_NULL = 0 */
1353 
1354         /* ACL2_GETACL = 1 */
1355         GETACL2args acl2_getacl_args;
1356 
1357         /* ACL2_SETACL = 2 */
 
1424 
1425 static bool_t
1426 auth_tooweak(struct svc_req *req, char *res)
1427 {
1428 
1429         if (req->rq_vers == NFS_VERSION && req->rq_proc == RFS_LOOKUP) {
1430                 struct nfsdiropres *dr = (struct nfsdiropres *)res;
1431                 if ((enum wnfsstat)dr->dr_status == WNFSERR_CLNT_FLAVOR)
1432                         return (TRUE);
1433         } else if (req->rq_vers == NFS_V3 && req->rq_proc == NFSPROC3_LOOKUP) {
1434                 LOOKUP3res *resp = (LOOKUP3res *)res;
1435                 if ((enum wnfsstat)resp->status == WNFSERR_CLNT_FLAVOR)
1436                         return (TRUE);
1437         }
1438         return (FALSE);
1439 }
1440 
1441 
1442 static void
1443 common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
1444     rpcvers_t max_vers, char *pgmname, struct rpc_disptable *disptable)
1445 {
1446         int which;
1447         rpcvers_t vers;
1448         char *args;
1449         union {
1450                         union rfs_args ra;
1451                         union acl_args aa;
1452                 } args_buf;
1453         char *res;
1454         union {
1455                         union rfs_res rr;
1456                         union acl_res ar;
1457                 } res_buf;
1458         struct rpcdisp *disp = NULL;
1459         int dis_flags = 0;
1460         cred_t *cr;
1461         int error = 0;
1462         int anon_ok;
1463         struct exportinfo *exi = NULL;
1464         unsigned int nfslog_rec_id;
1465         int dupstat;
1466         struct dupreq *dr;
1467         int authres;
1468         bool_t publicfh_ok = FALSE;
1469         enum_t auth_flavor;
1470         bool_t dupcached = FALSE;
1471         struct netbuf   nb;
1472         bool_t logging_enabled = FALSE;
1473         struct exportinfo *nfslog_exi = NULL;
1474         char **procnames;
1475         char cbuf[INET6_ADDRSTRLEN];    /* to hold both IPv4 and IPv6 addr */
1476         bool_t ro = FALSE;
1477         kstat_t *ksp = NULL;
1478         kstat_t *exi_ksp = NULL;
1479         size_t pos;                     /* request size */
1480         size_t rlen;                    /* reply size */
1481         bool_t rsent = FALSE;           /* reply was sent successfully */
1482         nfs_export_t *ne = nfs_get_export();
1483 
1484         vers = req->rq_vers;
1485 
1486         if (vers < min_vers || vers > max_vers) {
1487                 svcerr_progvers(req->rq_xprt, min_vers, max_vers);
1488                 error++;
1489                 cmn_err(CE_NOTE, "%s: bad version number %u", pgmname, vers);
1490                 goto done;
1491         }
1492         vers -= min_vers;
1493 
1494         which = req->rq_proc;
1495         if (which < 0 || which >= disptable[(int)vers].dis_nprocs) {
1496                 svcerr_noproc(req->rq_xprt);
1497                 error++;
1498                 goto done;
1499         }
1500 
1501         (*(disptable[(int)vers].dis_proccntp))[which].value.ui64++;
1502 
1503         ksp = (*(disptable[(int)vers].dis_prociop))[which];
1504         if (ksp != NULL) {
1505                 mutex_enter(ksp->ks_lock);
1506                 kstat_runq_enter(KSTAT_IO_PTR(ksp));
1507                 mutex_exit(ksp->ks_lock);
1508         }
1509         pos = XDR_GETPOS(&xprt->xp_xdrin);
1510 
1511         disp = &disptable[(int)vers].dis_table[which];
1512         procnames = disptable[(int)vers].dis_procnames;
1513 
1514         auth_flavor = req->rq_cred.oa_flavor;
1515 
1516         /*
1517          * Deserialize into the args struct.
1518          */
1519         args = (char *)&args_buf;
1520 
1521 #ifdef DEBUG
1522         if (rfs_no_fast_xdrargs || (auth_flavor == RPCSEC_GSS) ||
1523             disp->dis_fastxdrargs == NULL_xdrproc_t ||
1524             !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1525 #else
1526         if ((auth_flavor == RPCSEC_GSS) ||
1527             disp->dis_fastxdrargs == NULL_xdrproc_t ||
1528             !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1529 #endif
1530         {
 
1534                         /*
1535                          * Check if we are outside our capabilities.
1536                          */
1537                         if (rfs4_minorvers_mismatch(req, xprt, (void *)args))
1538                                 goto done;
1539 
1540                         svcerr_decode(xprt);
1541                         cmn_err(CE_NOTE,
1542                             "Failed to decode arguments for %s version %u "
1543                             "procedure %s client %s%s",
1544                             pgmname, vers + min_vers, procnames[which],
1545                             client_name(req), client_addr(req, cbuf));
1546                         goto done;
1547                 }
1548         }
1549 
1550         /*
1551          * If Version 4 use that specific dispatch function.
1552          */
1553         if (req->rq_vers == 4) {
1554                 error += rfs4_dispatch(disp, req, xprt, args, &rlen);
1555                 if (error == 0)
1556                         rsent = TRUE;
1557                 goto done;
1558         }
1559 
1560         dis_flags = disp->dis_flags;
1561 
1562         /*
1563          * Find export information and check authentication,
1564          * setting the credential if everything is ok.
1565          */
1566         if (disp->dis_getfh != NULL) {
1567                 void *fh;
1568                 fsid_t *fsid;
1569                 fid_t *fid, *xfid;
1570                 fhandle_t *fh2;
1571                 nfs_fh3 *fh3;
1572 
1573                 fh = (*disp->dis_getfh)(args);
1574                 switch (req->rq_vers) {
1575                 case NFS_VERSION:
1576                         fh2 = (fhandle_t *)fh;
 
1597                  * client's mount operation to fail.  As a work-around,
1598                  * we check here to see if the request is a getattr or
1599                  * statfs operation on the exported vnode itself, and
1600                  * pass a flag to checkauth with the result of this test.
1601                  *
1602                  * The filehandle refers to the mountpoint itself if
1603                  * the fh_data and fh_xdata portions of the filehandle
1604                  * are equal.
1605                  *
1606                  * Added anon_ok argument to checkauth().
1607                  */
1608 
1609                 if ((dis_flags & RPC_ALLOWANON) && EQFID(fid, xfid))
1610                         anon_ok = 1;
1611                 else
1612                         anon_ok = 0;
1613 
1614                 cr = xprt->xp_cred;
1615                 ASSERT(cr != NULL);
1616 #ifdef DEBUG
1617                 {
1618                         if (crgetref(cr) != 1) {
1619                                 crfree(cr);
1620                                 cr = crget();
1621                                 xprt->xp_cred = cr;
1622                                 cred_misses++;
1623                         } else
1624                                 cred_hits++;
1625                 }
1626 #else
1627                 if (crgetref(cr) != 1) {
1628                         crfree(cr);
1629                         cr = crget();
1630                         xprt->xp_cred = cr;
1631                 }
1632 #endif
1633 
1634                 exi = checkexport(fsid, xfid);
1635 
1636                 if (exi != NULL) {
1637                         rw_enter(&ne->exported_lock, RW_READER);
1638                         exi_ksp = NULL;
1639 
1640                         if (exi->exi_kstats != NULL) {
1641                                 switch (req->rq_vers) {
1642                                 case NFS_VERSION:
1643                                         exi_ksp = exp_kstats_v2(exi->exi_kstats,
1644                                             which);
1645                                         break;
1646                                 case NFS_V3:
1647                                         exi_ksp = exp_kstats_v3(exi->exi_kstats,
1648                                             which);
1649                                         break;
1650                                 default:
1651                                         ASSERT(0);
1652                                         break;
1653                                 }
1654                         }
1655 
1656                         if (exi_ksp != NULL) {
1657                                 mutex_enter(exi_ksp->ks_lock);
1658                                 kstat_runq_enter(KSTAT_IO_PTR(exi_ksp));
1659                                 mutex_exit(exi_ksp->ks_lock);
1660                         } else {
1661                                 rw_exit(&ne->exported_lock);
1662                         }
1663 
1664                         publicfh_ok = PUBLICFH_CHECK(ne, disp, exi, fsid, xfid);
1665                         /*
1666                          * Don't allow non-V4 clients access
1667                          * to pseudo exports
1668                          */
1669                         if (PSEUDO(exi)) {
1670                                 svcerr_weakauth(xprt);
1671                                 error++;
1672                                 goto done;
1673                         }
1674 
1675                         authres = checkauth(exi, req, cr, anon_ok, publicfh_ok,
1676                             &ro);
1677                         /*
1678                          * authres >  0: authentication OK - proceed
1679                          * authres == 0: authentication weak - return error
1680                          * authres <  0: authentication timeout - drop
1681                          */
1682                         if (authres <= 0) {
1683                                 if (authres == 0) {
1684                                         svcerr_weakauth(xprt);
 
1756                                 SVC_FREERES(xprt);
1757                         error++;
1758                         goto done;
1759                 }
1760         }
1761 
1762         if (auth_tooweak(req, res)) {
1763                 svcerr_weakauth(xprt);
1764                 error++;
1765                 goto done;
1766         }
1767 
1768         /*
1769          * Check to see if logging has been enabled on the server.
1770          * If so, then obtain the export info struct to be used for
1771          * the later writing of the log record.  This is done for
1772          * the case that a lookup is done across a non-logged public
1773          * file system.
1774          */
1775         if (nfslog_buffer_list != NULL) {
1776                 nfslog_exi = nfslog_get_exi(ne, exi, req, res, &nfslog_rec_id);
1777                 /*
1778                  * Is logging enabled?
1779                  */
1780                 logging_enabled = (nfslog_exi != NULL);
1781 
1782                 /*
1783                  * Copy the netbuf for logging purposes, before it is
1784                  * freed by svc_sendreply().
1785                  */
1786                 if (logging_enabled) {
1787                         NFSLOG_COPY_NETBUF(nfslog_exi, xprt, &nb);
1788                         /*
1789                          * If RPC_MAPRESP flag set (i.e. in V2 ops) the
1790                          * res gets copied directly into the mbuf and
1791                          * may be freed soon after the sendreply. So we
1792                          * must copy it here to a safe place...
1793                          */
1794                         if (res != (char *)&res_buf) {
1795                                 bcopy(res, (char *)&res_buf, disp->dis_ressz);
1796                         }
1797                 }
1798         }
1799 
1800         /*
1801          * Serialize and send results struct
1802          */
1803 #ifdef DEBUG
1804         if (rfs_no_fast_xdrres == 0 && res != (char *)&res_buf)
1805 #else
1806         if (res != (char *)&res_buf)
1807 #endif
1808         {
1809                 if (!svc_sendreply(xprt, disp->dis_fastxdrres, res)) {
1810                         cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1811                         svcerr_systemerr(xprt);
1812                         error++;
1813                 } else {
1814                         rlen = xdr_sizeof(disp->dis_fastxdrres, res);
1815                         rsent = TRUE;
1816                 }
1817         } else {
1818                 if (!svc_sendreply(xprt, disp->dis_xdrres, res)) {
1819                         cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1820                         svcerr_systemerr(xprt);
1821                         error++;
1822                 } else {
1823                         rlen = xdr_sizeof(disp->dis_xdrres, res);
1824                         rsent = TRUE;
1825                 }
1826         }
1827 
1828         /*
1829          * Log if needed
1830          */
1831         if (logging_enabled) {
1832                 nfslog_write_record(nfslog_exi, req, args, (char *)&res_buf,
1833                     cr, &nb, nfslog_rec_id, NFSLOG_ONE_BUFFER);
1834                 exi_rele(&nfslog_exi);
1835                 kmem_free((&nb)->buf, (&nb)->len);
1836         }
1837 
1838         /*
1839          * Free results struct. With the addition of NFS V4 we can
1840          * have non-idempotent procedures with functions.
1841          */
1842         if (disp->dis_resfree != nullfree && dupcached == FALSE) {
1843                 (*disp->dis_resfree)(res);
1844         }
1845 
1846 done:
1847         if (ksp != NULL || exi_ksp != NULL) {
1848                 pos = XDR_GETPOS(&xprt->xp_xdrin) - pos;
1849         }
1850 
1851         /*
1852          * Free arguments struct
1853          */
1854         if (disp) {
1855                 if (!SVC_FREEARGS(xprt, disp->dis_xdrargs, args)) {
1856                         cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1857                         error++;
1858                 }
1859         } else {
1860                 if (!SVC_FREEARGS(xprt, (xdrproc_t)0, (caddr_t)0)) {
1861                         cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1862                         error++;
1863                 }
1864         }
1865 
1866         if (exi_ksp != NULL) {
1867                 mutex_enter(exi_ksp->ks_lock);
1868                 KSTAT_IO_PTR(exi_ksp)->nwritten += pos;
1869                 KSTAT_IO_PTR(exi_ksp)->writes++;
1870                 if (rsent) {
1871                         KSTAT_IO_PTR(exi_ksp)->nread += rlen;
1872                         KSTAT_IO_PTR(exi_ksp)->reads++;
1873                 }
1874                 kstat_runq_exit(KSTAT_IO_PTR(exi_ksp));
1875                 mutex_exit(exi_ksp->ks_lock);
1876 
1877                 rw_exit(&ne->exported_lock);
1878         }
1879 
1880         if (exi != NULL)
1881                 exi_rele(&exi);
1882 
1883         if (ksp != NULL) {
1884                 mutex_enter(ksp->ks_lock);
1885                 KSTAT_IO_PTR(ksp)->nwritten += pos;
1886                 KSTAT_IO_PTR(ksp)->writes++;
1887                 if (rsent) {
1888                         KSTAT_IO_PTR(ksp)->nread += rlen;
1889                         KSTAT_IO_PTR(ksp)->reads++;
1890                 }
1891                 kstat_runq_exit(KSTAT_IO_PTR(ksp));
1892                 mutex_exit(ksp->ks_lock);
1893         }
1894 
1895         global_svstat_ptr[req->rq_vers][NFS_BADCALLS].value.ui64 += error;
1896 
1897         global_svstat_ptr[req->rq_vers][NFS_CALLS].value.ui64++;
1898 }
1899 
1900 static void
1901 rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
1902 {
1903         common_dispatch(req, xprt, NFS_VERSMIN, NFS_VERSMAX,
1904             "NFS", rfs_disptable);
1905 }
1906 
1907 static char *aclcallnames_v2[] = {
1908         "ACL2_NULL",
1909         "ACL2_GETACL",
1910         "ACL2_SETACL",
1911         "ACL2_GETATTR",
1912         "ACL2_ACCESS",
1913         "ACL2_GETXATTRDIR"
1914 };
 
2000             acl3_getacl_getfh},
2001 
2002         /* ACL3_SETACL = 2 */
2003         {acl3_setacl,
2004             xdr_SETACL3args, NULL_xdrproc_t, sizeof (SETACL3args),
2005             xdr_SETACL3res, NULL_xdrproc_t, sizeof (SETACL3res),
2006             nullfree, 0,
2007             acl3_setacl_getfh},
2008 
2009         /* ACL3_GETXATTRDIR = 3 */
2010         {acl3_getxattrdir,
2011             xdr_GETXATTRDIR3args, NULL_xdrproc_t, sizeof (GETXATTRDIR3args),
2012             xdr_GETXATTRDIR3res, NULL_xdrproc_t, sizeof (GETXATTRDIR3res),
2013             nullfree, RPC_IDEMPOTENT,
2014             acl3_getxattrdir_getfh},
2015 };
2016 
2017 static struct rpc_disptable acl_disptable[] = {
2018         {sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
2019                 aclcallnames_v2,
2020                 &aclproccnt_v2_ptr, &aclprocio_v2_ptr, acldisptab_v2},
2021         {sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
2022                 aclcallnames_v3,
2023                 &aclproccnt_v3_ptr, &aclprocio_v3_ptr, acldisptab_v3},
2024 };
2025 
2026 static void
2027 acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
2028 {
2029         common_dispatch(req, xprt, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,
2030             "ACL", acl_disptable);
2031 }
2032 
2033 int
2034 checkwin(int flavor, int window, struct svc_req *req)
2035 {
2036         struct authdes_cred *adc;
2037 
2038         switch (flavor) {
2039         case AUTH_DES:
2040                 adc = (struct authdes_cred *)req->rq_clntcred;
2041                 CTASSERT(sizeof (struct authdes_cred) <= RQCRED_SIZE);
2042                 if (adc->adc_fullname.window > window)
2043                         return (0);
 
2597 
2598         } else {
2599 
2600                 /*
2601                  * No IP address to print. If there was a host name
2602                  * printed, then we print a space.
2603                  */
2604                 (void) sprintf(buf, frontspace);
2605         }
2606 
2607         return (buf);
2608 }
2609 
2610 /*
2611  * NFS Server initialization routine.  This routine should only be called
2612  * once.  It performs the following tasks:
2613  *      - Call sub-initialization routines (localize access to variables)
2614  *      - Initialize all locks
2615  *      - initialize the version 3 write verifier
2616  */
2617 void
2618 nfs_srvinit(void)
2619 {
2620         /* NFS server zone-specific global variables */
2621         zone_key_create(&nfssrv_zone_key, nfs_srv_zone_init,
2622             NULL, nfs_srv_zone_fini);
2623 
2624         nfs_exportinit();
2625         rfs_srvrinit();
2626         rfs3_srvrinit();
2627         rfs4_srvrinit();
2628         nfsauth_init();
2629 }
2630 
2631 /*
2632  * NFS Server finalization routine. This routine is called to cleanup the
2633  * initialization work previously performed if the NFS server module could
2634  * not be loaded correctly.
2635  */
2636 void
2637 nfs_srvfini(void)
2638 {
2639         nfsauth_fini();
2640         rfs4_srvrfini();
2641         rfs3_srvrfini();
2642         rfs_srvrfini();
2643         nfs_exportfini();
2644 
2645         (void) zone_key_delete(nfssrv_zone_key);
2646 }
2647 
2648 /* ARGSUSED */
2649 static void *
2650 nfs_srv_zone_init(zoneid_t zoneid)
2651 {
2652         nfs_globals_t *ng;
2653 
2654         ng = kmem_zalloc(sizeof (*ng), KM_SLEEP);
2655 
2656         ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
2657         ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
2658 
2659         /* Init the stuff to control start/stop */
2660         ng->nfs_server_upordown = NFS_SERVER_STOPPED;
2661         mutex_init(&ng->nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
2662         cv_init(&ng->nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
2663         mutex_init(&ng->rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
2664         cv_init(&ng->rdma_wait_cv, NULL, CV_DEFAULT, NULL);
2665 
2666         return (ng);
2667 }
2668 
2669 /* ARGSUSED */
2670 static void
2671 nfs_srv_zone_fini(zoneid_t zoneid, void *data)
2672 {
2673         nfs_globals_t *ng;
2674 
2675         ng = (nfs_globals_t *)data;
2676         mutex_destroy(&ng->nfs_server_upordown_lock);
2677         cv_destroy(&ng->nfs_server_upordown_cv);
2678         mutex_destroy(&ng->rdma_wait_mutex);
2679         cv_destroy(&ng->rdma_wait_cv);
2680 
2681         kmem_free(ng, sizeof (*ng));
2682 }
2683 
2684 /*
2685  * Set up an iovec array of up to cnt pointers.
2686  */
2687 void
2688 mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
2689 {
2690         while (m != NULL && cnt-- > 0) {
2691                 iovp->iov_base = (caddr_t)m->b_rptr;
2692                 iovp->iov_len = (m->b_wptr - m->b_rptr);
2693                 iovp++;
2694                 m = m->b_cont;
2695         }
2696 }
2697 
2698 /*
2699  * Common code between NFS Version 2 and NFS Version 3 for the public
2700  * filehandle multicomponent lookups.
2701  */
2702 
2703 /*
2704  * Public filehandle evaluation of a multi-component lookup, following
2705  * symbolic links, if necessary. This may result in a vnode in another
2706  * filesystem, which is OK as long as the other filesystem is exported.
 
2903                     mc_dvp, cr, NATIVEPATH);
2904 
2905                 if (error == ENOENT) {
2906                         *vpp = tvp;
2907                         mc_dvp = tmc_dvp;
2908                         error = 0;
2909                 } else {        /* ok or error other than ENOENT */
2910                         if (tmc_dvp)
2911                                 VN_RELE(tmc_dvp);
2912                         if (error)
2913                                 goto publicfh_done;
2914 
2915                         /*
2916                          * Found a valid vp for index "filename". Sanity check
2917                          * for odd case where a directory is provided as index
2918                          * option argument and leads us to another filesystem
2919                          */
2920 
2921                         /* Release the reference on the old exi value */
2922                         ASSERT(*exi != NULL);
2923                         exi_rele(exi);
2924 
2925                         if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
2926                                 VN_RELE(*vpp);
2927                                 goto publicfh_done;
2928                         }
2929                 }
2930         }
2931 
2932 publicfh_done:
2933         if (mc_dvp)
2934                 VN_RELE(mc_dvp);
2935 
2936         return (error);
2937 }
2938 
2939 /*
2940  * Evaluate a multi-component path
2941  */
2942 int
2943 rfs_pathname(
2944         char *path,                     /* pathname to evaluate */
2945         vnode_t **dirvpp,               /* ret for ptr to parent dir vnode */
2946         vnode_t **compvpp,              /* ret for ptr to component vnode */
2947         vnode_t *startdvp,              /* starting vnode */
2948         cred_t *cr,                     /* user's credential */
2949         int pathflag)                   /* flag to identify path, e.g. URL */
2950 {
2951         char namebuf[TYPICALMAXPATHLEN];
2952         struct pathname pn;
2953         int error;
2954 
2955         /*
2956          * If pathname starts with '/', then set startdvp to root.
2957          */
2958         if (*path == '/') {
2959                 while (*path == '/')
2960                         path++;
2961 
2962                 startdvp = ZONE_ROOTVP();
2963         }
2964 
2965         error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
2966         if (error == 0) {
2967                 /*
2968                  * Call the URL parser for URL paths to modify the original
2969                  * string to handle any '%' encoded characters that exist.
2970                  * Done here to avoid an extra bcopy in the lookup.
2971                  * We need to be careful about pathlen's. We know that
2972                  * rfs_pathname() is called with a non-empty path. However,
2973                  * it could be emptied due to the path simply being all /'s,
2974                  * which is valid to proceed with the lookup, or due to the
2975                  * URL parser finding an encoded null character at the
2976                  * beginning of path which should not proceed with the lookup.
2977                  */
2978                 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2979                         URLparse(pn.pn_path);
2980                         if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
2981                                 return (ENOENT);
2982                 }
2983                 VN_HOLD(startdvp);
2984                 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
2985                     ZONE_ROOTVP(), startdvp, cr);
2986         }
2987         if (error == ENAMETOOLONG) {
2988                 /*
2989                  * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
2990                  */
2991                 if (error = pn_get(path, UIO_SYSSPACE, &pn))
2992                         return (error);
2993                 if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2994                         URLparse(pn.pn_path);
2995                         if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0) {
2996                                 pn_free(&pn);
2997                                 return (ENOENT);
2998                         }
2999                 }
3000                 VN_HOLD(startdvp);
3001                 error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
3002                     ZONE_ROOTVP(), startdvp, cr);
3003                 pn_free(&pn);
3004         }
3005 
3006         return (error);
3007 }
3008 
3009 /*
3010  * Adapt the multicomponent lookup path depending on the pathtype
3011  */
3012 static int
3013 MCLpath(char **path)
3014 {
3015         unsigned char c = (unsigned char)**path;
3016 
3017         /*
3018          * If the MCL path is between 0x20 and 0x7E (graphic printable
3019          * character of the US-ASCII coded character set), its a URL path,
3020          * per RFC 1738.
3021          */
3022         if (c >= 0x20 && c <= 0x7E)
 
3086         int walk;
3087         int error = 0;
3088 
3089         *exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
3090         if (*exi == NULL)
3091                 error = EACCES;
3092         else {
3093                 /*
3094                  * If nosub is set for this export then
3095                  * a lookup relative to the public fh
3096                  * must not terminate below the
3097                  * exported directory.
3098                  */
3099                 if ((*exi)->exi_export.ex_flags & EX_NOSUB && walk > 0)
3100                         error = EACCES;
3101         }
3102 
3103         return (error);
3104 }
3105 
3106 /*
3107  * Used by NFSv3 and NFSv4 server to query label of
3108  * a pathname component during lookup/access ops.
3109  */
3110 ts_label_t *
3111 nfs_getflabel(vnode_t *vp, struct exportinfo *exi)
3112 {
3113         zone_t *zone;
3114         ts_label_t *zone_label;
3115         char *path;
3116 
3117         mutex_enter(&vp->v_lock);
3118         if (vp->v_path != vn_vpath_empty) {
3119                 zone = zone_find_by_any_path(vp->v_path, B_FALSE);
3120                 mutex_exit(&vp->v_lock);
3121         } else {
3122                 /*
3123                  * v_path not cached. Fall back on pathname of exported
3124                  * file system as we rely on pathname from which we can
3125                  * derive a label. The exported file system portion of
 
 |