Print this page
    
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs_server.c
          +++ new/usr/src/uts/common/fs/nfs/nfs_server.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  
    | 
      ↓ open down ↓ | 
    14 lines elided | 
    
      ↑ open up ↑ | 
  
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
  24   24   * Copyright (c) 2013 by Delphix. All rights reserved.
  25      - * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  26   25   * Copyright (c) 2017 Joyent Inc
       26 + * Copyright 2019 Nexenta by DDN, Inc.
  27   27   */
  28   28  
  29   29  /*
  30   30   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  31   31   *      All rights reserved.
  32   32   *      Use is subject to license terms.
  33   33   */
  34   34  
  35   35  #include <sys/param.h>
  36   36  #include <sys/types.h>
  37   37  #include <sys/systm.h>
  38   38  #include <sys/cred.h>
  39   39  #include <sys/proc.h>
  40   40  #include <sys/user.h>
  41   41  #include <sys/buf.h>
  42   42  #include <sys/vfs.h>
  43   43  #include <sys/vnode.h>
  44   44  #include <sys/pathname.h>
  45   45  #include <sys/uio.h>
  46   46  #include <sys/file.h>
  47   47  #include <sys/stat.h>
  48   48  #include <sys/errno.h>
  49   49  #include <sys/socket.h>
  50   50  #include <sys/sysmacros.h>
  51   51  #include <sys/siginfo.h>
  52   52  #include <sys/tiuser.h>
  53   53  #include <sys/statvfs.h>
  54   54  #include <sys/stream.h>
  55   55  #include <sys/strsun.h>
  56   56  #include <sys/strsubr.h>
  57   57  #include <sys/stropts.h>
  58   58  #include <sys/timod.h>
  59   59  #include <sys/t_kuser.h>
  60   60  #include <sys/kmem.h>
  61   61  #include <sys/kstat.h>
  62   62  #include <sys/dirent.h>
  63   63  #include <sys/cmn_err.h>
  64   64  #include <sys/debug.h>
  65   65  #include <sys/unistd.h>
  66   66  #include <sys/vtrace.h>
  67   67  #include <sys/mode.h>
  68   68  #include <sys/acl.h>
  69   69  #include <sys/sdt.h>
  70   70  #include <sys/debug.h>
  71   71  
  72   72  #include <rpc/types.h>
  73   73  #include <rpc/auth.h>
  74   74  #include <rpc/auth_unix.h>
  75   75  #include <rpc/auth_des.h>
  
    | 
      ↓ open down ↓ | 
    39 lines elided | 
    
      ↑ open up ↑ | 
  
  76   76  #include <rpc/svc.h>
  77   77  #include <rpc/xdr.h>
  78   78  #include <rpc/rpc_rdma.h>
  79   79  
  80   80  #include <nfs/nfs.h>
  81   81  #include <nfs/export.h>
  82   82  #include <nfs/nfssys.h>
  83   83  #include <nfs/nfs_clnt.h>
  84   84  #include <nfs/nfs_acl.h>
  85   85  #include <nfs/nfs_log.h>
  86      -#include <nfs/nfs_cmd.h>
  87   86  #include <nfs/lm.h>
  88   87  #include <nfs/nfs_dispatch.h>
  89   88  #include <nfs/nfs4_drc.h>
  90   89  
  91   90  #include <sys/modctl.h>
  92   91  #include <sys/cladm.h>
  93   92  #include <sys/clconf.h>
  94   93  
  95   94  #include <sys/tsol/label.h>
  96   95  
  97   96  #define MAXHOST 32
  98   97  const char *kinet_ntop6(uchar_t *, char *, size_t);
  99   98  
 100   99  /*
 101  100   * Module linkage information.
  
    | 
      ↓ open down ↓ | 
    5 lines elided | 
    
      ↑ open up ↑ | 
  
 102  101   */
 103  102  
 104  103  static struct modlmisc modlmisc = {
 105  104          &mod_miscops, "NFS server module"
 106  105  };
 107  106  
 108  107  static struct modlinkage modlinkage = {
 109  108          MODREV_1, (void *)&modlmisc, NULL
 110  109  };
 111  110  
      111 +zone_key_t      nfssrv_zone_key;
      112 +list_t          nfssrv_globals_list;
      113 +krwlock_t       nfssrv_globals_rwl;
      114 +
 112  115  kmem_cache_t *nfs_xuio_cache;
 113  116  int nfs_loaned_buffers = 0;
 114  117  
 115  118  int
 116  119  _init(void)
 117  120  {
 118  121          int status;
 119  122  
 120      -        if ((status = nfs_srvinit()) != 0) {
 121      -                cmn_err(CE_WARN, "_init: nfs_srvinit failed");
 122      -                return (status);
 123      -        }
      123 +        nfs_srvinit();
 124  124  
 125  125          status = mod_install((struct modlinkage *)&modlinkage);
 126  126          if (status != 0) {
 127  127                  /*
 128  128                   * Could not load module, cleanup previous
 129  129                   * initialization work.
 130  130                   */
 131  131                  nfs_srvfini();
 132  132  
 133  133                  return (status);
 134  134          }
 135  135  
 136  136          /*
 137  137           * Initialise some placeholders for nfssys() calls. These have
 138  138           * to be declared by the nfs module, since that handles nfssys()
 139  139           * calls - also used by NFS clients - but are provided by this
 140  140           * nfssrv module. These also then serve as confirmation to the
 141  141           * relevant code in nfs that nfssrv has been loaded, as they're
 142  142           * initially NULL.
 143  143           */
 144  144          nfs_srv_quiesce_func = nfs_srv_quiesce_all;
 145  145          nfs_srv_dss_func = rfs4_dss_setpaths;
 146  146  
 147  147          /* setup DSS paths here; must be done before initial server startup */
 148  148          rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
 149  149  
 150  150          /* initialize the copy reduction caches */
 151  151  
 152  152          nfs_xuio_cache = kmem_cache_create("nfs_xuio_cache",
 153  153              sizeof (nfs_xuio_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 154  154  
 155  155          return (status);
 156  156  }
 157  157  
 158  158  int
 159  159  _fini()
 160  160  {
 161  161          return (EBUSY);
 162  162  }
 163  163  
 164  164  int
 165  165  _info(struct modinfo *modinfop)
 166  166  {
 167  167          return (mod_info(&modlinkage, modinfop));
 168  168  }
 169  169  
  
    | 
      ↓ open down ↓ | 
    36 lines elided | 
    
      ↑ open up ↑ | 
  
 170  170  /*
 171  171   * PUBLICFH_CHECK() checks if the dispatch routine supports
 172  172   * RPC_PUBLICFH_OK, if the filesystem is exported public, and if the
 173  173   * incoming request is using the public filehandle. The check duplicates
 174  174   * the exportmatch() call done in checkexport(), and we should consider
 175  175   * modifying those routines to avoid the duplication. For now, we optimize
 176  176   * by calling exportmatch() only after checking that the dispatch routine
 177  177   * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
 178  178   * public (i.e., not the placeholder).
 179  179   */
 180      -#define PUBLICFH_CHECK(disp, exi, fsid, xfid) \
      180 +#define PUBLICFH_CHECK(ne, disp, exi, fsid, xfid) \
 181  181                  ((disp->dis_flags & RPC_PUBLICFH_OK) && \
 182  182                  ((exi->exi_export.ex_flags & EX_PUBLIC) || \
 183      -                (exi == exi_public && exportmatch(exi_root, \
      183 +                (exi == ne->exi_public && exportmatch(ne->exi_root, \
 184  184                  fsid, xfid))))
 185  185  
 186  186  static void     nfs_srv_shutdown_all(int);
 187      -static void     rfs4_server_start(int);
      187 +static void     rfs4_server_start(nfs_globals_t *, int);
 188  188  static void     nullfree(void);
 189  189  static void     rfs_dispatch(struct svc_req *, SVCXPRT *);
 190  190  static void     acl_dispatch(struct svc_req *, SVCXPRT *);
 191      -static void     common_dispatch(struct svc_req *, SVCXPRT *,
 192      -                rpcvers_t, rpcvers_t, char *,
 193      -                struct rpc_disptable *);
 194      -static void     hanfsv4_failover(void);
 195  191  static  int     checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
 196  192                  bool_t, bool_t *);
 197  193  static char     *client_name(struct svc_req *req);
 198  194  static char     *client_addr(struct svc_req *req, char *buf);
 199  195  extern  int     sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
 200  196  extern  bool_t  sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
      197 +static void     *nfs_server_zone_init(zoneid_t);
      198 +static void     nfs_server_zone_fini(zoneid_t, void *);
      199 +static void     nfs_server_zone_shutdown(zoneid_t, void *);
 201  200  
 202  201  #define NFSLOG_COPY_NETBUF(exi, xprt, nb)       {               \
 203  202          (nb)->maxlen = (xprt)->xp_rtaddr.maxlen;                \
 204  203          (nb)->len = (xprt)->xp_rtaddr.len;                      \
 205  204          (nb)->buf = kmem_alloc((nb)->len, KM_SLEEP);            \
 206  205          bcopy((xprt)->xp_rtaddr.buf, (nb)->buf, (nb)->len);     \
 207  206          }
 208  207  
 209  208  /*
 210  209   * Public Filehandle common nfs routines
 211  210   */
 212  211  static int      MCLpath(char **);
 213  212  static void     URLparse(char *);
 214  213  
 215  214  /*
 216  215   * NFS callout table.
 217  216   * This table is used by svc_getreq() to dispatch a request with
 218  217   * a given prog/vers pair to an appropriate service provider
 219  218   * dispatch routine.
 220  219   *
 221  220   * NOTE: ordering is relied upon below when resetting the version min/max
 222  221   * for NFS_PROGRAM.  Careful, if this is ever changed.
 223  222   */
 224  223  static SVC_CALLOUT __nfs_sc_clts[] = {
 225  224          { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 226  225          { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 227  226  };
 228  227  
 229  228  static SVC_CALLOUT_TABLE nfs_sct_clts = {
 230  229          sizeof (__nfs_sc_clts) / sizeof (__nfs_sc_clts[0]), FALSE,
 231  230          __nfs_sc_clts
 232  231  };
 233  232  
 234  233  static SVC_CALLOUT __nfs_sc_cots[] = {
 235  234          { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 236  235          { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 237  236  };
 238  237  
 239  238  static SVC_CALLOUT_TABLE nfs_sct_cots = {
 240  239          sizeof (__nfs_sc_cots) / sizeof (__nfs_sc_cots[0]), FALSE, __nfs_sc_cots
  
    | 
      ↓ open down ↓ | 
    30 lines elided | 
    
      ↑ open up ↑ | 
  
 241  240  };
 242  241  
 243  242  static SVC_CALLOUT __nfs_sc_rdma[] = {
 244  243          { NFS_PROGRAM,     NFS_VERSMIN,     NFS_VERSMAX,        rfs_dispatch },
 245  244          { NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,    acl_dispatch }
 246  245  };
 247  246  
 248  247  static SVC_CALLOUT_TABLE nfs_sct_rdma = {
 249  248          sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
 250  249  };
 251      -rpcvers_t nfs_versmin = NFS_VERSMIN_DEFAULT;
 252      -rpcvers_t nfs_versmax = NFS_VERSMAX_DEFAULT;
 253  250  
 254  251  /*
 255      - * Used to track the state of the server so that initialization
 256      - * can be done properly.
 257      - */
 258      -typedef enum {
 259      -        NFS_SERVER_STOPPED,     /* server state destroyed */
 260      -        NFS_SERVER_STOPPING,    /* server state being destroyed */
 261      -        NFS_SERVER_RUNNING,
 262      -        NFS_SERVER_QUIESCED,    /* server state preserved */
 263      -        NFS_SERVER_OFFLINE      /* server pool offline */
 264      -} nfs_server_running_t;
 265      -
 266      -static nfs_server_running_t nfs_server_upordown;
 267      -static kmutex_t nfs_server_upordown_lock;
 268      -static  kcondvar_t nfs_server_upordown_cv;
 269      -
 270      -/*
 271  252   * DSS: distributed stable storage
 272  253   * lists of all DSS paths: current, and before last warmstart
 273  254   */
 274  255  nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
 275  256  
 276  257  int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
 277  258  bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
 278  259  
 279  260  /*
 280      - * RDMA wait variables.
      261 + * Stash NFS zone globals in TSD to avoid some lock contention
      262 + * from frequent zone_getspecific calls.
 281  263   */
 282      -static kcondvar_t rdma_wait_cv;
 283      -static kmutex_t rdma_wait_mutex;
      264 +static uint_t nfs_server_tsd_key;
 284  265  
      266 +nfs_globals_t *
      267 +nfs_srv_getzg(void)
      268 +{
      269 +        nfs_globals_t *ng;
      270 +
      271 +        ng = tsd_get(nfs_server_tsd_key);
      272 +        if (ng == NULL) {
      273 +                ng = zone_getspecific(nfssrv_zone_key, curzone);
      274 +                (void) tsd_set(nfs_server_tsd_key, ng);
      275 +        }
      276 +
      277 +        return (ng);
      278 +}
      279 +
 285  280  /*
 286  281   * Will be called at the point the server pool is being unregistered
 287  282   * from the pool list. From that point onwards, the pool is waiting
 288  283   * to be drained and as such the server state is stale and pertains
 289  284   * to the old instantiation of the NFS server pool.
 290  285   */
 291  286  void
 292  287  nfs_srv_offline(void)
 293  288  {
 294      -        mutex_enter(&nfs_server_upordown_lock);
 295      -        if (nfs_server_upordown == NFS_SERVER_RUNNING) {
 296      -                nfs_server_upordown = NFS_SERVER_OFFLINE;
      289 +        nfs_globals_t *ng;
      290 +
      291 +        ng = nfs_srv_getzg();
      292 +
      293 +        mutex_enter(&ng->nfs_server_upordown_lock);
      294 +        if (ng->nfs_server_upordown == NFS_SERVER_RUNNING) {
      295 +                ng->nfs_server_upordown = NFS_SERVER_OFFLINE;
 297  296          }
 298      -        mutex_exit(&nfs_server_upordown_lock);
      297 +        mutex_exit(&ng->nfs_server_upordown_lock);
 299  298  }
 300  299  
 301  300  /*
 302  301   * Will be called at the point the server pool is being destroyed so
 303  302   * all transports have been closed and no service threads are in
 304  303   * existence.
 305  304   *
 306  305   * If we quiesce the server, we're shutting it down without destroying the
 307  306   * server state. This allows it to warm start subsequently.
 308  307   */
 309  308  void
 310  309  nfs_srv_stop_all(void)
 311  310  {
 312  311          int quiesce = 0;
 313  312          nfs_srv_shutdown_all(quiesce);
 314  313  }
 315  314  
 316  315  /*
  
    | 
      ↓ open down ↓ | 
    8 lines elided | 
    
      ↑ open up ↑ | 
  
 317  316   * This alternative shutdown routine can be requested via nfssys()
 318  317   */
 319  318  void
 320  319  nfs_srv_quiesce_all(void)
 321  320  {
 322  321          int quiesce = 1;
 323  322          nfs_srv_shutdown_all(quiesce);
 324  323  }
 325  324  
 326  325  static void
 327      -nfs_srv_shutdown_all(int quiesce) {
 328      -        mutex_enter(&nfs_server_upordown_lock);
      326 +nfs_srv_shutdown_all(int quiesce)
      327 +{
      328 +        nfs_globals_t *ng = nfs_srv_getzg();
      329 +
      330 +        mutex_enter(&ng->nfs_server_upordown_lock);
 329  331          if (quiesce) {
 330      -                if (nfs_server_upordown == NFS_SERVER_RUNNING ||
 331      -                        nfs_server_upordown == NFS_SERVER_OFFLINE) {
 332      -                        nfs_server_upordown = NFS_SERVER_QUIESCED;
 333      -                        cv_signal(&nfs_server_upordown_cv);
      332 +                if (ng->nfs_server_upordown == NFS_SERVER_RUNNING ||
      333 +                    ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
      334 +                        ng->nfs_server_upordown = NFS_SERVER_QUIESCED;
      335 +                        cv_signal(&ng->nfs_server_upordown_cv);
 334  336  
 335      -                        /* reset DSS state, for subsequent warm restart */
      337 +                        /* reset DSS state */
 336  338                          rfs4_dss_numnewpaths = 0;
 337  339                          rfs4_dss_newpaths = NULL;
 338  340  
 339  341                          cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
 340  342                              "NFSv4 state has been preserved");
 341  343                  }
 342  344          } else {
 343      -                if (nfs_server_upordown == NFS_SERVER_OFFLINE) {
 344      -                        nfs_server_upordown = NFS_SERVER_STOPPING;
 345      -                        mutex_exit(&nfs_server_upordown_lock);
 346      -                        rfs4_state_fini();
 347      -                        rfs4_fini_drc(nfs4_drc);
 348      -                        mutex_enter(&nfs_server_upordown_lock);
 349      -                        nfs_server_upordown = NFS_SERVER_STOPPED;
 350      -                        cv_signal(&nfs_server_upordown_cv);
      345 +                if (ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
      346 +                        ng->nfs_server_upordown = NFS_SERVER_STOPPING;
      347 +                        mutex_exit(&ng->nfs_server_upordown_lock);
      348 +                        rfs4_state_zone_fini();
      349 +                        rfs4_fini_drc();
      350 +                        mutex_enter(&ng->nfs_server_upordown_lock);
      351 +                        ng->nfs_server_upordown = NFS_SERVER_STOPPED;
      352 +
      353 +                        /* reset DSS state */
      354 +                        rfs4_dss_numnewpaths = 0;
      355 +                        rfs4_dss_newpaths = NULL;
      356 +
      357 +                        cv_signal(&ng->nfs_server_upordown_cv);
 351  358                  }
 352  359          }
 353      -        mutex_exit(&nfs_server_upordown_lock);
      360 +        mutex_exit(&ng->nfs_server_upordown_lock);
 354  361  }
 355  362  
 356  363  static int
 357  364  nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
 358      -                        rpcvers_t versmin, rpcvers_t versmax)
      365 +    rpcvers_t versmin, rpcvers_t versmax)
 359  366  {
 360  367          struct strioctl strioc;
 361  368          struct T_info_ack tinfo;
 362  369          int             error, retval;
 363  370  
 364  371          /*
 365  372           * Find out what type of transport this is.
 366  373           */
 367  374          strioc.ic_cmd = TI_GETINFO;
 368  375          strioc.ic_timout = -1;
 369  376          strioc.ic_len = sizeof (tinfo);
 370  377          strioc.ic_dp = (char *)&tinfo;
 371  378          tinfo.PRIM_type = T_INFO_REQ;
 372  379  
 373  380          error = strioctl(fp->f_vnode, I_STR, (intptr_t)&strioc, 0, K_TO_K,
 374  381              CRED(), &retval);
 375  382          if (error || retval)
 376  383                  return (error);
 377  384  
 378  385          /*
 379  386           * Based on our query of the transport type...
 380  387           *
 381  388           * Reset the min/max versions based on the caller's request
 382  389           * NOTE: This assumes that NFS_PROGRAM is first in the array!!
 383  390           * And the second entry is the NFS_ACL_PROGRAM.
 384  391           */
 385  392          switch (tinfo.SERV_type) {
 386  393          case T_CLTS:
 387  394                  if (versmax == NFS_V4)
 388  395                          return (EINVAL);
 389  396                  __nfs_sc_clts[0].sc_versmin = versmin;
 390  397                  __nfs_sc_clts[0].sc_versmax = versmax;
 391  398                  __nfs_sc_clts[1].sc_versmin = versmin;
 392  399                  __nfs_sc_clts[1].sc_versmax = versmax;
 393  400                  *sctpp = &nfs_sct_clts;
 394  401                  break;
 395  402          case T_COTS:
 396  403          case T_COTS_ORD:
 397  404                  __nfs_sc_cots[0].sc_versmin = versmin;
 398  405                  __nfs_sc_cots[0].sc_versmax = versmax;
 399  406                  /* For the NFS_ACL program, check the max version */
 400  407                  if (versmax > NFS_ACL_VERSMAX)
 401  408                          versmax = NFS_ACL_VERSMAX;
 402  409                  __nfs_sc_cots[1].sc_versmin = versmin;
 403  410                  __nfs_sc_cots[1].sc_versmax = versmax;
 404  411                  *sctpp = &nfs_sct_cots;
 405  412                  break;
 406  413          default:
 407  414                  error = EINVAL;
 408  415          }
 409  416  
 410  417          return (error);
  
    | 
      ↓ open down ↓ | 
    42 lines elided | 
    
      ↑ open up ↑ | 
  
 411  418  }
 412  419  
 413  420  /*
 414  421   * NFS Server system call.
 415  422   * Does all of the work of running a NFS server.
 416  423   * uap->fd is the fd of an open transport provider
 417  424   */
 418  425  int
 419  426  nfs_svc(struct nfs_svc_args *arg, model_t model)
 420  427  {
      428 +        nfs_globals_t *ng;
 421  429          file_t *fp;
 422  430          SVCMASTERXPRT *xprt;
 423  431          int error;
 424  432          int readsize;
 425  433          char buf[KNC_STRSIZE];
 426  434          size_t len;
 427  435          STRUCT_HANDLE(nfs_svc_args, uap);
 428  436          struct netbuf addrmask;
 429  437          SVC_CALLOUT_TABLE *sctp = NULL;
 430  438  
 431  439  #ifdef lint
 432  440          model = model;          /* STRUCT macros don't always refer to it */
 433  441  #endif
 434  442  
      443 +        ng = nfs_srv_getzg();
 435  444          STRUCT_SET_HANDLE(uap, model, arg);
 436  445  
 437  446          /* Check privileges in nfssys() */
 438  447  
 439  448          if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 440  449                  return (EBADF);
 441  450  
      451 +        /* Setup global file handle in nfs_export */
      452 +        if ((error = nfs_export_get_rootfh(ng)) != 0)
      453 +                return (error);
      454 +
 442  455          /*
 443  456           * Set read buffer size to rsize
 444  457           * and add room for RPC headers.
 445  458           */
 446  459          readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
 447  460          if (readsize < RPC_MAXDATASIZE)
 448  461                  readsize = RPC_MAXDATASIZE;
 449  462  
 450  463          error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
 451  464              KNC_STRSIZE, &len);
 452  465          if (error) {
 453  466                  releasef(STRUCT_FGET(uap, fd));
 454  467                  return (error);
 455  468          }
 456  469  
 457  470          addrmask.len = STRUCT_FGET(uap, addrmask.len);
  
    | 
      ↓ open down ↓ | 
    6 lines elided | 
    
      ↑ open up ↑ | 
  
 458  471          addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
 459  472          addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
 460  473          error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
 461  474              addrmask.len);
 462  475          if (error) {
 463  476                  releasef(STRUCT_FGET(uap, fd));
 464  477                  kmem_free(addrmask.buf, addrmask.maxlen);
 465  478                  return (error);
 466  479          }
 467  480  
 468      -        nfs_versmin = STRUCT_FGET(uap, versmin);
 469      -        nfs_versmax = STRUCT_FGET(uap, versmax);
      481 +        ng->nfs_versmin = STRUCT_FGET(uap, versmin);
      482 +        ng->nfs_versmax = STRUCT_FGET(uap, versmax);
 470  483  
 471  484          /* Double check the vers min/max ranges */
 472      -        if ((nfs_versmin > nfs_versmax) ||
 473      -            (nfs_versmin < NFS_VERSMIN) ||
 474      -            (nfs_versmax > NFS_VERSMAX)) {
 475      -                nfs_versmin = NFS_VERSMIN_DEFAULT;
 476      -                nfs_versmax = NFS_VERSMAX_DEFAULT;
      485 +        if ((ng->nfs_versmin > ng->nfs_versmax) ||
      486 +            (ng->nfs_versmin < NFS_VERSMIN) ||
      487 +            (ng->nfs_versmax > NFS_VERSMAX)) {
      488 +                ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
      489 +                ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
 477  490          }
 478  491  
 479      -        if (error =
 480      -            nfs_srv_set_sc_versions(fp, &sctp, nfs_versmin, nfs_versmax)) {
      492 +        if (error = nfs_srv_set_sc_versions(fp, &sctp, ng->nfs_versmin,
      493 +            ng->nfs_versmax)) {
 481  494                  releasef(STRUCT_FGET(uap, fd));
 482  495                  kmem_free(addrmask.buf, addrmask.maxlen);
 483  496                  return (error);
 484  497          }
 485  498  
 486  499          /* Initialize nfsv4 server */
 487      -        if (nfs_versmax == (rpcvers_t)NFS_V4)
 488      -                rfs4_server_start(STRUCT_FGET(uap, delegation));
      500 +        if (ng->nfs_versmax == (rpcvers_t)NFS_V4)
      501 +                rfs4_server_start(ng, STRUCT_FGET(uap, delegation));
 489  502  
 490  503          /* Create a transport handle. */
 491  504          error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
 492  505              sctp, NULL, NFS_SVCPOOL_ID, TRUE);
 493  506  
 494  507          if (error)
 495  508                  kmem_free(addrmask.buf, addrmask.maxlen);
 496  509  
 497  510          releasef(STRUCT_FGET(uap, fd));
 498  511  
 499  512          /* HA-NFSv4: save the cluster nodeid */
 500  513          if (cluster_bootflags & CLUSTER_BOOTED)
 501  514                  lm_global_nlmid = clconf_get_nodeid();
 502  515  
 503  516          return (error);
 504  517  }
 505  518  
 506  519  static void
 507      -rfs4_server_start(int nfs4_srv_delegation)
      520 +rfs4_server_start(nfs_globals_t *ng, int nfs4_srv_delegation)
 508  521  {
 509  522          /*
 510  523           * Determine if the server has previously been "started" and
 511  524           * if not, do the per instance initialization
 512  525           */
 513      -        mutex_enter(&nfs_server_upordown_lock);
      526 +        mutex_enter(&ng->nfs_server_upordown_lock);
 514  527  
 515      -        if (nfs_server_upordown != NFS_SERVER_RUNNING) {
      528 +        if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
 516  529                  /* Do we need to stop and wait on the previous server? */
 517      -                while (nfs_server_upordown == NFS_SERVER_STOPPING ||
 518      -                    nfs_server_upordown == NFS_SERVER_OFFLINE)
 519      -                        cv_wait(&nfs_server_upordown_cv,
 520      -                            &nfs_server_upordown_lock);
      530 +                while (ng->nfs_server_upordown == NFS_SERVER_STOPPING ||
      531 +                    ng->nfs_server_upordown == NFS_SERVER_OFFLINE)
      532 +                        cv_wait(&ng->nfs_server_upordown_cv,
      533 +                            &ng->nfs_server_upordown_lock);
 521  534  
 522      -                if (nfs_server_upordown != NFS_SERVER_RUNNING) {
      535 +                if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
 523  536                          (void) svc_pool_control(NFS_SVCPOOL_ID,
 524  537                              SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
 525  538                          (void) svc_pool_control(NFS_SVCPOOL_ID,
 526  539                              SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
 527  540  
 528      -                        /* is this an nfsd warm start? */
 529      -                        if (nfs_server_upordown == NFS_SERVER_QUIESCED) {
 530      -                                cmn_err(CE_NOTE, "nfs_server: "
 531      -                                    "server was previously quiesced; "
 532      -                                    "existing NFSv4 state will be re-used");
      541 +                        rfs4_do_server_start(ng->nfs_server_upordown,
      542 +                            nfs4_srv_delegation,
      543 +                            cluster_bootflags & CLUSTER_BOOTED);
 533  544  
 534      -                                /*
 535      -                                 * HA-NFSv4: this is also the signal
 536      -                                 * that a Resource Group failover has
 537      -                                 * occurred.
 538      -                                 */
 539      -                                if (cluster_bootflags & CLUSTER_BOOTED)
 540      -                                        hanfsv4_failover();
 541      -                        } else {
 542      -                                /* cold start */
 543      -                                rfs4_state_init();
 544      -                                nfs4_drc = rfs4_init_drc(nfs4_drc_max,
 545      -                                    nfs4_drc_hash);
 546      -                        }
 547      -
 548      -                        /*
 549      -                         * Check to see if delegation is to be
 550      -                         * enabled at the server
 551      -                         */
 552      -                        if (nfs4_srv_delegation != FALSE)
 553      -                                rfs4_set_deleg_policy(SRV_NORMAL_DELEGATE);
 554      -
 555      -                        nfs_server_upordown = NFS_SERVER_RUNNING;
      545 +                        ng->nfs_server_upordown = NFS_SERVER_RUNNING;
 556  546                  }
 557      -                cv_signal(&nfs_server_upordown_cv);
      547 +                cv_signal(&ng->nfs_server_upordown_cv);
 558  548          }
 559      -        mutex_exit(&nfs_server_upordown_lock);
      549 +        mutex_exit(&ng->nfs_server_upordown_lock);
 560  550  }
 561  551  
 562  552  /*
 563  553   * If RDMA device available,
 564  554   * start RDMA listener.
 565  555   */
 566  556  int
 567  557  rdma_start(struct rdma_svc_args *rsa)
 568  558  {
      559 +        nfs_globals_t *ng;
 569  560          int error;
 570  561          rdma_xprt_group_t started_rdma_xprts;
 571  562          rdma_stat stat;
 572  563          int svc_state = 0;
 573  564  
 574  565          /* Double check the vers min/max ranges */
 575  566          if ((rsa->nfs_versmin > rsa->nfs_versmax) ||
 576  567              (rsa->nfs_versmin < NFS_VERSMIN) ||
 577  568              (rsa->nfs_versmax > NFS_VERSMAX)) {
 578  569                  rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
 579  570                  rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
 580  571          }
 581      -        nfs_versmin = rsa->nfs_versmin;
 582      -        nfs_versmax = rsa->nfs_versmax;
 583  572  
      573 +        ng = nfs_srv_getzg();
      574 +        ng->nfs_versmin = rsa->nfs_versmin;
      575 +        ng->nfs_versmax = rsa->nfs_versmax;
      576 +
 584  577          /* Set the versions in the callout table */
 585  578          __nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
 586  579          __nfs_sc_rdma[0].sc_versmax = rsa->nfs_versmax;
 587  580          /* For the NFS_ACL program, check the max version */
 588  581          __nfs_sc_rdma[1].sc_versmin = rsa->nfs_versmin;
 589  582          if (rsa->nfs_versmax > NFS_ACL_VERSMAX)
 590  583                  __nfs_sc_rdma[1].sc_versmax = NFS_ACL_VERSMAX;
 591  584          else
 592  585                  __nfs_sc_rdma[1].sc_versmax = rsa->nfs_versmax;
 593  586  
 594  587          /* Initialize nfsv4 server */
 595  588          if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
 596      -                rfs4_server_start(rsa->delegation);
      589 +                rfs4_server_start(ng, rsa->delegation);
 597  590  
 598  591          started_rdma_xprts.rtg_count = 0;
 599  592          started_rdma_xprts.rtg_listhead = NULL;
 600  593          started_rdma_xprts.rtg_poolid = rsa->poolid;
 601  594  
 602  595  restart:
 603  596          error = svc_rdma_kcreate(rsa->netid, &nfs_sct_rdma, rsa->poolid,
 604  597              &started_rdma_xprts);
 605  598  
 606  599          svc_state = !error;
 607  600  
 608  601          while (!error) {
 609  602  
 610  603                  /*
 611  604                   * wait till either interrupted by a signal on
 612  605                   * nfs service stop/restart or signalled by a
 613      -                 * rdma plugin attach/detatch.
      606 +                 * rdma attach/detatch.
 614  607                   */
 615  608  
 616  609                  stat = rdma_kwait();
 617  610  
 618  611                  /*
 619  612                   * stop services if running -- either on a HCA detach event
 620  613                   * or if the nfs service is stopped/restarted.
 621  614                   */
 622  615  
 623  616                  if ((stat == RDMA_HCA_DETACH || stat == RDMA_INTR) &&
 624  617                      svc_state) {
 625  618                          rdma_stop(&started_rdma_xprts);
 626  619                          svc_state = 0;
 627  620                  }
 628  621  
 629  622                  /*
 630  623                   * nfs service stop/restart, break out of the
 631  624                   * wait loop and return;
 632  625                   */
 633  626                  if (stat == RDMA_INTR)
 634  627                          return (0);
 635  628  
 636  629                  /*
 637  630                   * restart stopped services on a HCA attach event
 638  631                   * (if not already running)
 639  632                   */
 640  633  
 641  634                  if ((stat == RDMA_HCA_ATTACH) && (svc_state == 0))
 642  635                          goto restart;
 643  636  
 644  637                  /*
 645  638                   * loop until a nfs service stop/restart
 646  639                   */
 647  640          }
 648  641  
 649  642          return (error);
 650  643  }
 651  644  
 652  645  /* ARGSUSED */
 653  646  void
  
    | 
      ↓ open down ↓ | 
    30 lines elided | 
    
      ↑ open up ↑ | 
  
 654  647  rpc_null(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 655  648      struct svc_req *req, cred_t *cr, bool_t ro)
 656  649  {
 657  650  }
 658  651  
 659  652  /* ARGSUSED */
 660  653  void
 661  654  rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 662  655      struct svc_req *req, cred_t *cr, bool_t ro)
 663  656  {
 664      -        DTRACE_NFSV3_3(op__null__start, struct svc_req *, req,
 665      -            cred_t *, cr, vnode_t *, NULL);
 666      -        DTRACE_NFSV3_3(op__null__done, struct svc_req *, req,
 667      -            cred_t *, cr, vnode_t *, NULL);
      657 +        DTRACE_NFSV3_4(op__null__start, struct svc_req *, req,
      658 +            cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
      659 +        DTRACE_NFSV3_4(op__null__done, struct svc_req *, req,
      660 +            cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
 668  661  }
 669  662  
 670  663  /* ARGSUSED */
 671  664  static void
 672  665  rfs_error(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
 673  666      struct svc_req *req, cred_t *cr, bool_t ro)
 674  667  {
 675  668          /* return (EOPNOTSUPP); */
 676  669  }
 677  670  
 678  671  static void
 679  672  nullfree(void)
 680  673  {
 681  674  }
 682  675  
 683  676  static char *rfscallnames_v2[] = {
 684  677          "RFS2_NULL",
 685  678          "RFS2_GETATTR",
 686  679          "RFS2_SETATTR",
 687  680          "RFS2_ROOT",
 688  681          "RFS2_LOOKUP",
 689  682          "RFS2_READLINK",
 690  683          "RFS2_READ",
 691  684          "RFS2_WRITECACHE",
 692  685          "RFS2_WRITE",
 693  686          "RFS2_CREATE",
 694  687          "RFS2_REMOVE",
 695  688          "RFS2_RENAME",
 696  689          "RFS2_LINK",
 697  690          "RFS2_SYMLINK",
 698  691          "RFS2_MKDIR",
 699  692          "RFS2_RMDIR",
 700  693          "RFS2_READDIR",
 701  694          "RFS2_STATFS"
 702  695  };
 703  696  
 704  697  static struct rpcdisp rfsdisptab_v2[] = {
 705  698          /*
 706  699           * NFS VERSION 2
 707  700           */
 708  701  
 709  702          /* RFS_NULL = 0 */
 710  703          {rpc_null,
 711  704              xdr_void, NULL_xdrproc_t, 0,
 712  705              xdr_void, NULL_xdrproc_t, 0,
 713  706              nullfree, RPC_IDEMPOTENT,
 714  707              0},
 715  708  
 716  709          /* RFS_GETATTR = 1 */
 717  710          {rfs_getattr,
 718  711              xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
 719  712              xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
 720  713              nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
 721  714              rfs_getattr_getfh},
 722  715  
 723  716          /* RFS_SETATTR = 2 */
 724  717          {rfs_setattr,
 725  718              xdr_saargs, NULL_xdrproc_t, sizeof (struct nfssaargs),
 726  719              xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
 727  720              nullfree, RPC_MAPRESP,
 728  721              rfs_setattr_getfh},
 729  722  
 730  723          /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
 731  724          {rfs_error,
 732  725              xdr_void, NULL_xdrproc_t, 0,
 733  726              xdr_void, NULL_xdrproc_t, 0,
 734  727              nullfree, RPC_IDEMPOTENT,
 735  728              0},
 736  729  
 737  730          /* RFS_LOOKUP = 4 */
 738  731          {rfs_lookup,
 739  732              xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
 740  733              xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
 741  734              nullfree, RPC_IDEMPOTENT|RPC_MAPRESP|RPC_PUBLICFH_OK,
 742  735              rfs_lookup_getfh},
 743  736  
 744  737          /* RFS_READLINK = 5 */
 745  738          {rfs_readlink,
 746  739              xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
 747  740              xdr_rdlnres, NULL_xdrproc_t, sizeof (struct nfsrdlnres),
 748  741              rfs_rlfree, RPC_IDEMPOTENT,
 749  742              rfs_readlink_getfh},
 750  743  
 751  744          /* RFS_READ = 6 */
 752  745          {rfs_read,
 753  746              xdr_readargs, NULL_xdrproc_t, sizeof (struct nfsreadargs),
 754  747              xdr_rdresult, NULL_xdrproc_t, sizeof (struct nfsrdresult),
 755  748              rfs_rdfree, RPC_IDEMPOTENT,
 756  749              rfs_read_getfh},
 757  750  
 758  751          /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
 759  752          {rfs_error,
 760  753              xdr_void, NULL_xdrproc_t, 0,
 761  754              xdr_void, NULL_xdrproc_t, 0,
 762  755              nullfree, RPC_IDEMPOTENT,
 763  756              0},
 764  757  
 765  758          /* RFS_WRITE = 8 */
 766  759          {rfs_write,
 767  760              xdr_writeargs, NULL_xdrproc_t, sizeof (struct nfswriteargs),
 768  761              xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
 769  762              nullfree, RPC_MAPRESP,
 770  763              rfs_write_getfh},
 771  764  
 772  765          /* RFS_CREATE = 9 */
 773  766          {rfs_create,
 774  767              xdr_creatargs, NULL_xdrproc_t, sizeof (struct nfscreatargs),
 775  768              xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
 776  769              nullfree, RPC_MAPRESP,
 777  770              rfs_create_getfh},
 778  771  
 779  772          /* RFS_REMOVE = 10 */
 780  773          {rfs_remove,
 781  774              xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
 782  775  #ifdef _LITTLE_ENDIAN
 783  776              xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 784  777  #else
 785  778              xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 786  779  #endif
 787  780              nullfree, RPC_MAPRESP,
 788  781              rfs_remove_getfh},
 789  782  
 790  783          /* RFS_RENAME = 11 */
 791  784          {rfs_rename,
 792  785              xdr_rnmargs, NULL_xdrproc_t, sizeof (struct nfsrnmargs),
 793  786  #ifdef _LITTLE_ENDIAN
 794  787              xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 795  788  #else
 796  789              xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 797  790  #endif
 798  791              nullfree, RPC_MAPRESP,
 799  792              rfs_rename_getfh},
 800  793  
 801  794          /* RFS_LINK = 12 */
 802  795          {rfs_link,
 803  796              xdr_linkargs, NULL_xdrproc_t, sizeof (struct nfslinkargs),
 804  797  #ifdef _LITTLE_ENDIAN
 805  798              xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 806  799  #else
 807  800              xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 808  801  #endif
 809  802              nullfree, RPC_MAPRESP,
 810  803              rfs_link_getfh},
 811  804  
 812  805          /* RFS_SYMLINK = 13 */
 813  806          {rfs_symlink,
 814  807              xdr_slargs, NULL_xdrproc_t, sizeof (struct nfsslargs),
 815  808  #ifdef _LITTLE_ENDIAN
 816  809              xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 817  810  #else
 818  811              xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 819  812  #endif
 820  813              nullfree, RPC_MAPRESP,
 821  814              rfs_symlink_getfh},
 822  815  
 823  816          /* RFS_MKDIR = 14 */
 824  817          {rfs_mkdir,
 825  818              xdr_creatargs, NULL_xdrproc_t, sizeof (struct nfscreatargs),
 826  819              xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
 827  820              nullfree, RPC_MAPRESP,
 828  821              rfs_mkdir_getfh},
 829  822  
 830  823          /* RFS_RMDIR = 15 */
 831  824          {rfs_rmdir,
 832  825              xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
 833  826  #ifdef _LITTLE_ENDIAN
 834  827              xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
 835  828  #else
 836  829              xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
 837  830  #endif
 838  831              nullfree, RPC_MAPRESP,
 839  832              rfs_rmdir_getfh},
 840  833  
 841  834          /* RFS_READDIR = 16 */
 842  835          {rfs_readdir,
 843  836              xdr_rddirargs, NULL_xdrproc_t, sizeof (struct nfsrddirargs),
 844  837              xdr_putrddirres, NULL_xdrproc_t, sizeof (struct nfsrddirres),
 845  838              rfs_rddirfree, RPC_IDEMPOTENT,
 846  839              rfs_readdir_getfh},
 847  840  
 848  841          /* RFS_STATFS = 17 */
 849  842          {rfs_statfs,
 850  843              xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
 851  844              xdr_statfs, xdr_faststatfs, sizeof (struct nfsstatfs),
 852  845              nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
 853  846              rfs_statfs_getfh},
 854  847  };
 855  848  
 856  849  static char *rfscallnames_v3[] = {
 857  850          "RFS3_NULL",
 858  851          "RFS3_GETATTR",
 859  852          "RFS3_SETATTR",
 860  853          "RFS3_LOOKUP",
 861  854          "RFS3_ACCESS",
 862  855          "RFS3_READLINK",
 863  856          "RFS3_READ",
 864  857          "RFS3_WRITE",
 865  858          "RFS3_CREATE",
 866  859          "RFS3_MKDIR",
 867  860          "RFS3_SYMLINK",
 868  861          "RFS3_MKNOD",
 869  862          "RFS3_REMOVE",
 870  863          "RFS3_RMDIR",
 871  864          "RFS3_RENAME",
 872  865          "RFS3_LINK",
 873  866          "RFS3_READDIR",
 874  867          "RFS3_READDIRPLUS",
 875  868          "RFS3_FSSTAT",
 876  869          "RFS3_FSINFO",
 877  870          "RFS3_PATHCONF",
 878  871          "RFS3_COMMIT"
 879  872  };
 880  873  
 881  874  static struct rpcdisp rfsdisptab_v3[] = {
 882  875          /*
 883  876           * NFS VERSION 3
 884  877           */
 885  878  
 886  879          /* RFS_NULL = 0 */
 887  880          {rpc_null_v3,
 888  881              xdr_void, NULL_xdrproc_t, 0,
 889  882              xdr_void, NULL_xdrproc_t, 0,
 890  883              nullfree, RPC_IDEMPOTENT,
 891  884              0},
 892  885  
 893  886          /* RFS3_GETATTR = 1 */
 894  887          {rfs3_getattr,
 895  888              xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (GETATTR3args),
 896  889              xdr_GETATTR3res, NULL_xdrproc_t, sizeof (GETATTR3res),
 897  890              nullfree, (RPC_IDEMPOTENT | RPC_ALLOWANON),
 898  891              rfs3_getattr_getfh},
 899  892  
 900  893          /* RFS3_SETATTR = 2 */
 901  894          {rfs3_setattr,
 902  895              xdr_SETATTR3args, NULL_xdrproc_t, sizeof (SETATTR3args),
 903  896              xdr_SETATTR3res, NULL_xdrproc_t, sizeof (SETATTR3res),
 904  897              nullfree, 0,
 905  898              rfs3_setattr_getfh},
 906  899  
 907  900          /* RFS3_LOOKUP = 3 */
 908  901          {rfs3_lookup,
 909  902              xdr_diropargs3, NULL_xdrproc_t, sizeof (LOOKUP3args),
 910  903              xdr_LOOKUP3res, NULL_xdrproc_t, sizeof (LOOKUP3res),
 911  904              nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
 912  905              rfs3_lookup_getfh},
 913  906  
 914  907          /* RFS3_ACCESS = 4 */
 915  908          {rfs3_access,
 916  909              xdr_ACCESS3args, NULL_xdrproc_t, sizeof (ACCESS3args),
 917  910              xdr_ACCESS3res, NULL_xdrproc_t, sizeof (ACCESS3res),
 918  911              nullfree, RPC_IDEMPOTENT,
 919  912              rfs3_access_getfh},
 920  913  
 921  914          /* RFS3_READLINK = 5 */
 922  915          {rfs3_readlink,
 923  916              xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (READLINK3args),
 924  917              xdr_READLINK3res, NULL_xdrproc_t, sizeof (READLINK3res),
 925  918              rfs3_readlink_free, RPC_IDEMPOTENT,
 926  919              rfs3_readlink_getfh},
 927  920  
 928  921          /* RFS3_READ = 6 */
 929  922          {rfs3_read,
 930  923              xdr_READ3args, NULL_xdrproc_t, sizeof (READ3args),
 931  924              xdr_READ3res, NULL_xdrproc_t, sizeof (READ3res),
 932  925              rfs3_read_free, RPC_IDEMPOTENT,
 933  926              rfs3_read_getfh},
 934  927  
 935  928          /* RFS3_WRITE = 7 */
 936  929          {rfs3_write,
 937  930              xdr_WRITE3args, NULL_xdrproc_t, sizeof (WRITE3args),
 938  931              xdr_WRITE3res, NULL_xdrproc_t, sizeof (WRITE3res),
 939  932              nullfree, 0,
 940  933              rfs3_write_getfh},
 941  934  
 942  935          /* RFS3_CREATE = 8 */
 943  936          {rfs3_create,
 944  937              xdr_CREATE3args, NULL_xdrproc_t, sizeof (CREATE3args),
 945  938              xdr_CREATE3res, NULL_xdrproc_t, sizeof (CREATE3res),
 946  939              nullfree, 0,
 947  940              rfs3_create_getfh},
 948  941  
 949  942          /* RFS3_MKDIR = 9 */
 950  943          {rfs3_mkdir,
 951  944              xdr_MKDIR3args, NULL_xdrproc_t, sizeof (MKDIR3args),
 952  945              xdr_MKDIR3res, NULL_xdrproc_t, sizeof (MKDIR3res),
 953  946              nullfree, 0,
 954  947              rfs3_mkdir_getfh},
 955  948  
 956  949          /* RFS3_SYMLINK = 10 */
 957  950          {rfs3_symlink,
 958  951              xdr_SYMLINK3args, NULL_xdrproc_t, sizeof (SYMLINK3args),
 959  952              xdr_SYMLINK3res, NULL_xdrproc_t, sizeof (SYMLINK3res),
 960  953              nullfree, 0,
 961  954              rfs3_symlink_getfh},
 962  955  
 963  956          /* RFS3_MKNOD = 11 */
 964  957          {rfs3_mknod,
 965  958              xdr_MKNOD3args, NULL_xdrproc_t, sizeof (MKNOD3args),
 966  959              xdr_MKNOD3res, NULL_xdrproc_t, sizeof (MKNOD3res),
 967  960              nullfree, 0,
 968  961              rfs3_mknod_getfh},
 969  962  
 970  963          /* RFS3_REMOVE = 12 */
 971  964          {rfs3_remove,
 972  965              xdr_diropargs3, NULL_xdrproc_t, sizeof (REMOVE3args),
 973  966              xdr_REMOVE3res, NULL_xdrproc_t, sizeof (REMOVE3res),
 974  967              nullfree, 0,
 975  968              rfs3_remove_getfh},
 976  969  
 977  970          /* RFS3_RMDIR = 13 */
 978  971          {rfs3_rmdir,
 979  972              xdr_diropargs3, NULL_xdrproc_t, sizeof (RMDIR3args),
 980  973              xdr_RMDIR3res, NULL_xdrproc_t, sizeof (RMDIR3res),
 981  974              nullfree, 0,
 982  975              rfs3_rmdir_getfh},
 983  976  
 984  977          /* RFS3_RENAME = 14 */
 985  978          {rfs3_rename,
 986  979              xdr_RENAME3args, NULL_xdrproc_t, sizeof (RENAME3args),
 987  980              xdr_RENAME3res, NULL_xdrproc_t, sizeof (RENAME3res),
 988  981              nullfree, 0,
 989  982              rfs3_rename_getfh},
 990  983  
 991  984          /* RFS3_LINK = 15 */
 992  985          {rfs3_link,
 993  986              xdr_LINK3args, NULL_xdrproc_t, sizeof (LINK3args),
 994  987              xdr_LINK3res, NULL_xdrproc_t, sizeof (LINK3res),
 995  988              nullfree, 0,
 996  989              rfs3_link_getfh},
 997  990  
 998  991          /* RFS3_READDIR = 16 */
 999  992          {rfs3_readdir,
1000  993              xdr_READDIR3args, NULL_xdrproc_t, sizeof (READDIR3args),
1001  994              xdr_READDIR3res, NULL_xdrproc_t, sizeof (READDIR3res),
1002  995              rfs3_readdir_free, RPC_IDEMPOTENT,
1003  996              rfs3_readdir_getfh},
1004  997  
1005  998          /* RFS3_READDIRPLUS = 17 */
1006  999          {rfs3_readdirplus,
1007 1000              xdr_READDIRPLUS3args, NULL_xdrproc_t, sizeof (READDIRPLUS3args),
1008 1001              xdr_READDIRPLUS3res, NULL_xdrproc_t, sizeof (READDIRPLUS3res),
1009 1002              rfs3_readdirplus_free, RPC_AVOIDWORK,
1010 1003              rfs3_readdirplus_getfh},
1011 1004  
1012 1005          /* RFS3_FSSTAT = 18 */
1013 1006          {rfs3_fsstat,
1014 1007              xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (FSSTAT3args),
1015 1008              xdr_FSSTAT3res, NULL_xdrproc_t, sizeof (FSSTAT3res),
1016 1009              nullfree, RPC_IDEMPOTENT,
1017 1010              rfs3_fsstat_getfh},
1018 1011  
1019 1012          /* RFS3_FSINFO = 19 */
1020 1013          {rfs3_fsinfo,
1021 1014              xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (FSINFO3args),
1022 1015              xdr_FSINFO3res, NULL_xdrproc_t, sizeof (FSINFO3res),
1023 1016              nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON,
1024 1017              rfs3_fsinfo_getfh},
1025 1018  
1026 1019          /* RFS3_PATHCONF = 20 */
1027 1020          {rfs3_pathconf,
1028 1021              xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (PATHCONF3args),
1029 1022              xdr_PATHCONF3res, NULL_xdrproc_t, sizeof (PATHCONF3res),
1030 1023              nullfree, RPC_IDEMPOTENT,
1031 1024              rfs3_pathconf_getfh},
1032 1025  
1033 1026          /* RFS3_COMMIT = 21 */
1034 1027          {rfs3_commit,
1035 1028              xdr_COMMIT3args, NULL_xdrproc_t, sizeof (COMMIT3args),
1036 1029              xdr_COMMIT3res, NULL_xdrproc_t, sizeof (COMMIT3res),
1037 1030              nullfree, RPC_IDEMPOTENT,
1038 1031              rfs3_commit_getfh},
1039 1032  };
1040 1033  
1041 1034  static char *rfscallnames_v4[] = {
1042 1035          "RFS4_NULL",
1043 1036          "RFS4_COMPOUND",
1044 1037          "RFS4_NULL",
1045 1038          "RFS4_NULL",
1046 1039          "RFS4_NULL",
1047 1040          "RFS4_NULL",
1048 1041          "RFS4_NULL",
1049 1042          "RFS4_NULL",
1050 1043          "RFS4_CREATE"
1051 1044  };
1052 1045  
1053 1046  static struct rpcdisp rfsdisptab_v4[] = {
1054 1047          /*
1055 1048           * NFS VERSION 4
1056 1049           */
1057 1050  
1058 1051          /* RFS_NULL = 0 */
1059 1052          {rpc_null,
1060 1053              xdr_void, NULL_xdrproc_t, 0,
1061 1054              xdr_void, NULL_xdrproc_t, 0,
1062 1055              nullfree, RPC_IDEMPOTENT, 0},
1063 1056  
1064 1057          /* RFS4_compound = 1 */
1065 1058          {rfs4_compound,
1066 1059              xdr_COMPOUND4args_srv, NULL_xdrproc_t, sizeof (COMPOUND4args),
1067 1060              xdr_COMPOUND4res_srv, NULL_xdrproc_t, sizeof (COMPOUND4res),
1068 1061              rfs4_compound_free, 0, 0},
1069 1062  };
1070 1063  
1071 1064  union rfs_args {
1072 1065          /*
1073 1066           * NFS VERSION 2
1074 1067           */
1075 1068  
1076 1069          /* RFS_NULL = 0 */
1077 1070  
1078 1071          /* RFS_GETATTR = 1 */
1079 1072          fhandle_t nfs2_getattr_args;
1080 1073  
1081 1074          /* RFS_SETATTR = 2 */
1082 1075          struct nfssaargs nfs2_setattr_args;
1083 1076  
1084 1077          /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
1085 1078  
1086 1079          /* RFS_LOOKUP = 4 */
1087 1080          struct nfsdiropargs nfs2_lookup_args;
1088 1081  
1089 1082          /* RFS_READLINK = 5 */
1090 1083          fhandle_t nfs2_readlink_args;
1091 1084  
1092 1085          /* RFS_READ = 6 */
1093 1086          struct nfsreadargs nfs2_read_args;
1094 1087  
1095 1088          /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
1096 1089  
1097 1090          /* RFS_WRITE = 8 */
1098 1091          struct nfswriteargs nfs2_write_args;
1099 1092  
1100 1093          /* RFS_CREATE = 9 */
1101 1094          struct nfscreatargs nfs2_create_args;
1102 1095  
1103 1096          /* RFS_REMOVE = 10 */
1104 1097          struct nfsdiropargs nfs2_remove_args;
1105 1098  
1106 1099          /* RFS_RENAME = 11 */
1107 1100          struct nfsrnmargs nfs2_rename_args;
1108 1101  
1109 1102          /* RFS_LINK = 12 */
1110 1103          struct nfslinkargs nfs2_link_args;
1111 1104  
1112 1105          /* RFS_SYMLINK = 13 */
1113 1106          struct nfsslargs nfs2_symlink_args;
1114 1107  
1115 1108          /* RFS_MKDIR = 14 */
1116 1109          struct nfscreatargs nfs2_mkdir_args;
1117 1110  
1118 1111          /* RFS_RMDIR = 15 */
1119 1112          struct nfsdiropargs nfs2_rmdir_args;
1120 1113  
1121 1114          /* RFS_READDIR = 16 */
1122 1115          struct nfsrddirargs nfs2_readdir_args;
1123 1116  
1124 1117          /* RFS_STATFS = 17 */
1125 1118          fhandle_t nfs2_statfs_args;
1126 1119  
1127 1120          /*
1128 1121           * NFS VERSION 3
1129 1122           */
1130 1123  
1131 1124          /* RFS_NULL = 0 */
1132 1125  
1133 1126          /* RFS3_GETATTR = 1 */
1134 1127          GETATTR3args nfs3_getattr_args;
1135 1128  
1136 1129          /* RFS3_SETATTR = 2 */
1137 1130          SETATTR3args nfs3_setattr_args;
1138 1131  
1139 1132          /* RFS3_LOOKUP = 3 */
1140 1133          LOOKUP3args nfs3_lookup_args;
1141 1134  
1142 1135          /* RFS3_ACCESS = 4 */
1143 1136          ACCESS3args nfs3_access_args;
1144 1137  
1145 1138          /* RFS3_READLINK = 5 */
1146 1139          READLINK3args nfs3_readlink_args;
1147 1140  
1148 1141          /* RFS3_READ = 6 */
1149 1142          READ3args nfs3_read_args;
1150 1143  
1151 1144          /* RFS3_WRITE = 7 */
1152 1145          WRITE3args nfs3_write_args;
1153 1146  
1154 1147          /* RFS3_CREATE = 8 */
1155 1148          CREATE3args nfs3_create_args;
1156 1149  
1157 1150          /* RFS3_MKDIR = 9 */
1158 1151          MKDIR3args nfs3_mkdir_args;
1159 1152  
1160 1153          /* RFS3_SYMLINK = 10 */
1161 1154          SYMLINK3args nfs3_symlink_args;
1162 1155  
1163 1156          /* RFS3_MKNOD = 11 */
1164 1157          MKNOD3args nfs3_mknod_args;
1165 1158  
1166 1159          /* RFS3_REMOVE = 12 */
1167 1160          REMOVE3args nfs3_remove_args;
1168 1161  
1169 1162          /* RFS3_RMDIR = 13 */
1170 1163          RMDIR3args nfs3_rmdir_args;
1171 1164  
1172 1165          /* RFS3_RENAME = 14 */
1173 1166          RENAME3args nfs3_rename_args;
1174 1167  
1175 1168          /* RFS3_LINK = 15 */
1176 1169          LINK3args nfs3_link_args;
1177 1170  
1178 1171          /* RFS3_READDIR = 16 */
1179 1172          READDIR3args nfs3_readdir_args;
1180 1173  
1181 1174          /* RFS3_READDIRPLUS = 17 */
1182 1175          READDIRPLUS3args nfs3_readdirplus_args;
1183 1176  
1184 1177          /* RFS3_FSSTAT = 18 */
1185 1178          FSSTAT3args nfs3_fsstat_args;
1186 1179  
1187 1180          /* RFS3_FSINFO = 19 */
1188 1181          FSINFO3args nfs3_fsinfo_args;
1189 1182  
1190 1183          /* RFS3_PATHCONF = 20 */
1191 1184          PATHCONF3args nfs3_pathconf_args;
1192 1185  
1193 1186          /* RFS3_COMMIT = 21 */
1194 1187          COMMIT3args nfs3_commit_args;
1195 1188  
1196 1189          /*
1197 1190           * NFS VERSION 4
1198 1191           */
1199 1192  
1200 1193          /* RFS_NULL = 0 */
1201 1194  
1202 1195          /* COMPUND = 1 */
1203 1196          COMPOUND4args nfs4_compound_args;
1204 1197  };
1205 1198  
1206 1199  union rfs_res {
1207 1200          /*
1208 1201           * NFS VERSION 2
1209 1202           */
1210 1203  
1211 1204          /* RFS_NULL = 0 */
1212 1205  
1213 1206          /* RFS_GETATTR = 1 */
1214 1207          struct nfsattrstat nfs2_getattr_res;
1215 1208  
1216 1209          /* RFS_SETATTR = 2 */
1217 1210          struct nfsattrstat nfs2_setattr_res;
1218 1211  
1219 1212          /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
1220 1213  
1221 1214          /* RFS_LOOKUP = 4 */
1222 1215          struct nfsdiropres nfs2_lookup_res;
1223 1216  
1224 1217          /* RFS_READLINK = 5 */
1225 1218          struct nfsrdlnres nfs2_readlink_res;
1226 1219  
1227 1220          /* RFS_READ = 6 */
1228 1221          struct nfsrdresult nfs2_read_res;
1229 1222  
1230 1223          /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
1231 1224  
1232 1225          /* RFS_WRITE = 8 */
1233 1226          struct nfsattrstat nfs2_write_res;
1234 1227  
1235 1228          /* RFS_CREATE = 9 */
1236 1229          struct nfsdiropres nfs2_create_res;
1237 1230  
1238 1231          /* RFS_REMOVE = 10 */
1239 1232          enum nfsstat nfs2_remove_res;
1240 1233  
1241 1234          /* RFS_RENAME = 11 */
1242 1235          enum nfsstat nfs2_rename_res;
1243 1236  
1244 1237          /* RFS_LINK = 12 */
1245 1238          enum nfsstat nfs2_link_res;
1246 1239  
1247 1240          /* RFS_SYMLINK = 13 */
1248 1241          enum nfsstat nfs2_symlink_res;
1249 1242  
1250 1243          /* RFS_MKDIR = 14 */
1251 1244          struct nfsdiropres nfs2_mkdir_res;
1252 1245  
1253 1246          /* RFS_RMDIR = 15 */
1254 1247          enum nfsstat nfs2_rmdir_res;
1255 1248  
1256 1249          /* RFS_READDIR = 16 */
1257 1250          struct nfsrddirres nfs2_readdir_res;
1258 1251  
1259 1252          /* RFS_STATFS = 17 */
1260 1253          struct nfsstatfs nfs2_statfs_res;
1261 1254  
1262 1255          /*
1263 1256           * NFS VERSION 3
1264 1257           */
1265 1258  
1266 1259          /* RFS_NULL = 0 */
1267 1260  
1268 1261          /* RFS3_GETATTR = 1 */
1269 1262          GETATTR3res nfs3_getattr_res;
1270 1263  
1271 1264          /* RFS3_SETATTR = 2 */
1272 1265          SETATTR3res nfs3_setattr_res;
1273 1266  
1274 1267          /* RFS3_LOOKUP = 3 */
1275 1268          LOOKUP3res nfs3_lookup_res;
1276 1269  
1277 1270          /* RFS3_ACCESS = 4 */
1278 1271          ACCESS3res nfs3_access_res;
1279 1272  
1280 1273          /* RFS3_READLINK = 5 */
1281 1274          READLINK3res nfs3_readlink_res;
1282 1275  
1283 1276          /* RFS3_READ = 6 */
1284 1277          READ3res nfs3_read_res;
1285 1278  
1286 1279          /* RFS3_WRITE = 7 */
1287 1280          WRITE3res nfs3_write_res;
1288 1281  
1289 1282          /* RFS3_CREATE = 8 */
1290 1283          CREATE3res nfs3_create_res;
1291 1284  
1292 1285          /* RFS3_MKDIR = 9 */
1293 1286          MKDIR3res nfs3_mkdir_res;
1294 1287  
1295 1288          /* RFS3_SYMLINK = 10 */
1296 1289          SYMLINK3res nfs3_symlink_res;
1297 1290  
1298 1291          /* RFS3_MKNOD = 11 */
1299 1292          MKNOD3res nfs3_mknod_res;
1300 1293  
1301 1294          /* RFS3_REMOVE = 12 */
1302 1295          REMOVE3res nfs3_remove_res;
1303 1296  
1304 1297          /* RFS3_RMDIR = 13 */
1305 1298          RMDIR3res nfs3_rmdir_res;
1306 1299  
1307 1300          /* RFS3_RENAME = 14 */
1308 1301          RENAME3res nfs3_rename_res;
1309 1302  
1310 1303          /* RFS3_LINK = 15 */
1311 1304          LINK3res nfs3_link_res;
1312 1305  
1313 1306          /* RFS3_READDIR = 16 */
1314 1307          READDIR3res nfs3_readdir_res;
1315 1308  
1316 1309          /* RFS3_READDIRPLUS = 17 */
1317 1310          READDIRPLUS3res nfs3_readdirplus_res;
1318 1311  
1319 1312          /* RFS3_FSSTAT = 18 */
1320 1313          FSSTAT3res nfs3_fsstat_res;
1321 1314  
1322 1315          /* RFS3_FSINFO = 19 */
1323 1316          FSINFO3res nfs3_fsinfo_res;
1324 1317  
1325 1318          /* RFS3_PATHCONF = 20 */
1326 1319          PATHCONF3res nfs3_pathconf_res;
1327 1320  
1328 1321          /* RFS3_COMMIT = 21 */
1329 1322          COMMIT3res nfs3_commit_res;
1330 1323  
1331 1324          /*
1332 1325           * NFS VERSION 4
1333 1326           */
1334 1327  
  
    | 
      ↓ open down ↓ | 
    657 lines elided | 
    
      ↑ open up ↑ | 
  
1335 1328          /* RFS_NULL = 0 */
1336 1329  
1337 1330          /* RFS4_COMPOUND = 1 */
1338 1331          COMPOUND4res nfs4_compound_res;
1339 1332  
1340 1333  };
1341 1334  
1342 1335  static struct rpc_disptable rfs_disptable[] = {
1343 1336          {sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
1344 1337              rfscallnames_v2,
1345      -            &rfsproccnt_v2_ptr, rfsdisptab_v2},
     1338 +            rfsdisptab_v2},
1346 1339          {sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
1347 1340              rfscallnames_v3,
1348      -            &rfsproccnt_v3_ptr, rfsdisptab_v3},
     1341 +            rfsdisptab_v3},
1349 1342          {sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
1350 1343              rfscallnames_v4,
1351      -            &rfsproccnt_v4_ptr, rfsdisptab_v4},
     1344 +            rfsdisptab_v4},
1352 1345  };
1353 1346  
1354 1347  /*
1355 1348   * If nfs_portmon is set, then clients are required to use privileged
1356 1349   * ports (ports < IPPORT_RESERVED) in order to get NFS services.
1357 1350   *
1358 1351   * N.B.: this attempt to carry forward the already ill-conceived notion
1359 1352   * of privileged ports for TCP/UDP is really quite ineffectual.  Not only
1360 1353   * is it transport-dependent, it's laughably easy to spoof.  If you're
1361 1354   * really interested in security, you must start with secure RPC instead.
1362 1355   */
1363 1356  static int nfs_portmon = 0;
1364 1357  
1365 1358  #ifdef DEBUG
1366 1359  static int cred_hits = 0;
1367 1360  static int cred_misses = 0;
1368 1361  #endif
1369 1362  
1370      -
1371 1363  #ifdef DEBUG
1372 1364  /*
1373 1365   * Debug code to allow disabling of rfs_dispatch() use of
1374 1366   * fastxdrargs() and fastxdrres() calls for testing purposes.
1375 1367   */
1376 1368  static int rfs_no_fast_xdrargs = 0;
1377 1369  static int rfs_no_fast_xdrres = 0;
1378 1370  #endif
1379 1371  
1380 1372  union acl_args {
1381 1373          /*
1382 1374           * ACL VERSION 2
1383 1375           */
1384 1376  
1385 1377          /* ACL2_NULL = 0 */
1386 1378  
1387 1379          /* ACL2_GETACL = 1 */
1388 1380          GETACL2args acl2_getacl_args;
1389 1381  
1390 1382          /* ACL2_SETACL = 2 */
1391 1383          SETACL2args acl2_setacl_args;
1392 1384  
1393 1385          /* ACL2_GETATTR = 3 */
1394 1386          GETATTR2args acl2_getattr_args;
1395 1387  
1396 1388          /* ACL2_ACCESS = 4 */
1397 1389          ACCESS2args acl2_access_args;
1398 1390  
1399 1391          /* ACL2_GETXATTRDIR = 5 */
1400 1392          GETXATTRDIR2args acl2_getxattrdir_args;
1401 1393  
1402 1394          /*
1403 1395           * ACL VERSION 3
1404 1396           */
1405 1397  
1406 1398          /* ACL3_NULL = 0 */
1407 1399  
1408 1400          /* ACL3_GETACL = 1 */
1409 1401          GETACL3args acl3_getacl_args;
1410 1402  
1411 1403          /* ACL3_SETACL = 2 */
1412 1404          SETACL3args acl3_setacl;
1413 1405  
1414 1406          /* ACL3_GETXATTRDIR = 3 */
1415 1407          GETXATTRDIR3args acl3_getxattrdir_args;
1416 1408  
1417 1409  };
1418 1410  
1419 1411  union acl_res {
1420 1412          /*
1421 1413           * ACL VERSION 2
1422 1414           */
1423 1415  
1424 1416          /* ACL2_NULL = 0 */
1425 1417  
1426 1418          /* ACL2_GETACL = 1 */
1427 1419          GETACL2res acl2_getacl_res;
1428 1420  
1429 1421          /* ACL2_SETACL = 2 */
1430 1422          SETACL2res acl2_setacl_res;
1431 1423  
1432 1424          /* ACL2_GETATTR = 3 */
1433 1425          GETATTR2res acl2_getattr_res;
1434 1426  
1435 1427          /* ACL2_ACCESS = 4 */
1436 1428          ACCESS2res acl2_access_res;
1437 1429  
1438 1430          /* ACL2_GETXATTRDIR = 5 */
1439 1431          GETXATTRDIR2args acl2_getxattrdir_res;
1440 1432  
1441 1433          /*
1442 1434           * ACL VERSION 3
1443 1435           */
1444 1436  
1445 1437          /* ACL3_NULL = 0 */
1446 1438  
1447 1439          /* ACL3_GETACL = 1 */
1448 1440          GETACL3res acl3_getacl_res;
1449 1441  
1450 1442          /* ACL3_SETACL = 2 */
1451 1443          SETACL3res acl3_setacl_res;
1452 1444  
1453 1445          /* ACL3_GETXATTRDIR = 3 */
1454 1446          GETXATTRDIR3res acl3_getxattrdir_res;
1455 1447  
1456 1448  };
1457 1449  
1458 1450  static bool_t
1459 1451  auth_tooweak(struct svc_req *req, char *res)
1460 1452  {
1461 1453  
1462 1454          if (req->rq_vers == NFS_VERSION && req->rq_proc == RFS_LOOKUP) {
1463 1455                  struct nfsdiropres *dr = (struct nfsdiropres *)res;
  
    | 
      ↓ open down ↓ | 
    83 lines elided | 
    
      ↑ open up ↑ | 
  
1464 1456                  if ((enum wnfsstat)dr->dr_status == WNFSERR_CLNT_FLAVOR)
1465 1457                          return (TRUE);
1466 1458          } else if (req->rq_vers == NFS_V3 && req->rq_proc == NFSPROC3_LOOKUP) {
1467 1459                  LOOKUP3res *resp = (LOOKUP3res *)res;
1468 1460                  if ((enum wnfsstat)resp->status == WNFSERR_CLNT_FLAVOR)
1469 1461                          return (TRUE);
1470 1462          }
1471 1463          return (FALSE);
1472 1464  }
1473 1465  
1474      -
1475 1466  static void
1476 1467  common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
1477      -                rpcvers_t max_vers, char *pgmname,
1478      -                struct rpc_disptable *disptable)
     1468 +    rpcvers_t max_vers, char *pgmname, struct rpc_disptable *disptable)
1479 1469  {
1480 1470          int which;
1481 1471          rpcvers_t vers;
1482 1472          char *args;
1483 1473          union {
1484 1474                          union rfs_args ra;
1485 1475                          union acl_args aa;
1486 1476                  } args_buf;
1487 1477          char *res;
1488 1478          union {
1489 1479                          union rfs_res rr;
1490 1480                          union acl_res ar;
1491 1481                  } res_buf;
1492 1482          struct rpcdisp *disp = NULL;
1493 1483          int dis_flags = 0;
1494 1484          cred_t *cr;
1495 1485          int error = 0;
1496 1486          int anon_ok;
1497 1487          struct exportinfo *exi = NULL;
1498 1488          unsigned int nfslog_rec_id;
1499 1489          int dupstat;
1500 1490          struct dupreq *dr;
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
1501 1491          int authres;
1502 1492          bool_t publicfh_ok = FALSE;
1503 1493          enum_t auth_flavor;
1504 1494          bool_t dupcached = FALSE;
1505 1495          struct netbuf   nb;
1506 1496          bool_t logging_enabled = FALSE;
1507 1497          struct exportinfo *nfslog_exi = NULL;
1508 1498          char **procnames;
1509 1499          char cbuf[INET6_ADDRSTRLEN];    /* to hold both IPv4 and IPv6 addr */
1510 1500          bool_t ro = FALSE;
     1501 +        nfs_globals_t *ng = nfs_srv_getzg();
     1502 +        nfs_export_t *ne = ng->nfs_export;
     1503 +        kstat_named_t *svstat, *procstat;
1511 1504  
     1505 +        ASSERT(req->rq_prog == NFS_PROGRAM || req->rq_prog == NFS_ACL_PROGRAM);
     1506 +
1512 1507          vers = req->rq_vers;
1513 1508  
     1509 +        svstat = ng->svstat[req->rq_vers];
     1510 +        procstat = (req->rq_prog == NFS_PROGRAM) ?
     1511 +            ng->rfsproccnt[vers] : ng->aclproccnt[vers];
     1512 +
1514 1513          if (vers < min_vers || vers > max_vers) {
1515 1514                  svcerr_progvers(req->rq_xprt, min_vers, max_vers);
1516 1515                  error++;
1517 1516                  cmn_err(CE_NOTE, "%s: bad version number %u", pgmname, vers);
1518 1517                  goto done;
1519 1518          }
1520 1519          vers -= min_vers;
1521 1520  
1522 1521          which = req->rq_proc;
1523 1522          if (which < 0 || which >= disptable[(int)vers].dis_nprocs) {
1524 1523                  svcerr_noproc(req->rq_xprt);
1525 1524                  error++;
1526 1525                  goto done;
1527 1526          }
1528 1527  
1529      -        (*(disptable[(int)vers].dis_proccntp))[which].value.ui64++;
     1528 +        procstat[which].value.ui64++;
1530 1529  
1531 1530          disp = &disptable[(int)vers].dis_table[which];
1532 1531          procnames = disptable[(int)vers].dis_procnames;
1533 1532  
1534 1533          auth_flavor = req->rq_cred.oa_flavor;
1535 1534  
1536 1535          /*
1537 1536           * Deserialize into the args struct.
1538 1537           */
1539 1538          args = (char *)&args_buf;
1540 1539  
1541 1540  #ifdef DEBUG
1542 1541          if (rfs_no_fast_xdrargs || (auth_flavor == RPCSEC_GSS) ||
1543 1542              disp->dis_fastxdrargs == NULL_xdrproc_t ||
1544 1543              !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1545 1544  #else
1546 1545          if ((auth_flavor == RPCSEC_GSS) ||
1547 1546              disp->dis_fastxdrargs == NULL_xdrproc_t ||
1548 1547              !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1549 1548  #endif
1550 1549          {
1551 1550                  bzero(args, disp->dis_argsz);
1552 1551                  if (!SVC_GETARGS(xprt, disp->dis_xdrargs, args)) {
1553 1552                          error++;
1554 1553                          /*
1555 1554                           * Check if we are outside our capabilities.
1556 1555                           */
1557 1556                          if (rfs4_minorvers_mismatch(req, xprt, (void *)args))
1558 1557                                  goto done;
1559 1558  
1560 1559                          svcerr_decode(xprt);
1561 1560                          cmn_err(CE_NOTE,
1562 1561                              "Failed to decode arguments for %s version %u "
1563 1562                              "procedure %s client %s%s",
1564 1563                              pgmname, vers + min_vers, procnames[which],
1565 1564                              client_name(req), client_addr(req, cbuf));
1566 1565                          goto done;
1567 1566                  }
1568 1567          }
1569 1568  
1570 1569          /*
1571 1570           * If Version 4 use that specific dispatch function.
1572 1571           */
1573 1572          if (req->rq_vers == 4) {
1574 1573                  error += rfs4_dispatch(disp, req, xprt, args);
1575 1574                  goto done;
1576 1575          }
1577 1576  
1578 1577          dis_flags = disp->dis_flags;
1579 1578  
1580 1579          /*
1581 1580           * Find export information and check authentication,
1582 1581           * setting the credential if everything is ok.
1583 1582           */
1584 1583          if (disp->dis_getfh != NULL) {
1585 1584                  void *fh;
1586 1585                  fsid_t *fsid;
1587 1586                  fid_t *fid, *xfid;
1588 1587                  fhandle_t *fh2;
1589 1588                  nfs_fh3 *fh3;
1590 1589  
1591 1590                  fh = (*disp->dis_getfh)(args);
1592 1591                  switch (req->rq_vers) {
1593 1592                  case NFS_VERSION:
1594 1593                          fh2 = (fhandle_t *)fh;
1595 1594                          fsid = &fh2->fh_fsid;
1596 1595                          fid = (fid_t *)&fh2->fh_len;
1597 1596                          xfid = (fid_t *)&fh2->fh_xlen;
1598 1597                          break;
1599 1598                  case NFS_V3:
1600 1599                          fh3 = (nfs_fh3 *)fh;
1601 1600                          fsid = &fh3->fh3_fsid;
1602 1601                          fid = FH3TOFIDP(fh3);
1603 1602                          xfid = FH3TOXFIDP(fh3);
1604 1603                          break;
1605 1604                  }
1606 1605  
1607 1606                  /*
1608 1607                   * Fix for bug 1038302 - corbin
1609 1608                   * There is a problem here if anonymous access is
1610 1609                   * disallowed.  If the current request is part of the
1611 1610                   * client's mount process for the requested filesystem,
1612 1611                   * then it will carry root (uid 0) credentials on it, and
1613 1612                   * will be denied by checkauth if that client does not
1614 1613                   * have explicit root=0 permission.  This will cause the
1615 1614                   * client's mount operation to fail.  As a work-around,
1616 1615                   * we check here to see if the request is a getattr or
1617 1616                   * statfs operation on the exported vnode itself, and
1618 1617                   * pass a flag to checkauth with the result of this test.
1619 1618                   *
1620 1619                   * The filehandle refers to the mountpoint itself if
1621 1620                   * the fh_data and fh_xdata portions of the filehandle
1622 1621                   * are equal.
1623 1622                   *
1624 1623                   * Added anon_ok argument to checkauth().
  
    | 
      ↓ open down ↓ | 
    85 lines elided | 
    
      ↑ open up ↑ | 
  
1625 1624                   */
1626 1625  
1627 1626                  if ((dis_flags & RPC_ALLOWANON) && EQFID(fid, xfid))
1628 1627                          anon_ok = 1;
1629 1628                  else
1630 1629                          anon_ok = 0;
1631 1630  
1632 1631                  cr = xprt->xp_cred;
1633 1632                  ASSERT(cr != NULL);
1634 1633  #ifdef DEBUG
1635      -                if (crgetref(cr) != 1) {
1636      -                        crfree(cr);
1637      -                        cr = crget();
1638      -                        xprt->xp_cred = cr;
1639      -                        cred_misses++;
1640      -                } else
1641      -                        cred_hits++;
     1634 +                {
     1635 +                        if (crgetref(cr) != 1) {
     1636 +                                crfree(cr);
     1637 +                                cr = crget();
     1638 +                                xprt->xp_cred = cr;
     1639 +                                cred_misses++;
     1640 +                        } else
     1641 +                                cred_hits++;
     1642 +                }
1642 1643  #else
1643 1644                  if (crgetref(cr) != 1) {
1644 1645                          crfree(cr);
1645 1646                          cr = crget();
1646 1647                          xprt->xp_cred = cr;
1647 1648                  }
1648 1649  #endif
1649 1650  
1650 1651                  exi = checkexport(fsid, xfid);
1651 1652  
1652 1653                  if (exi != NULL) {
1653      -                        publicfh_ok = PUBLICFH_CHECK(disp, exi, fsid, xfid);
     1654 +                        publicfh_ok = PUBLICFH_CHECK(ne, disp, exi, fsid, xfid);
1654 1655  
1655 1656                          /*
1656 1657                           * Don't allow non-V4 clients access
1657 1658                           * to pseudo exports
1658 1659                           */
1659 1660                          if (PSEUDO(exi)) {
1660 1661                                  svcerr_weakauth(xprt);
1661 1662                                  error++;
1662 1663                                  goto done;
1663 1664                          }
1664 1665  
1665 1666                          authres = checkauth(exi, req, cr, anon_ok, publicfh_ok,
1666 1667                              &ro);
1667 1668                          /*
1668 1669                           * authres >  0: authentication OK - proceed
1669 1670                           * authres == 0: authentication weak - return error
1670 1671                           * authres <  0: authentication timeout - drop
1671 1672                           */
1672 1673                          if (authres <= 0) {
1673 1674                                  if (authres == 0) {
1674 1675                                          svcerr_weakauth(xprt);
1675 1676                                          error++;
1676 1677                                  }
1677 1678                                  goto done;
1678 1679                          }
1679 1680                  }
1680 1681          } else
1681 1682                  cr = NULL;
1682 1683  
1683 1684          if ((dis_flags & RPC_MAPRESP) && (auth_flavor != RPCSEC_GSS)) {
1684 1685                  res = (char *)SVC_GETRES(xprt, disp->dis_ressz);
1685 1686                  if (res == NULL)
1686 1687                          res = (char *)&res_buf;
1687 1688          } else
1688 1689                  res = (char *)&res_buf;
1689 1690  
1690 1691          if (!(dis_flags & RPC_IDEMPOTENT)) {
1691 1692                  dupstat = SVC_DUP_EXT(xprt, req, res, disp->dis_ressz, &dr,
1692 1693                      &dupcached);
1693 1694  
1694 1695                  switch (dupstat) {
1695 1696                  case DUP_ERROR:
1696 1697                          svcerr_systemerr(xprt);
1697 1698                          error++;
1698 1699                          goto done;
1699 1700                          /* NOTREACHED */
1700 1701                  case DUP_INPROGRESS:
1701 1702                          if (res != (char *)&res_buf)
1702 1703                                  SVC_FREERES(xprt);
1703 1704                          error++;
1704 1705                          goto done;
1705 1706                          /* NOTREACHED */
1706 1707                  case DUP_NEW:
1707 1708                  case DUP_DROP:
1708 1709                          curthread->t_flag |= T_DONTPEND;
1709 1710  
1710 1711                          (*disp->dis_proc)(args, res, exi, req, cr, ro);
1711 1712  
1712 1713                          curthread->t_flag &= ~T_DONTPEND;
1713 1714                          if (curthread->t_flag & T_WOULDBLOCK) {
1714 1715                                  curthread->t_flag &= ~T_WOULDBLOCK;
1715 1716                                  SVC_DUPDONE_EXT(xprt, dr, res, NULL,
1716 1717                                      disp->dis_ressz, DUP_DROP);
1717 1718                                  if (res != (char *)&res_buf)
1718 1719                                          SVC_FREERES(xprt);
1719 1720                                  error++;
1720 1721                                  goto done;
1721 1722                          }
1722 1723                          if (dis_flags & RPC_AVOIDWORK) {
1723 1724                                  SVC_DUPDONE_EXT(xprt, dr, res, NULL,
1724 1725                                      disp->dis_ressz, DUP_DROP);
1725 1726                          } else {
1726 1727                                  SVC_DUPDONE_EXT(xprt, dr, res,
1727 1728                                      disp->dis_resfree == nullfree ? NULL :
1728 1729                                      disp->dis_resfree,
1729 1730                                      disp->dis_ressz, DUP_DONE);
1730 1731                                  dupcached = TRUE;
1731 1732                          }
1732 1733                          break;
1733 1734                  case DUP_DONE:
1734 1735                          break;
1735 1736                  }
1736 1737  
1737 1738          } else {
1738 1739                  curthread->t_flag |= T_DONTPEND;
1739 1740  
1740 1741                  (*disp->dis_proc)(args, res, exi, req, cr, ro);
1741 1742  
1742 1743                  curthread->t_flag &= ~T_DONTPEND;
1743 1744                  if (curthread->t_flag & T_WOULDBLOCK) {
1744 1745                          curthread->t_flag &= ~T_WOULDBLOCK;
1745 1746                          if (res != (char *)&res_buf)
1746 1747                                  SVC_FREERES(xprt);
1747 1748                          error++;
1748 1749                          goto done;
1749 1750                  }
1750 1751          }
1751 1752  
1752 1753          if (auth_tooweak(req, res)) {
1753 1754                  svcerr_weakauth(xprt);
1754 1755                  error++;
1755 1756                  goto done;
  
    | 
      ↓ open down ↓ | 
    92 lines elided | 
    
      ↑ open up ↑ | 
  
1756 1757          }
1757 1758  
1758 1759          /*
1759 1760           * Check to see if logging has been enabled on the server.
1760 1761           * If so, then obtain the export info struct to be used for
1761 1762           * the later writing of the log record.  This is done for
1762 1763           * the case that a lookup is done across a non-logged public
1763 1764           * file system.
1764 1765           */
1765 1766          if (nfslog_buffer_list != NULL) {
1766      -                nfslog_exi = nfslog_get_exi(exi, req, res, &nfslog_rec_id);
     1767 +                nfslog_exi = nfslog_get_exi(ne, exi, req, res, &nfslog_rec_id);
1767 1768                  /*
1768 1769                   * Is logging enabled?
1769 1770                   */
1770 1771                  logging_enabled = (nfslog_exi != NULL);
1771 1772  
1772 1773                  /*
1773 1774                   * Copy the netbuf for logging purposes, before it is
1774 1775                   * freed by svc_sendreply().
1775 1776                   */
1776 1777                  if (logging_enabled) {
1777 1778                          NFSLOG_COPY_NETBUF(nfslog_exi, xprt, &nb);
1778 1779                          /*
1779 1780                           * If RPC_MAPRESP flag set (i.e. in V2 ops) the
1780 1781                           * res gets copied directly into the mbuf and
1781 1782                           * may be freed soon after the sendreply. So we
1782 1783                           * must copy it here to a safe place...
1783 1784                           */
1784 1785                          if (res != (char *)&res_buf) {
1785 1786                                  bcopy(res, (char *)&res_buf, disp->dis_ressz);
1786 1787                          }
1787 1788                  }
1788 1789          }
1789 1790  
1790 1791          /*
1791 1792           * Serialize and send results struct
1792 1793           */
1793 1794  #ifdef DEBUG
1794 1795          if (rfs_no_fast_xdrres == 0 && res != (char *)&res_buf)
1795 1796  #else
1796 1797          if (res != (char *)&res_buf)
1797 1798  #endif
1798 1799          {
1799 1800                  if (!svc_sendreply(xprt, disp->dis_fastxdrres, res)) {
1800 1801                          cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1801 1802                          svcerr_systemerr(xprt);
1802 1803                          error++;
1803 1804                  }
1804 1805          } else {
1805 1806                  if (!svc_sendreply(xprt, disp->dis_xdrres, res)) {
1806 1807                          cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1807 1808                          svcerr_systemerr(xprt);
1808 1809                          error++;
1809 1810                  }
1810 1811          }
1811 1812  
1812 1813          /*
1813 1814           * Log if needed
1814 1815           */
1815 1816          if (logging_enabled) {
1816 1817                  nfslog_write_record(nfslog_exi, req, args, (char *)&res_buf,
1817 1818                      cr, &nb, nfslog_rec_id, NFSLOG_ONE_BUFFER);
1818 1819                  exi_rele(nfslog_exi);
1819 1820                  kmem_free((&nb)->buf, (&nb)->len);
1820 1821          }
1821 1822  
1822 1823          /*
1823 1824           * Free results struct. With the addition of NFS V4 we can
1824 1825           * have non-idempotent procedures with functions.
1825 1826           */
1826 1827          if (disp->dis_resfree != nullfree && dupcached == FALSE) {
1827 1828                  (*disp->dis_resfree)(res);
1828 1829          }
1829 1830  
1830 1831  done:
1831 1832          /*
1832 1833           * Free arguments struct
1833 1834           */
1834 1835          if (disp) {
1835 1836                  if (!SVC_FREEARGS(xprt, disp->dis_xdrargs, args)) {
1836 1837                          cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1837 1838                          error++;
1838 1839                  }
  
    | 
      ↓ open down ↓ | 
    62 lines elided | 
    
      ↑ open up ↑ | 
  
1839 1840          } else {
1840 1841                  if (!SVC_FREEARGS(xprt, (xdrproc_t)0, (caddr_t)0)) {
1841 1842                          cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1842 1843                          error++;
1843 1844                  }
1844 1845          }
1845 1846  
1846 1847          if (exi != NULL)
1847 1848                  exi_rele(exi);
1848 1849  
1849      -        global_svstat_ptr[req->rq_vers][NFS_BADCALLS].value.ui64 += error;
1850      -
1851      -        global_svstat_ptr[req->rq_vers][NFS_CALLS].value.ui64++;
     1850 +        svstat[NFS_BADCALLS].value.ui64 += error;
     1851 +        svstat[NFS_CALLS].value.ui64++;
1852 1852  }
1853 1853  
1854 1854  static void
1855 1855  rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
1856 1856  {
1857 1857          common_dispatch(req, xprt, NFS_VERSMIN, NFS_VERSMAX,
1858 1858              "NFS", rfs_disptable);
1859 1859  }
1860 1860  
1861 1861  static char *aclcallnames_v2[] = {
1862 1862          "ACL2_NULL",
1863 1863          "ACL2_GETACL",
1864 1864          "ACL2_SETACL",
1865 1865          "ACL2_GETATTR",
1866 1866          "ACL2_ACCESS",
1867 1867          "ACL2_GETXATTRDIR"
1868 1868  };
1869 1869  
1870 1870  static struct rpcdisp acldisptab_v2[] = {
1871 1871          /*
1872 1872           * ACL VERSION 2
1873 1873           */
1874 1874  
1875 1875          /* ACL2_NULL = 0 */
1876 1876          {rpc_null,
1877 1877              xdr_void, NULL_xdrproc_t, 0,
1878 1878              xdr_void, NULL_xdrproc_t, 0,
1879 1879              nullfree, RPC_IDEMPOTENT,
1880 1880              0},
1881 1881  
1882 1882          /* ACL2_GETACL = 1 */
1883 1883          {acl2_getacl,
1884 1884              xdr_GETACL2args, xdr_fastGETACL2args, sizeof (GETACL2args),
1885 1885              xdr_GETACL2res, NULL_xdrproc_t, sizeof (GETACL2res),
1886 1886              acl2_getacl_free, RPC_IDEMPOTENT,
1887 1887              acl2_getacl_getfh},
1888 1888  
1889 1889          /* ACL2_SETACL = 2 */
1890 1890          {acl2_setacl,
1891 1891              xdr_SETACL2args, NULL_xdrproc_t, sizeof (SETACL2args),
1892 1892  #ifdef _LITTLE_ENDIAN
1893 1893              xdr_SETACL2res, xdr_fastSETACL2res, sizeof (SETACL2res),
1894 1894  #else
1895 1895              xdr_SETACL2res, NULL_xdrproc_t, sizeof (SETACL2res),
1896 1896  #endif
1897 1897              nullfree, RPC_MAPRESP,
1898 1898              acl2_setacl_getfh},
1899 1899  
1900 1900          /* ACL2_GETATTR = 3 */
1901 1901          {acl2_getattr,
1902 1902              xdr_GETATTR2args, xdr_fastGETATTR2args, sizeof (GETATTR2args),
1903 1903  #ifdef _LITTLE_ENDIAN
1904 1904              xdr_GETATTR2res, xdr_fastGETATTR2res, sizeof (GETATTR2res),
1905 1905  #else
1906 1906              xdr_GETATTR2res, NULL_xdrproc_t, sizeof (GETATTR2res),
1907 1907  #endif
1908 1908              nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
1909 1909              acl2_getattr_getfh},
1910 1910  
1911 1911          /* ACL2_ACCESS = 4 */
1912 1912          {acl2_access,
1913 1913              xdr_ACCESS2args, xdr_fastACCESS2args, sizeof (ACCESS2args),
1914 1914  #ifdef _LITTLE_ENDIAN
1915 1915              xdr_ACCESS2res, xdr_fastACCESS2res, sizeof (ACCESS2res),
1916 1916  #else
1917 1917              xdr_ACCESS2res, NULL_xdrproc_t, sizeof (ACCESS2res),
1918 1918  #endif
1919 1919              nullfree, RPC_IDEMPOTENT|RPC_MAPRESP,
1920 1920              acl2_access_getfh},
1921 1921  
1922 1922          /* ACL2_GETXATTRDIR = 5 */
1923 1923          {acl2_getxattrdir,
1924 1924              xdr_GETXATTRDIR2args, NULL_xdrproc_t, sizeof (GETXATTRDIR2args),
1925 1925              xdr_GETXATTRDIR2res, NULL_xdrproc_t, sizeof (GETXATTRDIR2res),
1926 1926              nullfree, RPC_IDEMPOTENT,
1927 1927              acl2_getxattrdir_getfh},
1928 1928  };
1929 1929  
1930 1930  static char *aclcallnames_v3[] = {
1931 1931          "ACL3_NULL",
1932 1932          "ACL3_GETACL",
1933 1933          "ACL3_SETACL",
1934 1934          "ACL3_GETXATTRDIR"
1935 1935  };
1936 1936  
1937 1937  static struct rpcdisp acldisptab_v3[] = {
1938 1938          /*
1939 1939           * ACL VERSION 3
1940 1940           */
1941 1941  
1942 1942          /* ACL3_NULL = 0 */
1943 1943          {rpc_null,
1944 1944              xdr_void, NULL_xdrproc_t, 0,
1945 1945              xdr_void, NULL_xdrproc_t, 0,
1946 1946              nullfree, RPC_IDEMPOTENT,
1947 1947              0},
1948 1948  
1949 1949          /* ACL3_GETACL = 1 */
1950 1950          {acl3_getacl,
1951 1951              xdr_GETACL3args, NULL_xdrproc_t, sizeof (GETACL3args),
1952 1952              xdr_GETACL3res, NULL_xdrproc_t, sizeof (GETACL3res),
1953 1953              acl3_getacl_free, RPC_IDEMPOTENT,
1954 1954              acl3_getacl_getfh},
1955 1955  
1956 1956          /* ACL3_SETACL = 2 */
1957 1957          {acl3_setacl,
1958 1958              xdr_SETACL3args, NULL_xdrproc_t, sizeof (SETACL3args),
1959 1959              xdr_SETACL3res, NULL_xdrproc_t, sizeof (SETACL3res),
1960 1960              nullfree, 0,
1961 1961              acl3_setacl_getfh},
1962 1962  
1963 1963          /* ACL3_GETXATTRDIR = 3 */
  
    | 
      ↓ open down ↓ | 
    102 lines elided | 
    
      ↑ open up ↑ | 
  
1964 1964          {acl3_getxattrdir,
1965 1965              xdr_GETXATTRDIR3args, NULL_xdrproc_t, sizeof (GETXATTRDIR3args),
1966 1966              xdr_GETXATTRDIR3res, NULL_xdrproc_t, sizeof (GETXATTRDIR3res),
1967 1967              nullfree, RPC_IDEMPOTENT,
1968 1968              acl3_getxattrdir_getfh},
1969 1969  };
1970 1970  
1971 1971  static struct rpc_disptable acl_disptable[] = {
1972 1972          {sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
1973 1973                  aclcallnames_v2,
1974      -                &aclproccnt_v2_ptr, acldisptab_v2},
     1974 +                acldisptab_v2},
1975 1975          {sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
1976 1976                  aclcallnames_v3,
1977      -                &aclproccnt_v3_ptr, acldisptab_v3},
     1977 +                acldisptab_v3},
1978 1978  };
1979 1979  
1980 1980  static void
1981 1981  acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
1982 1982  {
1983 1983          common_dispatch(req, xprt, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,
1984 1984              "ACL", acl_disptable);
1985 1985  }
1986 1986  
1987 1987  int
1988 1988  checkwin(int flavor, int window, struct svc_req *req)
1989 1989  {
1990 1990          struct authdes_cred *adc;
1991 1991  
1992 1992          switch (flavor) {
1993 1993          case AUTH_DES:
1994 1994                  adc = (struct authdes_cred *)req->rq_clntcred;
1995 1995                  CTASSERT(sizeof (struct authdes_cred) <= RQCRED_SIZE);
1996 1996                  if (adc->adc_fullname.window > window)
1997 1997                          return (0);
1998 1998                  break;
1999 1999  
2000 2000          default:
2001 2001                  break;
2002 2002          }
2003 2003          return (1);
2004 2004  }
2005 2005  
2006 2006  
2007 2007  /*
2008 2008   * checkauth() will check the access permission against the export
2009 2009   * information.  Then map root uid/gid to appropriate uid/gid.
2010 2010   *
2011 2011   * This routine is used by NFS V3 and V2 code.
2012 2012   */
2013 2013  static int
2014 2014  checkauth(struct exportinfo *exi, struct svc_req *req, cred_t *cr, int anon_ok,
2015 2015      bool_t publicfh_ok, bool_t *ro)
2016 2016  {
2017 2017          int i, nfsflavor, rpcflavor, stat, access;
2018 2018          struct secinfo *secp;
2019 2019          caddr_t principal;
2020 2020          char buf[INET6_ADDRSTRLEN]; /* to hold both IPv4 and IPv6 addr */
2021 2021          int anon_res = 0;
2022 2022  
2023 2023          uid_t uid;
2024 2024          gid_t gid;
2025 2025          uint_t ngids;
2026 2026          gid_t *gids;
2027 2027  
2028 2028          /*
2029 2029           * Check for privileged port number
2030 2030           * N.B.:  this assumes that we know the format of a netbuf.
2031 2031           */
2032 2032          if (nfs_portmon) {
2033 2033                  struct sockaddr *ca;
2034 2034                  ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2035 2035  
2036 2036                  if (ca == NULL)
2037 2037                          return (0);
2038 2038  
2039 2039                  if ((ca->sa_family == AF_INET &&
2040 2040                      ntohs(((struct sockaddr_in *)ca)->sin_port) >=
2041 2041                      IPPORT_RESERVED) ||
2042 2042                      (ca->sa_family == AF_INET6 &&
2043 2043                      ntohs(((struct sockaddr_in6 *)ca)->sin6_port) >=
2044 2044                      IPPORT_RESERVED)) {
2045 2045                          cmn_err(CE_NOTE,
2046 2046                              "nfs_server: client %s%ssent NFS request from "
2047 2047                              "unprivileged port",
2048 2048                              client_name(req), client_addr(req, buf));
2049 2049                          return (0);
2050 2050                  }
2051 2051          }
2052 2052  
2053 2053          /*
2054 2054           *  return 1 on success or 0 on failure
2055 2055           */
2056 2056          stat = sec_svc_getcred(req, cr, &principal, &nfsflavor);
2057 2057  
2058 2058          /*
2059 2059           * A failed AUTH_UNIX sec_svc_getcred() implies we couldn't set
2060 2060           * the credentials; below we map that to anonymous.
2061 2061           */
2062 2062          if (!stat && nfsflavor != AUTH_UNIX) {
2063 2063                  cmn_err(CE_NOTE,
2064 2064                      "nfs_server: couldn't get unix cred for %s",
2065 2065                      client_name(req));
2066 2066                  return (0);
2067 2067          }
2068 2068  
2069 2069          /*
2070 2070           * Short circuit checkauth() on operations that support the
2071 2071           * public filehandle, and if the request for that operation
2072 2072           * is using the public filehandle. Note that we must call
2073 2073           * sec_svc_getcred() first so that xp_cookie is set to the
2074 2074           * right value. Normally xp_cookie is just the RPC flavor
2075 2075           * of the the request, but in the case of RPCSEC_GSS it
2076 2076           * could be a pseudo flavor.
2077 2077           */
2078 2078          if (publicfh_ok)
2079 2079                  return (1);
2080 2080  
2081 2081          rpcflavor = req->rq_cred.oa_flavor;
2082 2082          /*
2083 2083           * Check if the auth flavor is valid for this export
2084 2084           */
2085 2085          access = nfsauth_access(exi, req, cr, &uid, &gid, &ngids, &gids);
2086 2086          if (access & NFSAUTH_DROP)
2087 2087                  return (-1);    /* drop the request */
2088 2088  
2089 2089          if (access & NFSAUTH_RO)
2090 2090                  *ro = TRUE;
2091 2091  
2092 2092          if (access & NFSAUTH_DENIED) {
2093 2093                  /*
2094 2094                   * If anon_ok == 1 and we got NFSAUTH_DENIED, it was
2095 2095                   * probably due to the flavor not matching during
2096 2096                   * the mount attempt. So map the flavor to AUTH_NONE
2097 2097                   * so that the credentials get mapped to the anonymous
2098 2098                   * user.
2099 2099                   */
2100 2100                  if (anon_ok == 1)
2101 2101                          rpcflavor = AUTH_NONE;
2102 2102                  else
2103 2103                          return (0);     /* deny access */
2104 2104  
2105 2105          } else if (access & NFSAUTH_MAPNONE) {
2106 2106                  /*
2107 2107                   * Access was granted even though the flavor mismatched
2108 2108                   * because AUTH_NONE was one of the exported flavors.
2109 2109                   */
2110 2110                  rpcflavor = AUTH_NONE;
2111 2111  
2112 2112          } else if (access & NFSAUTH_WRONGSEC) {
2113 2113                  /*
2114 2114                   * NFSAUTH_WRONGSEC is used for NFSv4. If we get here,
2115 2115                   * it means a client ignored the list of allowed flavors
2116 2116                   * returned via the MOUNT protocol. So we just disallow it!
2117 2117                   */
2118 2118                  return (0);
2119 2119          }
2120 2120  
2121 2121          if (rpcflavor != AUTH_SYS)
2122 2122                  kmem_free(gids, ngids * sizeof (gid_t));
2123 2123  
2124 2124          switch (rpcflavor) {
2125 2125          case AUTH_NONE:
2126 2126                  anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2127 2127                      exi->exi_export.ex_anon);
2128 2128                  (void) crsetgroups(cr, 0, NULL);
2129 2129                  break;
2130 2130  
2131 2131          case AUTH_UNIX:
2132 2132                  if (!stat || crgetuid(cr) == 0 && !(access & NFSAUTH_UIDMAP)) {
2133 2133                          anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2134 2134                              exi->exi_export.ex_anon);
2135 2135                          (void) crsetgroups(cr, 0, NULL);
2136 2136                  } else if (crgetuid(cr) == 0 && access & NFSAUTH_ROOT) {
2137 2137                          /*
2138 2138                           * It is root, so apply rootid to get real UID
2139 2139                           * Find the secinfo structure.  We should be able
2140 2140                           * to find it by the time we reach here.
2141 2141                           * nfsauth_access() has done the checking.
2142 2142                           */
2143 2143                          secp = NULL;
2144 2144                          for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2145 2145                                  struct secinfo *sptr;
2146 2146                                  sptr = &exi->exi_export.ex_secinfo[i];
2147 2147                                  if (sptr->s_secinfo.sc_nfsnum == nfsflavor) {
2148 2148                                          secp = sptr;
2149 2149                                          break;
2150 2150                                  }
2151 2151                          }
2152 2152                          if (secp != NULL) {
2153 2153                                  (void) crsetugid(cr, secp->s_rootid,
2154 2154                                      secp->s_rootid);
2155 2155                                  (void) crsetgroups(cr, 0, NULL);
2156 2156                          }
2157 2157                  } else if (crgetuid(cr) != uid || crgetgid(cr) != gid) {
2158 2158                          if (crsetugid(cr, uid, gid) != 0)
2159 2159                                  anon_res = crsetugid(cr,
2160 2160                                      exi->exi_export.ex_anon,
2161 2161                                      exi->exi_export.ex_anon);
2162 2162                          (void) crsetgroups(cr, 0, NULL);
2163 2163                  } else if (access & NFSAUTH_GROUPS) {
2164 2164                          (void) crsetgroups(cr, ngids, gids);
2165 2165                  }
2166 2166  
2167 2167                  kmem_free(gids, ngids * sizeof (gid_t));
2168 2168  
2169 2169                  break;
2170 2170  
2171 2171          case AUTH_DES:
2172 2172          case RPCSEC_GSS:
2173 2173                  /*
2174 2174                   *  Find the secinfo structure.  We should be able
2175 2175                   *  to find it by the time we reach here.
2176 2176                   *  nfsauth_access() has done the checking.
2177 2177                   */
2178 2178                  secp = NULL;
2179 2179                  for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2180 2180                          if (exi->exi_export.ex_secinfo[i].s_secinfo.sc_nfsnum ==
2181 2181                              nfsflavor) {
2182 2182                                  secp = &exi->exi_export.ex_secinfo[i];
2183 2183                                  break;
2184 2184                          }
2185 2185                  }
2186 2186  
2187 2187                  if (!secp) {
2188 2188                          cmn_err(CE_NOTE, "nfs_server: client %s%shad "
2189 2189                              "no secinfo data for flavor %d",
2190 2190                              client_name(req), client_addr(req, buf),
2191 2191                              nfsflavor);
2192 2192                          return (0);
2193 2193                  }
2194 2194  
2195 2195                  if (!checkwin(rpcflavor, secp->s_window, req)) {
2196 2196                          cmn_err(CE_NOTE,
2197 2197                              "nfs_server: client %s%sused invalid "
2198 2198                              "auth window value",
2199 2199                              client_name(req), client_addr(req, buf));
2200 2200                          return (0);
2201 2201                  }
2202 2202  
2203 2203                  /*
2204 2204                   * Map root principals listed in the share's root= list to root,
2205 2205                   * and map any others principals that were mapped to root by RPC
2206 2206                   * to anon.
2207 2207                   */
2208 2208                  if (principal && sec_svc_inrootlist(rpcflavor, principal,
2209 2209                      secp->s_rootcnt, secp->s_rootnames)) {
2210 2210                          if (crgetuid(cr) == 0 && secp->s_rootid == 0)
2211 2211                                  return (1);
2212 2212  
2213 2213  
2214 2214                          (void) crsetugid(cr, secp->s_rootid, secp->s_rootid);
2215 2215  
2216 2216                          /*
2217 2217                           * NOTE: If and when kernel-land privilege tracing is
2218 2218                           * added this may have to be replaced with code that
2219 2219                           * retrieves root's supplementary groups (e.g., using
2220 2220                           * kgss_get_group_info().  In the meantime principals
2221 2221                           * mapped to uid 0 get all privileges, so setting cr's
2222 2222                           * supplementary groups for them does nothing.
2223 2223                           */
2224 2224                          (void) crsetgroups(cr, 0, NULL);
2225 2225  
2226 2226                          return (1);
2227 2227                  }
2228 2228  
2229 2229                  /*
2230 2230                   * Not a root princ, or not in root list, map UID 0/nobody to
2231 2231                   * the anon ID for the share.  (RPC sets cr's UIDs and GIDs to
2232 2232                   * UID_NOBODY and GID_NOBODY, respectively.)
2233 2233                   */
2234 2234                  if (crgetuid(cr) != 0 &&
2235 2235                      (crgetuid(cr) != UID_NOBODY || crgetgid(cr) != GID_NOBODY))
2236 2236                          return (1);
2237 2237  
2238 2238                  anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2239 2239                      exi->exi_export.ex_anon);
2240 2240                  (void) crsetgroups(cr, 0, NULL);
2241 2241                  break;
2242 2242          default:
2243 2243                  return (0);
2244 2244          } /* switch on rpcflavor */
2245 2245  
2246 2246          /*
2247 2247           * Even if anon access is disallowed via ex_anon == -1, we allow
2248 2248           * this access if anon_ok is set.  So set creds to the default
2249 2249           * "nobody" id.
2250 2250           */
2251 2251          if (anon_res != 0) {
2252 2252                  if (anon_ok == 0) {
2253 2253                          cmn_err(CE_NOTE,
2254 2254                              "nfs_server: client %s%ssent wrong "
2255 2255                              "authentication for %s",
2256 2256                              client_name(req), client_addr(req, buf),
2257 2257                              exi->exi_export.ex_path ?
2258 2258                              exi->exi_export.ex_path : "?");
2259 2259                          return (0);
2260 2260                  }
2261 2261  
2262 2262                  if (crsetugid(cr, UID_NOBODY, GID_NOBODY) != 0)
2263 2263                          return (0);
2264 2264          }
2265 2265  
2266 2266          return (1);
2267 2267  }
2268 2268  
2269 2269  /*
2270 2270   * returns 0 on failure, -1 on a drop, -2 on wrong security flavor,
2271 2271   * and 1 on success
2272 2272   */
2273 2273  int
2274 2274  checkauth4(struct compound_state *cs, struct svc_req *req)
2275 2275  {
2276 2276          int i, rpcflavor, access;
2277 2277          struct secinfo *secp;
2278 2278          char buf[MAXHOST + 1];
2279 2279          int anon_res = 0, nfsflavor;
2280 2280          struct exportinfo *exi;
2281 2281          cred_t  *cr;
2282 2282          caddr_t principal;
2283 2283  
2284 2284          uid_t uid;
2285 2285          gid_t gid;
2286 2286          uint_t ngids;
2287 2287          gid_t *gids;
2288 2288  
2289 2289          exi = cs->exi;
2290 2290          cr = cs->cr;
2291 2291          principal = cs->principal;
2292 2292          nfsflavor = cs->nfsflavor;
2293 2293  
2294 2294          ASSERT(cr != NULL);
2295 2295  
2296 2296          rpcflavor = req->rq_cred.oa_flavor;
2297 2297          cs->access &= ~CS_ACCESS_LIMITED;
2298 2298  
2299 2299          /*
2300 2300           * Check for privileged port number
2301 2301           * N.B.:  this assumes that we know the format of a netbuf.
2302 2302           */
2303 2303          if (nfs_portmon) {
2304 2304                  struct sockaddr *ca;
2305 2305                  ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2306 2306  
2307 2307                  if (ca == NULL)
2308 2308                          return (0);
2309 2309  
2310 2310                  if ((ca->sa_family == AF_INET &&
2311 2311                      ntohs(((struct sockaddr_in *)ca)->sin_port) >=
2312 2312                      IPPORT_RESERVED) ||
2313 2313                      (ca->sa_family == AF_INET6 &&
2314 2314                      ntohs(((struct sockaddr_in6 *)ca)->sin6_port) >=
2315 2315                      IPPORT_RESERVED)) {
2316 2316                          cmn_err(CE_NOTE,
2317 2317                              "nfs_server: client %s%ssent NFSv4 request from "
2318 2318                              "unprivileged port",
2319 2319                              client_name(req), client_addr(req, buf));
2320 2320                          return (0);
2321 2321                  }
2322 2322          }
2323 2323  
2324 2324          /*
2325 2325           * Check the access right per auth flavor on the vnode of
2326 2326           * this export for the given request.
2327 2327           */
2328 2328          access = nfsauth4_access(cs->exi, cs->vp, req, cr, &uid, &gid, &ngids,
2329 2329              &gids);
2330 2330  
2331 2331          if (access & NFSAUTH_WRONGSEC)
2332 2332                  return (-2);    /* no access for this security flavor */
2333 2333  
2334 2334          if (access & NFSAUTH_DROP)
2335 2335                  return (-1);    /* drop the request */
2336 2336  
2337 2337          if (access & NFSAUTH_DENIED) {
2338 2338  
2339 2339                  if (exi->exi_export.ex_seccnt > 0)
2340 2340                          return (0);     /* deny access */
2341 2341  
2342 2342          } else if (access & NFSAUTH_LIMITED) {
2343 2343  
2344 2344                  cs->access |= CS_ACCESS_LIMITED;
2345 2345  
2346 2346          } else if (access & NFSAUTH_MAPNONE) {
2347 2347                  /*
2348 2348                   * Access was granted even though the flavor mismatched
2349 2349                   * because AUTH_NONE was one of the exported flavors.
2350 2350                   */
2351 2351                  rpcflavor = AUTH_NONE;
2352 2352          }
2353 2353  
2354 2354          /*
2355 2355           * XXX probably need to redo some of it for nfsv4?
2356 2356           * return 1 on success or 0 on failure
2357 2357           */
2358 2358  
2359 2359          if (rpcflavor != AUTH_SYS)
2360 2360                  kmem_free(gids, ngids * sizeof (gid_t));
2361 2361  
2362 2362          switch (rpcflavor) {
2363 2363          case AUTH_NONE:
2364 2364                  anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2365 2365                      exi->exi_export.ex_anon);
2366 2366                  (void) crsetgroups(cr, 0, NULL);
2367 2367                  break;
2368 2368  
2369 2369          case AUTH_UNIX:
2370 2370                  if (crgetuid(cr) == 0 && !(access & NFSAUTH_UIDMAP)) {
2371 2371                          anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2372 2372                              exi->exi_export.ex_anon);
2373 2373                          (void) crsetgroups(cr, 0, NULL);
2374 2374                  } else if (crgetuid(cr) == 0 && access & NFSAUTH_ROOT) {
2375 2375                          /*
2376 2376                           * It is root, so apply rootid to get real UID
2377 2377                           * Find the secinfo structure.  We should be able
2378 2378                           * to find it by the time we reach here.
2379 2379                           * nfsauth_access() has done the checking.
2380 2380                           */
2381 2381                          secp = NULL;
2382 2382                          for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2383 2383                                  struct secinfo *sptr;
2384 2384                                  sptr = &exi->exi_export.ex_secinfo[i];
2385 2385                                  if (sptr->s_secinfo.sc_nfsnum == nfsflavor) {
2386 2386                                          secp = &exi->exi_export.ex_secinfo[i];
2387 2387                                          break;
2388 2388                                  }
2389 2389                          }
2390 2390                          if (secp != NULL) {
2391 2391                                  (void) crsetugid(cr, secp->s_rootid,
2392 2392                                      secp->s_rootid);
2393 2393                                  (void) crsetgroups(cr, 0, NULL);
2394 2394                          }
2395 2395                  } else if (crgetuid(cr) != uid || crgetgid(cr) != gid) {
2396 2396                          if (crsetugid(cr, uid, gid) != 0)
2397 2397                                  anon_res = crsetugid(cr,
2398 2398                                      exi->exi_export.ex_anon,
2399 2399                                      exi->exi_export.ex_anon);
2400 2400                          (void) crsetgroups(cr, 0, NULL);
2401 2401                  } if (access & NFSAUTH_GROUPS) {
2402 2402                          (void) crsetgroups(cr, ngids, gids);
2403 2403                  }
2404 2404  
2405 2405                  kmem_free(gids, ngids * sizeof (gid_t));
2406 2406  
2407 2407                  break;
2408 2408  
2409 2409          default:
2410 2410                  /*
2411 2411                   *  Find the secinfo structure.  We should be able
2412 2412                   *  to find it by the time we reach here.
2413 2413                   *  nfsauth_access() has done the checking.
2414 2414                   */
2415 2415                  secp = NULL;
2416 2416                  for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2417 2417                          if (exi->exi_export.ex_secinfo[i].s_secinfo.sc_nfsnum ==
2418 2418                              nfsflavor) {
2419 2419                                  secp = &exi->exi_export.ex_secinfo[i];
2420 2420                                  break;
2421 2421                          }
2422 2422                  }
2423 2423  
2424 2424                  if (!secp) {
2425 2425                          cmn_err(CE_NOTE, "nfs_server: client %s%shad "
2426 2426                              "no secinfo data for flavor %d",
2427 2427                              client_name(req), client_addr(req, buf),
2428 2428                              nfsflavor);
2429 2429                          return (0);
2430 2430                  }
2431 2431  
2432 2432                  if (!checkwin(rpcflavor, secp->s_window, req)) {
2433 2433                          cmn_err(CE_NOTE,
2434 2434                              "nfs_server: client %s%sused invalid "
2435 2435                              "auth window value",
2436 2436                              client_name(req), client_addr(req, buf));
2437 2437                          return (0);
2438 2438                  }
2439 2439  
2440 2440                  /*
2441 2441                   * Map root principals listed in the share's root= list to root,
2442 2442                   * and map any others principals that were mapped to root by RPC
2443 2443                   * to anon. If not going to anon, set to rootid (root_mapping).
2444 2444                   */
2445 2445                  if (principal && sec_svc_inrootlist(rpcflavor, principal,
2446 2446                      secp->s_rootcnt, secp->s_rootnames)) {
2447 2447                          if (crgetuid(cr) == 0 && secp->s_rootid == 0)
2448 2448                                  return (1);
2449 2449  
2450 2450                          (void) crsetugid(cr, secp->s_rootid, secp->s_rootid);
2451 2451  
2452 2452                          /*
2453 2453                           * NOTE: If and when kernel-land privilege tracing is
2454 2454                           * added this may have to be replaced with code that
2455 2455                           * retrieves root's supplementary groups (e.g., using
2456 2456                           * kgss_get_group_info().  In the meantime principals
2457 2457                           * mapped to uid 0 get all privileges, so setting cr's
2458 2458                           * supplementary groups for them does nothing.
2459 2459                           */
2460 2460                          (void) crsetgroups(cr, 0, NULL);
2461 2461  
2462 2462                          return (1);
2463 2463                  }
2464 2464  
2465 2465                  /*
2466 2466                   * Not a root princ, or not in root list, map UID 0/nobody to
2467 2467                   * the anon ID for the share.  (RPC sets cr's UIDs and GIDs to
2468 2468                   * UID_NOBODY and GID_NOBODY, respectively.)
2469 2469                   */
2470 2470                  if (crgetuid(cr) != 0 &&
2471 2471                      (crgetuid(cr) != UID_NOBODY || crgetgid(cr) != GID_NOBODY))
2472 2472                          return (1);
2473 2473  
2474 2474                  anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2475 2475                      exi->exi_export.ex_anon);
2476 2476                  (void) crsetgroups(cr, 0, NULL);
2477 2477                  break;
2478 2478          } /* switch on rpcflavor */
2479 2479  
2480 2480          /*
2481 2481           * Even if anon access is disallowed via ex_anon == -1, we allow
2482 2482           * this access if anon_ok is set.  So set creds to the default
2483 2483           * "nobody" id.
2484 2484           */
2485 2485  
2486 2486          if (anon_res != 0) {
2487 2487                  cmn_err(CE_NOTE,
2488 2488                      "nfs_server: client %s%ssent wrong "
2489 2489                      "authentication for %s",
2490 2490                      client_name(req), client_addr(req, buf),
2491 2491                      exi->exi_export.ex_path ?
2492 2492                      exi->exi_export.ex_path : "?");
2493 2493                  return (0);
2494 2494          }
2495 2495  
2496 2496          return (1);
2497 2497  }
2498 2498  
2499 2499  
2500 2500  static char *
2501 2501  client_name(struct svc_req *req)
2502 2502  {
2503 2503          char *hostname = NULL;
2504 2504  
2505 2505          /*
2506 2506           * If it's a Unix cred then use the
2507 2507           * hostname from the credential.
2508 2508           */
2509 2509          if (req->rq_cred.oa_flavor == AUTH_UNIX) {
2510 2510                  hostname = ((struct authunix_parms *)
2511 2511                      req->rq_clntcred)->aup_machname;
2512 2512          }
2513 2513          if (hostname == NULL)
2514 2514                  hostname = "";
2515 2515  
2516 2516          return (hostname);
2517 2517  }
2518 2518  
2519 2519  static char *
2520 2520  client_addr(struct svc_req *req, char *buf)
2521 2521  {
2522 2522          struct sockaddr *ca;
2523 2523          uchar_t *b;
2524 2524          char *frontspace = "";
2525 2525  
2526 2526          /*
2527 2527           * We assume we are called in tandem with client_name and the
2528 2528           * format string looks like "...client %s%sblah blah..."
2529 2529           *
2530 2530           * If it's a Unix cred then client_name returned
2531 2531           * a host name, so we need insert a space between host name
2532 2532           * and IP address.
2533 2533           */
2534 2534          if (req->rq_cred.oa_flavor == AUTH_UNIX)
2535 2535                  frontspace = " ";
2536 2536  
2537 2537          /*
2538 2538           * Convert the caller's IP address to a dotted string
2539 2539           */
2540 2540          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2541 2541  
2542 2542          if (ca->sa_family == AF_INET) {
2543 2543                  b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
2544 2544                  (void) sprintf(buf, "%s(%d.%d.%d.%d) ", frontspace,
2545 2545                      b[0] & 0xFF, b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
2546 2546          } else if (ca->sa_family == AF_INET6) {
2547 2547                  struct sockaddr_in6 *sin6;
2548 2548                  sin6 = (struct sockaddr_in6 *)ca;
2549 2549                  (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
2550 2550                      buf, INET6_ADDRSTRLEN);
2551 2551  
2552 2552          } else {
2553 2553  
2554 2554                  /*
2555 2555                   * No IP address to print. If there was a host name
2556 2556                   * printed, then we print a space.
2557 2557                   */
2558 2558                  (void) sprintf(buf, frontspace);
2559 2559          }
2560 2560  
  
    | 
      ↓ open down ↓ | 
    573 lines elided | 
    
      ↑ open up ↑ | 
  
2561 2561          return (buf);
2562 2562  }
2563 2563  
2564 2564  /*
2565 2565   * NFS Server initialization routine.  This routine should only be called
2566 2566   * once.  It performs the following tasks:
2567 2567   *      - Call sub-initialization routines (localize access to variables)
2568 2568   *      - Initialize all locks
2569 2569   *      - initialize the version 3 write verifier
2570 2570   */
2571      -int
     2571 +void
2572 2572  nfs_srvinit(void)
2573 2573  {
2574      -        int error;
2575 2574  
2576      -        error = nfs_exportinit();
2577      -        if (error != 0)
2578      -                return (error);
2579      -        error = rfs4_srvrinit();
2580      -        if (error != 0) {
2581      -                nfs_exportfini();
2582      -                return (error);
2583      -        }
     2575 +        /* Truly global stuff in this module (not per zone) */
     2576 +        rw_init(&nfssrv_globals_rwl, NULL, RW_DEFAULT, NULL);
     2577 +        list_create(&nfssrv_globals_list, sizeof (nfs_globals_t),
     2578 +            offsetof(nfs_globals_t, nfs_g_link));
     2579 +        tsd_create(&nfs_server_tsd_key, NULL);
     2580 +
     2581 +        /* The order here is important */
     2582 +        nfs_exportinit();
2584 2583          rfs_srvrinit();
2585 2584          rfs3_srvrinit();
     2585 +        rfs4_srvrinit();
2586 2586          nfsauth_init();
2587 2587  
2588      -        /* Init the stuff to control start/stop */
2589      -        nfs_server_upordown = NFS_SERVER_STOPPED;
2590      -        mutex_init(&nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
2591      -        cv_init(&nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
2592      -        mutex_init(&rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
2593      -        cv_init(&rdma_wait_cv, NULL, CV_DEFAULT, NULL);
2594      -
2595      -        return (0);
     2588 +        /*
     2589 +         * NFS server zone-specific global variables
     2590 +         * Note the zone_init is called for the GZ here.
     2591 +         */
     2592 +        zone_key_create(&nfssrv_zone_key, nfs_server_zone_init,
     2593 +            nfs_server_zone_shutdown, nfs_server_zone_fini);
2596 2594  }
2597 2595  
2598 2596  /*
2599 2597   * NFS Server finalization routine. This routine is called to cleanup the
2600 2598   * initialization work previously performed if the NFS server module could
2601 2599   * not be loaded correctly.
2602 2600   */
2603 2601  void
2604 2602  nfs_srvfini(void)
2605 2603  {
     2604 +
     2605 +        /*
     2606 +         * NFS server zone-specific global variables
     2607 +         * Note the zone_fini is called for the GZ here.
     2608 +         */
     2609 +        (void) zone_key_delete(nfssrv_zone_key);
     2610 +
     2611 +        /* The order here is important (reverse of init) */
2606 2612          nfsauth_fini();
     2613 +        rfs4_srvrfini();
2607 2614          rfs3_srvrfini();
2608 2615          rfs_srvrfini();
2609 2616          nfs_exportfini();
2610 2617  
2611      -        mutex_destroy(&nfs_server_upordown_lock);
2612      -        cv_destroy(&nfs_server_upordown_cv);
2613      -        mutex_destroy(&rdma_wait_mutex);
2614      -        cv_destroy(&rdma_wait_cv);
     2618 +        /* Truly global stuff in this module (not per zone) */
     2619 +        tsd_destroy(&nfs_server_tsd_key);
     2620 +        list_destroy(&nfssrv_globals_list);
     2621 +        rw_destroy(&nfssrv_globals_rwl);
2615 2622  }
2616 2623  
2617 2624  /*
2618      - * Set up an iovec array of up to cnt pointers.
     2625 + * Zone init, shutdown, fini functions for the NFS server
     2626 + *
     2627 + * This design is careful to create the entire hierarhcy of
     2628 + * NFS server "globals" (including those created by various
     2629 + * per-module *_zone_init functions, etc.) so that all these
     2630 + * objects have exactly the same lifetime.
     2631 + *
     2632 + * These objects are also kept on a list for two reasons:
     2633 + * 1: It makes finding these in mdb _much_ easier.
     2634 + * 2: It allows operating across all zone globals for
     2635 + *    functions like nfs_auth.c:exi_cache_reclaim
2619 2636   */
     2637 +static void *
     2638 +nfs_server_zone_init(zoneid_t zoneid)
     2639 +{
     2640 +        nfs_globals_t *ng;
2620 2641  
     2642 +        ng = kmem_zalloc(sizeof (*ng), KM_SLEEP);
     2643 +
     2644 +        ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
     2645 +        ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
     2646 +
     2647 +        /* Init the stuff to control start/stop */
     2648 +        ng->nfs_server_upordown = NFS_SERVER_STOPPED;
     2649 +        mutex_init(&ng->nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
     2650 +        cv_init(&ng->nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
     2651 +        mutex_init(&ng->rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
     2652 +        cv_init(&ng->rdma_wait_cv, NULL, CV_DEFAULT, NULL);
     2653 +
     2654 +        ng->nfs_zoneid = zoneid;
     2655 +
     2656 +        /*
     2657 +         * Order here is important.
     2658 +         * export init must precede srv init calls.
     2659 +         */
     2660 +        nfs_export_zone_init(ng);
     2661 +        rfs_stat_zone_init(ng);
     2662 +        rfs_srv_zone_init(ng);
     2663 +        rfs3_srv_zone_init(ng);
     2664 +        rfs4_srv_zone_init(ng);
     2665 +        nfsauth_zone_init(ng);
     2666 +
     2667 +        rw_enter(&nfssrv_globals_rwl, RW_WRITER);
     2668 +        list_insert_tail(&nfssrv_globals_list, ng);
     2669 +        rw_exit(&nfssrv_globals_rwl);
     2670 +
     2671 +        return (ng);
     2672 +}
     2673 +
     2674 +/* ARGSUSED */
     2675 +static void
     2676 +nfs_server_zone_shutdown(zoneid_t zoneid, void *data)
     2677 +{
     2678 +        nfs_globals_t *ng;
     2679 +
     2680 +        ng = (nfs_globals_t *)data;
     2681 +
     2682 +        /*
     2683 +         * Order is like _fini, but only
     2684 +         * some modules need this hook.
     2685 +         */
     2686 +        nfsauth_zone_shutdown(ng);
     2687 +        nfs_export_zone_shutdown(ng);
     2688 +}
     2689 +
     2690 +/* ARGSUSED */
     2691 +static void
     2692 +nfs_server_zone_fini(zoneid_t zoneid, void *data)
     2693 +{
     2694 +        nfs_globals_t *ng;
     2695 +
     2696 +        ng = (nfs_globals_t *)data;
     2697 +
     2698 +        rw_enter(&nfssrv_globals_rwl, RW_WRITER);
     2699 +        list_remove(&nfssrv_globals_list, ng);
     2700 +        rw_exit(&nfssrv_globals_rwl);
     2701 +
     2702 +        /*
     2703 +         * Order here is important.
     2704 +         * reverse order from init
     2705 +         */
     2706 +        nfsauth_zone_fini(ng);
     2707 +        rfs4_srv_zone_fini(ng);
     2708 +        rfs3_srv_zone_fini(ng);
     2709 +        rfs_srv_zone_fini(ng);
     2710 +        rfs_stat_zone_fini(ng);
     2711 +        nfs_export_zone_fini(ng);
     2712 +
     2713 +        mutex_destroy(&ng->nfs_server_upordown_lock);
     2714 +        cv_destroy(&ng->nfs_server_upordown_cv);
     2715 +        mutex_destroy(&ng->rdma_wait_mutex);
     2716 +        cv_destroy(&ng->rdma_wait_cv);
     2717 +
     2718 +        kmem_free(ng, sizeof (*ng));
     2719 +}
     2720 +
     2721 +/*
     2722 + * Set up an iovec array of up to cnt pointers.
     2723 + */
2621 2724  void
2622 2725  mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
2623 2726  {
2624 2727          while (m != NULL && cnt-- > 0) {
2625 2728                  iovp->iov_base = (caddr_t)m->b_rptr;
2626 2729                  iovp->iov_len = (m->b_wptr - m->b_rptr);
2627 2730                  iovp++;
2628 2731                  m = m->b_cont;
2629 2732          }
2630 2733  }
2631 2734  
2632 2735  /*
2633 2736   * Common code between NFS Version 2 and NFS Version 3 for the public
2634 2737   * filehandle multicomponent lookups.
2635 2738   */
2636 2739  
2637 2740  /*
2638 2741   * Public filehandle evaluation of a multi-component lookup, following
2639 2742   * symbolic links, if necessary. This may result in a vnode in another
2640 2743   * filesystem, which is OK as long as the other filesystem is exported.
2641 2744   *
2642 2745   * Note that the exi will be set either to NULL or a new reference to the
2643 2746   * exportinfo struct that corresponds to the vnode of the multi-component path.
2644 2747   * It is the callers responsibility to release this reference.
2645 2748   */
2646 2749  int
2647 2750  rfs_publicfh_mclookup(char *p, vnode_t *dvp, cred_t *cr, vnode_t **vpp,
2648 2751      struct exportinfo **exi, struct sec_ol *sec)
2649 2752  {
2650 2753          int pathflag;
2651 2754          vnode_t *mc_dvp = NULL;
2652 2755          vnode_t *realvp;
2653 2756          int error;
2654 2757  
2655 2758          *exi = NULL;
2656 2759  
2657 2760          /*
2658 2761           * check if the given path is a url or native path. Since p is
2659 2762           * modified by MCLpath(), it may be empty after returning from
2660 2763           * there, and should be checked.
2661 2764           */
2662 2765          if ((pathflag = MCLpath(&p)) == -1)
2663 2766                  return (EIO);
2664 2767  
2665 2768          /*
2666 2769           * If pathflag is SECURITY_QUERY, turn the SEC_QUERY bit
2667 2770           * on in sec->sec_flags. This bit will later serve as an
2668 2771           * indication in makefh_ol() or makefh3_ol() to overload the
2669 2772           * filehandle to contain the sec modes used by the server for
2670 2773           * the path.
2671 2774           */
2672 2775          if (pathflag == SECURITY_QUERY) {
2673 2776                  if ((sec->sec_index = (uint_t)(*p)) > 0) {
2674 2777                          sec->sec_flags |= SEC_QUERY;
2675 2778                          p++;
2676 2779                          if ((pathflag = MCLpath(&p)) == -1)
2677 2780                                  return (EIO);
2678 2781                  } else {
2679 2782                          cmn_err(CE_NOTE,
2680 2783                              "nfs_server: invalid security index %d, "
2681 2784                              "violating WebNFS SNEGO protocol.", sec->sec_index);
2682 2785                          return (EIO);
2683 2786                  }
2684 2787          }
2685 2788  
2686 2789          if (p[0] == '\0') {
2687 2790                  error = ENOENT;
2688 2791                  goto publicfh_done;
2689 2792          }
2690 2793  
2691 2794          error = rfs_pathname(p, &mc_dvp, vpp, dvp, cr, pathflag);
2692 2795  
2693 2796          /*
2694 2797           * If name resolves to "/" we get EINVAL since we asked for
2695 2798           * the vnode of the directory that the file is in. Try again
2696 2799           * with NULL directory vnode.
2697 2800           */
2698 2801          if (error == EINVAL) {
2699 2802                  error = rfs_pathname(p, NULL, vpp, dvp, cr, pathflag);
2700 2803                  if (!error) {
2701 2804                          ASSERT(*vpp != NULL);
2702 2805                          if ((*vpp)->v_type == VDIR) {
2703 2806                                  VN_HOLD(*vpp);
2704 2807                                  mc_dvp = *vpp;
2705 2808                          } else {
2706 2809                                  /*
2707 2810                                   * This should not happen, the filesystem is
2708 2811                                   * in an inconsistent state. Fail the lookup
2709 2812                                   * at this point.
2710 2813                                   */
2711 2814                                  VN_RELE(*vpp);
2712 2815                                  error = EINVAL;
2713 2816                          }
2714 2817                  }
2715 2818          }
2716 2819  
2717 2820          if (error)
2718 2821                  goto publicfh_done;
2719 2822  
2720 2823          if (*vpp == NULL) {
2721 2824                  error = ENOENT;
2722 2825                  goto publicfh_done;
2723 2826          }
2724 2827  
2725 2828          ASSERT(mc_dvp != NULL);
2726 2829          ASSERT(*vpp != NULL);
2727 2830  
2728 2831          if ((*vpp)->v_type == VDIR) {
2729 2832                  do {
2730 2833                          /*
2731 2834                           * *vpp may be an AutoFS node, so we perform
2732 2835                           * a VOP_ACCESS() to trigger the mount of the intended
2733 2836                           * filesystem, so we can perform the lookup in the
2734 2837                           * intended filesystem.
2735 2838                           */
2736 2839                          (void) VOP_ACCESS(*vpp, 0, 0, cr, NULL);
2737 2840  
2738 2841                          /*
2739 2842                           * If vnode is covered, get the
2740 2843                           * the topmost vnode.
2741 2844                           */
2742 2845                          if (vn_mountedvfs(*vpp) != NULL) {
2743 2846                                  error = traverse(vpp);
2744 2847                                  if (error) {
2745 2848                                          VN_RELE(*vpp);
2746 2849                                          goto publicfh_done;
2747 2850                                  }
2748 2851                          }
2749 2852  
2750 2853                          if (VOP_REALVP(*vpp, &realvp, NULL) == 0 &&
2751 2854                              realvp != *vpp) {
2752 2855                                  /*
2753 2856                                   * If realvp is different from *vpp
2754 2857                                   * then release our reference on *vpp, so that
2755 2858                                   * the export access check be performed on the
2756 2859                                   * real filesystem instead.
2757 2860                                   */
2758 2861                                  VN_HOLD(realvp);
2759 2862                                  VN_RELE(*vpp);
2760 2863                                  *vpp = realvp;
2761 2864                          } else {
2762 2865                                  break;
2763 2866                          }
2764 2867                  /* LINTED */
2765 2868                  } while (TRUE);
2766 2869  
2767 2870                  /*
2768 2871                   * Let nfs_vptexi() figure what the real parent is.
2769 2872                   */
2770 2873                  VN_RELE(mc_dvp);
2771 2874                  mc_dvp = NULL;
2772 2875  
2773 2876          } else {
2774 2877                  /*
2775 2878                   * If vnode is covered, get the
2776 2879                   * the topmost vnode.
2777 2880                   */
2778 2881                  if (vn_mountedvfs(mc_dvp) != NULL) {
2779 2882                          error = traverse(&mc_dvp);
2780 2883                          if (error) {
2781 2884                                  VN_RELE(*vpp);
2782 2885                                  goto publicfh_done;
2783 2886                          }
2784 2887                  }
2785 2888  
2786 2889                  if (VOP_REALVP(mc_dvp, &realvp, NULL) == 0 &&
2787 2890                      realvp != mc_dvp) {
2788 2891                          /*
2789 2892                           * *vpp is a file, obtain realvp of the parent
2790 2893                           * directory vnode.
2791 2894                           */
2792 2895                          VN_HOLD(realvp);
2793 2896                          VN_RELE(mc_dvp);
2794 2897                          mc_dvp = realvp;
2795 2898                  }
2796 2899          }
2797 2900  
2798 2901          /*
2799 2902           * The pathname may take us from the public filesystem to another.
2800 2903           * If that's the case then just set the exportinfo to the new export
2801 2904           * and build filehandle for it. Thanks to per-access checking there's
2802 2905           * no security issues with doing this. If the client is not allowed
2803 2906           * access to this new export then it will get an access error when it
2804 2907           * tries to use the filehandle
2805 2908           */
2806 2909          if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
2807 2910                  VN_RELE(*vpp);
2808 2911                  goto publicfh_done;
2809 2912          }
2810 2913  
2811 2914          /*
2812 2915           * Not allowed access to pseudo exports.
2813 2916           */
2814 2917          if (PSEUDO(*exi)) {
2815 2918                  error = ENOENT;
2816 2919                  VN_RELE(*vpp);
2817 2920                  goto publicfh_done;
2818 2921          }
2819 2922  
2820 2923          /*
2821 2924           * Do a lookup for the index file. We know the index option doesn't
2822 2925           * allow paths through handling in the share command, so mc_dvp will
2823 2926           * be the parent for the index file vnode, if its present. Use
2824 2927           * temporary pointers to preserve and reuse the vnode pointers of the
2825 2928           * original directory in case there's no index file. Note that the
2826 2929           * index file is a native path, and should not be interpreted by
2827 2930           * the URL parser in rfs_pathname()
2828 2931           */
2829 2932          if (((*exi)->exi_export.ex_flags & EX_INDEX) &&
2830 2933              ((*vpp)->v_type == VDIR) && (pathflag == URLPATH)) {
2831 2934                  vnode_t *tvp, *tmc_dvp; /* temporary vnode pointers */
2832 2935  
2833 2936                  tmc_dvp = mc_dvp;
2834 2937                  mc_dvp = tvp = *vpp;
2835 2938  
2836 2939                  error = rfs_pathname((*exi)->exi_export.ex_index, NULL, vpp,
2837 2940                      mc_dvp, cr, NATIVEPATH);
2838 2941  
2839 2942                  if (error == ENOENT) {
2840 2943                          *vpp = tvp;
2841 2944                          mc_dvp = tmc_dvp;
2842 2945                          error = 0;
2843 2946                  } else {        /* ok or error other than ENOENT */
2844 2947                          if (tmc_dvp)
2845 2948                                  VN_RELE(tmc_dvp);
2846 2949                          if (error)
2847 2950                                  goto publicfh_done;
  
    | 
      ↓ open down ↓ | 
    217 lines elided | 
    
      ↑ open up ↑ | 
  
2848 2951  
2849 2952                          /*
2850 2953                           * Found a valid vp for index "filename". Sanity check
2851 2954                           * for odd case where a directory is provided as index
2852 2955                           * option argument and leads us to another filesystem
2853 2956                           */
2854 2957  
2855 2958                          /* Release the reference on the old exi value */
2856 2959                          ASSERT(*exi != NULL);
2857 2960                          exi_rele(*exi);
     2961 +                        *exi = NULL;
2858 2962  
2859 2963                          if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
2860 2964                                  VN_RELE(*vpp);
2861 2965                                  goto publicfh_done;
2862 2966                          }
     2967 +                        /* Have a new *exi */
2863 2968                  }
2864 2969          }
2865 2970  
2866 2971  publicfh_done:
2867 2972          if (mc_dvp)
2868 2973                  VN_RELE(mc_dvp);
2869 2974  
2870 2975          return (error);
2871 2976  }
2872 2977  
2873 2978  /*
2874 2979   * Evaluate a multi-component path
2875 2980   */
2876 2981  int
2877 2982  rfs_pathname(
2878 2983          char *path,                     /* pathname to evaluate */
  
    | 
      ↓ open down ↓ | 
    6 lines elided | 
    
      ↑ open up ↑ | 
  
2879 2984          vnode_t **dirvpp,               /* ret for ptr to parent dir vnode */
2880 2985          vnode_t **compvpp,              /* ret for ptr to component vnode */
2881 2986          vnode_t *startdvp,              /* starting vnode */
2882 2987          cred_t *cr,                     /* user's credential */
2883 2988          int pathflag)                   /* flag to identify path, e.g. URL */
2884 2989  {
2885 2990          char namebuf[TYPICALMAXPATHLEN];
2886 2991          struct pathname pn;
2887 2992          int error;
2888 2993  
     2994 +        ASSERT3U(crgetzoneid(cr), ==, curzone->zone_id);
     2995 +
2889 2996          /*
2890 2997           * If pathname starts with '/', then set startdvp to root.
2891 2998           */
2892 2999          if (*path == '/') {
2893 3000                  while (*path == '/')
2894 3001                          path++;
2895 3002  
2896      -                startdvp = rootdir;
     3003 +                startdvp = ZONE_ROOTVP();
2897 3004          }
2898 3005  
2899 3006          error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
2900 3007          if (error == 0) {
2901 3008                  /*
2902 3009                   * Call the URL parser for URL paths to modify the original
2903 3010                   * string to handle any '%' encoded characters that exist.
2904 3011                   * Done here to avoid an extra bcopy in the lookup.
2905 3012                   * We need to be careful about pathlen's. We know that
2906 3013                   * rfs_pathname() is called with a non-empty path. However,
2907 3014                   * it could be emptied due to the path simply being all /'s,
2908 3015                   * which is valid to proceed with the lookup, or due to the
  
    | 
      ↓ open down ↓ | 
    2 lines elided | 
    
      ↑ open up ↑ | 
  
2909 3016                   * URL parser finding an encoded null character at the
2910 3017                   * beginning of path which should not proceed with the lookup.
2911 3018                   */
2912 3019                  if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2913 3020                          URLparse(pn.pn_path);
2914 3021                          if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
2915 3022                                  return (ENOENT);
2916 3023                  }
2917 3024                  VN_HOLD(startdvp);
2918 3025                  error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
2919      -                    rootdir, startdvp, cr);
     3026 +                    ZONE_ROOTVP(), startdvp, cr);
2920 3027          }
2921 3028          if (error == ENAMETOOLONG) {
2922 3029                  /*
2923 3030                   * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
2924 3031                   */
2925 3032                  if (error = pn_get(path, UIO_SYSSPACE, &pn))
2926 3033                          return (error);
2927 3034                  if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2928 3035                          URLparse(pn.pn_path);
2929 3036                          if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0) {
2930 3037                                  pn_free(&pn);
2931 3038                                  return (ENOENT);
2932 3039                          }
2933 3040                  }
2934 3041                  VN_HOLD(startdvp);
2935 3042                  error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
2936      -                    rootdir, startdvp, cr);
     3043 +                    ZONE_ROOTVP(), startdvp, cr);
2937 3044                  pn_free(&pn);
2938 3045          }
2939 3046  
2940 3047          return (error);
2941 3048  }
2942 3049  
2943 3050  /*
2944 3051   * Adapt the multicomponent lookup path depending on the pathtype
2945 3052   */
2946 3053  static int
2947 3054  MCLpath(char **path)
2948 3055  {
2949 3056          unsigned char c = (unsigned char)**path;
2950 3057  
2951 3058          /*
2952 3059           * If the MCL path is between 0x20 and 0x7E (graphic printable
2953 3060           * character of the US-ASCII coded character set), its a URL path,
2954 3061           * per RFC 1738.
2955 3062           */
2956 3063          if (c >= 0x20 && c <= 0x7E)
2957 3064                  return (URLPATH);
2958 3065  
2959 3066          /*
2960 3067           * If the first octet of the MCL path is not an ASCII character
2961 3068           * then it must be interpreted as a tag value that describes the
2962 3069           * format of the remaining octets of the MCL path.
2963 3070           *
2964 3071           * If the first octet of the MCL path is 0x81 it is a query
2965 3072           * for the security info.
2966 3073           */
2967 3074          switch (c) {
2968 3075          case 0x80:      /* native path, i.e. MCL via mount protocol */
2969 3076                  (*path)++;
2970 3077                  return (NATIVEPATH);
2971 3078          case 0x81:      /* security query */
2972 3079                  (*path)++;
2973 3080                  return (SECURITY_QUERY);
2974 3081          default:
2975 3082                  return (-1);
2976 3083          }
2977 3084  }
2978 3085  
2979 3086  #define fromhex(c)  ((c >= '0' && c <= '9') ? (c - '0') : \
2980 3087                          ((c >= 'A' && c <= 'F') ? (c - 'A' + 10) :\
2981 3088                          ((c >= 'a' && c <= 'f') ? (c - 'a' + 10) : 0)))
2982 3089  
2983 3090  /*
2984 3091   * The implementation of URLparse guarantees that the final string will
2985 3092   * fit in the original one. Replaces '%' occurrences followed by 2 characters
2986 3093   * with its corresponding hexadecimal character.
2987 3094   */
2988 3095  static void
2989 3096  URLparse(char *str)
2990 3097  {
2991 3098          char *p, *q;
2992 3099  
2993 3100          p = q = str;
2994 3101          while (*p) {
2995 3102                  *q = *p;
2996 3103                  if (*p++ == '%') {
2997 3104                          if (*p) {
2998 3105                                  *q = fromhex(*p) * 16;
2999 3106                                  p++;
3000 3107                                  if (*p) {
3001 3108                                          *q += fromhex(*p);
3002 3109                                          p++;
3003 3110                                  }
3004 3111                          }
3005 3112                  }
3006 3113                  q++;
3007 3114          }
3008 3115          *q = '\0';
3009 3116  }
3010 3117  
3011 3118  
3012 3119  /*
3013 3120   * Get the export information for the lookup vnode, and verify its
3014 3121   * useable.
3015 3122   */
3016 3123  int
3017 3124  nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr,
3018 3125      struct exportinfo **exi)
3019 3126  {
3020 3127          int walk;
3021 3128          int error = 0;
3022 3129  
3023 3130          *exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
3024 3131          if (*exi == NULL)
3025 3132                  error = EACCES;
3026 3133          else {
3027 3134                  /*
3028 3135                   * If nosub is set for this export then
3029 3136                   * a lookup relative to the public fh
  
    | 
      ↓ open down ↓ | 
    83 lines elided | 
    
      ↑ open up ↑ | 
  
3030 3137                   * must not terminate below the
3031 3138                   * exported directory.
3032 3139                   */
3033 3140                  if ((*exi)->exi_export.ex_flags & EX_NOSUB && walk > 0)
3034 3141                          error = EACCES;
3035 3142          }
3036 3143  
3037 3144          return (error);
3038 3145  }
3039 3146  
3040      -/*
3041      - * Do the main work of handling HA-NFSv4 Resource Group failover on
3042      - * Sun Cluster.
3043      - * We need to detect whether any RG admin paths have been added or removed,
3044      - * and adjust resources accordingly.
3045      - * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
3046      - * order to scale, the list and array of paths need to be held in more
3047      - * suitable data structures.
3048      - */
3049      -static void
3050      -hanfsv4_failover(void)
3051      -{
3052      -        int i, start_grace, numadded_paths = 0;
3053      -        char **added_paths = NULL;
3054      -        rfs4_dss_path_t *dss_path;
3055      -
3056      -        /*
3057      -         * Note: currently, rfs4_dss_pathlist cannot be NULL, since
3058      -         * it will always include an entry for NFS4_DSS_VAR_DIR. If we
3059      -         * make the latter dynamically specified too, the following will
3060      -         * need to be adjusted.
3061      -         */
3062      -
3063      -        /*
3064      -         * First, look for removed paths: RGs that have been failed-over
3065      -         * away from this node.
3066      -         * Walk the "currently-serving" rfs4_dss_pathlist and, for each
3067      -         * path, check if it is on the "passed-in" rfs4_dss_newpaths array
3068      -         * from nfsd. If not, that RG path has been removed.
3069      -         *
3070      -         * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
3071      -         * any duplicates.
3072      -         */
3073      -        dss_path = rfs4_dss_pathlist;
3074      -        do {
3075      -                int found = 0;
3076      -                char *path = dss_path->path;
3077      -
3078      -                /* used only for non-HA so may not be removed */
3079      -                if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
3080      -                        dss_path = dss_path->next;
3081      -                        continue;
3082      -                }
3083      -
3084      -                for (i = 0; i < rfs4_dss_numnewpaths; i++) {
3085      -                        int cmpret;
3086      -                        char *newpath = rfs4_dss_newpaths[i];
3087      -
3088      -                        /*
3089      -                         * Since nfsd has sorted rfs4_dss_newpaths for us,
3090      -                         * once the return from strcmp is negative we know
3091      -                         * we've passed the point where "path" should be,
3092      -                         * and can stop searching: "path" has been removed.
3093      -                         */
3094      -                        cmpret = strcmp(path, newpath);
3095      -                        if (cmpret < 0)
3096      -                                break;
3097      -                        if (cmpret == 0) {
3098      -                                found = 1;
3099      -                                break;
3100      -                        }
3101      -                }
3102      -
3103      -                if (found == 0) {
3104      -                        unsigned index = dss_path->index;
3105      -                        rfs4_servinst_t *sip = dss_path->sip;
3106      -                        rfs4_dss_path_t *path_next = dss_path->next;
3107      -
3108      -                        /*
3109      -                         * This path has been removed.
3110      -                         * We must clear out the servinst reference to
3111      -                         * it, since it's now owned by another
3112      -                         * node: we should not attempt to touch it.
3113      -                         */
3114      -                        ASSERT(dss_path == sip->dss_paths[index]);
3115      -                        sip->dss_paths[index] = NULL;
3116      -
3117      -                        /* remove from "currently-serving" list, and destroy */
3118      -                        remque(dss_path);
3119      -                        /* allow for NUL */
3120      -                        kmem_free(dss_path->path, strlen(dss_path->path) + 1);
3121      -                        kmem_free(dss_path, sizeof (rfs4_dss_path_t));
3122      -
3123      -                        dss_path = path_next;
3124      -                } else {
3125      -                        /* path was found; not removed */
3126      -                        dss_path = dss_path->next;
3127      -                }
3128      -        } while (dss_path != rfs4_dss_pathlist);
3129      -
3130      -        /*
3131      -         * Now, look for added paths: RGs that have been failed-over
3132      -         * to this node.
3133      -         * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
3134      -         * for each path, check if it is on the "currently-serving"
3135      -         * rfs4_dss_pathlist. If not, that RG path has been added.
3136      -         *
3137      -         * Note: we don't do duplicate detection here; nfsd does that for us.
3138      -         *
3139      -         * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
3140      -         * an upper bound for the size needed for added_paths[numadded_paths].
3141      -         */
3142      -
3143      -        /* probably more space than we need, but guaranteed to be enough */
3144      -        if (rfs4_dss_numnewpaths > 0) {
3145      -                size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
3146      -                added_paths = kmem_zalloc(sz, KM_SLEEP);
3147      -        }
3148      -
3149      -        /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
3150      -        for (i = 0; i < rfs4_dss_numnewpaths; i++) {
3151      -                int found = 0;
3152      -                char *newpath = rfs4_dss_newpaths[i];
3153      -
3154      -                dss_path = rfs4_dss_pathlist;
3155      -                do {
3156      -                        char *path = dss_path->path;
3157      -
3158      -                        /* used only for non-HA */
3159      -                        if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
3160      -                                dss_path = dss_path->next;
3161      -                                continue;
3162      -                        }
3163      -
3164      -                        if (strncmp(path, newpath, strlen(path)) == 0) {
3165      -                                found = 1;
3166      -                                break;
3167      -                        }
3168      -
3169      -                        dss_path = dss_path->next;
3170      -                } while (dss_path != rfs4_dss_pathlist);
3171      -
3172      -                if (found == 0) {
3173      -                        added_paths[numadded_paths] = newpath;
3174      -                        numadded_paths++;
3175      -                }
3176      -        }
3177      -
3178      -        /* did we find any added paths? */
3179      -        if (numadded_paths > 0) {
3180      -                /* create a new server instance, and start its grace period */
3181      -                start_grace = 1;
3182      -                rfs4_servinst_create(start_grace, numadded_paths, added_paths);
3183      -
3184      -                /* read in the stable storage state from these paths */
3185      -                rfs4_dss_readstate(numadded_paths, added_paths);
3186      -
3187      -                /*
3188      -                 * Multiple failovers during a grace period will cause
3189      -                 * clients of the same resource group to be partitioned
3190      -                 * into different server instances, with different
3191      -                 * grace periods.  Since clients of the same resource
3192      -                 * group must be subject to the same grace period,
3193      -                 * we need to reset all currently active grace periods.
3194      -                 */
3195      -                rfs4_grace_reset_all();
3196      -        }
3197      -
3198      -        if (rfs4_dss_numnewpaths > 0)
3199      -                kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
3200      -}
3201      -
3202 3147  /*
3203 3148   * Used by NFSv3 and NFSv4 server to query label of
3204 3149   * a pathname component during lookup/access ops.
3205 3150   */
3206 3151  ts_label_t *
3207 3152  nfs_getflabel(vnode_t *vp, struct exportinfo *exi)
3208 3153  {
3209 3154          zone_t *zone;
3210 3155          ts_label_t *zone_label;
3211 3156          char *path;
3212 3157  
3213 3158          mutex_enter(&vp->v_lock);
3214 3159          if (vp->v_path != vn_vpath_empty) {
3215 3160                  zone = zone_find_by_any_path(vp->v_path, B_FALSE);
3216 3161                  mutex_exit(&vp->v_lock);
3217 3162          } else {
3218 3163                  /*
3219 3164                   * v_path not cached. Fall back on pathname of exported
3220 3165                   * file system as we rely on pathname from which we can
3221 3166                   * derive a label. The exported file system portion of
3222 3167                   * path is sufficient to obtain a label.
3223 3168                   */
3224 3169                  path = exi->exi_export.ex_path;
3225 3170                  if (path == NULL) {
3226 3171                          mutex_exit(&vp->v_lock);
3227 3172                          return (NULL);
3228 3173                  }
3229 3174                  zone = zone_find_by_any_path(path, B_FALSE);
3230 3175                  mutex_exit(&vp->v_lock);
3231 3176          }
3232 3177          /*
3233 3178           * Caller has verified that the file is either
3234 3179           * exported or visible. So if the path falls in
3235 3180           * global zone, admin_low is returned; otherwise
3236 3181           * the zone's label is returned.
3237 3182           */
3238 3183          zone_label = zone->zone_slabel;
3239 3184          label_hold(zone_label);
3240 3185          zone_rele(zone);
3241 3186          return (zone_label);
3242 3187  }
3243 3188  
3244 3189  /*
3245 3190   * TX NFS routine used by NFSv3 and NFSv4 to do label check
3246 3191   * on client label and server's file object lable.
3247 3192   */
3248 3193  boolean_t
3249 3194  do_rfs_label_check(bslabel_t *clabel, vnode_t *vp, int flag,
3250 3195      struct exportinfo *exi)
3251 3196  {
3252 3197          bslabel_t *slabel;
3253 3198          ts_label_t *tslabel;
3254 3199          boolean_t result;
3255 3200  
3256 3201          if ((tslabel = nfs_getflabel(vp, exi)) == NULL) {
3257 3202                  return (B_FALSE);
3258 3203          }
3259 3204          slabel = label2bslabel(tslabel);
3260 3205          DTRACE_PROBE4(tx__rfs__log__info__labelcheck, char *,
3261 3206              "comparing server's file label(1) with client label(2) (vp(3))",
3262 3207              bslabel_t *, slabel, bslabel_t *, clabel, vnode_t *, vp);
3263 3208  
3264 3209          if (flag == EQUALITY_CHECK)
3265 3210                  result = blequal(clabel, slabel);
3266 3211          else
3267 3212                  result = bldominates(clabel, slabel);
3268 3213          label_rele(tslabel);
3269 3214          return (result);
3270 3215  }
3271 3216  
3272 3217  /*
3273 3218   * Callback function to return the loaned buffers.
3274 3219   * Calls VOP_RETZCBUF() only after all uio_iov[]
3275 3220   * buffers are returned. nu_ref maintains the count.
3276 3221   */
3277 3222  void
3278 3223  rfs_free_xuio(void *free_arg)
3279 3224  {
3280 3225          uint_t ref;
3281 3226          nfs_xuio_t *nfsuiop = (nfs_xuio_t *)free_arg;
3282 3227  
3283 3228          ref = atomic_dec_uint_nv(&nfsuiop->nu_ref);
3284 3229  
3285 3230          /*
3286 3231           * Call VOP_RETZCBUF() only when all the iov buffers
3287 3232           * are sent OTW.
3288 3233           */
3289 3234          if (ref != 0)
3290 3235                  return;
3291 3236  
3292 3237          if (((uio_t *)nfsuiop)->uio_extflg & UIO_XUIO) {
3293 3238                  (void) VOP_RETZCBUF(nfsuiop->nu_vp, (xuio_t *)free_arg, NULL,
3294 3239                      NULL);
3295 3240                  VN_RELE(nfsuiop->nu_vp);
3296 3241          }
3297 3242  
3298 3243          kmem_cache_free(nfs_xuio_cache, free_arg);
3299 3244  }
3300 3245  
3301 3246  xuio_t *
3302 3247  rfs_setup_xuio(vnode_t *vp)
3303 3248  {
3304 3249          nfs_xuio_t *nfsuiop;
3305 3250  
3306 3251          nfsuiop = kmem_cache_alloc(nfs_xuio_cache, KM_SLEEP);
3307 3252  
3308 3253          bzero(nfsuiop, sizeof (nfs_xuio_t));
3309 3254          nfsuiop->nu_vp = vp;
3310 3255  
3311 3256          /*
3312 3257           * ref count set to 1. more may be added
3313 3258           * if multiple mblks refer to multiple iov's.
3314 3259           * This is done in uio_to_mblk().
3315 3260           */
3316 3261  
3317 3262          nfsuiop->nu_ref = 1;
3318 3263  
3319 3264          nfsuiop->nu_frtn.free_func = rfs_free_xuio;
3320 3265          nfsuiop->nu_frtn.free_arg = (char *)nfsuiop;
3321 3266  
3322 3267          nfsuiop->nu_uio.xu_type = UIOTYPE_ZEROCOPY;
3323 3268  
3324 3269          return (&nfsuiop->nu_uio);
3325 3270  }
3326 3271  
3327 3272  mblk_t *
3328 3273  uio_to_mblk(uio_t *uiop)
3329 3274  {
3330 3275          struct iovec *iovp;
3331 3276          int i;
3332 3277          mblk_t *mp, *mp1;
3333 3278          nfs_xuio_t *nfsuiop = (nfs_xuio_t *)uiop;
3334 3279  
3335 3280          if (uiop->uio_iovcnt == 0)
3336 3281                  return (NULL);
3337 3282  
3338 3283          iovp = uiop->uio_iov;
3339 3284          mp = mp1 = esballoca((uchar_t *)iovp->iov_base, iovp->iov_len,
3340 3285              BPRI_MED, &nfsuiop->nu_frtn);
3341 3286          ASSERT(mp != NULL);
3342 3287  
3343 3288          mp->b_wptr += iovp->iov_len;
3344 3289          mp->b_datap->db_type = M_DATA;
3345 3290  
3346 3291          for (i = 1; i < uiop->uio_iovcnt; i++) {
3347 3292                  iovp = (uiop->uio_iov + i);
3348 3293  
3349 3294                  mp1->b_cont = esballoca(
3350 3295                      (uchar_t *)iovp->iov_base, iovp->iov_len, BPRI_MED,
3351 3296                      &nfsuiop->nu_frtn);
3352 3297  
3353 3298                  mp1 = mp1->b_cont;
3354 3299                  ASSERT(mp1 != NULL);
3355 3300                  mp1->b_wptr += iovp->iov_len;
3356 3301                  mp1->b_datap->db_type = M_DATA;
3357 3302          }
3358 3303  
3359 3304          nfsuiop->nu_ref = uiop->uio_iovcnt;
3360 3305  
3361 3306          return (mp);
3362 3307  }
3363 3308  
3364 3309  /*
3365 3310   * Allocate memory to hold data for a read request of len bytes.
3366 3311   *
3367 3312   * We don't allocate buffers greater than kmem_max_cached in size to avoid
3368 3313   * allocating memory from the kmem_oversized arena.  If we allocate oversized
3369 3314   * buffers, we incur heavy cross-call activity when freeing these large buffers
3370 3315   * in the TCP receive path. Note that we can't set b_wptr here since the
3371 3316   * length of the data returned may differ from the length requested when
3372 3317   * reading the end of a file; we set b_wptr in rfs_rndup_mblks() once the
3373 3318   * length of the read is known.
3374 3319   */
3375 3320  mblk_t *
3376 3321  rfs_read_alloc(uint_t len, struct iovec **iov, int *iovcnt)
3377 3322  {
3378 3323          struct iovec *iovarr;
3379 3324          mblk_t *mp, **mpp = ∓
3380 3325          size_t mpsize;
3381 3326          uint_t remain = len;
3382 3327          int i, err = 0;
3383 3328  
3384 3329          *iovcnt = howmany(len, kmem_max_cached);
3385 3330  
3386 3331          iovarr = kmem_alloc(*iovcnt * sizeof (struct iovec), KM_SLEEP);
3387 3332          *iov = iovarr;
3388 3333  
3389 3334          for (i = 0; i < *iovcnt; remain -= mpsize, i++) {
3390 3335                  ASSERT(remain <= len);
3391 3336                  /*
3392 3337                   * We roundup the size we allocate to a multiple of
3393 3338                   * BYTES_PER_XDR_UNIT (4 bytes) so that the call to
3394 3339                   * xdrmblk_putmblk() never fails.
3395 3340                   */
3396 3341                  ASSERT(kmem_max_cached % BYTES_PER_XDR_UNIT == 0);
3397 3342                  mpsize = MIN(kmem_max_cached, remain);
3398 3343                  *mpp = allocb_wait(RNDUP(mpsize), BPRI_MED, STR_NOSIG, &err);
3399 3344                  ASSERT(*mpp != NULL);
3400 3345                  ASSERT(err == 0);
3401 3346  
3402 3347                  iovarr[i].iov_base = (caddr_t)(*mpp)->b_rptr;
3403 3348                  iovarr[i].iov_len = mpsize;
3404 3349                  mpp = &(*mpp)->b_cont;
3405 3350          }
3406 3351          return (mp);
3407 3352  }
3408 3353  
3409 3354  void
3410 3355  rfs_rndup_mblks(mblk_t *mp, uint_t len, int buf_loaned)
3411 3356  {
3412 3357          int i;
3413 3358          int alloc_err = 0;
3414 3359          mblk_t *rmp;
3415 3360          uint_t mpsize, remainder;
3416 3361  
3417 3362          remainder = P2NPHASE(len, BYTES_PER_XDR_UNIT);
3418 3363  
3419 3364          /*
3420 3365           * Non copy-reduction case.  This function assumes that blocks were
3421 3366           * allocated in multiples of BYTES_PER_XDR_UNIT bytes, which makes this
3422 3367           * padding safe without bounds checking.
3423 3368           */
3424 3369          if (!buf_loaned) {
3425 3370                  /*
3426 3371                   * Set the size of each mblk in the chain until we've consumed
3427 3372                   * the specified length for all but the last one.
3428 3373                   */
3429 3374                  while ((mpsize = MBLKSIZE(mp)) < len) {
3430 3375                          ASSERT(mpsize % BYTES_PER_XDR_UNIT == 0);
3431 3376                          mp->b_wptr += mpsize;
3432 3377                          len -= mpsize;
3433 3378                          mp = mp->b_cont;
3434 3379                          ASSERT(mp != NULL);
3435 3380                  }
3436 3381  
3437 3382                  ASSERT(len + remainder <= mpsize);
3438 3383                  mp->b_wptr += len;
3439 3384                  for (i = 0; i < remainder; i++)
3440 3385                          *mp->b_wptr++ = '\0';
3441 3386                  return;
3442 3387          }
3443 3388  
3444 3389          /*
3445 3390           * No remainder mblk required.
3446 3391           */
3447 3392          if (remainder == 0)
3448 3393                  return;
3449 3394  
3450 3395          /*
3451 3396           * Get to the last mblk in the chain.
3452 3397           */
3453 3398          while (mp->b_cont != NULL)
3454 3399                  mp = mp->b_cont;
3455 3400  
3456 3401          /*
3457 3402           * In case of copy-reduction mblks, the size of the mblks are fixed
3458 3403           * and are of the size of the loaned buffers.  Allocate a remainder
3459 3404           * mblk and chain it to the data buffers. This is sub-optimal, but not
3460 3405           * expected to happen commonly.
3461 3406           */
3462 3407          rmp = allocb_wait(remainder, BPRI_MED, STR_NOSIG, &alloc_err);
3463 3408          ASSERT(rmp != NULL);
3464 3409          ASSERT(alloc_err == 0);
3465 3410  
3466 3411          for (i = 0; i < remainder; i++)
3467 3412                  *rmp->b_wptr++ = '\0';
3468 3413  
3469 3414          rmp->b_datap->db_type = M_DATA;
3470 3415          mp->b_cont = rmp;
3471 3416  }
  
    | 
      ↓ open down ↓ | 
    260 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX