Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
↓ open down ↓ 12 lines elided ↑ open up ↑
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23      - * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  24   23   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25      - * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  26   24   */
  27   25  
  28   26  /*
  29   27   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30   28   *      All Rights Reserved
  31   29   */
  32   30  
       31 +/*
       32 + * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
       33 + * Copyright 2019 Nexenta Systems, Inc.
       34 + * Copyright 2019 Nexenta by DDN, Inc.
       35 + */
       36 +
  33   37  #include <sys/param.h>
  34   38  #include <sys/types.h>
  35   39  #include <sys/systm.h>
  36   40  #include <sys/cred.h>
  37   41  #include <sys/buf.h>
  38   42  #include <sys/vfs.h>
  39   43  #include <sys/vfs_opreg.h>
  40   44  #include <sys/vnode.h>
  41   45  #include <sys/uio.h>
  42   46  #include <sys/errno.h>
↓ open down ↓ 16 lines elided ↑ open up ↑
  59   63  #include <sys/zone.h>
  60   64  
  61   65  #include <fs/fs_reparse.h>
  62   66  
  63   67  #include <rpc/types.h>
  64   68  #include <rpc/auth.h>
  65   69  #include <rpc/rpcsec_gss.h>
  66   70  #include <rpc/svc.h>
  67   71  
  68   72  #include <nfs/nfs.h>
       73 +#include <nfs/nfssys.h>
  69   74  #include <nfs/export.h>
  70   75  #include <nfs/nfs_cmd.h>
  71   76  #include <nfs/lm.h>
  72   77  #include <nfs/nfs4.h>
       78 +#include <nfs/nfs4_drc.h>
  73   79  
  74   80  #include <sys/strsubr.h>
  75   81  #include <sys/strsun.h>
  76   82  
  77   83  #include <inet/common.h>
  78   84  #include <inet/ip.h>
  79   85  #include <inet/ip6.h>
  80   86  
  81   87  #include <sys/tsol/label.h>
  82   88  #include <sys/tsol/tndb.h>
↓ open down ↓ 24 lines elided ↑ open up ↑
 107  113   *          sizeof smallest (padded) name (4 bytes) +
 108  114   *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 109  115   *          sizeof attrlist4_len (4 bytes) +
 110  116   *          sizeof next boolean (4 bytes)
 111  117   *
 112  118   * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 113  119   * the smallest possible entry4 (assumes no attrs requested).
 114  120   *      sizeof nfsstat4 (4 bytes) +
 115  121   *      sizeof verifier4 (8 bytes) +
 116  122   *      sizeof entry4list bool (4 bytes) +
 117      - *      sizeof entry4   (36 bytes) +
 118      - *      sizeof eof bool  (4 bytes)
      123 + *      sizeof entry4 (36 bytes) +
      124 + *      sizeof eof bool (4 bytes)
 119  125   *
 120  126   * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 121  127   *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 122  128   *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 123  129   *      required for a given name length.  MAXNAMELEN is the maximum
 124  130   *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 125  131   *      macros are to allow for . and .. entries -- just a minor tweak to try
 126  132   *      and guarantee that buffer we give to VOP_READDIR will be large enough
 127  133   *      to hold ., .., and the largest possible solaris dirent64.
 128  134   */
↓ open down ↓ 11 lines elided ↑ open up ↑
 140  146   *
 141  147   * dirent64: named padded to provide 8 byte struct alignment
 142  148   *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 143  149   *
 144  150   * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 145  151   *
 146  152   */
 147  153  #define DIRENT64_TO_DIRCOUNT(dp) \
 148  154          (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 149  155  
 150      -time_t rfs4_start_time;                 /* Initialized in rfs4_srvrinit */
 151  156  
 152      -static sysid_t lockt_sysid;             /* dummy sysid for all LOCKT calls */
      157 +static sysid_t          lockt_sysid;    /* dummy sysid for all LOCKT calls */
 153  158  
 154  159  u_longlong_t    nfs4_srv_caller_id;
 155  160  uint_t          nfs4_srv_vkey = 0;
 156  161  
 157      -verifier4       Write4verf;
 158      -verifier4       Readdir4verf;
 159      -
 160  162  void    rfs4_init_compound_state(struct compound_state *);
 161  163  
 162  164  static void     nullfree(caddr_t);
 163  165  static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 164      -                        struct compound_state *);
      166 +                    struct compound_state *);
 165  167  static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166      -                        struct compound_state *);
      168 +                    struct compound_state *);
 167  169  static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168      -                        struct compound_state *);
      170 +                    struct compound_state *);
 169  171  static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170      -                        struct compound_state *);
      172 +                    struct compound_state *);
 171  173  static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172      -                        struct compound_state *);
      174 +                    struct compound_state *);
 173  175  static void     rfs4_op_create_free(nfs_resop4 *resop);
 174  176  static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 175      -                        struct svc_req *, struct compound_state *);
      177 +                    struct svc_req *, struct compound_state *);
 176  178  static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 177      -                        struct svc_req *, struct compound_state *);
      179 +                    struct svc_req *, struct compound_state *);
 178  180  static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 179      -                        struct compound_state *);
      181 +                    struct compound_state *);
 180  182  static void     rfs4_op_getattr_free(nfs_resop4 *);
 181  183  static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 182      -                        struct compound_state *);
      184 +                    struct compound_state *);
 183  185  static void     rfs4_op_getfh_free(nfs_resop4 *);
 184  186  static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185      -                        struct compound_state *);
      187 +                    struct compound_state *);
 186  188  static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 187      -                        struct compound_state *);
      189 +                    struct compound_state *);
 188  190  static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189      -                        struct compound_state *);
      191 +                    struct compound_state *);
 190  192  static void     lock_denied_free(nfs_resop4 *);
 191  193  static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192      -                        struct compound_state *);
      194 +                    struct compound_state *);
 193  195  static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194      -                        struct compound_state *);
      196 +                    struct compound_state *);
 195  197  static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 196      -                        struct compound_state *);
      198 +                    struct compound_state *);
 197  199  static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198      -                        struct compound_state *);
      200 +                    struct compound_state *);
 199  201  static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 200      -                                struct svc_req *req, struct compound_state *cs);
      202 +                    struct svc_req *req, struct compound_state *cs);
 201  203  static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 202      -                        struct compound_state *);
      204 +                    struct compound_state *);
 203  205  static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 204      -                        struct compound_state *);
      206 +                    struct compound_state *);
 205  207  static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 206      -                        struct svc_req *, struct compound_state *);
      208 +                    struct svc_req *, struct compound_state *);
 207  209  static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 208      -                        struct svc_req *, struct compound_state *);
      210 +                    struct svc_req *, struct compound_state *);
 209  211  static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 210      -                        struct compound_state *);
      212 +                    struct compound_state *);
 211  213  static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212      -                        struct compound_state *);
      214 +                    struct compound_state *);
 213  215  static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 214      -                        struct compound_state *);
      216 +                    struct compound_state *);
 215  217  static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216      -                        struct compound_state *);
      218 +                    struct compound_state *);
 217  219  static void     rfs4_op_read_free(nfs_resop4 *);
 218  220  static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 219  221  static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 220      -                        struct compound_state *);
      222 +                    struct compound_state *);
 221  223  static void     rfs4_op_readlink_free(nfs_resop4 *);
 222  224  static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 223      -                        struct svc_req *, struct compound_state *);
      225 +                    struct svc_req *, struct compound_state *);
 224  226  static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 225      -                        struct compound_state *);
      227 +                    struct compound_state *);
 226  228  static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227      -                        struct compound_state *);
      229 +                    struct compound_state *);
 228  230  static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229      -                        struct compound_state *);
      231 +                    struct compound_state *);
 230  232  static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231      -                        struct compound_state *);
      233 +                    struct compound_state *);
 232  234  static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233      -                        struct compound_state *);
      235 +                    struct compound_state *);
 234  236  static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235      -                        struct compound_state *);
      237 +                    struct compound_state *);
 236  238  static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237      -                        struct compound_state *);
      239 +                    struct compound_state *);
 238  240  static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239      -                        struct compound_state *);
      241 +                    struct compound_state *);
 240  242  static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 241      -                        struct svc_req *, struct compound_state *);
      243 +                    struct svc_req *, struct compound_state *);
 242  244  static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 243      -                        struct svc_req *req, struct compound_state *);
      245 +                    struct svc_req *req, struct compound_state *);
 244  246  static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 245      -                        struct compound_state *);
      247 +                    struct compound_state *);
 246  248  static void     rfs4_op_secinfo_free(nfs_resop4 *);
 247  249  
 248      -static nfsstat4 check_open_access(uint32_t,
 249      -                                struct compound_state *, struct svc_req *);
 250      -nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 251      -void rfs4_ss_clid(rfs4_client_t *);
      250 +static nfsstat4 check_open_access(uint32_t, struct compound_state *,
      251 +                    struct svc_req *);
      252 +nfsstat4        rfs4_client_sysid(rfs4_client_t *, sysid_t *);
      253 +void            rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
 252  254  
      255 +
 253  256  /*
 254  257   * translation table for attrs
 255  258   */
 256  259  struct nfs4_ntov_table {
 257  260          union nfs4_attr_u *na;
 258  261          uint8_t amap[NFS4_MAXNUM_ATTRS];
 259  262          int attrcnt;
 260  263          bool_t vfsstat;
 261  264  };
 262  265  
 263  266  static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 264  267  static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 265      -                                    struct nfs4_svgetit_arg *sargp);
      268 +                    struct nfs4_svgetit_arg *sargp);
 266  269  
 267  270  static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 268  271                      struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 269  272                      struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 270  273  
      274 +static void     hanfsv4_failover(nfs4_srv_t *);
      275 +
 271  276  fem_t           *deleg_rdops;
 272  277  fem_t           *deleg_wrops;
 273  278  
 274      -rfs4_servinst_t *rfs4_cur_servinst = NULL;      /* current server instance */
 275      -kmutex_t        rfs4_servinst_lock;     /* protects linked list */
 276      -int             rfs4_seen_first_compound;       /* set first time we see one */
 277      -
 278  279  /*
 279  280   * NFS4 op dispatch table
 280  281   */
 281  282  
 282  283  struct rfsv4disp {
 283  284          void    (*dis_proc)();          /* proc to call */
 284  285          void    (*dis_resfree)();       /* frees space allocated by proc */
 285  286          int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 286  287  };
 287  288  
↓ open down ↓ 171 lines elided ↑ open up ↑
 459  460          "rfs4_op_setattr",
 460  461          "rfs4_op_setclientid",
 461  462          "rfs4_op_setclient_confirm",
 462  463          "rfs4_op_verify",
 463  464          "rfs4_op_write",
 464  465          "rfs4_op_release_lockowner",
 465  466          "rfs4_op_illegal"
 466  467  };
 467  468  #endif
 468  469  
 469      -void    rfs4_ss_chkclid(rfs4_client_t *);
      470 +void    rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
 470  471  
 471  472  extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 472  473  
 473  474  extern void     rfs4_free_fs_locations4(fs_locations4 *);
 474  475  
 475  476  #ifdef  nextdp
 476  477  #undef nextdp
 477  478  #endif
 478  479  #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 479  480  
↓ open down ↓ 12 lines elided ↑ open up ↑
 492  493          VOPNAME_READ,           { .femop_read = deleg_wr_read },
 493  494          VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 494  495          VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 495  496          VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 496  497          VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 497  498          VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 498  499          VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 499  500          NULL,                   NULL
 500  501  };
 501  502  
 502      -int
 503      -rfs4_srvrinit(void)
      503 +nfs4_srv_t *
      504 +nfs4_get_srv(void)
 504  505  {
      506 +        nfs_globals_t *ng = nfs_srv_getzg();
      507 +        nfs4_srv_t *srv = ng->nfs4_srv;
      508 +        ASSERT(srv != NULL);
      509 +        return (srv);
      510 +}
      511 +
      512 +void
      513 +rfs4_srv_zone_init(nfs_globals_t *ng)
      514 +{
      515 +        nfs4_srv_t *nsrv4;
 505  516          timespec32_t verf;
 506      -        int error;
 507      -        extern void rfs4_attr_init();
 508      -        extern krwlock_t rfs4_deleg_policy_lock;
 509  517  
      518 +        nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
      519 +
 510  520          /*
 511  521           * The following algorithm attempts to find a unique verifier
 512  522           * to be used as the write verifier returned from the server
 513  523           * to the client.  It is important that this verifier change
 514  524           * whenever the server reboots.  Of secondary importance, it
 515  525           * is important for the verifier to be unique between two
 516  526           * different servers.
 517  527           *
 518  528           * Thus, an attempt is made to use the system hostid and the
 519  529           * current time in seconds when the nfssrv kernel module is
↓ open down ↓ 8 lines elided ↑ open up ↑
 528  538          verf.tv_sec = (time_t)zone_get_hostid(NULL);
 529  539          if (verf.tv_sec != 0) {
 530  540                  verf.tv_nsec = gethrestime_sec();
 531  541          } else {
 532  542                  timespec_t tverf;
 533  543  
 534  544                  gethrestime(&tverf);
 535  545                  verf.tv_sec = (time_t)tverf.tv_sec;
 536  546                  verf.tv_nsec = tverf.tv_nsec;
 537  547          }
      548 +        nsrv4->write4verf = *(uint64_t *)&verf;
 538  549  
 539      -        Write4verf = *(uint64_t *)&verf;
      550 +        /* Used to manage create/destroy of server state */
      551 +        nsrv4->nfs4_server_state = NULL;
      552 +        nsrv4->nfs4_cur_servinst = NULL;
      553 +        nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
      554 +        mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
      555 +        mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
      556 +        mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
      557 +        rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 540  558  
 541      -        rfs4_attr_init();
 542      -        mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
      559 +        ng->nfs4_srv = nsrv4;
      560 +}
 543  561  
 544      -        /* Used to manage create/destroy of server state */
 545      -        mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
      562 +void
      563 +rfs4_srv_zone_fini(nfs_globals_t *ng)
      564 +{
      565 +        nfs4_srv_t *nsrv4 = ng->nfs4_srv;
 546  566  
 547      -        /* Used to manage access to server instance linked list */
 548      -        mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
      567 +        ng->nfs4_srv = NULL;
 549  568  
 550      -        /* Used to manage access to rfs4_deleg_policy */
 551      -        rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
      569 +        mutex_destroy(&nsrv4->deleg_lock);
      570 +        mutex_destroy(&nsrv4->state_lock);
      571 +        mutex_destroy(&nsrv4->servinst_lock);
      572 +        rw_destroy(&nsrv4->deleg_policy_lock);
 552  573  
 553      -        error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
 554      -        if (error != 0) {
      574 +        kmem_free(nsrv4, sizeof (*nsrv4));
      575 +}
      576 +
      577 +void
      578 +rfs4_srvrinit(void)
      579 +{
      580 +        extern void rfs4_attr_init();
      581 +
      582 +        rfs4_attr_init();
      583 +
      584 +        if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
 555  585                  rfs4_disable_delegation();
 556      -        } else {
 557      -                error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 558      -                    &deleg_wrops);
 559      -                if (error != 0) {
 560      -                        rfs4_disable_delegation();
 561      -                        fem_free(deleg_rdops);
 562      -                }
      586 +        } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
      587 +            &deleg_wrops) != 0) {
      588 +                rfs4_disable_delegation();
      589 +                fem_free(deleg_rdops);
 563  590          }
 564  591  
 565  592          nfs4_srv_caller_id = fs_new_caller_id();
 566      -
 567  593          lockt_sysid = lm_alloc_sysidt();
 568      -
 569  594          vsd_create(&nfs4_srv_vkey, NULL);
 570      -
 571      -        return (0);
      595 +        rfs4_state_g_init();
 572  596  }
 573  597  
 574  598  void
 575  599  rfs4_srvrfini(void)
 576  600  {
 577      -        extern krwlock_t rfs4_deleg_policy_lock;
 578      -
 579  601          if (lockt_sysid != LM_NOSYSID) {
 580  602                  lm_free_sysidt(lockt_sysid);
 581  603                  lockt_sysid = LM_NOSYSID;
 582  604          }
 583  605  
 584      -        mutex_destroy(&rfs4_deleg_lock);
 585      -        mutex_destroy(&rfs4_state_lock);
 586      -        rw_destroy(&rfs4_deleg_policy_lock);
      606 +        rfs4_state_g_fini();
 587  607  
 588  608          fem_free(deleg_rdops);
 589  609          fem_free(deleg_wrops);
 590  610  }
 591  611  
 592  612  void
      613 +rfs4_do_server_start(int server_upordown,
      614 +    int srv_delegation, int cluster_booted)
      615 +{
      616 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
      617 +
      618 +        /* Is this a warm start? */
      619 +        if (server_upordown == NFS_SERVER_QUIESCED) {
      620 +                cmn_err(CE_NOTE, "nfs4_srv: "
      621 +                    "server was previously quiesced; "
      622 +                    "existing NFSv4 state will be re-used");
      623 +
      624 +                /*
      625 +                 * HA-NFSv4: this is also the signal
      626 +                 * that a Resource Group failover has
      627 +                 * occurred.
      628 +                 */
      629 +                if (cluster_booted)
      630 +                        hanfsv4_failover(nsrv4);
      631 +        } else {
      632 +                /* Cold start */
      633 +                nsrv4->rfs4_start_time = 0;
      634 +                rfs4_state_zone_init(nsrv4);
      635 +                nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
      636 +                    nfs4_drc_hash);
      637 +
      638 +                /*
      639 +                 * The nfsd service was started with the -s option
      640 +                 * we need to pull in any state from the paths indicated.
      641 +                 */
      642 +                if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
      643 +                        /* read in the stable storage state from these paths */
      644 +                        rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
      645 +                            rfs4_dss_newpaths);
      646 +                }
      647 +        }
      648 +
      649 +        /* Check if delegation is to be enabled */
      650 +        if (srv_delegation != FALSE)
      651 +                rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
      652 +}
      653 +
      654 +void
 593  655  rfs4_init_compound_state(struct compound_state *cs)
 594  656  {
 595  657          bzero(cs, sizeof (*cs));
 596  658          cs->cont = TRUE;
 597  659          cs->access = CS_ACCESS_DENIED;
 598  660          cs->deleg = FALSE;
 599  661          cs->mandlock = FALSE;
 600  662          cs->fh.nfs_fh4_val = cs->fhbuf;
 601  663  }
 602  664  
↓ open down ↓ 42 lines elided ↑ open up ↑
 645  707  {
 646  708          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 647  709  
 648  710          return (rfs4_servinst_in_grace(cp->rc_server_instance));
 649  711  }
 650  712  
 651  713  /*
 652  714   * reset all currently active grace periods
 653  715   */
 654  716  void
 655      -rfs4_grace_reset_all(void)
      717 +rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
 656  718  {
 657  719          rfs4_servinst_t *sip;
 658  720  
 659      -        mutex_enter(&rfs4_servinst_lock);
 660      -        for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
      721 +        mutex_enter(&nsrv4->servinst_lock);
      722 +        for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 661  723                  if (rfs4_servinst_in_grace(sip))
 662  724                          rfs4_grace_start(sip);
 663      -        mutex_exit(&rfs4_servinst_lock);
      725 +        mutex_exit(&nsrv4->servinst_lock);
 664  726  }
 665  727  
 666  728  /*
 667  729   * start any new instances' grace periods
 668  730   */
 669  731  void
 670      -rfs4_grace_start_new(void)
      732 +rfs4_grace_start_new(nfs4_srv_t *nsrv4)
 671  733  {
 672  734          rfs4_servinst_t *sip;
 673  735  
 674      -        mutex_enter(&rfs4_servinst_lock);
 675      -        for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
      736 +        mutex_enter(&nsrv4->servinst_lock);
      737 +        for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
 676  738                  if (rfs4_servinst_grace_new(sip))
 677  739                          rfs4_grace_start(sip);
 678      -        mutex_exit(&rfs4_servinst_lock);
      740 +        mutex_exit(&nsrv4->servinst_lock);
 679  741  }
 680  742  
 681  743  static rfs4_dss_path_t *
 682      -rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
      744 +rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
      745 +    char *path, unsigned index)
 683  746  {
 684  747          size_t len;
 685  748          rfs4_dss_path_t *dss_path;
 686  749  
 687  750          dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 688  751  
 689  752          /*
 690  753           * Take a copy of the string, since the original may be overwritten.
 691  754           * Sadly, no strdup() in the kernel.
 692  755           */
↓ open down ↓ 3 lines elided ↑ open up ↑
 696  759          (void) strlcpy(dss_path->path, path, len);
 697  760  
 698  761          /* associate with servinst */
 699  762          dss_path->sip = sip;
 700  763          dss_path->index = index;
 701  764  
 702  765          /*
 703  766           * Add to list of served paths.
 704  767           * No locking required, as we're only ever called at startup.
 705  768           */
 706      -        if (rfs4_dss_pathlist == NULL) {
      769 +        if (nsrv4->dss_pathlist == NULL) {
 707  770                  /* this is the first dss_path_t */
 708  771  
 709  772                  /* needed for insque/remque */
 710  773                  dss_path->next = dss_path->prev = dss_path;
 711  774  
 712      -                rfs4_dss_pathlist = dss_path;
      775 +                nsrv4->dss_pathlist = dss_path;
 713  776          } else {
 714      -                insque(dss_path, rfs4_dss_pathlist);
      777 +                insque(dss_path, nsrv4->dss_pathlist);
 715  778          }
 716  779  
 717  780          return (dss_path);
 718  781  }
 719  782  
 720  783  /*
 721  784   * Create a new server instance, and make it the currently active instance.
 722  785   * Note that starting the grace period too early will reduce the clients'
 723  786   * recovery window.
 724  787   */
 725  788  void
 726      -rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
      789 +rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
      790 +    int dss_npaths, char **dss_paths)
 727  791  {
 728  792          unsigned i;
 729  793          rfs4_servinst_t *sip;
 730  794          rfs4_oldstate_t *oldstate;
 731  795  
 732  796          sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 733  797          rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 734  798  
 735  799          sip->start_time = (time_t)0;
 736  800          sip->grace_period = (time_t)0;
↓ open down ↓ 10 lines elided ↑ open up ↑
 747  811          oldstate->next = oldstate;
 748  812          oldstate->prev = oldstate;
 749  813          sip->oldstate = oldstate;
 750  814  
 751  815  
 752  816          sip->dss_npaths = dss_npaths;
 753  817          sip->dss_paths = kmem_alloc(dss_npaths *
 754  818              sizeof (rfs4_dss_path_t *), KM_SLEEP);
 755  819  
 756  820          for (i = 0; i < dss_npaths; i++) {
 757      -                sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
      821 +                sip->dss_paths[i] =
      822 +                    rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
 758  823          }
 759  824  
 760      -        mutex_enter(&rfs4_servinst_lock);
 761      -        if (rfs4_cur_servinst != NULL) {
      825 +        mutex_enter(&nsrv4->servinst_lock);
      826 +        if (nsrv4->nfs4_cur_servinst != NULL) {
 762  827                  /* add to linked list */
 763      -                sip->prev = rfs4_cur_servinst;
 764      -                rfs4_cur_servinst->next = sip;
      828 +                sip->prev = nsrv4->nfs4_cur_servinst;
      829 +                nsrv4->nfs4_cur_servinst->next = sip;
 765  830          }
 766  831          if (start_grace)
 767  832                  rfs4_grace_start(sip);
 768  833          /* make the new instance "current" */
 769      -        rfs4_cur_servinst = sip;
      834 +        nsrv4->nfs4_cur_servinst = sip;
 770  835  
 771      -        mutex_exit(&rfs4_servinst_lock);
      836 +        mutex_exit(&nsrv4->servinst_lock);
 772  837  }
 773  838  
 774  839  /*
 775  840   * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 776  841   * all instances directly.
 777  842   */
 778  843  void
 779      -rfs4_servinst_destroy_all(void)
      844 +rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
 780  845  {
 781  846          rfs4_servinst_t *sip, *prev, *current;
 782  847  #ifdef DEBUG
 783  848          int n = 0;
 784  849  #endif
 785  850  
 786      -        mutex_enter(&rfs4_servinst_lock);
 787      -        ASSERT(rfs4_cur_servinst != NULL);
 788      -        current = rfs4_cur_servinst;
 789      -        rfs4_cur_servinst = NULL;
      851 +        mutex_enter(&nsrv4->servinst_lock);
      852 +        ASSERT(nsrv4->nfs4_cur_servinst != NULL);
      853 +        current = nsrv4->nfs4_cur_servinst;
      854 +        nsrv4->nfs4_cur_servinst = NULL;
 790  855          for (sip = current; sip != NULL; sip = prev) {
 791  856                  prev = sip->prev;
 792  857                  rw_destroy(&sip->rwlock);
 793  858                  if (sip->oldstate)
 794  859                          kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 795      -                if (sip->dss_paths)
      860 +                if (sip->dss_paths) {
      861 +                        int i = sip->dss_npaths;
      862 +
      863 +                        while (i > 0) {
      864 +                                i--;
      865 +                                if (sip->dss_paths[i] != NULL) {
      866 +                                        char *path = sip->dss_paths[i]->path;
      867 +
      868 +                                        if (path != NULL) {
      869 +                                                kmem_free(path,
      870 +                                                    strlen(path) + 1);
      871 +                                        }
      872 +                                        kmem_free(sip->dss_paths[i],
      873 +                                            sizeof (rfs4_dss_path_t));
      874 +                                }
      875 +                        }
 796  876                          kmem_free(sip->dss_paths,
 797  877                              sip->dss_npaths * sizeof (rfs4_dss_path_t *));
      878 +                }
 798  879                  kmem_free(sip, sizeof (rfs4_servinst_t));
 799  880  #ifdef DEBUG
 800  881                  n++;
 801  882  #endif
 802  883          }
 803      -        mutex_exit(&rfs4_servinst_lock);
      884 +        mutex_exit(&nsrv4->servinst_lock);
 804  885  }
 805  886  
 806  887  /*
 807  888   * Assign the current server instance to a client_t.
 808  889   * Should be called with cp->rc_dbe held.
 809  890   */
 810  891  void
 811      -rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
      892 +rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
      893 +    rfs4_servinst_t *sip)
 812  894  {
 813  895          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 814  896  
 815  897          /*
 816  898           * The lock ensures that if the current instance is in the process
 817  899           * of changing, we will see the new one.
 818  900           */
 819      -        mutex_enter(&rfs4_servinst_lock);
      901 +        mutex_enter(&nsrv4->servinst_lock);
 820  902          cp->rc_server_instance = sip;
 821      -        mutex_exit(&rfs4_servinst_lock);
      903 +        mutex_exit(&nsrv4->servinst_lock);
 822  904  }
 823  905  
 824  906  rfs4_servinst_t *
 825  907  rfs4_servinst(rfs4_client_t *cp)
 826  908  {
 827  909          ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 828  910  
 829  911          return (cp->rc_server_instance);
 830  912  }
 831  913  
↓ open down ↓ 32 lines elided ↑ open up ↑
 864  946  /*
 865  947   * Used by rfs4_op_secinfo to get the security information from the
 866  948   * export structure associated with the component.
 867  949   */
 868  950  /* ARGSUSED */
 869  951  static nfsstat4
 870  952  do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 871  953  {
 872  954          int error, different_export = 0;
 873  955          vnode_t *dvp, *vp;
 874      -        struct exportinfo *exi = NULL;
      956 +        struct exportinfo *exi;
 875  957          fid_t fid;
 876  958          uint_t count, i;
 877  959          secinfo4 *resok_val;
 878  960          struct secinfo *secp;
 879  961          seconfig_t *si;
 880  962          bool_t did_traverse = FALSE;
 881  963          int dotdot, walk;
      964 +        nfs_export_t *ne = nfs_get_export();
 882  965  
 883  966          dvp = cs->vp;
      967 +        exi = cs->exi;
      968 +        ASSERT(exi != NULL);
 884  969          dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 885  970  
 886  971          /*
 887  972           * If dotdotting, then need to check whether it's above the
 888  973           * root of a filesystem, or above an export point.
 889  974           */
 890  975          if (dotdot) {
      976 +                vnode_t *zone_rootvp = ne->exi_root->exi_vp;
 891  977  
      978 +                ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
 892  979                  /*
 893  980                   * If dotdotting at the root of a filesystem, then
 894  981                   * need to traverse back to the mounted-on filesystem
 895  982                   * and do the dotdot lookup there.
 896  983                   */
 897      -                if (cs->vp->v_flag & VROOT) {
      984 +                if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
 898  985  
 899  986                          /*
 900  987                           * If at the system root, then can
 901  988                           * go up no further.
 902  989                           */
 903      -                        if (VN_CMP(dvp, rootdir))
      990 +                        if (VN_CMP(dvp, zone_rootvp))
 904  991                                  return (puterrno4(ENOENT));
 905  992  
 906  993                          /*
 907  994                           * Traverse back to the mounted-on filesystem
 908  995                           */
 909      -                        dvp = untraverse(cs->vp);
      996 +                        dvp = untraverse(dvp, zone_rootvp);
 910  997  
 911  998                          /*
 912  999                           * Set the different_export flag so we remember
 913 1000                           * to pick up a new exportinfo entry for
 914 1001                           * this new filesystem.
 915 1002                           */
 916 1003                          different_export = 1;
 917 1004                  } else {
 918 1005  
 919 1006                          /*
 920 1007                           * If dotdotting above an export point then set
 921 1008                           * the different_export to get new export info.
 922 1009                           */
 923      -                        different_export = nfs_exported(cs->exi, cs->vp);
     1010 +                        different_export = nfs_exported(exi, dvp);
 924 1011                  }
 925 1012          }
 926 1013  
 927 1014          /*
 928 1015           * Get the vnode for the component "nm".
 929 1016           */
 930 1017          error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 931 1018              NULL, NULL, NULL);
 932 1019          if (error)
 933 1020                  return (puterrno4(error));
 934 1021  
 935 1022          /*
 936 1023           * If the vnode is in a pseudo filesystem, or if the security flavor
 937 1024           * used in the request is valid but not an explicitly shared flavor,
 938 1025           * or the access bit indicates that this is a limited access,
 939 1026           * check whether this vnode is visible.
 940 1027           */
 941 1028          if (!different_export &&
 942      -            (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
     1029 +            (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
 943 1030              cs->access & CS_ACCESS_LIMITED)) {
 944      -                if (! nfs_visible(cs->exi, vp, &different_export)) {
     1031 +                if (! nfs_visible(exi, vp, &different_export)) {
 945 1032                          VN_RELE(vp);
 946 1033                          return (puterrno4(ENOENT));
 947 1034                  }
 948 1035          }
 949 1036  
 950 1037          /*
 951 1038           * If it's a mountpoint, then traverse it.
 952 1039           */
 953 1040          if (vn_ismntpt(vp)) {
 954 1041                  if ((error = traverse(&vp)) != 0) {
↓ open down ↓ 21 lines elided ↑ open up ↑
 976 1063          if (different_export) {
 977 1064  
 978 1065                  bzero(&fid, sizeof (fid));
 979 1066                  fid.fid_len = MAXFIDSZ;
 980 1067                  error = vop_fid_pseudo(vp, &fid);
 981 1068                  if (error) {
 982 1069                          VN_RELE(vp);
 983 1070                          return (puterrno4(error));
 984 1071                  }
 985 1072  
     1073 +                /* We'll need to reassign "exi". */
 986 1074                  if (dotdot)
 987 1075                          exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
 988 1076                  else
 989 1077                          exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
 990 1078  
 991 1079                  if (exi == NULL) {
 992 1080                          if (did_traverse == TRUE) {
 993 1081                                  /*
 994 1082                                   * If this vnode is a mounted-on vnode,
 995 1083                                   * but the mounted-on file system is not
 996 1084                                   * exported, send back the secinfo for
 997 1085                                   * the exported node that the mounted-on
 998 1086                                   * vnode lives in.
 999 1087                                   */
1000 1088                                  exi = cs->exi;
1001 1089                          } else {
1002 1090                                  VN_RELE(vp);
1003 1091                                  return (puterrno4(EACCES));
1004 1092                          }
1005 1093                  }
1006      -        } else {
1007      -                exi = cs->exi;
1008 1094          }
1009 1095          ASSERT(exi != NULL);
1010 1096  
1011 1097  
1012 1098          /*
1013 1099           * Create the secinfo result based on the security information
1014 1100           * from the exportinfo structure (exi).
1015 1101           *
1016 1102           * Return all flavors for a pseudo node.
1017 1103           * For a real export node, return the flavor that the client
1018 1104           * has access with.
1019 1105           */
1020      -        ASSERT(RW_LOCK_HELD(&exported_lock));
     1106 +        ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1021 1107          if (PSEUDO(exi)) {
1022 1108                  count = exi->exi_export.ex_seccnt; /* total sec count */
1023 1109                  resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1024 1110                  secp = exi->exi_export.ex_secinfo;
1025 1111  
1026 1112                  for (i = 0; i < count; i++) {
1027 1113                          si = &secp[i].s_secinfo;
1028 1114                          resok_val[i].flavor = si->sc_rpcnum;
1029 1115                          if (resok_val[i].flavor == RPCSEC_GSS) {
1030 1116                                  rpcsec_gss_info *info;
↓ open down ↓ 342 lines elided ↑ open up ↑
1373 1459  static void
1374 1460  rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1375 1461      struct compound_state *cs)
1376 1462  {
1377 1463          COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1378 1464          COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1379 1465          int error;
1380 1466          vnode_t *vp = cs->vp;
1381 1467          cred_t *cr = cs->cr;
1382 1468          vattr_t va;
     1469 +        nfs4_srv_t *nsrv4;
1383 1470  
1384 1471          DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1385 1472              COMMIT4args *, args);
1386 1473  
1387 1474          if (vp == NULL) {
1388 1475                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1389 1476                  goto out;
1390 1477          }
1391 1478          if (cs->access == CS_ACCESS_DENIED) {
1392 1479                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
↓ open down ↓ 36 lines elided ↑ open up ↑
1429 1516                  goto out;
1430 1517          }
1431 1518  
1432 1519          error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1433 1520  
1434 1521          if (error) {
1435 1522                  *cs->statusp = resp->status = puterrno4(error);
1436 1523                  goto out;
1437 1524          }
1438 1525  
     1526 +        nsrv4 = nfs4_get_srv();
1439 1527          *cs->statusp = resp->status = NFS4_OK;
1440      -        resp->writeverf = Write4verf;
     1528 +        resp->writeverf = nsrv4->write4verf;
1441 1529  out:
1442 1530          DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1443 1531              COMMIT4res *, resp);
1444 1532  }
1445 1533  
1446 1534  /*
1447 1535   * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1448 1536   * was completed. It does the nfsv4 create for special files.
1449 1537   */
1450 1538  /* ARGSUSED */
↓ open down ↓ 1175 lines elided ↑ open up ↑
2626 2714          }
2627 2715  
2628 2716          dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2629 2717  
2630 2718          /*
2631 2719           * If dotdotting, then need to check whether it's
2632 2720           * above the root of a filesystem, or above an
2633 2721           * export point.
2634 2722           */
2635 2723          if (dotdot) {
     2724 +                vnode_t *zone_rootvp;
2636 2725  
     2726 +                ASSERT(cs->exi != NULL);
     2727 +                zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2637 2728                  /*
2638 2729                   * If dotdotting at the root of a filesystem, then
2639 2730                   * need to traverse back to the mounted-on filesystem
2640 2731                   * and do the dotdot lookup there.
2641 2732                   */
2642      -                if (cs->vp->v_flag & VROOT) {
     2733 +                if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2643 2734  
2644 2735                          /*
2645 2736                           * If at the system root, then can
2646 2737                           * go up no further.
2647 2738                           */
2648      -                        if (VN_CMP(cs->vp, rootdir))
     2739 +                        if (VN_CMP(cs->vp, zone_rootvp))
2649 2740                                  return (puterrno4(ENOENT));
2650 2741  
2651 2742                          /*
2652 2743                           * Traverse back to the mounted-on filesystem
2653 2744                           */
2654      -                        cs->vp = untraverse(cs->vp);
     2745 +                        cs->vp = untraverse(cs->vp, zone_rootvp);
2655 2746  
2656 2747                          /*
2657 2748                           * Set the different_export flag so we remember
2658 2749                           * to pick up a new exportinfo entry for
2659 2750                           * this new filesystem.
2660 2751                           */
2661 2752                          different_export = 1;
2662 2753                  } else {
2663 2754  
2664 2755                          /*
↓ open down ↓ 737 lines elided ↑ open up ↑
3402 3493  /* ARGSUSED */
3403 3494  static void
3404 3495  rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3405 3496      struct compound_state *cs)
3406 3497  {
3407 3498          PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3408 3499          int             error;
3409 3500          vnode_t         *vp;
3410 3501          struct exportinfo *exi, *sav_exi;
3411 3502          nfs_fh4_fmt_t   *fh_fmtp;
     3503 +        nfs_export_t *ne = nfs_get_export();
3412 3504  
3413 3505          DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3414 3506  
3415 3507          if (cs->vp) {
3416 3508                  VN_RELE(cs->vp);
3417 3509                  cs->vp = NULL;
3418 3510          }
3419 3511  
3420 3512          if (cs->cr)
3421 3513                  crfree(cs->cr);
3422 3514  
3423 3515          cs->cr = crdup(cs->basecr);
3424 3516  
3425      -        vp = exi_public->exi_vp;
     3517 +        vp = ne->exi_public->exi_vp;
3426 3518          if (vp == NULL) {
3427 3519                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3428 3520                  goto out;
3429 3521          }
3430 3522  
3431      -        error = makefh4(&cs->fh, vp, exi_public);
     3523 +        error = makefh4(&cs->fh, vp, ne->exi_public);
3432 3524          if (error != 0) {
3433 3525                  *cs->statusp = resp->status = puterrno4(error);
3434 3526                  goto out;
3435 3527          }
3436 3528          sav_exi = cs->exi;
3437      -        if (exi_public == exi_root) {
     3529 +        if (ne->exi_public == ne->exi_root) {
3438 3530                  /*
3439 3531                   * No filesystem is actually shared public, so we default
3440 3532                   * to exi_root. In this case, we must check whether root
3441 3533                   * is exported.
3442 3534                   */
3443 3535                  fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3444 3536  
3445 3537                  /*
3446 3538                   * if root filesystem is exported, the exportinfo struct that we
3447 3539                   * should use is what checkexport4 returns, because root_exi is
3448 3540                   * actually a mostly empty struct.
3449 3541                   */
3450 3542                  exi = checkexport4(&fh_fmtp->fh4_fsid,
3451 3543                      (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3452      -                cs->exi = ((exi != NULL) ? exi : exi_public);
     3544 +                cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3453 3545          } else {
3454 3546                  /*
3455 3547                   * it's a properly shared filesystem
3456 3548                   */
3457      -                cs->exi = exi_public;
     3549 +                cs->exi = ne->exi_public;
3458 3550          }
3459 3551  
3460 3552          if (is_system_labeled()) {
3461 3553                  bslabel_t *clabel;
3462 3554  
3463 3555                  ASSERT(req->rq_label != NULL);
3464 3556                  clabel = req->rq_label;
3465 3557                  DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3466 3558                      "got client label from request(1)",
3467 3559                      struct svc_req *, req);
↓ open down ↓ 121 lines elided ↑ open up ↑
3589 3681                  crfree(cs->cr);
3590 3682  
3591 3683          cs->cr = crdup(cs->basecr);
3592 3684  
3593 3685          /*
3594 3686           * Using rootdir, the system root vnode,
3595 3687           * get its fid.
3596 3688           */
3597 3689          bzero(&fid, sizeof (fid));
3598 3690          fid.fid_len = MAXFIDSZ;
3599      -        error = vop_fid_pseudo(rootdir, &fid);
     3691 +        error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3600 3692          if (error != 0) {
3601 3693                  *cs->statusp = resp->status = puterrno4(error);
3602 3694                  goto out;
3603 3695          }
3604 3696  
3605 3697          /*
3606 3698           * Then use the root fsid & fid it to find out if it's exported
3607 3699           *
3608 3700           * If the server root isn't exported directly, then
3609 3701           * it should at least be a pseudo export based on
3610 3702           * one or more exports further down in the server's
3611 3703           * file tree.
3612 3704           */
3613      -        exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
     3705 +        exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3614 3706          if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3615 3707                  NFS4_DEBUG(rfs4_debug,
3616 3708                      (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3617 3709                  *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3618 3710                  goto out;
3619 3711          }
3620 3712  
3621 3713          /*
3622 3714           * Now make a filehandle based on the root
3623 3715           * export and root vnode.
3624 3716           */
3625      -        error = makefh4(&cs->fh, rootdir, exi);
     3717 +        error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3626 3718          if (error != 0) {
3627 3719                  *cs->statusp = resp->status = puterrno4(error);
3628 3720                  goto out;
3629 3721          }
3630 3722  
3631 3723          sav_exi = cs->exi;
3632 3724          cs->exi = exi;
3633 3725  
3634      -        VN_HOLD(rootdir);
3635      -        cs->vp = rootdir;
     3726 +        VN_HOLD(ZONE_ROOTVP());
     3727 +        cs->vp = ZONE_ROOTVP();
3636 3728  
3637 3729          if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3638      -                VN_RELE(rootdir);
     3730 +                VN_RELE(cs->vp);
3639 3731                  cs->vp = NULL;
3640 3732                  cs->exi = sav_exi;
3641 3733                  goto out;
3642 3734          }
3643 3735  
3644 3736          *cs->statusp = resp->status = NFS4_OK;
3645 3737          cs->deleg = FALSE;
3646 3738  out:
3647 3739          DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3648 3740              PUTROOTFH4res *, resp);
↓ open down ↓ 65 lines elided ↑ open up ↑
3714 3806          if (MANDLOCK(vp, va.va_mode)) {
3715 3807                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
3716 3808                  goto out;
3717 3809          }
3718 3810  
3719 3811          data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3720 3812  
3721 3813          if (is_referral) {
3722 3814                  char *s;
3723 3815                  size_t strsz;
     3816 +                kstat_named_t *stat =
     3817 +                    cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
3724 3818  
3725 3819                  /* Get an artificial symlink based on a referral */
3726 3820                  s = build_symlink(vp, cs->cr, &strsz);
3727      -                global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
     3821 +                stat[NFS_REFERLINKS].value.ui64++;
3728 3822                  DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3729 3823                      vnode_t *, vp, char *, s);
3730 3824                  if (s == NULL)
3731 3825                          error = EINVAL;
3732 3826                  else {
3733 3827                          error = 0;
3734 3828                          (void) strlcpy(data, s, MAXPATHLEN + 1);
3735 3829                          kmem_free(s, strsz);
3736 3830                  }
3737 3831  
↓ open down ↓ 426 lines elided ↑ open up ↑
4164 4258                  if (vn_ismntpt(vp)) {
4165 4259                          error = EACCES;
4166 4260                  } else {
4167 4261                          /*
4168 4262                           * System V defines rmdir to return EEXIST,
4169 4263                           * not ENOTEMPTY, if the directory is not
4170 4264                           * empty.  A System V NFS server needs to map
4171 4265                           * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4172 4266                           * transmit over the wire.
4173 4267                           */
4174      -                        if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
     4268 +                        if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4175 4269                              NULL, 0)) == EEXIST)
4176 4270                                  error = ENOTEMPTY;
4177 4271                  }
4178 4272          } else {
4179 4273                  if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4180 4274                      fp != NULL) {
4181 4275                          struct vattr va;
4182 4276                          vnode_t *tvp;
4183 4277  
4184 4278                          rfs4_dbe_lock(fp->rf_dbe);
↓ open down ↓ 91 lines elided ↑ open up ↑
4276 4370  /* ARGSUSED */
4277 4371  static void
4278 4372  rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4279 4373      struct compound_state *cs)
4280 4374  {
4281 4375          RENAME4args *args = &argop->nfs_argop4_u.oprename;
4282 4376          RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4283 4377          int error;
4284 4378          vnode_t *odvp;
4285 4379          vnode_t *ndvp;
4286      -        vnode_t *srcvp, *targvp;
     4380 +        vnode_t *srcvp, *targvp, *tvp;
4287 4381          struct vattr obdva, oidva, oadva;
4288 4382          struct vattr nbdva, nidva, nadva;
4289 4383          char *onm, *nnm;
4290 4384          uint_t olen, nlen;
4291 4385          rfs4_file_t *fp, *sfp;
4292 4386          int in_crit_src, in_crit_targ;
4293 4387          int fp_rele_grant_hold, sfp_rele_grant_hold;
     4388 +        int unlinked;
4294 4389          bslabel_t *clabel;
4295 4390          struct sockaddr *ca;
4296 4391          char *converted_onm = NULL;
4297 4392          char *converted_nnm = NULL;
4298 4393          nfsstat4 status;
4299 4394  
4300 4395          DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4301 4396              RENAME4args *, args);
4302 4397  
4303 4398          fp = sfp = NULL;
4304      -        srcvp = targvp = NULL;
     4399 +        srcvp = targvp = tvp = NULL;
4305 4400          in_crit_src = in_crit_targ = 0;
4306 4401          fp_rele_grant_hold = sfp_rele_grant_hold = 0;
     4402 +        unlinked = 0;
4307 4403  
4308 4404          /* CURRENT_FH: target directory */
4309 4405          ndvp = cs->vp;
4310 4406          if (ndvp == NULL) {
4311 4407                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4312 4408                  goto out;
4313 4409          }
4314 4410  
4315 4411          /* SAVED_FH: from directory */
4316 4412          odvp = cs->saved_vp;
↓ open down ↓ 152 lines elided ↑ open up ↑
4469 4565          if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4470 4566              NULL, cs->cr)) {
4471 4567                  if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4472 4568                      NULL)) {
4473 4569                          *cs->statusp = resp->status = NFS4ERR_DELAY;
4474 4570                          goto err_out;
4475 4571                  }
4476 4572          }
4477 4573          fp_rele_grant_hold = 1;
4478 4574  
4479      -
4480 4575          /* Check for NBMAND lock on both source and target */
4481 4576          if (nbl_need_check(srcvp)) {
4482 4577                  nbl_start_crit(srcvp, RW_READER);
4483 4578                  in_crit_src = 1;
4484 4579                  if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4485 4580                          *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4486 4581                          goto err_out;
4487 4582                  }
4488 4583          }
4489 4584  
↓ open down ↓ 14 lines elided ↑ open up ↑
4504 4599                  error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4505 4600          }
4506 4601          if (error) {
4507 4602                  *cs->statusp = resp->status = puterrno4(error);
4508 4603                  goto err_out;
4509 4604          }
4510 4605  
4511 4606          NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4512 4607          NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4513 4608  
4514      -        if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4515      -            cs->cr, NULL, 0)) == 0 && fp != NULL) {
4516      -                struct vattr va;
4517      -                vnode_t *tvp;
     4609 +        error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
     4610 +            NULL, 0);
4518 4611  
     4612 +        /*
     4613 +         * If target existed and was unlinked by VOP_RENAME, state will need
     4614 +         * closed. To avoid deadlock, rfs4_close_all_state will be done after
     4615 +         * any necessary nbl_end_crit on srcvp and tgtvp.
     4616 +         */
     4617 +        if (error == 0 && fp != NULL) {
4519 4618                  rfs4_dbe_lock(fp->rf_dbe);
4520 4619                  tvp = fp->rf_vp;
4521 4620                  if (tvp)
4522 4621                          VN_HOLD(tvp);
4523 4622                  rfs4_dbe_unlock(fp->rf_dbe);
4524 4623  
4525 4624                  if (tvp) {
     4625 +                        struct vattr va;
4526 4626                          va.va_mask = AT_NLINK;
     4627 +
4527 4628                          if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4528 4629                              va.va_nlink == 0) {
4529      -                                /* The file is gone and so should the state */
4530      -                                if (in_crit_targ) {
4531      -                                        nbl_end_crit(targvp);
4532      -                                        in_crit_targ = 0;
     4630 +                                unlinked = 1;
     4631 +
     4632 +                                /* DEBUG data */
     4633 +                                if ((srcvp == targvp) || (tvp != targvp)) {
     4634 +                                        cmn_err(CE_WARN, "rfs4_op_rename: "
     4635 +                                            "srcvp %p, targvp: %p, tvp: %p",
     4636 +                                            (void *)srcvp, (void *)targvp,
     4637 +                                            (void *)tvp);
4533 4638                                  }
4534      -                                rfs4_close_all_state(fp);
     4639 +                        } else {
     4640 +                                VN_RELE(tvp);
4535 4641                          }
4536      -                        VN_RELE(tvp);
4537 4642                  }
4538 4643          }
4539 4644          if (error == 0)
4540 4645                  vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4541 4646  
4542 4647          if (in_crit_src)
4543 4648                  nbl_end_crit(srcvp);
4544 4649          if (srcvp)
4545 4650                  VN_RELE(srcvp);
4546 4651          if (in_crit_targ)
4547 4652                  nbl_end_crit(targvp);
4548 4653          if (targvp)
4549 4654                  VN_RELE(targvp);
4550 4655  
     4656 +        if (unlinked) {
     4657 +                ASSERT(fp != NULL);
     4658 +                ASSERT(tvp != NULL);
     4659 +
     4660 +                /* DEBUG data */
     4661 +                if (RW_READ_HELD(&tvp->v_nbllock)) {
     4662 +                        cmn_err(CE_WARN, "rfs4_op_rename: "
     4663 +                            "RW_READ_HELD(%p)", (void *)tvp);
     4664 +                }
     4665 +
     4666 +                /* The file is gone and so should the state */
     4667 +                rfs4_close_all_state(fp);
     4668 +                VN_RELE(tvp);
     4669 +        }
     4670 +
4551 4671          if (sfp) {
4552 4672                  rfs4_clear_dont_grant(sfp);
4553 4673                  rfs4_file_rele(sfp);
4554 4674          }
4555 4675          if (fp) {
4556 4676                  rfs4_clear_dont_grant(fp);
4557 4677                  rfs4_file_rele(fp);
4558 4678          }
4559 4679  
4560 4680          if (converted_onm != onm)
↓ open down ↓ 916 lines elided ↑ open up ↑
5477 5597          struct uio uio;
5478 5598          struct iovec iov[MAX_IOVECS];
5479 5599          struct iovec *iovp;
5480 5600          int iovcnt;
5481 5601          int ioflag;
5482 5602          cred_t *savecred, *cr;
5483 5603          bool_t *deleg = &cs->deleg;
5484 5604          nfsstat4 stat;
5485 5605          int in_crit = 0;
5486 5606          caller_context_t ct;
     5607 +        nfs4_srv_t *nsrv4;
5487 5608  
5488 5609          DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5489 5610              WRITE4args *, args);
5490 5611  
5491 5612          vp = cs->vp;
5492 5613          if (vp == NULL) {
5493 5614                  *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5494 5615                  goto out;
5495 5616          }
5496 5617          if (cs->access == CS_ACCESS_DENIED) {
↓ open down ↓ 50 lines elided ↑ open up ↑
5547 5668              (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5548 5669                  *cs->statusp = resp->status = puterrno4(error);
5549 5670                  goto out;
5550 5671          }
5551 5672  
5552 5673          if (MANDLOCK(vp, bva.va_mode)) {
5553 5674                  *cs->statusp = resp->status = NFS4ERR_ACCESS;
5554 5675                  goto out;
5555 5676          }
5556 5677  
     5678 +        nsrv4 = nfs4_get_srv();
5557 5679          if (args->data_len == 0) {
5558 5680                  *cs->statusp = resp->status = NFS4_OK;
5559 5681                  resp->count = 0;
5560 5682                  resp->committed = args->stable;
5561      -                resp->writeverf = Write4verf;
     5683 +                resp->writeverf = nsrv4->write4verf;
5562 5684                  goto out;
5563 5685          }
5564 5686  
5565 5687          if (args->mblk != NULL) {
5566 5688                  mblk_t *m;
5567 5689                  uint_t bytes, round_len;
5568 5690  
5569 5691                  iovcnt = 0;
5570 5692                  bytes = 0;
5571 5693                  round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
↓ open down ↓ 75 lines elided ↑ open up ↑
5647 5769          }
5648 5770  
5649 5771          *cs->statusp = resp->status = NFS4_OK;
5650 5772          resp->count = args->data_len - uio.uio_resid;
5651 5773  
5652 5774          if (ioflag == 0)
5653 5775                  resp->committed = UNSTABLE4;
5654 5776          else
5655 5777                  resp->committed = FILE_SYNC4;
5656 5778  
5657      -        resp->writeverf = Write4verf;
     5779 +        resp->writeverf = nsrv4->write4verf;
5658 5780  
5659 5781  out:
5660 5782          if (in_crit)
5661 5783                  nbl_end_crit(vp);
5662 5784  
5663 5785          DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5664 5786              WRITE4res *, resp);
5665 5787  }
5666 5788  
5667 5789  
5668 5790  /* XXX put in a header file */
5669 5791  extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5670 5792  
5671 5793  void
5672 5794  rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5673 5795      struct svc_req *req, cred_t *cr, int *rv)
5674 5796  {
5675 5797          uint_t i;
5676 5798          struct compound_state cs;
     5799 +        nfs4_srv_t *nsrv4;
     5800 +        nfs_export_t *ne = nfs_get_export();
5677 5801  
5678 5802          if (rv != NULL)
5679 5803                  *rv = 0;
5680 5804          rfs4_init_compound_state(&cs);
5681 5805          /*
5682      -         * Form a reply tag by copying over the reqeuest tag.
     5806 +         * Form a reply tag by copying over the request tag.
5683 5807           */
5684      -        resp->tag.utf8string_val =
5685      -            kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5686 5808          resp->tag.utf8string_len = args->tag.utf8string_len;
5687      -        bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5688      -            resp->tag.utf8string_len);
     5809 +        if (args->tag.utf8string_len != 0) {
     5810 +                resp->tag.utf8string_val =
     5811 +                    kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
     5812 +                bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
     5813 +                    resp->tag.utf8string_len);
     5814 +        } else {
     5815 +                resp->tag.utf8string_val = NULL;
     5816 +        }
5689 5817  
5690 5818          cs.statusp = &resp->status;
5691 5819          cs.req = req;
5692 5820          resp->array = NULL;
5693 5821          resp->array_len = 0;
5694 5822  
5695 5823          /*
5696 5824           * XXX for now, minorversion should be zero
5697 5825           */
5698 5826          if (args->minorversion != NFS4_MINORVERSION) {
↓ open down ↓ 25 lines elided ↑ open up ↑
5724 5852                  svcerr_badcred(req->rq_xprt);
5725 5853                  if (rv != NULL)
5726 5854                          *rv = 1;
5727 5855                  return;
5728 5856          }
5729 5857          resp->array_len = args->array_len;
5730 5858          resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5731 5859              KM_SLEEP);
5732 5860  
5733 5861          cs.basecr = cr;
     5862 +        nsrv4 = nfs4_get_srv();
5734 5863  
5735 5864          DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5736 5865              COMPOUND4args *, args);
5737 5866  
5738 5867          /*
5739 5868           * For now, NFS4 compound processing must be protected by
5740 5869           * exported_lock because it can access more than one exportinfo
5741 5870           * per compound and share/unshare can now change multiple
5742 5871           * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5743 5872           * per proc (excluding public exinfo), and exi_count design
5744 5873           * is sufficient to protect concurrent execution of NFS2/3
5745 5874           * ops along with unexport.  This lock will be removed as
5746 5875           * part of the NFSv4 phase 2 namespace redesign work.
5747 5876           */
5748      -        rw_enter(&exported_lock, RW_READER);
     5877 +        rw_enter(&ne->exported_lock, RW_READER);
5749 5878  
5750 5879          /*
5751 5880           * If this is the first compound we've seen, we need to start all
5752 5881           * new instances' grace periods.
5753 5882           */
5754      -        if (rfs4_seen_first_compound == 0) {
5755      -                rfs4_grace_start_new();
     5883 +        if (nsrv4->seen_first_compound == 0) {
     5884 +                rfs4_grace_start_new(nsrv4);
5756 5885                  /*
5757 5886                   * This must be set after rfs4_grace_start_new(), otherwise
5758 5887                   * another thread could proceed past here before the former
5759 5888                   * is finished.
5760 5889                   */
5761      -                rfs4_seen_first_compound = 1;
     5890 +                nsrv4->seen_first_compound = 1;
5762 5891          }
5763 5892  
5764 5893          for (i = 0; i < args->array_len && cs.cont; i++) {
5765 5894                  nfs_argop4 *argop;
5766 5895                  nfs_resop4 *resop;
5767 5896                  uint_t op;
     5897 +                kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
5768 5898  
5769 5899                  argop = &args->array[i];
5770 5900                  resop = &resp->array[i];
5771 5901                  resop->resop = argop->argop;
5772 5902                  op = (uint_t)resop->resop;
5773 5903  
5774 5904                  if (op < rfsv4disp_cnt) {
5775 5905                          /*
5776 5906                           * Count the individual ops here; NULL and COMPOUND
5777 5907                           * are counted in common_dispatch()
5778 5908                           */
5779      -                        rfsproccnt_v4_ptr[op].value.ui64++;
     5909 +                        stat[op].value.ui64++;
5780 5910  
5781 5911                          NFS4_DEBUG(rfs4_debug > 1,
5782 5912                              (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5783 5913                          (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5784 5914                          NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5785 5915                              rfs4_op_string[op], *cs.statusp));
5786 5916                          if (*cs.statusp != NFS4_OK)
5787 5917                                  cs.cont = FALSE;
5788 5918                  } else {
5789 5919                          /*
5790 5920                           * This is effectively dead code since XDR code
5791 5921                           * will have already returned BADXDR if op doesn't
5792 5922                           * decode to legal value.  This only done for a
5793 5923                           * day when XDR code doesn't verify v4 opcodes.
5794 5924                           */
5795 5925                          op = OP_ILLEGAL;
5796      -                        rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
     5926 +                        stat[OP_ILLEGAL_IDX].value.ui64++;
5797 5927  
5798 5928                          rfs4_op_illegal(argop, resop, req, &cs);
5799 5929                          cs.cont = FALSE;
5800 5930                  }
5801 5931  
5802 5932                  /*
5803 5933                   * If not at last op, and if we are to stop, then
5804 5934                   * compact the results array.
5805 5935                   */
5806 5936                  if ((i + 1) < args->array_len && !cs.cont) {
↓ open down ↓ 2 lines elided ↑ open up ↑
5809 5939                          bcopy(resp->array,
5810 5940                              new_res, (i+1) * sizeof (nfs_resop4));
5811 5941                          kmem_free(resp->array,
5812 5942                              args->array_len * sizeof (nfs_resop4));
5813 5943  
5814 5944                          resp->array_len =  i + 1;
5815 5945                          resp->array = new_res;
5816 5946                  }
5817 5947          }
5818 5948  
5819      -        rw_exit(&exported_lock);
     5949 +        rw_exit(&ne->exported_lock);
5820 5950  
5821      -        DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5822      -            COMPOUND4res *, resp);
5823      -
     5951 +        /*
     5952 +         * clear exportinfo and vnode fields from compound_state before dtrace
     5953 +         * probe, to avoid tracing residual values for path and share path.
     5954 +         */
5824 5955          if (cs.vp)
5825 5956                  VN_RELE(cs.vp);
5826 5957          if (cs.saved_vp)
5827 5958                  VN_RELE(cs.saved_vp);
     5959 +        cs.exi = cs.saved_exi = NULL;
     5960 +        cs.vp = cs.saved_vp = NULL;
     5961 +
     5962 +        DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
     5963 +            COMPOUND4res *, resp);
     5964 +
5828 5965          if (cs.saved_fh.nfs_fh4_val)
5829 5966                  kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5830 5967  
5831 5968          if (cs.basecr)
5832 5969                  crfree(cs.basecr);
5833 5970          if (cs.cr)
5834 5971                  crfree(cs.cr);
5835 5972          /*
5836 5973           * done with this compound request, free the label
5837 5974           */
↓ open down ↓ 683 lines elided ↑ open up ↑
6521 6658                  cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6522 6659  
6523 6660                  /*
6524 6661                   * Truncate the file if necessary; this would be
6525 6662                   * the case for create over an existing file.
6526 6663                   */
6527 6664  
6528 6665                  if (trunc) {
6529 6666                          int in_crit = 0;
6530 6667                          rfs4_file_t *fp;
     6668 +                        nfs4_srv_t *nsrv4;
6531 6669                          bool_t create = FALSE;
6532 6670  
6533 6671                          /*
6534 6672                           * We are writing over an existing file.
6535 6673                           * Check to see if we need to recall a delegation.
6536 6674                           */
6537      -                        rfs4_hold_deleg_policy();
     6675 +                        nsrv4 = nfs4_get_srv();
     6676 +                        rfs4_hold_deleg_policy(nsrv4);
6538 6677                          if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6539 6678                                  if (rfs4_check_delegated_byfp(FWRITE, fp,
6540 6679                                      (reqsize == 0), FALSE, FALSE, &clientid)) {
6541 6680                                          rfs4_file_rele(fp);
6542      -                                        rfs4_rele_deleg_policy();
     6681 +                                        rfs4_rele_deleg_policy(nsrv4);
6543 6682                                          VN_RELE(vp);
6544 6683                                          *attrset = 0;
6545 6684                                          return (NFS4ERR_DELAY);
6546 6685                                  }
6547 6686                                  rfs4_file_rele(fp);
6548 6687                          }
6549      -                        rfs4_rele_deleg_policy();
     6688 +                        rfs4_rele_deleg_policy(nsrv4);
6550 6689  
6551 6690                          if (nbl_need_check(vp)) {
6552 6691                                  in_crit = 1;
6553 6692  
6554 6693                                  ASSERT(reqsize == 0);
6555 6694  
6556 6695                                  nbl_start_crit(vp, RW_READER);
6557 6696                                  if (nbl_conflict(vp, NBL_WRITE, 0,
6558 6697                                      cva.va_size, 0, NULL)) {
6559 6698                                          in_crit = 0;
↓ open down ↓ 1537 lines elided ↑ open up ↑
8097 8236  /*ARGSUSED*/
8098 8237  void
8099 8238  rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8100 8239      struct svc_req *req, struct compound_state *cs)
8101 8240  {
8102 8241          SETCLIENTID_CONFIRM4args *args =
8103 8242              &argop->nfs_argop4_u.opsetclientid_confirm;
8104 8243          SETCLIENTID_CONFIRM4res *res =
8105 8244              &resop->nfs_resop4_u.opsetclientid_confirm;
8106 8245          rfs4_client_t *cp, *cptoclose = NULL;
     8246 +        nfs4_srv_t *nsrv4;
8107 8247  
8108 8248          DTRACE_NFSV4_2(op__setclientid__confirm__start,
8109 8249              struct compound_state *, cs,
8110 8250              SETCLIENTID_CONFIRM4args *, args);
8111 8251  
     8252 +        nsrv4 = nfs4_get_srv();
8112 8253          *cs->statusp = res->status = NFS4_OK;
8113 8254  
8114 8255          cp = rfs4_findclient_by_id(args->clientid, TRUE);
8115 8256  
8116 8257          if (cp == NULL) {
8117 8258                  *cs->statusp = res->status =
8118 8259                      rfs4_check_clientid(&args->clientid, 1);
8119 8260                  goto out;
8120 8261          }
8121 8262  
↓ open down ↓ 15 lines elided ↑ open up ↑
8137 8278          if (cp->rc_cp_confirmed) {
8138 8279                  cptoclose = cp->rc_cp_confirmed;
8139 8280                  cptoclose->rc_ss_remove = 1;
8140 8281                  cp->rc_cp_confirmed = NULL;
8141 8282          }
8142 8283  
8143 8284          /*
8144 8285           * Update the client's associated server instance, if it's changed
8145 8286           * since the client was created.
8146 8287           */
8147      -        if (rfs4_servinst(cp) != rfs4_cur_servinst)
8148      -                rfs4_servinst_assign(cp, rfs4_cur_servinst);
     8288 +        if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
     8289 +                rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8149 8290  
8150 8291          /*
8151 8292           * Record clientid in stable storage.
8152 8293           * Must be done after server instance has been assigned.
8153 8294           */
8154      -        rfs4_ss_clid(cp);
     8295 +        rfs4_ss_clid(nsrv4, cp);
8155 8296  
8156 8297          rfs4_dbe_unlock(cp->rc_dbe);
8157 8298  
8158 8299          if (cptoclose)
8159 8300                  /* don't need to rele, client_close does it */
8160 8301                  rfs4_client_close(cptoclose);
8161 8302  
8162 8303          /* If needed, initiate CB_NULL call for callback path */
8163 8304          rfs4_deleg_cb_check(cp);
8164 8305          rfs4_update_lease(cp);
8165 8306  
8166 8307          /*
8167 8308           * Check to see if client can perform reclaims
8168 8309           */
8169      -        rfs4_ss_chkclid(cp);
     8310 +        rfs4_ss_chkclid(nsrv4, cp);
8170 8311  
8171 8312          rfs4_client_rele(cp);
8172 8313  
8173 8314  out:
8174 8315          DTRACE_NFSV4_2(op__setclientid__confirm__done,
8175 8316              struct compound_state *, cs,
8176 8317              SETCLIENTID_CONFIRM4 *, res);
8177 8318  }
8178 8319  
8179 8320  
↓ open down ↓ 1623 lines elided ↑ open up ↑
9803 9944          int is_downrev;
9804 9945  
9805 9946          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9806 9947          ASSERT(ca);
9807 9948          ci = rfs4_find_clntip(ca, &create);
9808 9949          if (ci == NULL)
9809 9950                  return (0);
9810 9951          is_downrev = ci->ri_no_referrals;
9811 9952          rfs4_dbe_rele(ci->ri_dbe);
9812 9953          return (is_downrev);
     9954 +}
     9955 +
     9956 +/*
     9957 + * Do the main work of handling HA-NFSv4 Resource Group failover on
     9958 + * Sun Cluster.
     9959 + * We need to detect whether any RG admin paths have been added or removed,
     9960 + * and adjust resources accordingly.
     9961 + * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
     9962 + * order to scale, the list and array of paths need to be held in more
     9963 + * suitable data structures.
     9964 + */
     9965 +static void
     9966 +hanfsv4_failover(nfs4_srv_t *nsrv4)
     9967 +{
     9968 +        int i, start_grace, numadded_paths = 0;
     9969 +        char **added_paths = NULL;
     9970 +        rfs4_dss_path_t *dss_path;
     9971 +
     9972 +        /*
     9973 +         * Note: currently, dss_pathlist cannot be NULL, since
     9974 +         * it will always include an entry for NFS4_DSS_VAR_DIR. If we
     9975 +         * make the latter dynamically specified too, the following will
     9976 +         * need to be adjusted.
     9977 +         */
     9978 +
     9979 +        /*
     9980 +         * First, look for removed paths: RGs that have been failed-over
     9981 +         * away from this node.
     9982 +         * Walk the "currently-serving" dss_pathlist and, for each
     9983 +         * path, check if it is on the "passed-in" rfs4_dss_newpaths array
     9984 +         * from nfsd. If not, that RG path has been removed.
     9985 +         *
     9986 +         * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
     9987 +         * any duplicates.
     9988 +         */
     9989 +        dss_path = nsrv4->dss_pathlist;
     9990 +        do {
     9991 +                int found = 0;
     9992 +                char *path = dss_path->path;
     9993 +
     9994 +                /* used only for non-HA so may not be removed */
     9995 +                if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
     9996 +                        dss_path = dss_path->next;
     9997 +                        continue;
     9998 +                }
     9999 +
     10000 +                for (i = 0; i < rfs4_dss_numnewpaths; i++) {
     10001 +                        int cmpret;
     10002 +                        char *newpath = rfs4_dss_newpaths[i];
     10003 +
     10004 +                        /*
     10005 +                         * Since nfsd has sorted rfs4_dss_newpaths for us,
     10006 +                         * once the return from strcmp is negative we know
     10007 +                         * we've passed the point where "path" should be,
     10008 +                         * and can stop searching: "path" has been removed.
     10009 +                         */
     10010 +                        cmpret = strcmp(path, newpath);
     10011 +                        if (cmpret < 0)
     10012 +                                break;
     10013 +                        if (cmpret == 0) {
     10014 +                                found = 1;
     10015 +                                break;
     10016 +                        }
     10017 +                }
     10018 +
     10019 +                if (found == 0) {
     10020 +                        unsigned index = dss_path->index;
     10021 +                        rfs4_servinst_t *sip = dss_path->sip;
     10022 +                        rfs4_dss_path_t *path_next = dss_path->next;
     10023 +
     10024 +                        /*
     10025 +                         * This path has been removed.
     10026 +                         * We must clear out the servinst reference to
     10027 +                         * it, since it's now owned by another
     10028 +                         * node: we should not attempt to touch it.
     10029 +                         */
     10030 +                        ASSERT(dss_path == sip->dss_paths[index]);
     10031 +                        sip->dss_paths[index] = NULL;
     10032 +
     10033 +                        /* remove from "currently-serving" list, and destroy */
     10034 +                        remque(dss_path);
     10035 +                        /* allow for NUL */
     10036 +                        kmem_free(dss_path->path, strlen(dss_path->path) + 1);
     10037 +                        kmem_free(dss_path, sizeof (rfs4_dss_path_t));
     10038 +
     10039 +                        dss_path = path_next;
     10040 +                } else {
     10041 +                        /* path was found; not removed */
     10042 +                        dss_path = dss_path->next;
     10043 +                }
     10044 +        } while (dss_path != nsrv4->dss_pathlist);
     10045 +
     10046 +        /*
     10047 +         * Now, look for added paths: RGs that have been failed-over
     10048 +         * to this node.
     10049 +         * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
     10050 +         * for each path, check if it is on the "currently-serving"
     10051 +         * dss_pathlist. If not, that RG path has been added.
     10052 +         *
     10053 +         * Note: we don't do duplicate detection here; nfsd does that for us.
     10054 +         *
     10055 +         * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
     10056 +         * an upper bound for the size needed for added_paths[numadded_paths].
     10057 +         */
     10058 +
     10059 +        /* probably more space than we need, but guaranteed to be enough */
     10060 +        if (rfs4_dss_numnewpaths > 0) {
     10061 +                size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
     10062 +                added_paths = kmem_zalloc(sz, KM_SLEEP);
     10063 +        }
     10064 +
     10065 +        /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
     10066 +        for (i = 0; i < rfs4_dss_numnewpaths; i++) {
     10067 +                int found = 0;
     10068 +                char *newpath = rfs4_dss_newpaths[i];
     10069 +
     10070 +                dss_path = nsrv4->dss_pathlist;
     10071 +                do {
     10072 +                        char *path = dss_path->path;
     10073 +
     10074 +                        /* used only for non-HA */
     10075 +                        if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
     10076 +                                dss_path = dss_path->next;
     10077 +                                continue;
     10078 +                        }
     10079 +
     10080 +                        if (strncmp(path, newpath, strlen(path)) == 0) {
     10081 +                                found = 1;
     10082 +                                break;
     10083 +                        }
     10084 +
     10085 +                        dss_path = dss_path->next;
     10086 +                } while (dss_path != nsrv4->dss_pathlist);
     10087 +
     10088 +                if (found == 0) {
     10089 +                        added_paths[numadded_paths] = newpath;
     10090 +                        numadded_paths++;
     10091 +                }
     10092 +        }
     10093 +
     10094 +        /* did we find any added paths? */
     10095 +        if (numadded_paths > 0) {
     10096 +
     10097 +                /* create a new server instance, and start its grace period */
     10098 +                start_grace = 1;
     10099 +                /* CSTYLED */
     10100 +                rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
     10101 +
     10102 +                /* read in the stable storage state from these paths */
     10103 +                rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
     10104 +
     10105 +                /*
     10106 +                 * Multiple failovers during a grace period will cause
     10107 +                 * clients of the same resource group to be partitioned
     10108 +                 * into different server instances, with different
     10109 +                 * grace periods.  Since clients of the same resource
     10110 +                 * group must be subject to the same grace period,
     10111 +                 * we need to reset all currently active grace periods.
     10112 +                 */
     10113 +                rfs4_grace_reset_all(nsrv4);
     10114 +        }
     10115 +
     10116 +        if (rfs4_dss_numnewpaths > 0)
     10117 +                kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
9813 10118  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX