Print this page
    
re #13613 rb4516 Tunables needs volatile keyword
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  24   24   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  25   25   */
  26   26  
  27   27  /*
  28   28   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  29   29   *      All Rights Reserved
  30   30   */
  31   31  
  32   32  #include <sys/param.h>
  33   33  #include <sys/types.h>
  34   34  #include <sys/systm.h>
  35   35  #include <sys/cred.h>
  36   36  #include <sys/vfs.h>
  37   37  #include <sys/vfs_opreg.h>
  38   38  #include <sys/vnode.h>
  39   39  #include <sys/pathname.h>
  40   40  #include <sys/sysmacros.h>
  41   41  #include <sys/kmem.h>
  42   42  #include <sys/mkdev.h>
  43   43  #include <sys/mount.h>
  44   44  #include <sys/statvfs.h>
  45   45  #include <sys/errno.h>
  46   46  #include <sys/debug.h>
  47   47  #include <sys/cmn_err.h>
  48   48  #include <sys/utsname.h>
  49   49  #include <sys/bootconf.h>
  50   50  #include <sys/modctl.h>
  51   51  #include <sys/acl.h>
  52   52  #include <sys/flock.h>
  53   53  #include <sys/time.h>
  54   54  #include <sys/disp.h>
  55   55  #include <sys/policy.h>
  56   56  #include <sys/socket.h>
  57   57  #include <sys/netconfig.h>
  58   58  #include <sys/dnlc.h>
  59   59  #include <sys/list.h>
  60   60  #include <sys/mntent.h>
  61   61  #include <sys/tsol/label.h>
  62   62  
  63   63  #include <rpc/types.h>
  64   64  #include <rpc/auth.h>
  65   65  #include <rpc/rpcsec_gss.h>
  66   66  #include <rpc/clnt.h>
  67   67  
  68   68  #include <nfs/nfs.h>
  69   69  #include <nfs/nfs_clnt.h>
  70   70  #include <nfs/mount.h>
  71   71  #include <nfs/nfs_acl.h>
  72   72  
  73   73  #include <fs/fs_subr.h>
  74   74  
  75   75  #include <nfs/nfs4.h>
  76   76  #include <nfs/rnode4.h>
  77   77  #include <nfs/nfs4_clnt.h>
  78   78  #include <sys/fs/autofs.h>
  79   79  
  80   80  #include <sys/sdt.h>
  81   81  
  82   82  
  83   83  /*
  84   84   * Arguments passed to thread to free data structures from forced unmount.
  85   85   */
  86   86  
  87   87  typedef struct {
  88   88          vfs_t   *fm_vfsp;
  89   89          int     fm_flag;
  90   90          cred_t  *fm_cr;
  91   91  } freemountargs_t;
  92   92  
  93   93  static void     async_free_mount(vfs_t *, int, cred_t *);
  94   94  static void     nfs4_free_mount(vfs_t *, int, cred_t *);
  95   95  static void     nfs4_free_mount_thread(freemountargs_t *);
  96   96  static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *);
  97   97  
  98   98  /*
  99   99   * From rpcsec module (common/rpcsec).
 100  100   */
 101  101  extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t);
 102  102  extern void sec_clnt_freeinfo(struct sec_data *);
 103  103  
 104  104  /*
 105  105   * The order and contents of this structure must be kept in sync with that of
 106  106   * rfsreqcnt_v4_tmpl in nfs_stats.c
 107  107   */
 108  108  static char *rfsnames_v4[] = {
 109  109          "null", "compound", "reserved", "access", "close", "commit", "create",
 110  110          "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock",
 111  111          "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr",
 112  112          "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh",
 113  113          "read", "readdir", "readlink", "remove", "rename", "renew",
 114  114          "restorefh", "savefh", "secinfo", "setattr", "setclientid",
 115  115          "setclientid_confirm", "verify", "write"
 116  116  };
 117  117  
 118  118  /*
 119  119   * nfs4_max_mount_retry is the number of times the client will redrive
 120  120   * a mount compound before giving up and returning failure.  The intent
 121  121   * is to redrive mount compounds which fail NFS4ERR_STALE so that
 122  122   * if a component of the server path being mounted goes stale, it can
 123  123   * "recover" by redriving the mount compund (LOOKUP ops).  This recovery
 124  124   * code is needed outside of the recovery framework because mount is a
 125  125   * special case.  The client doesn't create vnodes/rnodes for components
 126  126   * of the server path being mounted.  The recovery code recovers real
 127  127   * client objects, not STALE FHs which map to components of the server
 128  128   * path being mounted.
 129  129   *
 130  130   * We could just fail the mount on the first time, but that would
 131  131   * instantly trigger failover (from nfs4_mount), and the client should
 132  132   * try to re-lookup the STALE FH before doing failover.  The easiest
 133  133   * way to "re-lookup" is to simply redrive the mount compound.
 134  134   */
 135  135  static int nfs4_max_mount_retry = 2;
 136  136  
 137  137  /*
 138  138   * nfs4 vfs operations.
 139  139   */
 140  140  int             nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
 141  141  static int      nfs4_unmount(vfs_t *, int, cred_t *);
 142  142  static int      nfs4_root(vfs_t *, vnode_t **);
 143  143  static int      nfs4_statvfs(vfs_t *, struct statvfs64 *);
 144  144  static int      nfs4_sync(vfs_t *, short, cred_t *);
 145  145  static int      nfs4_vget(vfs_t *, vnode_t **, fid_t *);
 146  146  static int      nfs4_mountroot(vfs_t *, whymountroot_t);
 147  147  static void     nfs4_freevfs(vfs_t *);
 148  148  
 149  149  static int      nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *,
 150  150                      int, cred_t *, zone_t *);
 151  151  
 152  152  vfsops_t        *nfs4_vfsops;
 153  153  
 154  154  int nfs4_vfsinit(void);
 155  155  void nfs4_vfsfini(void);
 156  156  static void nfs4setclientid_init(void);
 157  157  static void nfs4setclientid_fini(void);
 158  158  static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *,  cred_t *,
 159  159                  struct nfs4_server *, nfs4_error_t *, int *);
 160  160  static void     destroy_nfs4_server(nfs4_server_t *);
 161  161  static void     remove_mi(nfs4_server_t *, mntinfo4_t *);
 162  162  
 163  163  extern void nfs4_ephemeral_init(void);
 164  164  extern void nfs4_ephemeral_fini(void);
 165  165  
 166  166  /* referral related routines */
 167  167  static servinfo4_t *copy_svp(servinfo4_t *);
 168  168  static void free_knconf_contents(struct knetconfig *k);
 169  169  static char *extract_referral_point(const char *, int);
 170  170  static void setup_newsvpath(servinfo4_t *, int);
 171  171  static void update_servinfo4(servinfo4_t *, fs_location4 *,
 172  172                  struct nfs_fsl_info *, char *, int);
 173  173  
 174  174  /*
 175  175   * Initialize the vfs structure
 176  176   */
 177  177  
 178  178  static int nfs4fstyp;
 179  179  
 180  180  
 181  181  /*
 182  182   * Debug variable to check for rdma based
 183  183   * transport startup and cleanup. Controlled
 184  184   * through /etc/system. Off by default.
 185  185   */
 186  186  extern int rdma_debug;
 187  187  
 188  188  int
 189  189  nfs4init(int fstyp, char *name)
 190  190  {
 191  191          static const fs_operation_def_t nfs4_vfsops_template[] = {
 192  192                  VFSNAME_MOUNT,          { .vfs_mount = nfs4_mount },
 193  193                  VFSNAME_UNMOUNT,        { .vfs_unmount = nfs4_unmount },
 194  194                  VFSNAME_ROOT,           { .vfs_root = nfs4_root },
 195  195                  VFSNAME_STATVFS,        { .vfs_statvfs = nfs4_statvfs },
 196  196                  VFSNAME_SYNC,           { .vfs_sync = nfs4_sync },
 197  197                  VFSNAME_VGET,           { .vfs_vget = nfs4_vget },
 198  198                  VFSNAME_MOUNTROOT,      { .vfs_mountroot = nfs4_mountroot },
 199  199                  VFSNAME_FREEVFS,        { .vfs_freevfs = nfs4_freevfs },
 200  200                  NULL,                   NULL
 201  201          };
 202  202          int error;
 203  203  
 204  204          nfs4_vfsops = NULL;
 205  205          nfs4_vnodeops = NULL;
 206  206          nfs4_trigger_vnodeops = NULL;
 207  207  
 208  208          error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops);
 209  209          if (error != 0) {
 210  210                  zcmn_err(GLOBAL_ZONEID, CE_WARN,
 211  211                      "nfs4init: bad vfs ops template");
 212  212                  goto out;
 213  213          }
 214  214  
 215  215          error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops);
 216  216          if (error != 0) {
 217  217                  zcmn_err(GLOBAL_ZONEID, CE_WARN,
 218  218                      "nfs4init: bad vnode ops template");
 219  219                  goto out;
 220  220          }
 221  221  
 222  222          error = vn_make_ops("nfs4_trigger", nfs4_trigger_vnodeops_template,
 223  223              &nfs4_trigger_vnodeops);
 224  224          if (error != 0) {
 225  225                  zcmn_err(GLOBAL_ZONEID, CE_WARN,
 226  226                      "nfs4init: bad trigger vnode ops template");
 227  227                  goto out;
 228  228          }
 229  229  
 230  230          nfs4fstyp = fstyp;
 231  231          (void) nfs4_vfsinit();
 232  232          (void) nfs4_init_dot_entries();
 233  233  
 234  234  out:
 235  235          if (error) {
 236  236                  if (nfs4_trigger_vnodeops != NULL)
 237  237                          vn_freevnodeops(nfs4_trigger_vnodeops);
 238  238  
 239  239                  if (nfs4_vnodeops != NULL)
 240  240                          vn_freevnodeops(nfs4_vnodeops);
 241  241  
 242  242                  (void) vfs_freevfsops_by_type(fstyp);
 243  243          }
 244  244  
 245  245          return (error);
 246  246  }
 247  247  
 248  248  void
 249  249  nfs4fini(void)
 250  250  {
 251  251          (void) nfs4_destroy_dot_entries();
 252  252          nfs4_vfsfini();
 253  253  }
 254  254  
 255  255  /*
 256  256   * Create a new sec_data structure to store AUTH_DH related data:
 257  257   * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC
 258  258   * flag set for NFS V4 since we are avoiding to contact the rpcbind
 259  259   * daemon and is using the IP time service (IPPORT_TIMESERVER).
 260  260   *
 261  261   * sec_data can be freed by sec_clnt_freeinfo().
 262  262   */
 263  263  static struct sec_data *
 264  264  create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr,
 265  265      struct knetconfig *knconf)
 266  266  {
 267  267          struct sec_data *secdata;
 268  268          dh_k4_clntdata_t *data;
 269  269          char *pf, *p;
 270  270  
 271  271          if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0)
 272  272                  return (NULL);
 273  273  
 274  274          secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
 275  275          secdata->flags = 0;
 276  276  
 277  277          data = kmem_alloc(sizeof (*data), KM_SLEEP);
 278  278  
 279  279          data->syncaddr.maxlen = syncaddr->maxlen;
 280  280          data->syncaddr.len = syncaddr->len;
 281  281          data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP);
 282  282          bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len);
 283  283  
 284  284          /*
 285  285           * duplicate the knconf information for the
 286  286           * new opaque data.
 287  287           */
 288  288          data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP);
 289  289          *data->knconf = *knconf;
 290  290          pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
 291  291          p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
 292  292          bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE);
 293  293          bcopy(knconf->knc_proto, p, KNC_STRSIZE);
 294  294          data->knconf->knc_protofmly = pf;
 295  295          data->knconf->knc_proto = p;
 296  296  
 297  297          /* move server netname to the sec_data structure */
 298  298          data->netname = kmem_alloc(nlen, KM_SLEEP);
 299  299          bcopy(netname, data->netname, nlen);
 300  300          data->netnamelen = (int)nlen;
 301  301  
 302  302          secdata->secmod = AUTH_DH;
 303  303          secdata->rpcflavor = AUTH_DH;
 304  304          secdata->data = (caddr_t)data;
 305  305  
 306  306          return (secdata);
 307  307  }
 308  308  
 309  309  /*
 310  310   * Returns (deep) copy of sec_data_t. Allocates all memory required; caller
 311  311   * is responsible for freeing.
 312  312   */
 313  313  sec_data_t *
 314  314  copy_sec_data(sec_data_t *fsecdata)
 315  315  {
 316  316          sec_data_t *tsecdata;
 317  317  
 318  318          if (fsecdata == NULL)
 319  319                  return (NULL);
 320  320  
 321  321          if (fsecdata->rpcflavor == AUTH_DH) {
 322  322                  dh_k4_clntdata_t *fdata = (dh_k4_clntdata_t *)fsecdata->data;
 323  323  
 324  324                  if (fdata == NULL)
 325  325                          return (NULL);
 326  326  
 327  327                  tsecdata = (sec_data_t *)create_authdh_data(fdata->netname,
 328  328                      fdata->netnamelen, &fdata->syncaddr, fdata->knconf);
 329  329  
 330  330                  return (tsecdata);
 331  331          }
 332  332  
 333  333          tsecdata = kmem_zalloc(sizeof (sec_data_t), KM_SLEEP);
 334  334  
 335  335          tsecdata->secmod = fsecdata->secmod;
 336  336          tsecdata->rpcflavor = fsecdata->rpcflavor;
 337  337          tsecdata->flags = fsecdata->flags;
 338  338          tsecdata->uid = fsecdata->uid;
 339  339  
 340  340          if (fsecdata->rpcflavor == RPCSEC_GSS) {
 341  341                  gss_clntdata_t *gcd = (gss_clntdata_t *)fsecdata->data;
 342  342  
 343  343                  tsecdata->data = (caddr_t)copy_sec_data_gss(gcd);
 344  344          } else {
 345  345                  tsecdata->data = NULL;
 346  346          }
 347  347  
 348  348          return (tsecdata);
 349  349  }
 350  350  
 351  351  gss_clntdata_t *
 352  352  copy_sec_data_gss(gss_clntdata_t *fdata)
 353  353  {
 354  354          gss_clntdata_t *tdata;
 355  355  
 356  356          if (fdata == NULL)
 357  357                  return (NULL);
 358  358  
 359  359          tdata = kmem_zalloc(sizeof (gss_clntdata_t), KM_SLEEP);
 360  360  
 361  361          tdata->mechanism.length = fdata->mechanism.length;
 362  362          tdata->mechanism.elements = kmem_zalloc(fdata->mechanism.length,
 363  363              KM_SLEEP);
 364  364          bcopy(fdata->mechanism.elements, tdata->mechanism.elements,
 365  365              fdata->mechanism.length);
 366  366  
 367  367          tdata->service = fdata->service;
 368  368  
 369  369          (void) strcpy(tdata->uname, fdata->uname);
 370  370          (void) strcpy(tdata->inst, fdata->inst);
 371  371          (void) strcpy(tdata->realm, fdata->realm);
 372  372  
 373  373          tdata->qop = fdata->qop;
 374  374  
 375  375          return (tdata);
 376  376  }
 377  377  
 378  378  static int
 379  379  nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp)
 380  380  {
 381  381          servinfo4_t *si;
 382  382  
 383  383          /*
 384  384           * Iterate over the servinfo4 list to make sure
 385  385           * we do not have a duplicate. Skip any servinfo4
 386  386           * that has been marked "NOT IN USE"
 387  387           */
 388  388          for (si = svp_head; si; si = si->sv_next) {
 389  389                  (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0);
 390  390                  if (si->sv_flags & SV4_NOTINUSE) {
 391  391                          nfs_rw_exit(&si->sv_lock);
 392  392                          continue;
 393  393                  }
 394  394                  nfs_rw_exit(&si->sv_lock);
 395  395                  if (si == svp)
 396  396                          continue;
 397  397                  if (si->sv_addr.len == svp->sv_addr.len &&
 398  398                      strcmp(si->sv_knconf->knc_protofmly,
 399  399                      svp->sv_knconf->knc_protofmly) == 0 &&
 400  400                      bcmp(si->sv_addr.buf, svp->sv_addr.buf,
 401  401                      si->sv_addr.len) == 0) {
 402  402                          /* it's a duplicate */
 403  403                          return (1);
 404  404                  }
 405  405          }
 406  406          /* it's not a duplicate */
 407  407          return (0);
 408  408  }
 409  409  
 410  410  void
 411  411  nfs4_free_args(struct nfs_args *nargs)
 412  412  {
 413  413          if (nargs->knconf) {
 414  414                  if (nargs->knconf->knc_protofmly)
 415  415                          kmem_free(nargs->knconf->knc_protofmly,
 416  416                              KNC_STRSIZE);
 417  417                  if (nargs->knconf->knc_proto)
 418  418                          kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE);
 419  419                  kmem_free(nargs->knconf, sizeof (*nargs->knconf));
 420  420                  nargs->knconf = NULL;
 421  421          }
 422  422  
 423  423          if (nargs->fh) {
 424  424                  kmem_free(nargs->fh, strlen(nargs->fh) + 1);
 425  425                  nargs->fh = NULL;
 426  426          }
 427  427  
 428  428          if (nargs->hostname) {
 429  429                  kmem_free(nargs->hostname, strlen(nargs->hostname) + 1);
 430  430                  nargs->hostname = NULL;
 431  431          }
 432  432  
 433  433          if (nargs->addr) {
 434  434                  if (nargs->addr->buf) {
 435  435                          ASSERT(nargs->addr->len);
 436  436                          kmem_free(nargs->addr->buf, nargs->addr->len);
 437  437                  }
 438  438                  kmem_free(nargs->addr, sizeof (struct netbuf));
 439  439                  nargs->addr = NULL;
 440  440          }
 441  441  
 442  442          if (nargs->syncaddr) {
 443  443                  ASSERT(nargs->syncaddr->len);
 444  444                  if (nargs->syncaddr->buf) {
 445  445                          ASSERT(nargs->syncaddr->len);
 446  446                          kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len);
 447  447                  }
 448  448                  kmem_free(nargs->syncaddr, sizeof (struct netbuf));
 449  449                  nargs->syncaddr = NULL;
 450  450          }
 451  451  
 452  452          if (nargs->netname) {
 453  453                  kmem_free(nargs->netname, strlen(nargs->netname) + 1);
 454  454                  nargs->netname = NULL;
 455  455          }
 456  456  
 457  457          if (nargs->nfs_ext_u.nfs_extA.secdata) {
 458  458                  sec_clnt_freeinfo(
 459  459                      nargs->nfs_ext_u.nfs_extA.secdata);
 460  460                  nargs->nfs_ext_u.nfs_extA.secdata = NULL;
 461  461          }
 462  462  }
 463  463  
 464  464  
 465  465  int
 466  466  nfs4_copyin(char *data, int datalen, struct nfs_args *nargs)
 467  467  {
 468  468  
 469  469          int error;
 470  470          size_t hlen;                    /* length of hostname */
 471  471          size_t nlen;                    /* length of netname */
 472  472          char netname[MAXNETNAMELEN+1];  /* server's netname */
 473  473          struct netbuf addr;             /* server's address */
 474  474          struct netbuf syncaddr;         /* AUTH_DES time sync addr */
 475  475          struct knetconfig *knconf;              /* transport structure */
 476  476          struct sec_data *secdata = NULL;        /* security data */
 477  477          STRUCT_DECL(nfs_args, args);            /* nfs mount arguments */
 478  478          STRUCT_DECL(knetconfig, knconf_tmp);
 479  479          STRUCT_DECL(netbuf, addr_tmp);
 480  480          int flags;
 481  481          char *p, *pf;
 482  482          struct pathname pn;
 483  483          char *userbufptr;
 484  484  
 485  485  
 486  486          bzero(nargs, sizeof (*nargs));
 487  487  
 488  488          STRUCT_INIT(args, get_udatamodel());
 489  489          bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE));
 490  490          if (copyin(data, STRUCT_BUF(args), MIN(datalen,
 491  491              STRUCT_SIZE(args))))
 492  492                  return (EFAULT);
 493  493  
 494  494          nargs->wsize = STRUCT_FGET(args, wsize);
 495  495          nargs->rsize = STRUCT_FGET(args, rsize);
 496  496          nargs->timeo = STRUCT_FGET(args, timeo);
 497  497          nargs->retrans = STRUCT_FGET(args, retrans);
 498  498          nargs->acregmin = STRUCT_FGET(args, acregmin);
 499  499          nargs->acregmax = STRUCT_FGET(args, acregmax);
 500  500          nargs->acdirmin = STRUCT_FGET(args, acdirmin);
 501  501          nargs->acdirmax = STRUCT_FGET(args, acdirmax);
 502  502  
 503  503          flags = STRUCT_FGET(args, flags);
 504  504          nargs->flags = flags;
 505  505  
 506  506          addr.buf = NULL;
 507  507          syncaddr.buf = NULL;
 508  508  
 509  509  
 510  510          /*
 511  511           * Allocate space for a knetconfig structure and
 512  512           * its strings and copy in from user-land.
 513  513           */
 514  514          knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP);
 515  515          STRUCT_INIT(knconf_tmp, get_udatamodel());
 516  516          if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp),
 517  517              STRUCT_SIZE(knconf_tmp))) {
 518  518                  kmem_free(knconf, sizeof (*knconf));
 519  519                  return (EFAULT);
 520  520          }
 521  521  
 522  522          knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics);
 523  523          knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly);
 524  524          knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto);
 525  525          if (get_udatamodel() != DATAMODEL_LP64) {
 526  526                  knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev));
 527  527          } else {
 528  528                  knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev);
 529  529          }
 530  530  
 531  531          pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
 532  532          p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
 533  533          error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL);
 534  534          if (error) {
 535  535                  kmem_free(pf, KNC_STRSIZE);
 536  536                  kmem_free(p, KNC_STRSIZE);
 537  537                  kmem_free(knconf, sizeof (*knconf));
 538  538                  return (error);
 539  539          }
 540  540  
 541  541          error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL);
 542  542          if (error) {
 543  543                  kmem_free(pf, KNC_STRSIZE);
 544  544                  kmem_free(p, KNC_STRSIZE);
 545  545                  kmem_free(knconf, sizeof (*knconf));
 546  546                  return (error);
 547  547          }
 548  548  
 549  549  
 550  550          knconf->knc_protofmly = pf;
 551  551          knconf->knc_proto = p;
 552  552  
 553  553          nargs->knconf = knconf;
 554  554  
 555  555          /*
 556  556           * Get server address
 557  557           */
 558  558          STRUCT_INIT(addr_tmp, get_udatamodel());
 559  559          if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp),
 560  560              STRUCT_SIZE(addr_tmp))) {
 561  561                  error = EFAULT;
 562  562                  goto errout;
 563  563          }
 564  564  
 565  565          nargs->addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP);
 566  566          userbufptr = STRUCT_FGETP(addr_tmp, buf);
 567  567          addr.len = STRUCT_FGET(addr_tmp, len);
 568  568          addr.buf = kmem_alloc(addr.len, KM_SLEEP);
 569  569          addr.maxlen = addr.len;
 570  570          if (copyin(userbufptr, addr.buf, addr.len)) {
 571  571                  kmem_free(addr.buf, addr.len);
 572  572                  error = EFAULT;
 573  573                  goto errout;
 574  574          }
 575  575          bcopy(&addr, nargs->addr, sizeof (struct netbuf));
 576  576  
 577  577          /*
 578  578           * Get the root fhandle
 579  579           */
 580  580          error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn);
 581  581          if (error)
 582  582                  goto errout;
 583  583  
 584  584          /* Volatile fh: keep server paths, so use actual-size strings */
 585  585          nargs->fh = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP);
 586  586          bcopy(pn.pn_path, nargs->fh, pn.pn_pathlen);
 587  587          nargs->fh[pn.pn_pathlen] = '\0';
 588  588          pn_free(&pn);
 589  589  
 590  590  
 591  591          /*
 592  592           * Get server's hostname
 593  593           */
 594  594          if (flags & NFSMNT_HOSTNAME) {
 595  595                  error = copyinstr(STRUCT_FGETP(args, hostname),
 596  596                      netname, sizeof (netname), &hlen);
 597  597                  if (error)
 598  598                          goto errout;
 599  599                  nargs->hostname = kmem_zalloc(hlen, KM_SLEEP);
 600  600                  (void) strcpy(nargs->hostname, netname);
 601  601  
 602  602          } else {
 603  603                  nargs->hostname = NULL;
 604  604          }
 605  605  
 606  606  
 607  607          /*
 608  608           * If there are syncaddr and netname data, load them in. This is
 609  609           * to support data needed for NFSV4 when AUTH_DH is the negotiated
 610  610           * flavor via SECINFO. (instead of using MOUNT protocol in V3).
 611  611           */
 612  612          netname[0] = '\0';
 613  613          if (flags & NFSMNT_SECURE) {
 614  614  
 615  615                  /* get syncaddr */
 616  616                  STRUCT_INIT(addr_tmp, get_udatamodel());
 617  617                  if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp),
 618  618                      STRUCT_SIZE(addr_tmp))) {
 619  619                          error = EINVAL;
 620  620                          goto errout;
 621  621                  }
 622  622                  userbufptr = STRUCT_FGETP(addr_tmp, buf);
 623  623                  syncaddr.len = STRUCT_FGET(addr_tmp, len);
 624  624                  syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP);
 625  625                  syncaddr.maxlen = syncaddr.len;
 626  626                  if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) {
 627  627                          kmem_free(syncaddr.buf, syncaddr.len);
 628  628                          error = EFAULT;
 629  629                          goto errout;
 630  630                  }
 631  631  
 632  632                  nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP);
 633  633                  bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf));
 634  634  
 635  635                  /* get server's netname */
 636  636                  if (copyinstr(STRUCT_FGETP(args, netname), netname,
 637  637                      sizeof (netname), &nlen)) {
 638  638                          error = EFAULT;
 639  639                          goto errout;
 640  640                  }
 641  641  
 642  642                  netname[nlen] = '\0';
 643  643                  nargs->netname = kmem_zalloc(nlen, KM_SLEEP);
 644  644                  (void) strcpy(nargs->netname, netname);
 645  645          }
 646  646  
 647  647          /*
 648  648           * Get the extention data which has the security data structure.
 649  649           * This includes data for AUTH_SYS as well.
 650  650           */
 651  651          if (flags & NFSMNT_NEWARGS) {
 652  652                  nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext);
 653  653                  if (nargs->nfs_args_ext == NFS_ARGS_EXTA ||
 654  654                      nargs->nfs_args_ext == NFS_ARGS_EXTB) {
 655  655                          /*
 656  656                           * Indicating the application is using the new
 657  657                           * sec_data structure to pass in the security
 658  658                           * data.
 659  659                           */
 660  660                          if (STRUCT_FGETP(args,
 661  661                              nfs_ext_u.nfs_extA.secdata) != NULL) {
 662  662                                  error = sec_clnt_loadinfo(
 663  663                                      (struct sec_data *)STRUCT_FGETP(args,
 664  664                                      nfs_ext_u.nfs_extA.secdata),
 665  665                                      &secdata, get_udatamodel());
 666  666                          }
 667  667                          nargs->nfs_ext_u.nfs_extA.secdata = secdata;
 668  668                  }
 669  669          }
 670  670  
 671  671          if (error)
 672  672                  goto errout;
 673  673  
 674  674          /*
 675  675           * Failover support:
 676  676           *
 677  677           * We may have a linked list of nfs_args structures,
 678  678           * which means the user is looking for failover.  If
 679  679           * the mount is either not "read-only" or "soft",
 680  680           * we want to bail out with EINVAL.
 681  681           */
 682  682          if (nargs->nfs_args_ext == NFS_ARGS_EXTB)
 683  683                  nargs->nfs_ext_u.nfs_extB.next =
 684  684                      STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next);
 685  685  
 686  686  errout:
 687  687          if (error)
 688  688                  nfs4_free_args(nargs);
 689  689  
 690  690          return (error);
 691  691  }
 692  692  
 693  693  
 694  694  /*
 695  695   * nfs mount vfsop
 696  696   * Set up mount info record and attach it to vfs struct.
 697  697   */
 698  698  int
 699  699  nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 700  700  {
 701  701          char *data = uap->dataptr;
 702  702          int error;
 703  703          vnode_t *rtvp;                  /* the server's root */
 704  704          mntinfo4_t *mi;                 /* mount info, pointed at by vfs */
 705  705          struct knetconfig *rdma_knconf; /* rdma transport structure */
 706  706          rnode4_t *rp;
 707  707          struct servinfo4 *svp;          /* nfs server info */
 708  708          struct servinfo4 *svp_tail = NULL; /* previous nfs server info */
 709  709          struct servinfo4 *svp_head;     /* first nfs server info */
 710  710          struct servinfo4 *svp_2ndlast;  /* 2nd last in server info list */
 711  711          struct sec_data *secdata;       /* security data */
 712  712          struct nfs_args *args = NULL;
 713  713          int flags, addr_type, removed;
 714  714          zone_t *zone = nfs_zone();
 715  715          nfs4_error_t n4e;
 716  716          zone_t *mntzone = NULL;
 717  717  
 718  718          if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
 719  719                  return (EPERM);
 720  720          if (mvp->v_type != VDIR)
 721  721                  return (ENOTDIR);
 722  722  
 723  723          /*
 724  724           * get arguments
 725  725           *
 726  726           * nfs_args is now versioned and is extensible, so
 727  727           * uap->datalen might be different from sizeof (args)
 728  728           * in a compatible situation.
 729  729           */
 730  730  more:
 731  731          if (!(uap->flags & MS_SYSSPACE)) {
 732  732                  if (args == NULL)
 733  733                          args = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP);
 734  734                  else
 735  735                          nfs4_free_args(args);
 736  736                  error = nfs4_copyin(data, uap->datalen, args);
 737  737                  if (error) {
 738  738                          if (args) {
 739  739                                  kmem_free(args, sizeof (*args));
 740  740                          }
 741  741                          return (error);
 742  742                  }
 743  743          } else {
 744  744                  args = (struct nfs_args *)data;
 745  745          }
 746  746  
 747  747          flags = args->flags;
 748  748  
 749  749          /*
 750  750           * If the request changes the locking type, disallow the remount,
 751  751           * because it's questionable whether we can transfer the
 752  752           * locking state correctly.
 753  753           */
 754  754          if (uap->flags & MS_REMOUNT) {
 755  755                  if (!(uap->flags & MS_SYSSPACE)) {
 756  756                          nfs4_free_args(args);
 757  757                          kmem_free(args, sizeof (*args));
 758  758                  }
 759  759                  if ((mi = VFTOMI4(vfsp)) != NULL) {
 760  760                          uint_t new_mi_llock;
 761  761                          uint_t old_mi_llock;
 762  762                          new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0;
 763  763                          old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0;
 764  764                          if (old_mi_llock != new_mi_llock)
 765  765                                  return (EBUSY);
 766  766                  }
 767  767                  return (0);
 768  768          }
 769  769  
 770  770          /*
 771  771           * For ephemeral mount trigger stub vnodes, we have two problems
 772  772           * to solve: racing threads will likely fail the v_count check, and
 773  773           * we want only one to proceed with the mount.
 774  774           *
 775  775           * For stubs, if the mount has already occurred (via a racing thread),
 776  776           * just return success. If not, skip the v_count check and proceed.
 777  777           * Note that we are already serialised at this point.
 778  778           */
 779  779          mutex_enter(&mvp->v_lock);
 780  780          if (vn_matchops(mvp, nfs4_trigger_vnodeops)) {
 781  781                  /* mntpt is a v4 stub vnode */
 782  782                  ASSERT(RP_ISSTUB(VTOR4(mvp)));
 783  783                  ASSERT(!(uap->flags & MS_OVERLAY));
 784  784                  ASSERT(!(mvp->v_flag & VROOT));
 785  785                  if (vn_mountedvfs(mvp) != NULL) {
 786  786                          /* ephemeral mount has already occurred */
 787  787                          ASSERT(uap->flags & MS_SYSSPACE);
 788  788                          mutex_exit(&mvp->v_lock);
 789  789                          return (0);
 790  790                  }
 791  791          } else {
 792  792                  /* mntpt is a non-v4 or v4 non-stub vnode */
 793  793                  if (!(uap->flags & MS_OVERLAY) &&
 794  794                      (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 795  795                          mutex_exit(&mvp->v_lock);
 796  796                          if (!(uap->flags & MS_SYSSPACE)) {
 797  797                                  nfs4_free_args(args);
 798  798                                  kmem_free(args, sizeof (*args));
 799  799                          }
 800  800                          return (EBUSY);
 801  801                  }
 802  802          }
 803  803          mutex_exit(&mvp->v_lock);
 804  804  
 805  805          /* make sure things are zeroed for errout: */
 806  806          rtvp = NULL;
 807  807          mi = NULL;
 808  808          secdata = NULL;
 809  809  
 810  810          /*
 811  811           * A valid knetconfig structure is required.
 812  812           */
 813  813          if (!(flags & NFSMNT_KNCONF) ||
 814  814              args->knconf == NULL || args->knconf->knc_protofmly == NULL ||
 815  815              args->knconf->knc_proto == NULL ||
 816  816              (strcmp(args->knconf->knc_proto, NC_UDP) == 0)) {
 817  817                  if (!(uap->flags & MS_SYSSPACE)) {
 818  818                          nfs4_free_args(args);
 819  819                          kmem_free(args, sizeof (*args));
 820  820                  }
 821  821                  return (EINVAL);
 822  822          }
 823  823  
 824  824          if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) ||
 825  825              (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) {
 826  826                  if (!(uap->flags & MS_SYSSPACE)) {
 827  827                          nfs4_free_args(args);
 828  828                          kmem_free(args, sizeof (*args));
 829  829                  }
 830  830                  return (EINVAL);
 831  831          }
 832  832  
 833  833          /*
 834  834           * Allocate a servinfo4 struct.
 835  835           */
 836  836          svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
 837  837          nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL);
 838  838          if (svp_tail) {
 839  839                  svp_2ndlast = svp_tail;
 840  840                  svp_tail->sv_next = svp;
 841  841          } else {
 842  842                  svp_head = svp;
 843  843                  svp_2ndlast = svp;
 844  844          }
 845  845  
 846  846          svp_tail = svp;
 847  847          svp->sv_knconf = args->knconf;
 848  848          args->knconf = NULL;
 849  849  
 850  850          /*
 851  851           * Get server address
 852  852           */
 853  853          if (args->addr == NULL || args->addr->buf == NULL) {
 854  854                  error = EINVAL;
 855  855                  goto errout;
 856  856          }
 857  857  
 858  858          svp->sv_addr.maxlen = args->addr->maxlen;
 859  859          svp->sv_addr.len = args->addr->len;
 860  860          svp->sv_addr.buf = args->addr->buf;
 861  861          args->addr->buf = NULL;
 862  862  
 863  863          /*
 864  864           * Get the root fhandle
 865  865           */
 866  866          if (args->fh == NULL || (strlen(args->fh) >= MAXPATHLEN)) {
 867  867                  error = EINVAL;
 868  868                  goto errout;
 869  869          }
 870  870  
 871  871          svp->sv_path = args->fh;
 872  872          svp->sv_pathlen = strlen(args->fh) + 1;
 873  873          args->fh = NULL;
 874  874  
 875  875          /*
 876  876           * Get server's hostname
 877  877           */
 878  878          if (flags & NFSMNT_HOSTNAME) {
 879  879                  if (args->hostname == NULL || (strlen(args->hostname) >
 880  880                      MAXNETNAMELEN)) {
 881  881                          error = EINVAL;
 882  882                          goto errout;
 883  883                  }
 884  884                  svp->sv_hostnamelen = strlen(args->hostname) + 1;
 885  885                  svp->sv_hostname = args->hostname;
 886  886                  args->hostname = NULL;
 887  887          } else {
 888  888                  char *p = "unknown-host";
 889  889                  svp->sv_hostnamelen = strlen(p) + 1;
 890  890                  svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP);
 891  891                  (void) strcpy(svp->sv_hostname, p);
 892  892          }
 893  893  
 894  894          /*
 895  895           * RDMA MOUNT SUPPORT FOR NFS v4.
 896  896           * Establish, is it possible to use RDMA, if so overload the
 897  897           * knconf with rdma specific knconf and free the orignal knconf.
 898  898           */
 899  899          if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) {
 900  900                  /*
 901  901                   * Determine the addr type for RDMA, IPv4 or v6.
 902  902                   */
 903  903                  if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0)
 904  904                          addr_type = AF_INET;
 905  905                  else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0)
 906  906                          addr_type = AF_INET6;
 907  907  
 908  908                  if (rdma_reachable(addr_type, &svp->sv_addr,
 909  909                      &rdma_knconf) == 0) {
 910  910                          /*
 911  911                           * If successful, hijack the orignal knconf and
 912  912                           * replace with the new one, depending on the flags.
 913  913                           */
 914  914                          svp->sv_origknconf = svp->sv_knconf;
 915  915                          svp->sv_knconf = rdma_knconf;
 916  916                  } else {
 917  917                          if (flags & NFSMNT_TRYRDMA) {
 918  918  #ifdef  DEBUG
 919  919                                  if (rdma_debug)
 920  920                                          zcmn_err(getzoneid(), CE_WARN,
 921  921                                              "no RDMA onboard, revert\n");
 922  922  #endif
 923  923                          }
 924  924  
 925  925                          if (flags & NFSMNT_DORDMA) {
 926  926                                  /*
 927  927                                   * If proto=rdma is specified and no RDMA
 928  928                                   * path to this server is avialable then
 929  929                                   * ditch this server.
 930  930                                   * This is not included in the mountable
 931  931                                   * server list or the replica list.
 932  932                                   * Check if more servers are specified;
 933  933                                   * Failover case, otherwise bail out of mount.
 934  934                                   */
 935  935                                  if (args->nfs_args_ext == NFS_ARGS_EXTB &&
 936  936                                      args->nfs_ext_u.nfs_extB.next != NULL) {
 937  937                                          data = (char *)
 938  938                                              args->nfs_ext_u.nfs_extB.next;
 939  939                                          if (uap->flags & MS_RDONLY &&
 940  940                                              !(flags & NFSMNT_SOFT)) {
 941  941                                                  if (svp_head->sv_next == NULL) {
 942  942                                                          svp_tail = NULL;
 943  943                                                          svp_2ndlast = NULL;
 944  944                                                          sv4_free(svp_head);
 945  945                                                          goto more;
 946  946                                                  } else {
 947  947                                                          svp_tail = svp_2ndlast;
 948  948                                                          svp_2ndlast->sv_next =
 949  949                                                              NULL;
 950  950                                                          sv4_free(svp);
 951  951                                                          goto more;
 952  952                                                  }
 953  953                                          }
 954  954                                  } else {
 955  955                                          /*
 956  956                                           * This is the last server specified
 957  957                                           * in the nfs_args list passed down
 958  958                                           * and its not rdma capable.
 959  959                                           */
 960  960                                          if (svp_head->sv_next == NULL) {
 961  961                                                  /*
 962  962                                                   * Is this the only one
 963  963                                                   */
 964  964                                                  error = EINVAL;
 965  965  #ifdef  DEBUG
 966  966                                                  if (rdma_debug)
 967  967                                                          zcmn_err(getzoneid(),
 968  968                                                              CE_WARN,
 969  969                                                              "No RDMA srv");
 970  970  #endif
 971  971                                                  goto errout;
 972  972                                          } else {
 973  973                                                  /*
 974  974                                                   * There is list, since some
 975  975                                                   * servers specified before
 976  976                                                   * this passed all requirements
 977  977                                                   */
 978  978                                                  svp_tail = svp_2ndlast;
 979  979                                                  svp_2ndlast->sv_next = NULL;
 980  980                                                  sv4_free(svp);
 981  981                                                  goto proceed;
 982  982                                          }
 983  983                                  }
 984  984                          }
 985  985                  }
 986  986          }
 987  987  
 988  988          /*
 989  989           * If there are syncaddr and netname data, load them in. This is
 990  990           * to support data needed for NFSV4 when AUTH_DH is the negotiated
 991  991           * flavor via SECINFO. (instead of using MOUNT protocol in V3).
 992  992           */
 993  993          if (args->flags & NFSMNT_SECURE) {
 994  994                  svp->sv_dhsec = create_authdh_data(args->netname,
 995  995                      strlen(args->netname),
 996  996                      args->syncaddr, svp->sv_knconf);
 997  997          }
 998  998  
 999  999          /*
1000 1000           * Get the extention data which has the security data structure.
1001 1001           * This includes data for AUTH_SYS as well.
1002 1002           */
1003 1003          if (flags & NFSMNT_NEWARGS) {
1004 1004                  switch (args->nfs_args_ext) {
1005 1005                  case NFS_ARGS_EXTA:
1006 1006                  case NFS_ARGS_EXTB:
1007 1007                          /*
1008 1008                           * Indicating the application is using the new
1009 1009                           * sec_data structure to pass in the security
1010 1010                           * data.
1011 1011                           */
1012 1012                          secdata = args->nfs_ext_u.nfs_extA.secdata;
1013 1013                          if (secdata == NULL) {
1014 1014                                  error = EINVAL;
1015 1015                          } else if (uap->flags & MS_SYSSPACE) {
1016 1016                                  /*
1017 1017                                   * Need to validate the flavor here if
1018 1018                                   * sysspace, userspace was already
1019 1019                                   * validate from the nfs_copyin function.
1020 1020                                   */
1021 1021                                  switch (secdata->rpcflavor) {
1022 1022                                  case AUTH_NONE:
1023 1023                                  case AUTH_UNIX:
1024 1024                                  case AUTH_LOOPBACK:
1025 1025                                  case AUTH_DES:
1026 1026                                  case RPCSEC_GSS:
1027 1027                                          break;
1028 1028                                  default:
1029 1029                                          error = EINVAL;
1030 1030                                          goto errout;
1031 1031                                  }
1032 1032                          }
1033 1033                          args->nfs_ext_u.nfs_extA.secdata = NULL;
1034 1034                          break;
1035 1035  
1036 1036                  default:
1037 1037                          error = EINVAL;
1038 1038                          break;
1039 1039                  }
1040 1040  
1041 1041          } else if (flags & NFSMNT_SECURE) {
1042 1042                  /*
1043 1043                   * NFSMNT_SECURE is deprecated but we keep it
1044 1044                   * to support the rogue user-generated application
1045 1045                   * that may use this undocumented interface to do
1046 1046                   * AUTH_DH security, e.g. our own rexd.
1047 1047                   *
1048 1048                   * Also note that NFSMNT_SECURE is used for passing
1049 1049                   * AUTH_DH info to be used in negotiation.
1050 1050                   */
1051 1051                  secdata = create_authdh_data(args->netname,
1052 1052                      strlen(args->netname), args->syncaddr, svp->sv_knconf);
1053 1053  
1054 1054          } else {
1055 1055                  secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
1056 1056                  secdata->secmod = secdata->rpcflavor = AUTH_SYS;
1057 1057                  secdata->data = NULL;
1058 1058          }
1059 1059  
1060 1060          svp->sv_secdata = secdata;
1061 1061  
1062 1062          /*
1063 1063           * User does not explictly specify a flavor, and a user
1064 1064           * defined default flavor is passed down.
1065 1065           */
1066 1066          if (flags & NFSMNT_SECDEFAULT) {
1067 1067                  (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
1068 1068                  svp->sv_flags |= SV4_TRYSECDEFAULT;
1069 1069                  nfs_rw_exit(&svp->sv_lock);
1070 1070          }
1071 1071  
1072 1072          /*
1073 1073           * Failover support:
1074 1074           *
1075 1075           * We may have a linked list of nfs_args structures,
1076 1076           * which means the user is looking for failover.  If
1077 1077           * the mount is either not "read-only" or "soft",
1078 1078           * we want to bail out with EINVAL.
1079 1079           */
1080 1080          if (args->nfs_args_ext == NFS_ARGS_EXTB &&
1081 1081              args->nfs_ext_u.nfs_extB.next != NULL) {
1082 1082                  if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) {
1083 1083                          data = (char *)args->nfs_ext_u.nfs_extB.next;
1084 1084                          goto more;
1085 1085                  }
1086 1086                  error = EINVAL;
1087 1087                  goto errout;
1088 1088          }
1089 1089  
1090 1090          /*
1091 1091           * Determine the zone we're being mounted into.
1092 1092           */
1093 1093          zone_hold(mntzone = zone);              /* start with this assumption */
1094 1094          if (getzoneid() == GLOBAL_ZONEID) {
1095 1095                  zone_rele(mntzone);
1096 1096                  mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
1097 1097                  ASSERT(mntzone != NULL);
1098 1098                  if (mntzone != zone) {
1099 1099                          error = EBUSY;
1100 1100                          goto errout;
1101 1101                  }
1102 1102          }
1103 1103  
1104 1104          if (is_system_labeled()) {
1105 1105                  error = nfs_mount_label_policy(vfsp, &svp->sv_addr,
1106 1106                      svp->sv_knconf, cr);
1107 1107  
1108 1108                  if (error > 0)
1109 1109                          goto errout;
1110 1110  
1111 1111                  if (error == -1) {
1112 1112                          /* change mount to read-only to prevent write-down */
1113 1113                          vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
1114 1114                  }
1115 1115          }
1116 1116  
1117 1117          /*
1118 1118           * Stop the mount from going any further if the zone is going away.
1119 1119           */
1120 1120          if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) {
1121 1121                  error = EBUSY;
1122 1122                  goto errout;
1123 1123          }
1124 1124  
1125 1125          /*
1126 1126           * Get root vnode.
1127 1127           */
1128 1128  proceed:
1129 1129          error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone);
1130 1130          if (error) {
1131 1131                  /* if nfs4rootvp failed, it will free svp_head */
1132 1132                  svp_head = NULL;
1133 1133                  goto errout;
1134 1134          }
1135 1135  
1136 1136          mi = VTOMI4(rtvp);
1137 1137  
1138 1138          /*
1139 1139           * Send client id to the server, if necessary
1140 1140           */
1141 1141          nfs4_error_zinit(&n4e);
1142 1142          nfs4setclientid(mi, cr, FALSE, &n4e);
1143 1143  
1144 1144          error = n4e.error;
1145 1145  
1146 1146          if (error)
1147 1147                  goto errout;
1148 1148  
1149 1149          /*
1150 1150           * Set option fields in the mount info record
1151 1151           */
1152 1152  
1153 1153          if (svp_head->sv_next) {
1154 1154                  mutex_enter(&mi->mi_lock);
1155 1155                  mi->mi_flags |= MI4_LLOCK;
1156 1156                  mutex_exit(&mi->mi_lock);
1157 1157          }
1158 1158          error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, args);
1159 1159          if (error)
1160 1160                  goto errout;
1161 1161  
1162 1162          /*
1163 1163           * Time to tie in the mirror mount info at last!
1164 1164           */
1165 1165          if (flags & NFSMNT_EPHEMERAL)
1166 1166                  error = nfs4_record_ephemeral_mount(mi, mvp);
1167 1167  
1168 1168  errout:
1169 1169          if (error) {
1170 1170                  if (rtvp != NULL) {
1171 1171                          rp = VTOR4(rtvp);
1172 1172                          if (rp->r_flags & R4HASHED)
1173 1173                                  rp4_rmhash(rp);
1174 1174                  }
1175 1175                  if (mi != NULL) {
1176 1176                          nfs4_async_stop(vfsp);
1177 1177                          nfs4_async_manager_stop(vfsp);
1178 1178                          nfs4_remove_mi_from_server(mi, NULL);
1179 1179                          if (rtvp != NULL)
1180 1180                                  VN_RELE(rtvp);
1181 1181                          if (mntzone != NULL)
1182 1182                                  zone_rele(mntzone);
1183 1183                          /* need to remove it from the zone */
1184 1184                          removed = nfs4_mi_zonelist_remove(mi);
1185 1185                          if (removed)
1186 1186                                  zone_rele_ref(&mi->mi_zone_ref,
1187 1187                                      ZONE_REF_NFSV4);
1188 1188                          MI4_RELE(mi);
1189 1189                          if (!(uap->flags & MS_SYSSPACE) && args) {
1190 1190                                  nfs4_free_args(args);
1191 1191                                  kmem_free(args, sizeof (*args));
1192 1192                          }
1193 1193                          return (error);
1194 1194                  }
1195 1195                  if (svp_head)
1196 1196                          sv4_free(svp_head);
1197 1197          }
1198 1198  
1199 1199          if (!(uap->flags & MS_SYSSPACE) && args) {
1200 1200                  nfs4_free_args(args);
1201 1201                  kmem_free(args, sizeof (*args));
1202 1202          }
1203 1203          if (rtvp != NULL)
1204 1204                  VN_RELE(rtvp);
1205 1205  
1206 1206          if (mntzone != NULL)
1207 1207                  zone_rele(mntzone);
1208 1208  
1209 1209          return (error);
1210 1210  }
1211 1211  
1212 1212  #ifdef  DEBUG
1213 1213  #define VERS_MSG        "NFS4 server "
1214 1214  #else
1215 1215  #define VERS_MSG        "NFS server "
1216 1216  #endif
1217 1217  
1218 1218  #define READ_MSG        \
1219 1219          VERS_MSG "%s returned 0 for read transfer size"
1220 1220  #define WRITE_MSG       \
1221 1221          VERS_MSG "%s returned 0 for write transfer size"
1222 1222  #define SIZE_MSG        \
1223 1223          VERS_MSG "%s returned 0 for maximum file size"
1224 1224  
1225 1225  /*
1226 1226   * Get the symbolic link text from the server for a given filehandle
1227 1227   * of that symlink.
1228 1228   *
1229 1229   *      (get symlink text) PUTFH READLINK
1230 1230   */
1231 1231  static int
1232 1232  getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr,
1233 1233      int flags)
1234 1234  {
1235 1235          COMPOUND4args_clnt args;
1236 1236          COMPOUND4res_clnt res;
1237 1237          int doqueue;
1238 1238          nfs_argop4 argop[2];
1239 1239          nfs_resop4 *resop;
1240 1240          READLINK4res *lr_res;
1241 1241          uint_t len;
1242 1242          bool_t needrecov = FALSE;
1243 1243          nfs4_recov_state_t recov_state;
1244 1244          nfs4_sharedfh_t *sfh;
1245 1245          nfs4_error_t e;
1246 1246          int num_retry = nfs4_max_mount_retry;
1247 1247          int recovery = !(flags & NFS4_GETFH_NEEDSOP);
1248 1248  
1249 1249          sfh = sfh4_get(fh, mi);
1250 1250          recov_state.rs_flags = 0;
1251 1251          recov_state.rs_num_retry_despite_err = 0;
1252 1252  
1253 1253  recov_retry:
1254 1254          nfs4_error_zinit(&e);
1255 1255  
1256 1256          args.array_len = 2;
1257 1257          args.array = argop;
1258 1258          args.ctag = TAG_GET_SYMLINK;
1259 1259  
1260 1260          if (! recovery) {
1261 1261                  e.error = nfs4_start_op(mi, NULL, NULL, &recov_state);
1262 1262                  if (e.error) {
1263 1263                          sfh4_rele(&sfh);
1264 1264                          return (e.error);
1265 1265                  }
1266 1266          }
1267 1267  
1268 1268          /* 0. putfh symlink fh */
1269 1269          argop[0].argop = OP_CPUTFH;
1270 1270          argop[0].nfs_argop4_u.opcputfh.sfh = sfh;
1271 1271  
1272 1272          /* 1. readlink */
1273 1273          argop[1].argop = OP_READLINK;
1274 1274  
1275 1275          doqueue = 1;
1276 1276  
1277 1277          rfs4call(mi, &args, &res, cr, &doqueue, 0, &e);
1278 1278  
1279 1279          needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp);
1280 1280  
1281 1281          if (needrecov && !recovery && num_retry-- > 0) {
1282 1282  
1283 1283                  NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
1284 1284                      "getlinktext_otw: initiating recovery\n"));
1285 1285  
1286 1286                  if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL,
1287 1287                      OP_READLINK, NULL, NULL, NULL) == FALSE) {
1288 1288                          nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
1289 1289                          if (!e.error)
1290 1290                                  xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1291 1291                          goto recov_retry;
1292 1292                  }
1293 1293          }
1294 1294  
1295 1295          /*
1296 1296           * If non-NFS4 pcol error and/or we weren't able to recover.
1297 1297           */
1298 1298          if (e.error != 0) {
1299 1299                  if (! recovery)
1300 1300                          nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
1301 1301                  sfh4_rele(&sfh);
1302 1302                  return (e.error);
1303 1303          }
1304 1304  
1305 1305          if (res.status) {
1306 1306                  e.error = geterrno4(res.status);
1307 1307                  xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1308 1308                  if (! recovery)
1309 1309                          nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
1310 1310                  sfh4_rele(&sfh);
1311 1311                  return (e.error);
1312 1312          }
1313 1313  
1314 1314          /* res.status == NFS4_OK */
1315 1315          ASSERT(res.status == NFS4_OK);
1316 1316  
1317 1317          resop = &res.array[1];  /* readlink res */
1318 1318          lr_res = &resop->nfs_resop4_u.opreadlink;
1319 1319  
1320 1320          /* treat symlink name as data */
1321 1321          *linktextp = utf8_to_str((utf8string *)&lr_res->link, &len, NULL);
1322 1322  
1323 1323          if (! recovery)
1324 1324                  nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
1325 1325          sfh4_rele(&sfh);
1326 1326          xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1327 1327          return (0);
1328 1328  }
1329 1329  
1330 1330  /*
1331 1331   * Skip over consecutive slashes and "/./" in a pathname.
1332 1332   */
1333 1333  void
1334 1334  pathname_skipslashdot(struct pathname *pnp)
1335 1335  {
1336 1336          char *c1, *c2;
1337 1337  
1338 1338          while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') {
1339 1339  
1340 1340                  c1 = pnp->pn_path + 1;
1341 1341                  c2 = pnp->pn_path + 2;
1342 1342  
1343 1343                  if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) {
1344 1344                          pnp->pn_path = pnp->pn_path + 2; /* skip "/." */
1345 1345                          pnp->pn_pathlen = pnp->pn_pathlen - 2;
1346 1346                  } else {
1347 1347                          pnp->pn_path++;
1348 1348                          pnp->pn_pathlen--;
1349 1349                  }
1350 1350          }
1351 1351  }
1352 1352  
1353 1353  /*
1354 1354   * Resolve a symbolic link path. The symlink is in the nth component of
1355 1355   * svp->sv_path and has an nfs4 file handle "fh".
1356 1356   * Upon return, the sv_path will point to the new path that has the nth
1357 1357   * component resolved to its symlink text.
1358 1358   */
1359 1359  int
1360 1360  resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh,
1361 1361      cred_t *cr, int flags)
1362 1362  {
1363 1363          char *oldpath;
1364 1364          char *symlink, *newpath;
1365 1365          struct pathname oldpn, newpn;
1366 1366          char component[MAXNAMELEN];
1367 1367          int i, addlen, error = 0;
1368 1368          int oldpathlen;
1369 1369  
1370 1370          /* Get the symbolic link text over the wire. */
1371 1371          error = getlinktext_otw(mi, fh, &symlink, cr, flags);
1372 1372  
1373 1373          if (error || symlink == NULL || strlen(symlink) == 0)
1374 1374                  return (error);
1375 1375  
1376 1376          /*
1377 1377           * Compose the new pathname.
1378 1378           * Note:
1379 1379           *    - only the nth component is resolved for the pathname.
1380 1380           *    - pathname.pn_pathlen does not count the ending null byte.
1381 1381           */
1382 1382          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1383 1383          oldpath = svp->sv_path;
1384 1384          oldpathlen = svp->sv_pathlen;
1385 1385          if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) {
1386 1386                  nfs_rw_exit(&svp->sv_lock);
1387 1387                  kmem_free(symlink, strlen(symlink) + 1);
1388 1388                  return (error);
1389 1389          }
1390 1390          nfs_rw_exit(&svp->sv_lock);
1391 1391          pn_alloc(&newpn);
1392 1392  
1393 1393          /*
1394 1394           * Skip over previous components from the oldpath so that the
1395 1395           * oldpn.pn_path will point to the symlink component. Skip
1396 1396           * leading slashes and "/./" (no OP_LOOKUP on ".") so that
1397 1397           * pn_getcompnent can get the component.
1398 1398           */
1399 1399          for (i = 1; i < nth; i++) {
1400 1400                  pathname_skipslashdot(&oldpn);
1401 1401                  error = pn_getcomponent(&oldpn, component);
1402 1402                  if (error)
1403 1403                          goto out;
1404 1404          }
1405 1405  
1406 1406          /*
1407 1407           * Copy the old path upto the component right before the symlink
1408 1408           * if the symlink is not an absolute path.
1409 1409           */
1410 1410          if (symlink[0] != '/') {
1411 1411                  addlen = oldpn.pn_path - oldpn.pn_buf;
1412 1412                  bcopy(oldpn.pn_buf, newpn.pn_path, addlen);
1413 1413                  newpn.pn_pathlen += addlen;
1414 1414                  newpn.pn_path += addlen;
1415 1415                  newpn.pn_buf[newpn.pn_pathlen] = '/';
1416 1416                  newpn.pn_pathlen++;
1417 1417                  newpn.pn_path++;
1418 1418          }
1419 1419  
1420 1420          /* copy the resolved symbolic link text */
1421 1421          addlen = strlen(symlink);
1422 1422          if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) {
1423 1423                  error = ENAMETOOLONG;
1424 1424                  goto out;
1425 1425          }
1426 1426          bcopy(symlink, newpn.pn_path, addlen);
1427 1427          newpn.pn_pathlen += addlen;
1428 1428          newpn.pn_path += addlen;
1429 1429  
1430 1430          /*
1431 1431           * Check if there is any remaining path after the symlink component.
1432 1432           * First, skip the symlink component.
1433 1433           */
1434 1434          pathname_skipslashdot(&oldpn);
1435 1435          if (error = pn_getcomponent(&oldpn, component))
1436 1436                  goto out;
1437 1437  
1438 1438          addlen = pn_pathleft(&oldpn); /* includes counting the slash */
1439 1439  
1440 1440          /*
1441 1441           * Copy the remaining path to the new pathname if there is any.
1442 1442           */
1443 1443          if (addlen > 0) {
1444 1444                  if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) {
1445 1445                          error = ENAMETOOLONG;
1446 1446                          goto out;
1447 1447                  }
1448 1448                  bcopy(oldpn.pn_path, newpn.pn_path, addlen);
1449 1449                  newpn.pn_pathlen += addlen;
1450 1450          }
1451 1451          newpn.pn_buf[newpn.pn_pathlen] = '\0';
1452 1452  
1453 1453          /* get the newpath and store it in the servinfo4_t */
1454 1454          newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP);
1455 1455          bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen);
1456 1456          newpath[newpn.pn_pathlen] = '\0';
1457 1457  
1458 1458          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
1459 1459          svp->sv_path = newpath;
1460 1460          svp->sv_pathlen = strlen(newpath) + 1;
1461 1461          nfs_rw_exit(&svp->sv_lock);
1462 1462  
1463 1463          kmem_free(oldpath, oldpathlen);
1464 1464  out:
1465 1465          kmem_free(symlink, strlen(symlink) + 1);
1466 1466          pn_free(&newpn);
1467 1467          pn_free(&oldpn);
1468 1468  
1469 1469          return (error);
1470 1470  }
1471 1471  
1472 1472  /*
1473 1473   * This routine updates servinfo4 structure with the new referred server
1474 1474   * info.
1475 1475   * nfsfsloc has the location related information
1476 1476   * fsp has the hostname and pathname info.
1477 1477   * new path = pathname from referral + part of orig pathname(based on nth).
1478 1478   */
1479 1479  static void
1480 1480  update_servinfo4(servinfo4_t *svp, fs_location4 *fsp,
1481 1481      struct nfs_fsl_info *nfsfsloc, char *orig_path, int nth)
1482 1482  {
1483 1483          struct knetconfig *knconf, *svknconf;
1484 1484          struct netbuf *saddr;
1485 1485          sec_data_t      *secdata;
1486 1486          utf8string *host;
1487 1487          int i = 0, num_slashes = 0;
1488 1488          char *p, *spath, *op, *new_path;
1489 1489  
1490 1490          /* Update knconf */
1491 1491          knconf = svp->sv_knconf;
1492 1492          free_knconf_contents(knconf);
1493 1493          bzero(knconf, sizeof (struct knetconfig));
1494 1494          svknconf = nfsfsloc->knconf;
1495 1495          knconf->knc_semantics = svknconf->knc_semantics;
1496 1496          knconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1497 1497          knconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1498 1498          knconf->knc_rdev = svknconf->knc_rdev;
1499 1499          bcopy(svknconf->knc_protofmly, knconf->knc_protofmly, KNC_STRSIZE);
1500 1500          bcopy(svknconf->knc_proto, knconf->knc_proto, KNC_STRSIZE);
1501 1501  
1502 1502          /* Update server address */
1503 1503          saddr = &svp->sv_addr;
1504 1504          if (saddr->buf != NULL)
1505 1505                  kmem_free(saddr->buf, saddr->maxlen);
1506 1506          saddr->buf  = kmem_alloc(nfsfsloc->addr->maxlen, KM_SLEEP);
1507 1507          saddr->len = nfsfsloc->addr->len;
1508 1508          saddr->maxlen = nfsfsloc->addr->maxlen;
1509 1509          bcopy(nfsfsloc->addr->buf, saddr->buf, nfsfsloc->addr->len);
1510 1510  
1511 1511          /* Update server name */
1512 1512          host = fsp->server_val;
1513 1513          kmem_free(svp->sv_hostname, svp->sv_hostnamelen);
1514 1514          svp->sv_hostname = kmem_zalloc(host->utf8string_len + 1, KM_SLEEP);
1515 1515          bcopy(host->utf8string_val, svp->sv_hostname, host->utf8string_len);
1516 1516          svp->sv_hostname[host->utf8string_len] = '\0';
1517 1517          svp->sv_hostnamelen = host->utf8string_len + 1;
1518 1518  
1519 1519          /*
1520 1520           * Update server path.
1521 1521           * We need to setup proper path here.
1522 1522           * For ex., If we got a path name serv1:/rp/aaa/bbb
1523 1523           * where aaa is a referral and points to serv2:/rpool/aa
1524 1524           * we need to set the path to serv2:/rpool/aa/bbb
1525 1525           * The first part of this below code generates /rpool/aa
1526 1526           * and the second part appends /bbb to the server path.
1527 1527           */
1528 1528          spath = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1529 1529          *p++ = '/';
1530 1530          for (i = 0; i < fsp->rootpath.pathname4_len; i++) {
1531 1531                  component4 *comp;
1532 1532  
1533 1533                  comp = &fsp->rootpath.pathname4_val[i];
1534 1534                  /* If no space, null the string and bail */
1535 1535                  if ((p - spath) + comp->utf8string_len + 1 > MAXPATHLEN) {
1536 1536                          p = spath + MAXPATHLEN - 1;
1537 1537                          spath[0] = '\0';
1538 1538                          break;
1539 1539                  }
1540 1540                  bcopy(comp->utf8string_val, p, comp->utf8string_len);
1541 1541                  p += comp->utf8string_len;
1542 1542                  *p++ = '/';
1543 1543          }
1544 1544          if (fsp->rootpath.pathname4_len != 0)
1545 1545                  *(p - 1) = '\0';
1546 1546          else
1547 1547                  *p = '\0';
1548 1548          p = spath;
1549 1549  
1550 1550          new_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1551 1551          (void) strlcpy(new_path, p, MAXPATHLEN);
1552 1552          kmem_free(p, MAXPATHLEN);
1553 1553          i = strlen(new_path);
1554 1554  
1555 1555          for (op = orig_path; *op; op++) {
1556 1556                  if (*op == '/')
1557 1557                          num_slashes++;
1558 1558                  if (num_slashes == nth + 2) {
1559 1559                          while (*op != '\0') {
1560 1560                                  new_path[i] = *op;
1561 1561                                  i++;
1562 1562                                  op++;
1563 1563                          }
1564 1564                          break;
1565 1565                  }
1566 1566          }
1567 1567          new_path[i] = '\0';
1568 1568  
1569 1569          kmem_free(svp->sv_path, svp->sv_pathlen);
1570 1570          svp->sv_pathlen = strlen(new_path) + 1;
1571 1571          svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP);
1572 1572          bcopy(new_path, svp->sv_path, svp->sv_pathlen);
1573 1573          kmem_free(new_path, MAXPATHLEN);
1574 1574  
1575 1575          /*
1576 1576           * All the security data is specific to old server.
1577 1577           * Clean it up except secdata which deals with mount options.
1578 1578           * We need to inherit that data. Copy secdata into our new servinfo4.
1579 1579           */
1580 1580          if (svp->sv_dhsec) {
1581 1581                  sec_clnt_freeinfo(svp->sv_dhsec);
1582 1582                  svp->sv_dhsec = NULL;
1583 1583          }
1584 1584          if (svp->sv_save_secinfo &&
1585 1585              svp->sv_save_secinfo != svp->sv_secinfo) {
1586 1586                  secinfo_free(svp->sv_save_secinfo);
1587 1587                  svp->sv_save_secinfo = NULL;
1588 1588          }
1589 1589          if (svp->sv_secinfo) {
1590 1590                  secinfo_free(svp->sv_secinfo);
1591 1591                  svp->sv_secinfo = NULL;
1592 1592          }
1593 1593          svp->sv_currsec = NULL;
1594 1594  
1595 1595          secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
1596 1596          *secdata = *svp->sv_secdata;
1597 1597          secdata->data = NULL;
1598 1598          if (svp->sv_secdata) {
1599 1599                  sec_clnt_freeinfo(svp->sv_secdata);
1600 1600                  svp->sv_secdata = NULL;
1601 1601          }
1602 1602          svp->sv_secdata = secdata;
1603 1603  }
1604 1604  
1605 1605  /*
1606 1606   * Resolve a referral. The referral is in the n+1th component of
1607 1607   * svp->sv_path and has a parent nfs4 file handle "fh".
1608 1608   * Upon return, the sv_path will point to the new path that has referral
1609 1609   * component resolved to its referred path and part of original path.
1610 1610   * Hostname and other address information is also updated.
1611 1611   */
1612 1612  int
1613 1613  resolve_referral(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, int nth,
1614 1614      nfs_fh4 *fh)
1615 1615  {
1616 1616          nfs4_sharedfh_t *sfh;
1617 1617          struct nfs_fsl_info nfsfsloc;
1618 1618          nfs4_ga_res_t garp;
1619 1619          COMPOUND4res_clnt callres;
1620 1620          fs_location4    *fsp;
1621 1621          char *nm, *orig_path;
1622 1622          int orig_pathlen = 0, ret = -1, index;
1623 1623  
1624 1624          if (svp->sv_pathlen <= 0)
1625 1625                  return (ret);
1626 1626  
1627 1627          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
1628 1628          orig_pathlen = svp->sv_pathlen;
1629 1629          orig_path = kmem_alloc(orig_pathlen, KM_SLEEP);
1630 1630          bcopy(svp->sv_path, orig_path, orig_pathlen);
1631 1631          nm = extract_referral_point(svp->sv_path, nth);
1632 1632          setup_newsvpath(svp, nth);
1633 1633          nfs_rw_exit(&svp->sv_lock);
1634 1634  
1635 1635          sfh = sfh4_get(fh, mi);
1636 1636          index = nfs4_process_referral(mi, sfh, nm, cr,
1637 1637              &garp, &callres, &nfsfsloc);
1638 1638          sfh4_rele(&sfh);
1639 1639          kmem_free(nm, MAXPATHLEN);
1640 1640          if (index < 0) {
1641 1641                  kmem_free(orig_path, orig_pathlen);
1642 1642                  return (index);
1643 1643          }
1644 1644  
1645 1645          fsp =  &garp.n4g_ext_res->n4g_fslocations.locations_val[index];
1646 1646          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
1647 1647          update_servinfo4(svp, fsp, &nfsfsloc, orig_path, nth);
1648 1648          nfs_rw_exit(&svp->sv_lock);
1649 1649  
1650 1650          mutex_enter(&mi->mi_lock);
1651 1651          mi->mi_vfs_referral_loop_cnt++;
1652 1652          mutex_exit(&mi->mi_lock);
1653 1653  
1654 1654          ret = 0;
1655 1655  bad:
1656 1656          /* Free up XDR memory allocated in nfs4_process_referral() */
1657 1657          xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc);
1658 1658          xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1659 1659          kmem_free(orig_path, orig_pathlen);
1660 1660  
1661 1661          return (ret);
1662 1662  }
1663 1663  
1664 1664  /*
1665 1665   * Get the root filehandle for the given filesystem and server, and update
1666 1666   * svp.
1667 1667   *
1668 1668   * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop
1669 1669   * to coordinate with recovery.  Otherwise, the caller is assumed to be
1670 1670   * the recovery thread or have already done a start_fop.
1671 1671   *
1672 1672   * Errors are returned by the nfs4_error_t parameter.
1673 1673   */
1674 1674  static void
1675 1675  nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp,
1676 1676      int flags, cred_t *cr, nfs4_error_t *ep)
1677 1677  {
1678 1678          COMPOUND4args_clnt args;
1679 1679          COMPOUND4res_clnt res;
1680 1680          int doqueue = 1;
1681 1681          nfs_argop4 *argop;
1682 1682          nfs_resop4 *resop;
1683 1683          nfs4_ga_res_t *garp;
1684 1684          int num_argops;
1685 1685          lookup4_param_t lookuparg;
1686 1686          nfs_fh4 *tmpfhp;
1687 1687          nfs_fh4 *resfhp;
1688 1688          bool_t needrecov = FALSE;
1689 1689          nfs4_recov_state_t recov_state;
1690 1690          int llndx;
1691 1691          int nthcomp;
1692 1692          int recovery = !(flags & NFS4_GETFH_NEEDSOP);
1693 1693  
1694 1694          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1695 1695          ASSERT(svp->sv_path != NULL);
1696 1696          if (svp->sv_path[0] == '\0') {
1697 1697                  nfs_rw_exit(&svp->sv_lock);
1698 1698                  nfs4_error_init(ep, EINVAL);
1699 1699                  return;
1700 1700          }
1701 1701          nfs_rw_exit(&svp->sv_lock);
1702 1702  
1703 1703          recov_state.rs_flags = 0;
1704 1704          recov_state.rs_num_retry_despite_err = 0;
1705 1705  
1706 1706  recov_retry:
1707 1707          if (mi->mi_vfs_referral_loop_cnt >= NFS4_REFERRAL_LOOP_MAX) {
1708 1708                  DTRACE_PROBE3(nfs4clnt__debug__referral__loop, mntinfo4 *,
1709 1709                      mi, servinfo4_t *, svp, char *, "nfs4getfh_otw");
1710 1710                  nfs4_error_init(ep, EINVAL);
1711 1711                  return;
1712 1712          }
1713 1713          nfs4_error_zinit(ep);
1714 1714  
1715 1715          if (!recovery) {
1716 1716                  ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT,
1717 1717                      &recov_state, NULL);
1718 1718  
1719 1719                  /*
1720 1720                   * If recovery has been started and this request as
1721 1721                   * initiated by a mount, then we must wait for recovery
1722 1722                   * to finish before proceeding, otherwise, the error
1723 1723                   * cleanup would remove data structures needed by the
1724 1724                   * recovery thread.
1725 1725                   */
1726 1726                  if (ep->error) {
1727 1727                          mutex_enter(&mi->mi_lock);
1728 1728                          if (mi->mi_flags & MI4_MOUNTING) {
1729 1729                                  mi->mi_flags |= MI4_RECOV_FAIL;
1730 1730                                  mi->mi_error = EIO;
1731 1731  
1732 1732                                  NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
1733 1733                                      "nfs4getfh_otw: waiting 4 recovery\n"));
1734 1734  
1735 1735                                  while (mi->mi_flags & MI4_RECOV_ACTIV)
1736 1736                                          cv_wait(&mi->mi_failover_cv,
1737 1737                                              &mi->mi_lock);
1738 1738                          }
1739 1739                          mutex_exit(&mi->mi_lock);
1740 1740                          return;
1741 1741                  }
1742 1742  
1743 1743                  /*
1744 1744                   * If the client does not specify a specific flavor to use
1745 1745                   * and has not gotten a secinfo list from the server yet,
1746 1746                   * retrieve the secinfo list from the server and use a
1747 1747                   * flavor from the list to mount.
1748 1748                   *
1749 1749                   * If fail to get the secinfo list from the server, then
1750 1750                   * try the default flavor.
1751 1751                   */
1752 1752                  if ((svp->sv_flags & SV4_TRYSECDEFAULT) &&
1753 1753                      svp->sv_secinfo == NULL) {
1754 1754                          (void) nfs4_secinfo_path(mi, cr, FALSE);
1755 1755                  }
1756 1756          }
1757 1757  
1758 1758          if (recovery)
1759 1759                  args.ctag = TAG_REMAP_MOUNT;
1760 1760          else
1761 1761                  args.ctag = TAG_MOUNT;
1762 1762  
1763 1763          lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES;
1764 1764          lookuparg.argsp = &args;
1765 1765          lookuparg.resp = &res;
1766 1766          lookuparg.header_len = 2;       /* Putrootfh, getfh */
1767 1767          lookuparg.trailer_len = 0;
1768 1768          lookuparg.ga_bits = FATTR4_FSINFO_MASK;
1769 1769          lookuparg.mi = mi;
1770 1770  
1771 1771          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1772 1772          ASSERT(svp->sv_path != NULL);
1773 1773          llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0);
1774 1774          nfs_rw_exit(&svp->sv_lock);
1775 1775  
1776 1776          argop = args.array;
1777 1777          num_argops = args.array_len;
1778 1778  
1779 1779          /* choose public or root filehandle */
1780 1780          if (flags & NFS4_GETFH_PUBLIC)
1781 1781                  argop[0].argop = OP_PUTPUBFH;
1782 1782          else
1783 1783                  argop[0].argop = OP_PUTROOTFH;
1784 1784  
1785 1785          /* get fh */
1786 1786          argop[1].argop = OP_GETFH;
1787 1787  
1788 1788          NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE,
1789 1789              "nfs4getfh_otw: %s call, mi 0x%p",
1790 1790              needrecov ? "recov" : "first", (void *)mi));
1791 1791  
1792 1792          rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep);
1793 1793  
1794 1794          needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp);
1795 1795  
1796 1796          if (needrecov) {
1797 1797                  bool_t abort;
1798 1798  
1799 1799                  if (recovery) {
1800 1800                          nfs4args_lookup_free(argop, num_argops);
1801 1801                          kmem_free(argop,
1802 1802                              lookuparg.arglen * sizeof (nfs_argop4));
1803 1803                          if (!ep->error)
1804 1804                                  xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1805 1805                          return;
1806 1806                  }
1807 1807  
1808 1808                  NFS4_DEBUG(nfs4_client_recov_debug,
1809 1809                      (CE_NOTE, "nfs4getfh_otw: initiating recovery\n"));
1810 1810  
1811 1811                  abort = nfs4_start_recovery(ep, mi, NULL,
1812 1812                      NULL, NULL, NULL, OP_GETFH, NULL, NULL, NULL);
1813 1813                  if (!ep->error) {
1814 1814                          ep->error = geterrno4(res.status);
1815 1815                          xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1816 1816                  }
1817 1817                  nfs4args_lookup_free(argop, num_argops);
1818 1818                  kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1819 1819                  nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov);
1820 1820                  /* have another go? */
1821 1821                  if (abort == FALSE)
1822 1822                          goto recov_retry;
1823 1823                  return;
1824 1824          }
1825 1825  
1826 1826          /*
1827 1827           * No recovery, but check if error is set.
1828 1828           */
1829 1829          if (ep->error)  {
1830 1830                  nfs4args_lookup_free(argop, num_argops);
1831 1831                  kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1832 1832                  if (!recovery)
1833 1833                          nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state,
1834 1834                              needrecov);
1835 1835                  return;
1836 1836          }
1837 1837  
1838 1838  is_link_err:
1839 1839  
1840 1840          /* for non-recovery errors */
1841 1841          if (res.status && res.status != NFS4ERR_SYMLINK &&
1842 1842              res.status != NFS4ERR_MOVED) {
1843 1843                  if (!recovery) {
1844 1844                          nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state,
1845 1845                              needrecov);
1846 1846                  }
1847 1847                  nfs4args_lookup_free(argop, num_argops);
1848 1848                  kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1849 1849                  xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1850 1850                  return;
1851 1851          }
1852 1852  
1853 1853          /*
1854 1854           * If any intermediate component in the path is a symbolic link,
1855 1855           * resolve the symlink, then try mount again using the new path.
1856 1856           */
1857 1857          if (res.status == NFS4ERR_SYMLINK || res.status == NFS4ERR_MOVED) {
1858 1858                  int where;
1859 1859  
1860 1860                  /*
1861 1861                   * Need to call nfs4_end_op before resolve_sympath to avoid
1862 1862                   * potential nfs4_start_op deadlock.
1863 1863                   */
1864 1864                  if (!recovery)
1865 1865                          nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state,
1866 1866                              needrecov);
1867 1867  
1868 1868                  /*
1869 1869                   * This must be from OP_LOOKUP failure. The (cfh) for this
1870 1870                   * OP_LOOKUP is a symlink node. Found out where the
1871 1871                   * OP_GETFH is for the (cfh) that is a symlink node.
1872 1872                   *
1873 1873                   * Example:
1874 1874                   * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR,
1875 1875                   * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR
1876 1876                   *
1877 1877                   * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink.
1878 1878                   * In this case, where = 7, nthcomp = 2.
1879 1879                   */
1880 1880                  where = res.array_len - 2;
1881 1881                  ASSERT(where > 0);
1882 1882  
1883 1883                  if (res.status == NFS4ERR_SYMLINK) {
1884 1884  
1885 1885                          resop = &res.array[where - 1];
1886 1886                          ASSERT(resop->resop == OP_GETFH);
1887 1887                          tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1888 1888                          nthcomp = res.array_len/3 - 1;
1889 1889                          ep->error = resolve_sympath(mi, svp, nthcomp,
1890 1890                              tmpfhp, cr, flags);
1891 1891  
1892 1892                  } else if (res.status == NFS4ERR_MOVED) {
1893 1893  
1894 1894                          resop = &res.array[where - 2];
1895 1895                          ASSERT(resop->resop == OP_GETFH);
1896 1896                          tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1897 1897                          nthcomp = res.array_len/3 - 1;
1898 1898                          ep->error = resolve_referral(mi, svp, cr, nthcomp,
1899 1899                              tmpfhp);
1900 1900                  }
1901 1901  
1902 1902                  nfs4args_lookup_free(argop, num_argops);
1903 1903                  kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1904 1904                  xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1905 1905  
1906 1906                  if (ep->error)
1907 1907                          return;
1908 1908  
1909 1909                  goto recov_retry;
1910 1910          }
1911 1911  
1912 1912          /* getfh */
1913 1913          resop = &res.array[res.array_len - 2];
1914 1914          ASSERT(resop->resop == OP_GETFH);
1915 1915          resfhp = &resop->nfs_resop4_u.opgetfh.object;
1916 1916  
1917 1917          /* getattr fsinfo res */
1918 1918          resop++;
1919 1919          garp = &resop->nfs_resop4_u.opgetattr.ga_res;
1920 1920  
1921 1921          *vtp = garp->n4g_va.va_type;
1922 1922  
1923 1923          mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet;
1924 1924  
1925 1925          mutex_enter(&mi->mi_lock);
1926 1926          if (garp->n4g_ext_res->n4g_pc4.pc4_link_support)
1927 1927                  mi->mi_flags |= MI4_LINK;
1928 1928          if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support)
1929 1929                  mi->mi_flags |= MI4_SYMLINK;
1930 1930          if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK)
1931 1931                  mi->mi_flags |= MI4_ACL;
1932 1932          mutex_exit(&mi->mi_lock);
1933 1933  
1934 1934          if (garp->n4g_ext_res->n4g_maxread == 0)
1935 1935                  mi->mi_tsize =
1936 1936                      MIN(MAXBSIZE, mi->mi_tsize);
1937 1937          else
1938 1938                  mi->mi_tsize =
1939 1939                      MIN(garp->n4g_ext_res->n4g_maxread,
1940 1940                      mi->mi_tsize);
1941 1941  
1942 1942          if (garp->n4g_ext_res->n4g_maxwrite == 0)
1943 1943                  mi->mi_stsize =
1944 1944                      MIN(MAXBSIZE, mi->mi_stsize);
1945 1945          else
1946 1946                  mi->mi_stsize =
1947 1947                      MIN(garp->n4g_ext_res->n4g_maxwrite,
1948 1948                      mi->mi_stsize);
1949 1949  
1950 1950          if (garp->n4g_ext_res->n4g_maxfilesize != 0)
1951 1951                  mi->mi_maxfilesize =
1952 1952                      MIN(garp->n4g_ext_res->n4g_maxfilesize,
1953 1953                      mi->mi_maxfilesize);
1954 1954  
1955 1955          /*
1956 1956           * If the final component is a a symbolic link, resolve the symlink,
1957 1957           * then try mount again using the new path.
1958 1958           *
1959 1959           * Assume no symbolic link for root filesysm "/".
1960 1960           */
1961 1961          if (*vtp == VLNK) {
1962 1962                  /*
1963 1963                   * nthcomp is the total result length minus
1964 1964                   * the 1st 2 OPs (PUTROOTFH, GETFH),
1965 1965                   * then divided by 3 (LOOKUP,GETFH,GETATTR)
1966 1966                   *
1967 1967                   * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR
1968 1968                   *      LOOKUP 2nd-comp GETFH GETATTR
1969 1969                   *
1970 1970                   *      (8 - 2)/3 = 2
1971 1971                   */
1972 1972                  nthcomp = (res.array_len - 2)/3;
1973 1973  
1974 1974                  /*
1975 1975                   * Need to call nfs4_end_op before resolve_sympath to avoid
1976 1976                   * potential nfs4_start_op deadlock. See RFE 4777612.
1977 1977                   */
1978 1978                  if (!recovery)
1979 1979                          nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state,
1980 1980                              needrecov);
1981 1981  
1982 1982                  ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr,
1983 1983                      flags);
1984 1984  
1985 1985                  nfs4args_lookup_free(argop, num_argops);
1986 1986                  kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1987 1987                  xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1988 1988  
1989 1989                  if (ep->error)
1990 1990                          return;
1991 1991  
1992 1992                  goto recov_retry;
1993 1993          }
1994 1994  
1995 1995          /*
1996 1996           * We need to figure out where in the compound the getfh
1997 1997           * for the parent directory is. If the object to be mounted is
1998 1998           * the root, then there is no lookup at all:
1999 1999           * PUTROOTFH, GETFH.
2000 2000           * If the object to be mounted is in the root, then the compound is:
2001 2001           * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR.
2002 2002           * In either of these cases, the index of the GETFH is 1.
2003 2003           * If it is not at the root, then it's something like:
2004 2004           * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR,
2005 2005           * LOOKUP, GETFH, GETATTR
2006 2006           * In this case, the index is llndx (last lookup index) - 2.
2007 2007           */
2008 2008          if (llndx == -1 || llndx == 2)
2009 2009                  resop = &res.array[1];
2010 2010          else {
2011 2011                  ASSERT(llndx > 2);
2012 2012                  resop = &res.array[llndx-2];
2013 2013          }
2014 2014  
2015 2015          ASSERT(resop->resop == OP_GETFH);
2016 2016          tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
2017 2017  
2018 2018          /* save the filehandles for the replica */
2019 2019          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2020 2020          ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE);
2021 2021          svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len;
2022 2022          bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf,
2023 2023              tmpfhp->nfs_fh4_len);
2024 2024          ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE);
2025 2025          svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len;
2026 2026          bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len);
2027 2027  
2028 2028          /* initialize fsid and supp_attrs for server fs */
2029 2029          svp->sv_fsid = garp->n4g_fsid;
2030 2030          svp->sv_supp_attrs =
2031 2031              garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK;
2032 2032  
2033 2033          nfs_rw_exit(&svp->sv_lock);
2034 2034          nfs4args_lookup_free(argop, num_argops);
2035 2035          kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
2036 2036          xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2037 2037          if (!recovery)
2038 2038                  nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov);
2039 2039  }
2040 2040  
2041 2041  /*
2042 2042   * Save a copy of Servinfo4_t structure.
2043 2043   * We might need when there is a failure in getting file handle
2044 2044   * in case of a referral to replace servinfo4 struct and try again.
2045 2045   */
2046 2046  static struct servinfo4 *
2047 2047  copy_svp(servinfo4_t *nsvp)
2048 2048  {
2049 2049          servinfo4_t *svp = NULL;
2050 2050          struct knetconfig *sknconf, *tknconf;
2051 2051          struct netbuf *saddr, *taddr;
2052 2052  
2053 2053          svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
2054 2054          nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL);
2055 2055          svp->sv_flags = nsvp->sv_flags;
2056 2056          svp->sv_fsid = nsvp->sv_fsid;
2057 2057          svp->sv_hostnamelen = nsvp->sv_hostnamelen;
2058 2058          svp->sv_pathlen = nsvp->sv_pathlen;
2059 2059          svp->sv_supp_attrs = nsvp->sv_supp_attrs;
2060 2060  
2061 2061          svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP);
2062 2062          svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP);
2063 2063          bcopy(nsvp->sv_hostname, svp->sv_hostname, svp->sv_hostnamelen);
2064 2064          bcopy(nsvp->sv_path, svp->sv_path, svp->sv_pathlen);
2065 2065  
2066 2066          saddr = &nsvp->sv_addr;
2067 2067          taddr = &svp->sv_addr;
2068 2068          taddr->maxlen = saddr->maxlen;
2069 2069          taddr->len = saddr->len;
2070 2070          if (saddr->len > 0) {
2071 2071                  taddr->buf = kmem_zalloc(saddr->maxlen, KM_SLEEP);
2072 2072                  bcopy(saddr->buf, taddr->buf, saddr->len);
2073 2073          }
2074 2074  
2075 2075          svp->sv_knconf = kmem_zalloc(sizeof (struct knetconfig), KM_SLEEP);
2076 2076          sknconf = nsvp->sv_knconf;
2077 2077          tknconf = svp->sv_knconf;
2078 2078          tknconf->knc_semantics = sknconf->knc_semantics;
2079 2079          tknconf->knc_rdev = sknconf->knc_rdev;
2080 2080          if (sknconf->knc_proto != NULL) {
2081 2081                  tknconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
2082 2082                  bcopy(sknconf->knc_proto, (char *)tknconf->knc_proto,
2083 2083                      KNC_STRSIZE);
2084 2084          }
2085 2085          if (sknconf->knc_protofmly != NULL) {
2086 2086                  tknconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
2087 2087                  bcopy(sknconf->knc_protofmly, (char *)tknconf->knc_protofmly,
2088 2088                      KNC_STRSIZE);
2089 2089          }
2090 2090  
2091 2091          if (nsvp->sv_origknconf != NULL) {
2092 2092                  svp->sv_origknconf = kmem_zalloc(sizeof (struct knetconfig),
2093 2093                      KM_SLEEP);
2094 2094                  sknconf = nsvp->sv_origknconf;
2095 2095                  tknconf = svp->sv_origknconf;
2096 2096                  tknconf->knc_semantics = sknconf->knc_semantics;
2097 2097                  tknconf->knc_rdev = sknconf->knc_rdev;
2098 2098                  if (sknconf->knc_proto != NULL) {
2099 2099                          tknconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
2100 2100                          bcopy(sknconf->knc_proto, (char *)tknconf->knc_proto,
2101 2101                              KNC_STRSIZE);
2102 2102                  }
2103 2103                  if (sknconf->knc_protofmly != NULL) {
2104 2104                          tknconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE,
2105 2105                              KM_SLEEP);
2106 2106                          bcopy(sknconf->knc_protofmly,
2107 2107                              (char *)tknconf->knc_protofmly, KNC_STRSIZE);
2108 2108                  }
2109 2109          }
2110 2110  
2111 2111          svp->sv_secdata = copy_sec_data(nsvp->sv_secdata);
2112 2112          svp->sv_dhsec = copy_sec_data(svp->sv_dhsec);
2113 2113          /*
2114 2114           * Rest of the security information is not copied as they are built
2115 2115           * with the information available from secdata and dhsec.
2116 2116           */
2117 2117          svp->sv_next = NULL;
2118 2118  
2119 2119          return (svp);
2120 2120  }
2121 2121  
2122 2122  servinfo4_t *
2123 2123  restore_svp(mntinfo4_t *mi, servinfo4_t *svp, servinfo4_t *origsvp)
2124 2124  {
2125 2125          servinfo4_t *srvnext, *tmpsrv;
2126 2126  
2127 2127          if (strcmp(svp->sv_hostname, origsvp->sv_hostname) != 0) {
2128 2128                  /*
2129 2129                   * Since the hostname changed, we must be dealing
2130 2130                   * with a referral, and the lookup failed.  We will
2131 2131                   * restore the whole servinfo4_t to what it was before.
2132 2132                   */
2133 2133                  srvnext = svp->sv_next;
2134 2134                  svp->sv_next = NULL;
2135 2135                  tmpsrv = copy_svp(origsvp);
2136 2136                  sv4_free(svp);
2137 2137                  svp = tmpsrv;
2138 2138                  svp->sv_next = srvnext;
2139 2139                  mutex_enter(&mi->mi_lock);
2140 2140                  mi->mi_servers = svp;
2141 2141                  mi->mi_curr_serv = svp;
2142 2142                  mutex_exit(&mi->mi_lock);
2143 2143  
2144 2144          } else if (origsvp->sv_pathlen != svp->sv_pathlen) {
2145 2145  
2146 2146                  /*
2147 2147                   * For symlink case: restore original path because
2148 2148                   * it might have contained symlinks that were
2149 2149                   * expanded by nfsgetfh_otw before the failure occurred.
2150 2150                   */
2151 2151                  (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2152 2152                  kmem_free(svp->sv_path, svp->sv_pathlen);
  
    | 
      ↓ open down ↓ | 
    2152 lines elided | 
    
      ↑ open up ↑ | 
  
2153 2153                  svp->sv_path =
2154 2154                      kmem_alloc(origsvp->sv_pathlen, KM_SLEEP);
2155 2155                  svp->sv_pathlen = origsvp->sv_pathlen;
2156 2156                  bcopy(origsvp->sv_path, svp->sv_path,
2157 2157                      origsvp->sv_pathlen);
2158 2158                  nfs_rw_exit(&svp->sv_lock);
2159 2159          }
2160 2160          return (svp);
2161 2161  }
2162 2162  
2163      -static ushort_t nfs4_max_threads = 8;   /* max number of active async threads */
2164      -uint_t nfs4_bsize = 32 * 1024;  /* client `block' size */
2165      -static uint_t nfs4_async_clusters = 1;  /* # of reqs from each async queue */
2166      -static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO;
     2163 +volatile ushort_t nfs4_max_threads = 8; /* max number of active async threads */
     2164 +volatile uint_t nfs4_bsize = 32 * 1024; /* client `block' size */
     2165 +volatile uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */
     2166 +volatile uint_t nfs4_cots_timeo = NFS_COTS_TIMEO;
2167 2167  
2168 2168  /*
2169 2169   * Remap the root filehandle for the given filesystem.
2170 2170   *
2171 2171   * results returned via the nfs4_error_t parameter.
2172 2172   */
2173 2173  void
2174 2174  nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags)
2175 2175  {
2176 2176          struct servinfo4 *svp, *origsvp;
2177 2177          vtype_t vtype;
2178 2178          nfs_fh4 rootfh;
2179 2179          int getfh_flags;
2180 2180          int num_retry;
2181 2181  
2182 2182          mutex_enter(&mi->mi_lock);
2183 2183  
2184 2184  remap_retry:
2185 2185          svp = mi->mi_curr_serv;
2186 2186          getfh_flags =
2187 2187              (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0;
2188 2188          getfh_flags |=
2189 2189              (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0;
2190 2190          mutex_exit(&mi->mi_lock);
2191 2191  
2192 2192          /*
2193 2193           * Just in case server path being mounted contains
2194 2194           * symlinks and fails w/STALE, save the initial sv_path
2195 2195           * so we can redrive the initial mount compound with the
2196 2196           * initial sv_path -- not a symlink-expanded version.
2197 2197           *
2198 2198           * This could only happen if a symlink was expanded
2199 2199           * and the expanded mount compound failed stale.  Because
2200 2200           * it could be the case that the symlink was removed at
2201 2201           * the server (and replaced with another symlink/dir,
2202 2202           * we need to use the initial sv_path when attempting
2203 2203           * to re-lookup everything and recover.
2204 2204           */
2205 2205          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2206 2206          origsvp = copy_svp(svp);
2207 2207          nfs_rw_exit(&svp->sv_lock);
2208 2208  
2209 2209          num_retry = nfs4_max_mount_retry;
2210 2210  
2211 2211          do {
2212 2212                  /*
2213 2213                   * Get the root fh from the server.  Retry nfs4_max_mount_retry
2214 2214                   * (2) times if it fails with STALE since the recovery
2215 2215                   * infrastructure doesn't do STALE recovery for components
2216 2216                   * of the server path to the object being mounted.
2217 2217                   */
2218 2218                  nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep);
2219 2219  
2220 2220                  if (ep->error == 0 && ep->stat == NFS4_OK)
2221 2221                          break;
2222 2222  
2223 2223                  /*
2224 2224                   * For some reason, the mount compound failed.  Before
2225 2225                   * retrying, we need to restore original conditions.
2226 2226                   */
2227 2227                  svp = restore_svp(mi, svp, origsvp);
2228 2228  
2229 2229          } while (num_retry-- > 0);
2230 2230  
2231 2231          sv4_free(origsvp);
2232 2232  
2233 2233          if (ep->error != 0 || ep->stat != 0) {
2234 2234                  return;
2235 2235          }
2236 2236  
2237 2237          if (vtype != VNON && vtype != mi->mi_type) {
2238 2238                  /* shouldn't happen */
2239 2239                  zcmn_err(mi->mi_zone->zone_id, CE_WARN,
2240 2240                      "nfs4_remap_root: server root vnode type (%d) doesn't "
2241 2241                      "match mount info (%d)", vtype, mi->mi_type);
2242 2242          }
2243 2243  
2244 2244          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2245 2245          rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf;
2246 2246          rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len;
2247 2247          nfs_rw_exit(&svp->sv_lock);
2248 2248          sfh4_update(mi->mi_rootfh, &rootfh);
2249 2249  
2250 2250          /*
2251 2251           * It's possible that recovery took place on the filesystem
2252 2252           * and the server has been updated between the time we did
2253 2253           * the nfs4getfh_otw and now. Re-drive the otw operation
2254 2254           * to make sure we have a good fh.
2255 2255           */
2256 2256          mutex_enter(&mi->mi_lock);
2257 2257          if (mi->mi_curr_serv != svp)
2258 2258                  goto remap_retry;
2259 2259  
2260 2260          mutex_exit(&mi->mi_lock);
2261 2261  }
2262 2262  
2263 2263  static int
2264 2264  nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head,
2265 2265      int flags, cred_t *cr, zone_t *zone)
2266 2266  {
2267 2267          vnode_t *rtvp = NULL;
2268 2268          mntinfo4_t *mi;
2269 2269          dev_t nfs_dev;
2270 2270          int error = 0;
2271 2271          rnode4_t *rp;
2272 2272          int i, len;
2273 2273          struct vattr va;
2274 2274          vtype_t vtype = VNON;
2275 2275          vtype_t tmp_vtype = VNON;
2276 2276          struct servinfo4 *firstsvp = NULL, *svp = svp_head;
2277 2277          nfs4_oo_hash_bucket_t *bucketp;
2278 2278          nfs_fh4 fh;
2279 2279          char *droptext = "";
2280 2280          struct nfs_stats *nfsstatsp;
2281 2281          nfs4_fname_t *mfname;
2282 2282          nfs4_error_t e;
2283 2283          int num_retry, removed;
2284 2284          cred_t *lcr = NULL, *tcr = cr;
2285 2285          struct servinfo4 *origsvp;
2286 2286          char *resource;
2287 2287  
2288 2288          nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone());
2289 2289          ASSERT(nfsstatsp != NULL);
2290 2290  
2291 2291          ASSERT(nfs_zone() == zone);
2292 2292          ASSERT(crgetref(cr));
2293 2293  
2294 2294          /*
2295 2295           * Create a mount record and link it to the vfs struct.
2296 2296           */
2297 2297          mi = kmem_zalloc(sizeof (*mi), KM_SLEEP);
2298 2298          mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL);
2299 2299          nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL);
2300 2300          nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL);
2301 2301          nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL);
2302 2302  
2303 2303          if (!(flags & NFSMNT_SOFT))
2304 2304                  mi->mi_flags |= MI4_HARD;
2305 2305          if ((flags & NFSMNT_NOPRINT))
2306 2306                  mi->mi_flags |= MI4_NOPRINT;
2307 2307          if (flags & NFSMNT_INT)
2308 2308                  mi->mi_flags |= MI4_INT;
2309 2309          if (flags & NFSMNT_PUBLIC)
2310 2310                  mi->mi_flags |= MI4_PUBLIC;
2311 2311          if (flags & NFSMNT_MIRRORMOUNT)
2312 2312                  mi->mi_flags |= MI4_MIRRORMOUNT;
2313 2313          if (flags & NFSMNT_REFERRAL)
2314 2314                  mi->mi_flags |= MI4_REFERRAL;
2315 2315          mi->mi_retrans = NFS_RETRIES;
2316 2316          if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
2317 2317              svp->sv_knconf->knc_semantics == NC_TPI_COTS)
2318 2318                  mi->mi_timeo = nfs4_cots_timeo;
2319 2319          else
2320 2320                  mi->mi_timeo = NFS_TIMEO;
2321 2321          mi->mi_prog = NFS_PROGRAM;
2322 2322          mi->mi_vers = NFS_V4;
2323 2323          mi->mi_rfsnames = rfsnames_v4;
2324 2324          mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr;
2325 2325          cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL);
2326 2326          mi->mi_servers = svp;
2327 2327          mi->mi_curr_serv = svp;
2328 2328          mi->mi_acregmin = SEC2HR(ACREGMIN);
2329 2329          mi->mi_acregmax = SEC2HR(ACREGMAX);
2330 2330          mi->mi_acdirmin = SEC2HR(ACDIRMIN);
2331 2331          mi->mi_acdirmax = SEC2HR(ACDIRMAX);
2332 2332          mi->mi_fh_expire_type = FH4_PERSISTENT;
2333 2333          mi->mi_clientid_next = NULL;
2334 2334          mi->mi_clientid_prev = NULL;
2335 2335          mi->mi_srv = NULL;
2336 2336          mi->mi_grace_wait = 0;
2337 2337          mi->mi_error = 0;
2338 2338          mi->mi_srvsettime = 0;
2339 2339          mi->mi_srvset_cnt = 0;
2340 2340  
2341 2341          mi->mi_count = 1;
2342 2342  
2343 2343          mi->mi_tsize = nfs4_tsize(svp->sv_knconf);
2344 2344          mi->mi_stsize = mi->mi_tsize;
2345 2345  
2346 2346          if (flags & NFSMNT_DIRECTIO)
2347 2347                  mi->mi_flags |= MI4_DIRECTIO;
2348 2348  
2349 2349          mi->mi_flags |= MI4_MOUNTING;
2350 2350  
2351 2351          /*
2352 2352           * Make a vfs struct for nfs.  We do this here instead of below
2353 2353           * because rtvp needs a vfs before we can do a getattr on it.
2354 2354           *
2355 2355           * Assign a unique device id to the mount
2356 2356           */
2357 2357          mutex_enter(&nfs_minor_lock);
2358 2358          do {
2359 2359                  nfs_minor = (nfs_minor + 1) & MAXMIN32;
2360 2360                  nfs_dev = makedevice(nfs_major, nfs_minor);
2361 2361          } while (vfs_devismounted(nfs_dev));
2362 2362          mutex_exit(&nfs_minor_lock);
2363 2363  
2364 2364          vfsp->vfs_dev = nfs_dev;
2365 2365          vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp);
2366 2366          vfsp->vfs_data = (caddr_t)mi;
2367 2367          vfsp->vfs_fstype = nfsfstyp;
2368 2368          vfsp->vfs_bsize = nfs4_bsize;
2369 2369  
2370 2370          /*
2371 2371           * Initialize fields used to support async putpage operations.
2372 2372           */
2373 2373          for (i = 0; i < NFS4_ASYNC_TYPES; i++)
2374 2374                  mi->mi_async_clusters[i] = nfs4_async_clusters;
2375 2375          mi->mi_async_init_clusters = nfs4_async_clusters;
2376 2376          mi->mi_async_curr[NFS4_ASYNC_QUEUE] =
2377 2377              mi->mi_async_curr[NFS4_ASYNC_PGOPS_QUEUE] = &mi->mi_async_reqs[0];
2378 2378          mi->mi_max_threads = nfs4_max_threads;
2379 2379          mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL);
2380 2380          cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL);
2381 2381          cv_init(&mi->mi_async_work_cv[NFS4_ASYNC_QUEUE], NULL, CV_DEFAULT,
2382 2382              NULL);
2383 2383          cv_init(&mi->mi_async_work_cv[NFS4_ASYNC_PGOPS_QUEUE], NULL,
2384 2384              CV_DEFAULT, NULL);
2385 2385          cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL);
2386 2386          cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL);
2387 2387  
2388 2388          mi->mi_vfsp = vfsp;
2389 2389          mi->mi_zone = zone;
2390 2390          zone_init_ref(&mi->mi_zone_ref);
2391 2391          zone_hold_ref(zone, &mi->mi_zone_ref, ZONE_REF_NFSV4);
2392 2392          nfs4_mi_zonelist_add(mi);
2393 2393  
2394 2394          /*
2395 2395           * Initialize the <open owner/cred> hash table.
2396 2396           */
2397 2397          for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) {
2398 2398                  bucketp = &(mi->mi_oo_list[i]);
2399 2399                  mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL);
2400 2400                  list_create(&bucketp->b_oo_hash_list,
2401 2401                      sizeof (nfs4_open_owner_t),
2402 2402                      offsetof(nfs4_open_owner_t, oo_hash_node));
2403 2403          }
2404 2404  
2405 2405          /*
2406 2406           * Initialize the freed open owner list.
2407 2407           */
2408 2408          mi->mi_foo_num = 0;
2409 2409          mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS;
2410 2410          list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t),
2411 2411              offsetof(nfs4_open_owner_t, oo_foo_node));
2412 2412  
2413 2413          list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t),
2414 2414              offsetof(nfs4_lost_rqst_t, lr_node));
2415 2415  
2416 2416          list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t),
2417 2417              offsetof(nfs4_bseqid_entry_t, bs_node));
2418 2418  
2419 2419          /*
2420 2420           * Initialize the msg buffer.
2421 2421           */
2422 2422          list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t),
2423 2423              offsetof(nfs4_debug_msg_t, msg_node));
2424 2424          mi->mi_msg_count = 0;
2425 2425          mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL);
2426 2426  
2427 2427          /*
2428 2428           * Initialize kstats
2429 2429           */
2430 2430          nfs4_mnt_kstat_init(vfsp);
2431 2431  
2432 2432          /*
2433 2433           * Initialize the shared filehandle pool.
2434 2434           */
2435 2435          sfh4_createtab(&mi->mi_filehandles);
2436 2436  
2437 2437          /*
2438 2438           * Save server path we're attempting to mount.
2439 2439           */
2440 2440          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2441 2441          origsvp = copy_svp(svp);
2442 2442          nfs_rw_exit(&svp->sv_lock);
2443 2443  
2444 2444          /*
2445 2445           * Make the GETFH call to get root fh for each replica.
2446 2446           */
2447 2447          if (svp_head->sv_next)
2448 2448                  droptext = ", dropping replica";
2449 2449  
2450 2450          /*
2451 2451           * If the uid is set then set the creds for secure mounts
2452 2452           * by proxy processes such as automountd.
2453 2453           */
2454 2454          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2455 2455          if (svp->sv_secdata->uid != 0 &&
2456 2456              svp->sv_secdata->rpcflavor == RPCSEC_GSS) {
2457 2457                  lcr = crdup(cr);
2458 2458                  (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr));
2459 2459                  tcr = lcr;
2460 2460          }
2461 2461          nfs_rw_exit(&svp->sv_lock);
2462 2462          for (svp = svp_head; svp; svp = svp->sv_next) {
2463 2463                  if (nfs4_chkdup_servinfo4(svp_head, svp)) {
2464 2464                          nfs_cmn_err(error, CE_WARN,
2465 2465                              VERS_MSG "Host %s is a duplicate%s",
2466 2466                              svp->sv_hostname, droptext);
2467 2467                          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2468 2468                          svp->sv_flags |= SV4_NOTINUSE;
2469 2469                          nfs_rw_exit(&svp->sv_lock);
2470 2470                          continue;
2471 2471                  }
2472 2472                  mi->mi_curr_serv = svp;
2473 2473  
2474 2474                  /*
2475 2475                   * Just in case server path being mounted contains
2476 2476                   * symlinks and fails w/STALE, save the initial sv_path
2477 2477                   * so we can redrive the initial mount compound with the
2478 2478                   * initial sv_path -- not a symlink-expanded version.
2479 2479                   *
2480 2480                   * This could only happen if a symlink was expanded
2481 2481                   * and the expanded mount compound failed stale.  Because
2482 2482                   * it could be the case that the symlink was removed at
2483 2483                   * the server (and replaced with another symlink/dir,
2484 2484                   * we need to use the initial sv_path when attempting
2485 2485                   * to re-lookup everything and recover.
2486 2486                   *
2487 2487                   * Other mount errors should evenutally be handled here also
2488 2488                   * (NFS4ERR_DELAY, NFS4ERR_RESOURCE).  For now, all mount
2489 2489                   * failures will result in mount being redriven a few times.
2490 2490                   */
2491 2491                  num_retry = nfs4_max_mount_retry;
2492 2492                  do {
2493 2493                          nfs4getfh_otw(mi, svp, &tmp_vtype,
2494 2494                              ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) |
2495 2495                              NFS4_GETFH_NEEDSOP, tcr, &e);
2496 2496  
2497 2497                          if (e.error == 0 && e.stat == NFS4_OK)
2498 2498                                  break;
2499 2499  
2500 2500                          /*
2501 2501                           * For some reason, the mount compound failed.  Before
2502 2502                           * retrying, we need to restore original conditions.
2503 2503                           */
2504 2504                          svp = restore_svp(mi, svp, origsvp);
2505 2505                          svp_head = svp;
2506 2506  
2507 2507                  } while (num_retry-- > 0);
2508 2508                  error = e.error ? e.error : geterrno4(e.stat);
2509 2509                  if (error) {
2510 2510                          nfs_cmn_err(error, CE_WARN,
2511 2511                              VERS_MSG "initial call to %s failed%s: %m",
2512 2512                              svp->sv_hostname, droptext);
2513 2513                          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2514 2514                          svp->sv_flags |= SV4_NOTINUSE;
2515 2515                          nfs_rw_exit(&svp->sv_lock);
2516 2516                          mi->mi_flags &= ~MI4_RECOV_FAIL;
2517 2517                          mi->mi_error = 0;
2518 2518                          continue;
2519 2519                  }
2520 2520  
2521 2521                  if (tmp_vtype == VBAD) {
2522 2522                          zcmn_err(mi->mi_zone->zone_id, CE_WARN,
2523 2523                              VERS_MSG "%s returned a bad file type for "
2524 2524                              "root%s", svp->sv_hostname, droptext);
2525 2525                          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2526 2526                          svp->sv_flags |= SV4_NOTINUSE;
2527 2527                          nfs_rw_exit(&svp->sv_lock);
2528 2528                          continue;
2529 2529                  }
2530 2530  
2531 2531                  if (vtype == VNON) {
2532 2532                          vtype = tmp_vtype;
2533 2533                  } else if (vtype != tmp_vtype) {
2534 2534                          zcmn_err(mi->mi_zone->zone_id, CE_WARN,
2535 2535                              VERS_MSG "%s returned a different file type "
2536 2536                              "for root%s", svp->sv_hostname, droptext);
2537 2537                          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2538 2538                          svp->sv_flags |= SV4_NOTINUSE;
2539 2539                          nfs_rw_exit(&svp->sv_lock);
2540 2540                          continue;
2541 2541                  }
2542 2542                  if (firstsvp == NULL)
2543 2543                          firstsvp = svp;
2544 2544          }
2545 2545  
2546 2546          if (firstsvp == NULL) {
2547 2547                  if (error == 0)
2548 2548                          error = ENOENT;
2549 2549                  goto bad;
2550 2550          }
2551 2551  
2552 2552          mi->mi_curr_serv = svp = firstsvp;
2553 2553          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2554 2554          ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0);
2555 2555          fh.nfs_fh4_len = svp->sv_fhandle.fh_len;
2556 2556          fh.nfs_fh4_val = svp->sv_fhandle.fh_buf;
2557 2557          mi->mi_rootfh = sfh4_get(&fh, mi);
2558 2558          fh.nfs_fh4_len = svp->sv_pfhandle.fh_len;
2559 2559          fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf;
2560 2560          mi->mi_srvparentfh = sfh4_get(&fh, mi);
2561 2561          nfs_rw_exit(&svp->sv_lock);
2562 2562  
2563 2563          /*
2564 2564           * Get the fname for filesystem root.
2565 2565           */
2566 2566          mi->mi_fname = fn_get(NULL, ".", mi->mi_rootfh);
2567 2567          mfname = mi->mi_fname;
2568 2568          fn_hold(mfname);
2569 2569  
2570 2570          /*
2571 2571           * Make the root vnode without attributes.
2572 2572           */
2573 2573          rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL,
2574 2574              &mfname, NULL, mi, cr, gethrtime());
2575 2575          rtvp->v_type = vtype;
2576 2576  
2577 2577          mi->mi_curread = mi->mi_tsize;
2578 2578          mi->mi_curwrite = mi->mi_stsize;
2579 2579  
2580 2580          /*
2581 2581           * Start the manager thread responsible for handling async worker
2582 2582           * threads.
2583 2583           */
2584 2584          MI4_HOLD(mi);
2585 2585          VFS_HOLD(vfsp); /* add reference for thread */
2586 2586          mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager,
2587 2587              vfsp, 0, minclsyspri);
2588 2588          ASSERT(mi->mi_manager_thread != NULL);
2589 2589  
2590 2590          /*
2591 2591           * Create the thread that handles over-the-wire calls for
2592 2592           * VOP_INACTIVE.
2593 2593           * This needs to happen after the manager thread is created.
2594 2594           */
2595 2595          MI4_HOLD(mi);
2596 2596          mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread,
2597 2597              mi, 0, minclsyspri);
2598 2598          ASSERT(mi->mi_inactive_thread != NULL);
2599 2599  
2600 2600          /* If we didn't get a type, get one now */
2601 2601          if (rtvp->v_type == VNON) {
2602 2602                  va.va_mask = AT_TYPE;
2603 2603                  error = nfs4getattr(rtvp, &va, tcr);
2604 2604                  if (error)
2605 2605                          goto bad;
2606 2606                  rtvp->v_type = va.va_type;
2607 2607          }
2608 2608  
2609 2609          mi->mi_type = rtvp->v_type;
2610 2610  
2611 2611          mutex_enter(&mi->mi_lock);
2612 2612          mi->mi_flags &= ~MI4_MOUNTING;
2613 2613          mutex_exit(&mi->mi_lock);
2614 2614  
2615 2615          /* Update VFS with new server and path info */
2616 2616          if ((strcmp(svp->sv_hostname, origsvp->sv_hostname) != 0) ||
2617 2617              (strcmp(svp->sv_path, origsvp->sv_path) != 0)) {
2618 2618                  len = svp->sv_hostnamelen + svp->sv_pathlen;
2619 2619                  resource = kmem_zalloc(len, KM_SLEEP);
2620 2620                  (void) strcat(resource, svp->sv_hostname);
2621 2621                  (void) strcat(resource, ":");
2622 2622                  (void) strcat(resource, svp->sv_path);
2623 2623                  vfs_setresource(vfsp, resource, 0);
2624 2624                  kmem_free(resource, len);
2625 2625          }
2626 2626  
2627 2627          sv4_free(origsvp);
2628 2628          *rtvpp = rtvp;
2629 2629          if (lcr != NULL)
2630 2630                  crfree(lcr);
2631 2631  
2632 2632          return (0);
2633 2633  bad:
2634 2634          /*
2635 2635           * An error occurred somewhere, need to clean up...
2636 2636           */
2637 2637          if (lcr != NULL)
2638 2638                  crfree(lcr);
2639 2639  
2640 2640          if (rtvp != NULL) {
2641 2641                  /*
2642 2642                   * We need to release our reference to the root vnode and
2643 2643                   * destroy the mntinfo4 struct that we just created.
2644 2644                   */
2645 2645                  rp = VTOR4(rtvp);
2646 2646                  if (rp->r_flags & R4HASHED)
2647 2647                          rp4_rmhash(rp);
2648 2648                  VN_RELE(rtvp);
2649 2649          }
2650 2650          nfs4_async_stop(vfsp);
2651 2651          nfs4_async_manager_stop(vfsp);
2652 2652          removed = nfs4_mi_zonelist_remove(mi);
2653 2653          if (removed)
2654 2654                  zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4);
2655 2655  
2656 2656          /*
2657 2657           * This releases the initial "hold" of the mi since it will never
2658 2658           * be referenced by the vfsp.  Also, when mount returns to vfs.c
2659 2659           * with an error, the vfsp will be destroyed, not rele'd.
2660 2660           */
2661 2661          MI4_RELE(mi);
2662 2662  
2663 2663          if (origsvp != NULL)
2664 2664                  sv4_free(origsvp);
2665 2665  
2666 2666          *rtvpp = NULL;
2667 2667          return (error);
2668 2668  }
2669 2669  
2670 2670  /*
2671 2671   * vfs operations
2672 2672   */
2673 2673  static int
2674 2674  nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr)
2675 2675  {
2676 2676          mntinfo4_t              *mi;
2677 2677          ushort_t                omax;
2678 2678          int                     removed;
2679 2679  
2680 2680          bool_t                  must_unlock;
2681 2681  
2682 2682          nfs4_ephemeral_tree_t   *eph_tree;
2683 2683  
2684 2684          if (secpolicy_fs_unmount(cr, vfsp) != 0)
2685 2685                  return (EPERM);
2686 2686  
2687 2687          mi = VFTOMI4(vfsp);
2688 2688  
2689 2689          if (flag & MS_FORCE) {
2690 2690                  vfsp->vfs_flag |= VFS_UNMOUNTED;
2691 2691                  if (nfs_zone() != mi->mi_zone) {
2692 2692                          /*
2693 2693                           * If the request is coming from the wrong zone,
2694 2694                           * we don't want to create any new threads, and
2695 2695                           * performance is not a concern.  Do everything
2696 2696                           * inline.
2697 2697                           */
2698 2698                          NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
2699 2699                              "nfs4_unmount x-zone forced unmount of vfs %p\n",
2700 2700                              (void *)vfsp));
2701 2701                          nfs4_free_mount(vfsp, flag, cr);
2702 2702                  } else {
2703 2703                          /*
2704 2704                           * Free data structures asynchronously, to avoid
2705 2705                           * blocking the current thread (for performance
2706 2706                           * reasons only).
2707 2707                           */
2708 2708                          async_free_mount(vfsp, flag, cr);
2709 2709                  }
2710 2710  
2711 2711                  return (0);
2712 2712          }
2713 2713  
2714 2714          /*
2715 2715           * Wait until all asynchronous putpage operations on
2716 2716           * this file system are complete before flushing rnodes
2717 2717           * from the cache.
2718 2718           */
2719 2719          omax = mi->mi_max_threads;
2720 2720          if (nfs4_async_stop_sig(vfsp))
2721 2721                  return (EINTR);
2722 2722  
2723 2723          r4flush(vfsp, cr);
2724 2724  
2725 2725          /*
2726 2726           * About the only reason that this would fail would be
2727 2727           * that the harvester is already busy tearing down this
2728 2728           * node. So we fail back to the caller and let them try
2729 2729           * again when needed.
2730 2730           */
2731 2731          if (nfs4_ephemeral_umount(mi, flag, cr,
2732 2732              &must_unlock, &eph_tree)) {
2733 2733                  ASSERT(must_unlock == FALSE);
2734 2734                  mutex_enter(&mi->mi_async_lock);
2735 2735                  mi->mi_max_threads = omax;
2736 2736                  mutex_exit(&mi->mi_async_lock);
2737 2737  
2738 2738                  return (EBUSY);
2739 2739          }
2740 2740  
2741 2741          /*
2742 2742           * If there are any active vnodes on this file system,
2743 2743           * then the file system is busy and can't be unmounted.
2744 2744           */
2745 2745          if (check_rtable4(vfsp)) {
2746 2746                  nfs4_ephemeral_umount_unlock(&must_unlock, &eph_tree);
2747 2747  
2748 2748                  mutex_enter(&mi->mi_async_lock);
2749 2749                  mi->mi_max_threads = omax;
2750 2750                  mutex_exit(&mi->mi_async_lock);
2751 2751  
2752 2752                  return (EBUSY);
2753 2753          }
2754 2754  
2755 2755          /*
2756 2756           * The unmount can't fail from now on, so record any
2757 2757           * ephemeral changes.
2758 2758           */
2759 2759          nfs4_ephemeral_umount_activate(mi, &must_unlock, &eph_tree);
2760 2760  
2761 2761          /*
2762 2762           * There are no active files that could require over-the-wire
2763 2763           * calls to the server, so stop the async manager and the
2764 2764           * inactive thread.
2765 2765           */
2766 2766          nfs4_async_manager_stop(vfsp);
2767 2767  
2768 2768          /*
2769 2769           * Destroy all rnodes belonging to this file system from the
2770 2770           * rnode hash queues and purge any resources allocated to
2771 2771           * them.
2772 2772           */
2773 2773          destroy_rtable4(vfsp, cr);
2774 2774          vfsp->vfs_flag |= VFS_UNMOUNTED;
2775 2775  
2776 2776          nfs4_remove_mi_from_server(mi, NULL);
2777 2777          removed = nfs4_mi_zonelist_remove(mi);
2778 2778          if (removed)
2779 2779                  zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4);
2780 2780  
2781 2781          return (0);
2782 2782  }
2783 2783  
2784 2784  /*
2785 2785   * find root of nfs
2786 2786   */
2787 2787  static int
2788 2788  nfs4_root(vfs_t *vfsp, vnode_t **vpp)
2789 2789  {
2790 2790          mntinfo4_t *mi;
2791 2791          vnode_t *vp;
2792 2792          nfs4_fname_t *mfname;
2793 2793          servinfo4_t *svp;
2794 2794  
2795 2795          mi = VFTOMI4(vfsp);
2796 2796  
2797 2797          if (nfs_zone() != mi->mi_zone)
2798 2798                  return (EPERM);
2799 2799  
2800 2800          svp = mi->mi_curr_serv;
2801 2801          if (svp) {
2802 2802                  (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2803 2803                  if (svp->sv_flags & SV4_ROOT_STALE) {
2804 2804                          nfs_rw_exit(&svp->sv_lock);
2805 2805  
2806 2806                          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2807 2807                          if (svp->sv_flags & SV4_ROOT_STALE) {
2808 2808                                  svp->sv_flags &= ~SV4_ROOT_STALE;
2809 2809                                  nfs_rw_exit(&svp->sv_lock);
2810 2810                                  return (ENOENT);
2811 2811                          }
2812 2812                          nfs_rw_exit(&svp->sv_lock);
2813 2813                  } else
2814 2814                          nfs_rw_exit(&svp->sv_lock);
2815 2815          }
2816 2816  
2817 2817          mfname = mi->mi_fname;
2818 2818          fn_hold(mfname);
2819 2819          vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL,
2820 2820              VFTOMI4(vfsp), CRED(), gethrtime());
2821 2821  
2822 2822          if (VTOR4(vp)->r_flags & R4STALE) {
2823 2823                  VN_RELE(vp);
2824 2824                  return (ENOENT);
2825 2825          }
2826 2826  
2827 2827          ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type);
2828 2828  
2829 2829          vp->v_type = mi->mi_type;
2830 2830  
2831 2831          *vpp = vp;
2832 2832  
2833 2833          return (0);
2834 2834  }
2835 2835  
2836 2836  static int
2837 2837  nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr)
2838 2838  {
2839 2839          int error;
2840 2840          nfs4_ga_res_t gar;
2841 2841          nfs4_ga_ext_res_t ger;
2842 2842  
2843 2843          gar.n4g_ext_res = &ger;
2844 2844  
2845 2845          if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar,
2846 2846              NFS4_STATFS_ATTR_MASK, cr))
2847 2847                  return (error);
2848 2848  
2849 2849          *sbp = gar.n4g_ext_res->n4g_sb;
2850 2850  
2851 2851          return (0);
2852 2852  }
2853 2853  
2854 2854  /*
2855 2855   * Get file system statistics.
2856 2856   */
2857 2857  static int
2858 2858  nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp)
2859 2859  {
2860 2860          int error;
2861 2861          vnode_t *vp;
2862 2862          cred_t *cr;
2863 2863  
2864 2864          error = nfs4_root(vfsp, &vp);
2865 2865          if (error)
2866 2866                  return (error);
2867 2867  
2868 2868          cr = CRED();
2869 2869  
2870 2870          error = nfs4_statfs_otw(vp, sbp, cr);
2871 2871          if (!error) {
2872 2872                  (void) strncpy(sbp->f_basetype,
2873 2873                      vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ);
2874 2874                  sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
2875 2875          } else {
2876 2876                  nfs4_purge_stale_fh(error, vp, cr);
2877 2877          }
2878 2878  
2879 2879          VN_RELE(vp);
2880 2880  
2881 2881          return (error);
2882 2882  }
2883 2883  
2884 2884  static kmutex_t nfs4_syncbusy;
2885 2885  
2886 2886  /*
2887 2887   * Flush dirty nfs files for file system vfsp.
2888 2888   * If vfsp == NULL, all nfs files are flushed.
2889 2889   *
2890 2890   * SYNC_CLOSE in flag is passed to us to
2891 2891   * indicate that we are shutting down and or
2892 2892   * rebooting.
2893 2893   */
2894 2894  static int
2895 2895  nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr)
2896 2896  {
2897 2897          /*
2898 2898           * Cross-zone calls are OK here, since this translates to a
2899 2899           * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone.
2900 2900           */
2901 2901          if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) {
2902 2902                  r4flush(vfsp, cr);
2903 2903                  mutex_exit(&nfs4_syncbusy);
2904 2904          }
2905 2905  
2906 2906          /*
2907 2907           * if SYNC_CLOSE is set then we know that
2908 2908           * the system is rebooting, mark the mntinfo
2909 2909           * for later examination.
2910 2910           */
2911 2911          if (vfsp && (flag & SYNC_CLOSE)) {
2912 2912                  mntinfo4_t *mi;
2913 2913  
2914 2914                  mi = VFTOMI4(vfsp);
2915 2915                  if (!(mi->mi_flags & MI4_SHUTDOWN)) {
2916 2916                          mutex_enter(&mi->mi_lock);
2917 2917                          mi->mi_flags |= MI4_SHUTDOWN;
2918 2918                          mutex_exit(&mi->mi_lock);
2919 2919                  }
2920 2920          }
2921 2921          return (0);
2922 2922  }
2923 2923  
2924 2924  /*
2925 2925   * vget is difficult, if not impossible, to support in v4 because we don't
2926 2926   * know the parent directory or name, which makes it impossible to create a
2927 2927   * useful shadow vnode.  And we need the shadow vnode for things like
2928 2928   * OPEN.
2929 2929   */
2930 2930  
2931 2931  /* ARGSUSED */
2932 2932  /*
2933 2933   * XXX Check nfs4_vget_pseudo() for dependency.
2934 2934   */
2935 2935  static int
2936 2936  nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
2937 2937  {
2938 2938          return (EREMOTE);
2939 2939  }
2940 2940  
2941 2941  /*
2942 2942   * nfs4_mountroot get called in the case where we are diskless booting.  All
2943 2943   * we need from here is the ability to get the server info and from there we
2944 2944   * can simply call nfs4_rootvp.
2945 2945   */
2946 2946  /* ARGSUSED */
2947 2947  static int
2948 2948  nfs4_mountroot(vfs_t *vfsp, whymountroot_t why)
2949 2949  {
2950 2950          vnode_t *rtvp;
2951 2951          char root_hostname[SYS_NMLN+1];
2952 2952          struct servinfo4 *svp;
2953 2953          int error;
2954 2954          int vfsflags;
2955 2955          size_t size;
2956 2956          char *root_path;
2957 2957          struct pathname pn;
2958 2958          char *name;
2959 2959          cred_t *cr;
2960 2960          mntinfo4_t *mi;
2961 2961          struct nfs_args args;           /* nfs mount arguments */
2962 2962          static char token[10];
2963 2963          nfs4_error_t n4e;
2964 2964  
2965 2965          bzero(&args, sizeof (args));
2966 2966  
2967 2967          /* do this BEFORE getfile which causes xid stamps to be initialized */
2968 2968          clkset(-1L);            /* hack for now - until we get time svc? */
2969 2969  
2970 2970          if (why == ROOT_REMOUNT) {
2971 2971                  /*
2972 2972                   * Shouldn't happen.
2973 2973                   */
2974 2974                  panic("nfs4_mountroot: why == ROOT_REMOUNT");
2975 2975          }
2976 2976  
2977 2977          if (why == ROOT_UNMOUNT) {
2978 2978                  /*
2979 2979                   * Nothing to do for NFS.
2980 2980                   */
2981 2981                  return (0);
2982 2982          }
2983 2983  
2984 2984          /*
2985 2985           * why == ROOT_INIT
2986 2986           */
2987 2987  
2988 2988          name = token;
2989 2989          *name = 0;
2990 2990          (void) getfsname("root", name, sizeof (token));
2991 2991  
2992 2992          pn_alloc(&pn);
2993 2993          root_path = pn.pn_path;
2994 2994  
2995 2995          svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
2996 2996          nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL);
2997 2997          svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP);
2998 2998          svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
2999 2999          svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
3000 3000  
3001 3001          /*
3002 3002           * Get server address
3003 3003           * Get the root path
3004 3004           * Get server's transport
3005 3005           * Get server's hostname
3006 3006           * Get options
3007 3007           */
3008 3008          args.addr = &svp->sv_addr;
3009 3009          (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
3010 3010          args.fh = (char *)&svp->sv_fhandle;
3011 3011          args.knconf = svp->sv_knconf;
3012 3012          args.hostname = root_hostname;
3013 3013          vfsflags = 0;
3014 3014          if (error = mount_root(*name ? name : "root", root_path, NFS_V4,
3015 3015              &args, &vfsflags)) {
3016 3016                  if (error == EPROTONOSUPPORT)
3017 3017                          nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: "
3018 3018                              "mount_root failed: server doesn't support NFS V4");
3019 3019                  else
3020 3020                          nfs_cmn_err(error, CE_WARN,
3021 3021                              "nfs4_mountroot: mount_root failed: %m");
3022 3022                  nfs_rw_exit(&svp->sv_lock);
3023 3023                  sv4_free(svp);
3024 3024                  pn_free(&pn);
3025 3025                  return (error);
3026 3026          }
3027 3027          nfs_rw_exit(&svp->sv_lock);
3028 3028          svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1);
3029 3029          svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP);
3030 3030          (void) strcpy(svp->sv_hostname, root_hostname);
3031 3031  
3032 3032          svp->sv_pathlen = (int)(strlen(root_path) + 1);
3033 3033          svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP);
3034 3034          (void) strcpy(svp->sv_path, root_path);
3035 3035  
3036 3036          /*
3037 3037           * Force root partition to always be mounted with AUTH_UNIX for now
3038 3038           */
3039 3039          svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP);
3040 3040          svp->sv_secdata->secmod = AUTH_UNIX;
3041 3041          svp->sv_secdata->rpcflavor = AUTH_UNIX;
3042 3042          svp->sv_secdata->data = NULL;
3043 3043  
3044 3044          cr = crgetcred();
3045 3045          rtvp = NULL;
3046 3046  
3047 3047          error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone);
3048 3048  
3049 3049          if (error) {
3050 3050                  crfree(cr);
3051 3051                  pn_free(&pn);
3052 3052                  sv4_free(svp);
3053 3053                  return (error);
3054 3054          }
3055 3055  
3056 3056          mi = VTOMI4(rtvp);
3057 3057  
3058 3058          /*
3059 3059           * Send client id to the server, if necessary
3060 3060           */
3061 3061          nfs4_error_zinit(&n4e);
3062 3062          nfs4setclientid(mi, cr, FALSE, &n4e);
3063 3063          error = n4e.error;
3064 3064  
3065 3065          crfree(cr);
3066 3066  
3067 3067          if (error) {
3068 3068                  pn_free(&pn);
3069 3069                  goto errout;
3070 3070          }
3071 3071  
3072 3072          error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args);
3073 3073          if (error) {
3074 3074                  nfs_cmn_err(error, CE_WARN,
3075 3075                      "nfs4_mountroot: invalid root mount options");
3076 3076                  pn_free(&pn);
3077 3077                  goto errout;
3078 3078          }
3079 3079  
3080 3080          (void) vfs_lock_wait(vfsp);
3081 3081          vfs_add(NULL, vfsp, vfsflags);
3082 3082          vfs_unlock(vfsp);
3083 3083  
3084 3084          size = strlen(svp->sv_hostname);
3085 3085          (void) strcpy(rootfs.bo_name, svp->sv_hostname);
3086 3086          rootfs.bo_name[size] = ':';
3087 3087          (void) strcpy(&rootfs.bo_name[size + 1], root_path);
3088 3088  
3089 3089          pn_free(&pn);
3090 3090  
3091 3091  errout:
3092 3092          if (error) {
3093 3093                  sv4_free(svp);
3094 3094                  nfs4_async_stop(vfsp);
3095 3095                  nfs4_async_manager_stop(vfsp);
3096 3096          }
3097 3097  
3098 3098          if (rtvp != NULL)
3099 3099                  VN_RELE(rtvp);
3100 3100  
3101 3101          return (error);
3102 3102  }
3103 3103  
3104 3104  /*
3105 3105   * Initialization routine for VFS routines.  Should only be called once
3106 3106   */
3107 3107  int
3108 3108  nfs4_vfsinit(void)
3109 3109  {
3110 3110          mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL);
3111 3111          nfs4setclientid_init();
3112 3112          nfs4_ephemeral_init();
3113 3113          return (0);
3114 3114  }
3115 3115  
3116 3116  void
3117 3117  nfs4_vfsfini(void)
3118 3118  {
3119 3119          nfs4_ephemeral_fini();
3120 3120          nfs4setclientid_fini();
3121 3121          mutex_destroy(&nfs4_syncbusy);
3122 3122  }
3123 3123  
3124 3124  void
3125 3125  nfs4_freevfs(vfs_t *vfsp)
3126 3126  {
3127 3127          mntinfo4_t *mi;
3128 3128  
3129 3129          /* need to release the initial hold */
3130 3130          mi = VFTOMI4(vfsp);
3131 3131  
3132 3132          /*
3133 3133           * At this point, we can no longer reference the vfs
3134 3134           * and need to inform other holders of the reference
3135 3135           * to the mntinfo4_t.
3136 3136           */
3137 3137          mi->mi_vfsp = NULL;
3138 3138  
3139 3139          MI4_RELE(mi);
3140 3140  }
3141 3141  
3142 3142  /*
3143 3143   * Client side SETCLIENTID and SETCLIENTID_CONFIRM
3144 3144   */
3145 3145  struct nfs4_server nfs4_server_lst =
3146 3146          { &nfs4_server_lst, &nfs4_server_lst };
3147 3147  
3148 3148  kmutex_t nfs4_server_lst_lock;
3149 3149  
3150 3150  static void
3151 3151  nfs4setclientid_init(void)
3152 3152  {
3153 3153          mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL);
3154 3154  }
3155 3155  
3156 3156  static void
3157 3157  nfs4setclientid_fini(void)
3158 3158  {
3159 3159          mutex_destroy(&nfs4_server_lst_lock);
3160 3160  }
3161 3161  
3162 3162  int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY;
3163 3163  int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES;
3164 3164  
3165 3165  /*
3166 3166   * Set the clientid for the server for "mi".  No-op if the clientid is
3167 3167   * already set.
3168 3168   *
3169 3169   * The recovery boolean should be set to TRUE if this function was called
3170 3170   * by the recovery code, and FALSE otherwise.  This is used to determine
3171 3171   * if we need to call nfs4_start/end_op as well as grab the mi_recovlock
3172 3172   * for adding a mntinfo4_t to a nfs4_server_t.
3173 3173   *
3174 3174   * Error is returned via 'n4ep'.  If there was a 'n4ep->stat' error, then
3175 3175   * 'n4ep->error' is set to geterrno4(n4ep->stat).
3176 3176   */
3177 3177  void
3178 3178  nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep)
3179 3179  {
3180 3180          struct nfs4_server *np;
3181 3181          struct servinfo4 *svp = mi->mi_curr_serv;
3182 3182          nfs4_recov_state_t recov_state;
3183 3183          int num_retries = 0;
3184 3184          bool_t retry;
3185 3185          cred_t *lcr = NULL;
3186 3186          int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */
3187 3187          time_t lease_time = 0;
3188 3188  
3189 3189          recov_state.rs_flags = 0;
3190 3190          recov_state.rs_num_retry_despite_err = 0;
3191 3191          ASSERT(n4ep != NULL);
3192 3192  
3193 3193  recov_retry:
3194 3194          retry = FALSE;
3195 3195          nfs4_error_zinit(n4ep);
3196 3196          if (!recovery)
3197 3197                  (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
3198 3198  
3199 3199          mutex_enter(&nfs4_server_lst_lock);
3200 3200          np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */
3201 3201          mutex_exit(&nfs4_server_lst_lock);
3202 3202          if (!np) {
3203 3203                  struct nfs4_server *tnp;
3204 3204                  np = new_nfs4_server(svp, cr);
3205 3205                  mutex_enter(&np->s_lock);
3206 3206  
3207 3207                  mutex_enter(&nfs4_server_lst_lock);
3208 3208                  tnp = servinfo4_to_nfs4_server(svp);
3209 3209                  if (tnp) {
3210 3210                          /*
3211 3211                           * another thread snuck in and put server on list.
3212 3212                           * since we aren't adding it to the nfs4_server_list
3213 3213                           * we need to set the ref count to 0 and destroy it.
3214 3214                           */
3215 3215                          np->s_refcnt = 0;
3216 3216                          destroy_nfs4_server(np);
3217 3217                          np = tnp;
3218 3218                  } else {
3219 3219                          /*
3220 3220                           * do not give list a reference until everything
3221 3221                           * succeeds
3222 3222                           */
3223 3223                          insque(np, &nfs4_server_lst);
3224 3224                  }
3225 3225                  mutex_exit(&nfs4_server_lst_lock);
3226 3226          }
3227 3227          ASSERT(MUTEX_HELD(&np->s_lock));
3228 3228          /*
3229 3229           * If we find the server already has N4S_CLIENTID_SET, then
3230 3230           * just return, we've already done SETCLIENTID to that server
3231 3231           */
3232 3232          if (np->s_flags & N4S_CLIENTID_SET) {
3233 3233                  /* add mi to np's mntinfo4_list */
3234 3234                  nfs4_add_mi_to_server(np, mi);
3235 3235                  if (!recovery)
3236 3236                          nfs_rw_exit(&mi->mi_recovlock);
3237 3237                  mutex_exit(&np->s_lock);
3238 3238                  nfs4_server_rele(np);
3239 3239                  return;
3240 3240          }
3241 3241          mutex_exit(&np->s_lock);
3242 3242  
3243 3243  
3244 3244          /*
3245 3245           * Drop the mi_recovlock since nfs4_start_op will
3246 3246           * acquire it again for us.
3247 3247           */
3248 3248          if (!recovery) {
3249 3249                  nfs_rw_exit(&mi->mi_recovlock);
3250 3250  
3251 3251                  n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state);
3252 3252                  if (n4ep->error) {
3253 3253                          nfs4_server_rele(np);
3254 3254                          return;
3255 3255                  }
3256 3256          }
3257 3257  
3258 3258          mutex_enter(&np->s_lock);
3259 3259          while (np->s_flags & N4S_CLIENTID_PEND) {
3260 3260                  if (!cv_wait_sig(&np->s_clientid_pend, &np->s_lock)) {
3261 3261                          mutex_exit(&np->s_lock);
3262 3262                          nfs4_server_rele(np);
3263 3263                          if (!recovery)
3264 3264                                  nfs4_end_op(mi, NULL, NULL, &recov_state,
3265 3265                                      recovery);
3266 3266                          n4ep->error = EINTR;
3267 3267                          return;
3268 3268                  }
3269 3269          }
3270 3270  
3271 3271          if (np->s_flags & N4S_CLIENTID_SET) {
3272 3272                  /* XXX copied/pasted from above */
3273 3273                  /* add mi to np's mntinfo4_list */
3274 3274                  nfs4_add_mi_to_server(np, mi);
3275 3275                  mutex_exit(&np->s_lock);
3276 3276                  nfs4_server_rele(np);
3277 3277                  if (!recovery)
3278 3278                          nfs4_end_op(mi, NULL, NULL, &recov_state, recovery);
3279 3279                  return;
3280 3280          }
3281 3281  
3282 3282          /*
3283 3283           * Reset the N4S_CB_PINGED flag. This is used to
3284 3284           * indicate if we have received a CB_NULL from the
3285 3285           * server. Also we reset the waiter flag.
3286 3286           */
3287 3287          np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER);
3288 3288          /* any failure must now clear this flag */
3289 3289          np->s_flags |= N4S_CLIENTID_PEND;
3290 3290          mutex_exit(&np->s_lock);
3291 3291          nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse);
3292 3292  
3293 3293          if (n4ep->error == EACCES) {
3294 3294                  /*
3295 3295                   * If the uid is set then set the creds for secure mounts
3296 3296                   * by proxy processes such as automountd.
3297 3297                   */
3298 3298                  (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
3299 3299                  if (svp->sv_secdata->uid != 0) {
3300 3300                          lcr = crdup(cr);
3301 3301                          (void) crsetugid(lcr, svp->sv_secdata->uid,
3302 3302                              crgetgid(cr));
3303 3303                  }
3304 3304                  nfs_rw_exit(&svp->sv_lock);
3305 3305  
3306 3306                  if (lcr != NULL) {
3307 3307                          mutex_enter(&np->s_lock);
3308 3308                          crfree(np->s_cred);
3309 3309                          np->s_cred = lcr;
3310 3310                          mutex_exit(&np->s_lock);
3311 3311                          nfs4setclientid_otw(mi, svp, lcr, np, n4ep,
3312 3312                              &retry_inuse);
3313 3313                  }
3314 3314          }
3315 3315          mutex_enter(&np->s_lock);
3316 3316          lease_time = np->s_lease_time;
3317 3317          np->s_flags &= ~N4S_CLIENTID_PEND;
3318 3318          mutex_exit(&np->s_lock);
3319 3319  
3320 3320          if (n4ep->error != 0 || n4ep->stat != NFS4_OK) {
3321 3321                  /*
3322 3322                   * Start recovery if failover is a possibility.  If
3323 3323                   * invoked by the recovery thread itself, then just
3324 3324                   * return and let it handle the failover first.  NB:
3325 3325                   * recovery is not allowed if the mount is in progress
3326 3326                   * since the infrastructure is not sufficiently setup
3327 3327                   * to allow it.  Just return the error (after suitable
3328 3328                   * retries).
3329 3329                   */
3330 3330                  if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) {
3331 3331                          (void) nfs4_start_recovery(n4ep, mi, NULL,
3332 3332                              NULL, NULL, NULL, OP_SETCLIENTID, NULL, NULL, NULL);
3333 3333                          /*
3334 3334                           * Don't retry here, just return and let
3335 3335                           * recovery take over.
3336 3336                           */
3337 3337                          if (recovery)
3338 3338                                  retry = FALSE;
3339 3339                  } else if (nfs4_rpc_retry_error(n4ep->error) ||
3340 3340                      n4ep->stat == NFS4ERR_RESOURCE ||
3341 3341                      n4ep->stat == NFS4ERR_STALE_CLIENTID) {
3342 3342  
3343 3343                          retry = TRUE;
3344 3344                          /*
3345 3345                           * Always retry if in recovery or once had
3346 3346                           * contact with the server (but now it's
3347 3347                           * overloaded).
3348 3348                           */
3349 3349                          if (recovery == TRUE ||
3350 3350                              n4ep->error == ETIMEDOUT ||
3351 3351                              n4ep->error == ECONNRESET)
3352 3352                                  num_retries = 0;
3353 3353                  } else if (retry_inuse && n4ep->error == 0 &&
3354 3354                      n4ep->stat == NFS4ERR_CLID_INUSE) {
3355 3355                          retry = TRUE;
3356 3356                          num_retries = 0;
3357 3357                  }
3358 3358          } else {
3359 3359                  /*
3360 3360                   * Since everything succeeded give the list a reference count if
3361 3361                   * it hasn't been given one by add_new_nfs4_server() or if this
3362 3362                   * is not a recovery situation in which case it is already on
3363 3363                   * the list.
3364 3364                   */
3365 3365                  mutex_enter(&np->s_lock);
3366 3366                  if ((np->s_flags & N4S_INSERTED) == 0) {
3367 3367                          np->s_refcnt++;
3368 3368                          np->s_flags |= N4S_INSERTED;
3369 3369                  }
3370 3370                  mutex_exit(&np->s_lock);
3371 3371          }
3372 3372  
3373 3373          if (!recovery)
3374 3374                  nfs4_end_op(mi, NULL, NULL, &recov_state, recovery);
3375 3375  
3376 3376  
3377 3377          if (retry && num_retries++ < nfs4_num_sclid_retries) {
3378 3378                  if (retry_inuse) {
3379 3379                          delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay));
3380 3380                          retry_inuse = 0;
3381 3381                  } else
3382 3382                          delay(SEC_TO_TICK(nfs4_retry_sclid_delay));
3383 3383  
3384 3384                  nfs4_server_rele(np);
3385 3385                  goto recov_retry;
3386 3386          }
3387 3387  
3388 3388  
3389 3389          if (n4ep->error == 0)
3390 3390                  n4ep->error = geterrno4(n4ep->stat);
3391 3391  
3392 3392          /* broadcast before release in case no other threads are waiting */
3393 3393          cv_broadcast(&np->s_clientid_pend);
3394 3394          nfs4_server_rele(np);
3395 3395  }
3396 3396  
3397 3397  int nfs4setclientid_otw_debug = 0;
3398 3398  
3399 3399  /*
3400 3400   * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM,
3401 3401   * but nothing else; the calling function must be designed to handle those
3402 3402   * other errors.
3403 3403   */
3404 3404  static void
3405 3405  nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp,  cred_t *cr,
3406 3406      struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep)
3407 3407  {
3408 3408          COMPOUND4args_clnt args;
3409 3409          COMPOUND4res_clnt res;
3410 3410          nfs_argop4 argop[3];
3411 3411          SETCLIENTID4args *s_args;
3412 3412          SETCLIENTID4resok *s_resok;
3413 3413          int doqueue = 1;
3414 3414          nfs4_ga_res_t *garp = NULL;
3415 3415          timespec_t prop_time, after_time;
3416 3416          verifier4 verf;
3417 3417          clientid4 tmp_clientid;
3418 3418  
3419 3419          ASSERT(!MUTEX_HELD(&np->s_lock));
3420 3420  
3421 3421          args.ctag = TAG_SETCLIENTID;
3422 3422  
3423 3423          args.array = argop;
3424 3424          args.array_len = 3;
3425 3425  
3426 3426          /* PUTROOTFH */
3427 3427          argop[0].argop = OP_PUTROOTFH;
3428 3428  
3429 3429          /* GETATTR */
3430 3430          argop[1].argop = OP_GETATTR;
3431 3431          argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK;
3432 3432          argop[1].nfs_argop4_u.opgetattr.mi = mi;
3433 3433  
3434 3434          /* SETCLIENTID */
3435 3435          argop[2].argop = OP_SETCLIENTID;
3436 3436  
3437 3437          s_args = &argop[2].nfs_argop4_u.opsetclientid;
3438 3438  
3439 3439          mutex_enter(&np->s_lock);
3440 3440  
3441 3441          s_args->client.verifier = np->clidtosend.verifier;
3442 3442          s_args->client.id_len = np->clidtosend.id_len;
3443 3443          ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT);
3444 3444          s_args->client.id_val = np->clidtosend.id_val;
3445 3445  
3446 3446          /*
3447 3447           * Callback needs to happen on non-RDMA transport
3448 3448           * Check if we have saved the original knetconfig
3449 3449           * if so, use that instead.
3450 3450           */
3451 3451          if (svp->sv_origknconf != NULL)
3452 3452                  nfs4_cb_args(np, svp->sv_origknconf, s_args);
3453 3453          else
3454 3454                  nfs4_cb_args(np, svp->sv_knconf, s_args);
3455 3455  
3456 3456          mutex_exit(&np->s_lock);
3457 3457  
3458 3458          rfs4call(mi, &args, &res, cr, &doqueue, 0, ep);
3459 3459  
3460 3460          if (ep->error)
3461 3461                  return;
3462 3462  
3463 3463          /* getattr lease_time res */
3464 3464          if ((res.array_len >= 2) &&
3465 3465              (res.array[1].nfs_resop4_u.opgetattr.status == NFS4_OK)) {
3466 3466                  garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res;
3467 3467  
3468 3468  #ifndef _LP64
3469 3469                  /*
3470 3470                   * The 32 bit client cannot handle a lease time greater than
3471 3471                   * (INT32_MAX/1000000).  This is due to the use of the
3472 3472                   * lease_time in calls to drv_usectohz() in
3473 3473                   * nfs4_renew_lease_thread().  The problem is that
3474 3474                   * drv_usectohz() takes a time_t (which is just a long = 4
3475 3475                   * bytes) as its parameter.  The lease_time is multiplied by
3476 3476                   * 1000000 to convert seconds to usecs for the parameter.  If
3477 3477                   * a number bigger than (INT32_MAX/1000000) is used then we
3478 3478                   * overflow on the 32bit client.
3479 3479                   */
3480 3480                  if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) {
3481 3481                          garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000;
3482 3482                  }
3483 3483  #endif
3484 3484  
3485 3485                  mutex_enter(&np->s_lock);
3486 3486                  np->s_lease_time = garp->n4g_ext_res->n4g_leasetime;
3487 3487  
3488 3488                  /*
3489 3489                   * Keep track of the lease period for the mi's
3490 3490                   * mi_msg_list.  We need an appropiate time
3491 3491                   * bound to associate past facts with a current
3492 3492                   * event.  The lease period is perfect for this.
3493 3493                   */
3494 3494                  mutex_enter(&mi->mi_msg_list_lock);
3495 3495                  mi->mi_lease_period = np->s_lease_time;
3496 3496                  mutex_exit(&mi->mi_msg_list_lock);
3497 3497                  mutex_exit(&np->s_lock);
3498 3498          }
3499 3499  
3500 3500  
3501 3501          if (res.status == NFS4ERR_CLID_INUSE) {
3502 3502                  clientaddr4 *clid_inuse;
3503 3503  
3504 3504                  if (!(*retry_inusep)) {
3505 3505                          clid_inuse = &res.array->nfs_resop4_u.
3506 3506                              opsetclientid.SETCLIENTID4res_u.client_using;
3507 3507  
3508 3508                          zcmn_err(mi->mi_zone->zone_id, CE_NOTE,
3509 3509                              "NFS4 mount (SETCLIENTID failed)."
3510 3510                              "  nfs4_client_id.id is in"
3511 3511                              "use already by: r_netid<%s> r_addr<%s>",
3512 3512                              clid_inuse->r_netid, clid_inuse->r_addr);
3513 3513                  }
3514 3514  
3515 3515                  /*
3516 3516                   * XXX - The client should be more robust in its
3517 3517                   * handling of clientid in use errors (regen another
3518 3518                   * clientid and try again?)
3519 3519                   */
3520 3520                  xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3521 3521                  return;
3522 3522          }
3523 3523  
3524 3524          if (res.status) {
3525 3525                  xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3526 3526                  return;
3527 3527          }
3528 3528  
3529 3529          s_resok = &res.array[2].nfs_resop4_u.
3530 3530              opsetclientid.SETCLIENTID4res_u.resok4;
3531 3531  
3532 3532          tmp_clientid = s_resok->clientid;
3533 3533  
3534 3534          verf = s_resok->setclientid_confirm;
3535 3535  
3536 3536  #ifdef  DEBUG
3537 3537          if (nfs4setclientid_otw_debug) {
3538 3538                  union {
3539 3539                          clientid4       clientid;
3540 3540                          int             foo[2];
3541 3541                  } cid;
3542 3542  
3543 3543                  cid.clientid = s_resok->clientid;
3544 3544  
3545 3545                  zcmn_err(mi->mi_zone->zone_id, CE_NOTE,
3546 3546                  "nfs4setclientid_otw: OK, clientid = %x,%x, "
3547 3547                  "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf);
3548 3548          }
3549 3549  #endif
3550 3550  
3551 3551          xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3552 3552  
3553 3553          /* Confirm the client id and get the lease_time attribute */
3554 3554  
3555 3555          args.ctag = TAG_SETCLIENTID_CF;
3556 3556  
3557 3557          args.array = argop;
3558 3558          args.array_len = 1;
3559 3559  
3560 3560          argop[0].argop = OP_SETCLIENTID_CONFIRM;
3561 3561  
3562 3562          argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid;
3563 3563          argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf;
3564 3564  
3565 3565          /* used to figure out RTT for np */
3566 3566          gethrestime(&prop_time);
3567 3567  
3568 3568          NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: "
3569 3569              "start time: %ld sec %ld nsec", prop_time.tv_sec,
3570 3570              prop_time.tv_nsec));
3571 3571  
3572 3572          rfs4call(mi, &args, &res, cr, &doqueue, 0, ep);
3573 3573  
3574 3574          gethrestime(&after_time);
3575 3575          mutex_enter(&np->s_lock);
3576 3576          np->propagation_delay.tv_sec =
3577 3577              MAX(1, after_time.tv_sec - prop_time.tv_sec);
3578 3578          mutex_exit(&np->s_lock);
3579 3579  
3580 3580          NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: "
3581 3581              "finish time: %ld sec ", after_time.tv_sec));
3582 3582  
3583 3583          NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: "
3584 3584              "propagation delay set to %ld sec",
3585 3585              np->propagation_delay.tv_sec));
3586 3586  
3587 3587          if (ep->error)
3588 3588                  return;
3589 3589  
3590 3590          if (res.status == NFS4ERR_CLID_INUSE) {
3591 3591                  clientaddr4 *clid_inuse;
3592 3592  
3593 3593                  if (!(*retry_inusep)) {
3594 3594                          clid_inuse = &res.array->nfs_resop4_u.
3595 3595                              opsetclientid.SETCLIENTID4res_u.client_using;
3596 3596  
3597 3597                          zcmn_err(mi->mi_zone->zone_id, CE_NOTE,
3598 3598                              "SETCLIENTID_CONFIRM failed.  "
3599 3599                              "nfs4_client_id.id is in use already by: "
3600 3600                              "r_netid<%s> r_addr<%s>",
3601 3601                              clid_inuse->r_netid, clid_inuse->r_addr);
3602 3602                  }
3603 3603  
3604 3604                  xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3605 3605                  return;
3606 3606          }
3607 3607  
3608 3608          if (res.status) {
3609 3609                  xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3610 3610                  return;
3611 3611          }
3612 3612  
3613 3613          mutex_enter(&np->s_lock);
3614 3614          np->clientid = tmp_clientid;
3615 3615          np->s_flags |= N4S_CLIENTID_SET;
3616 3616  
3617 3617          /* Add mi to np's mntinfo4 list */
3618 3618          nfs4_add_mi_to_server(np, mi);
3619 3619  
3620 3620          if (np->lease_valid == NFS4_LEASE_NOT_STARTED) {
3621 3621                  /*
3622 3622                   * Start lease management thread.
3623 3623                   * Keep trying until we succeed.
3624 3624                   */
3625 3625  
3626 3626                  np->s_refcnt++;         /* pass reference to thread */
3627 3627                  (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0,
3628 3628                      minclsyspri);
3629 3629          }
3630 3630          mutex_exit(&np->s_lock);
3631 3631  
3632 3632          xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3633 3633  }
3634 3634  
3635 3635  /*
3636 3636   * Add mi to sp's mntinfo4_list if it isn't already in the list.  Makes
3637 3637   * mi's clientid the same as sp's.
3638 3638   * Assumes sp is locked down.
3639 3639   */
3640 3640  void
3641 3641  nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi)
3642 3642  {
3643 3643          mntinfo4_t *tmi;
3644 3644          int in_list = 0;
3645 3645  
3646 3646          ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) ||
3647 3647              nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER));
3648 3648          ASSERT(sp != &nfs4_server_lst);
3649 3649          ASSERT(MUTEX_HELD(&sp->s_lock));
3650 3650  
3651 3651          NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3652 3652              "nfs4_add_mi_to_server: add mi %p to sp %p",
3653 3653              (void*)mi, (void*)sp));
3654 3654  
3655 3655          for (tmi = sp->mntinfo4_list;
3656 3656              tmi != NULL;
3657 3657              tmi = tmi->mi_clientid_next) {
3658 3658                  if (tmi == mi) {
3659 3659                          NFS4_DEBUG(nfs4_client_lease_debug,
3660 3660                              (CE_NOTE,
3661 3661                              "nfs4_add_mi_to_server: mi in list"));
3662 3662                          in_list = 1;
3663 3663                  }
3664 3664          }
3665 3665  
3666 3666          /*
3667 3667           * First put a hold on the mntinfo4's vfsp so that references via
3668 3668           * mntinfo4_list will be valid.
3669 3669           */
3670 3670          if (!in_list)
3671 3671                  VFS_HOLD(mi->mi_vfsp);
3672 3672  
3673 3673          NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: "
3674 3674              "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi));
3675 3675  
3676 3676          if (!in_list) {
3677 3677                  if (sp->mntinfo4_list)
3678 3678                          sp->mntinfo4_list->mi_clientid_prev = mi;
3679 3679                  mi->mi_clientid_next = sp->mntinfo4_list;
3680 3680                  mi->mi_srv = sp;
3681 3681                  sp->mntinfo4_list = mi;
3682 3682                  mi->mi_srvsettime = gethrestime_sec();
3683 3683                  mi->mi_srvset_cnt++;
3684 3684          }
3685 3685  
3686 3686          /* set mi's clientid to that of sp's for later matching */
3687 3687          mi->mi_clientid = sp->clientid;
3688 3688  
3689 3689          /*
3690 3690           * Update the clientid for any other mi's belonging to sp.  This
3691 3691           * must be done here while we hold sp->s_lock, so that
3692 3692           * find_nfs4_server() continues to work.
3693 3693           */
3694 3694  
3695 3695          for (tmi = sp->mntinfo4_list;
3696 3696              tmi != NULL;
3697 3697              tmi = tmi->mi_clientid_next) {
3698 3698                  if (tmi != mi) {
3699 3699                          tmi->mi_clientid = sp->clientid;
3700 3700                  }
3701 3701          }
3702 3702  }
3703 3703  
3704 3704  /*
3705 3705   * Remove the mi from sp's mntinfo4_list and release its reference.
3706 3706   * Exception: if mi still has open files, flag it for later removal (when
3707 3707   * all the files are closed).
3708 3708   *
3709 3709   * If this is the last mntinfo4 in sp's list then tell the lease renewal
3710 3710   * thread to exit.
3711 3711   */
3712 3712  static void
3713 3713  nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp)
3714 3714  {
3715 3715          NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3716 3716              "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p",
3717 3717              (void*)mi, (void*)sp));
3718 3718  
3719 3719          ASSERT(sp != NULL);
3720 3720          ASSERT(MUTEX_HELD(&sp->s_lock));
3721 3721          ASSERT(mi->mi_open_files >= 0);
3722 3722  
3723 3723          /*
3724 3724           * First make sure this mntinfo4 can be taken off of the list,
3725 3725           * ie: it doesn't have any open files remaining.
3726 3726           */
3727 3727          if (mi->mi_open_files > 0) {
3728 3728                  NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3729 3729                      "nfs4_remove_mi_from_server_nolock: don't "
3730 3730                      "remove mi since it still has files open"));
3731 3731  
3732 3732                  mutex_enter(&mi->mi_lock);
3733 3733                  mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE;
3734 3734                  mutex_exit(&mi->mi_lock);
3735 3735                  return;
3736 3736          }
3737 3737  
3738 3738          VFS_HOLD(mi->mi_vfsp);
3739 3739          remove_mi(sp, mi);
3740 3740          VFS_RELE(mi->mi_vfsp);
3741 3741  
3742 3742          if (sp->mntinfo4_list == NULL) {
3743 3743                  /* last fs unmounted, kill the thread */
3744 3744                  NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3745 3745                      "remove_mi_from_nfs4_server_nolock: kill the thread"));
3746 3746                  nfs4_mark_srv_dead(sp);
3747 3747          }
3748 3748  }
3749 3749  
3750 3750  /*
3751 3751   * Remove mi from sp's mntinfo4_list and release the vfs reference.
3752 3752   */
3753 3753  static void
3754 3754  remove_mi(nfs4_server_t *sp, mntinfo4_t *mi)
3755 3755  {
3756 3756          ASSERT(MUTEX_HELD(&sp->s_lock));
3757 3757  
3758 3758          /*
3759 3759           * We release a reference, and the caller must still have a
3760 3760           * reference.
3761 3761           */
3762 3762          ASSERT(mi->mi_vfsp->vfs_count >= 2);
3763 3763  
3764 3764          if (mi->mi_clientid_prev) {
3765 3765                  mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next;
3766 3766          } else {
3767 3767                  /* This is the first mi in sp's mntinfo4_list */
3768 3768                  /*
3769 3769                   * Make sure the first mntinfo4 in the list is the actual
3770 3770                   * mntinfo4 passed in.
3771 3771                   */
3772 3772                  ASSERT(sp->mntinfo4_list == mi);
3773 3773  
3774 3774                  sp->mntinfo4_list = mi->mi_clientid_next;
3775 3775          }
3776 3776          if (mi->mi_clientid_next)
3777 3777                  mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev;
3778 3778  
3779 3779          /* Now mark the mntinfo4's links as being removed */
3780 3780          mi->mi_clientid_prev = mi->mi_clientid_next = NULL;
3781 3781          mi->mi_srv = NULL;
3782 3782          mi->mi_srvset_cnt++;
3783 3783  
3784 3784          VFS_RELE(mi->mi_vfsp);
3785 3785  }
3786 3786  
3787 3787  /*
3788 3788   * Free all the entries in sp's mntinfo4_list.
3789 3789   */
3790 3790  static void
3791 3791  remove_all_mi(nfs4_server_t *sp)
3792 3792  {
3793 3793          mntinfo4_t *mi;
3794 3794  
3795 3795          ASSERT(MUTEX_HELD(&sp->s_lock));
3796 3796  
3797 3797          while (sp->mntinfo4_list != NULL) {
3798 3798                  mi = sp->mntinfo4_list;
3799 3799                  /*
3800 3800                   * Grab a reference in case there is only one left (which
3801 3801                   * remove_mi() frees).
3802 3802                   */
3803 3803                  VFS_HOLD(mi->mi_vfsp);
3804 3804                  remove_mi(sp, mi);
3805 3805                  VFS_RELE(mi->mi_vfsp);
3806 3806          }
3807 3807  }
3808 3808  
3809 3809  /*
3810 3810   * Remove the mi from sp's mntinfo4_list as above, and rele the vfs.
3811 3811   *
3812 3812   * This version can be called with a null nfs4_server_t arg,
3813 3813   * and will either find the right one and handle locking, or
3814 3814   * do nothing because the mi wasn't added to an sp's mntinfo4_list.
3815 3815   */
3816 3816  void
3817 3817  nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp)
3818 3818  {
3819 3819          nfs4_server_t   *sp;
3820 3820  
3821 3821          if (esp) {
3822 3822                  nfs4_remove_mi_from_server_nolock(mi, esp);
3823 3823                  return;
3824 3824          }
3825 3825  
3826 3826          (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
3827 3827          if (sp = find_nfs4_server_all(mi, 1)) {
3828 3828                  nfs4_remove_mi_from_server_nolock(mi, sp);
3829 3829                  mutex_exit(&sp->s_lock);
3830 3830                  nfs4_server_rele(sp);
3831 3831          }
3832 3832          nfs_rw_exit(&mi->mi_recovlock);
3833 3833  }
3834 3834  
3835 3835  /*
3836 3836   * Return TRUE if the given server has any non-unmounted filesystems.
3837 3837   */
3838 3838  
3839 3839  bool_t
3840 3840  nfs4_fs_active(nfs4_server_t *sp)
3841 3841  {
3842 3842          mntinfo4_t *mi;
3843 3843  
3844 3844          ASSERT(MUTEX_HELD(&sp->s_lock));
3845 3845  
3846 3846          for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) {
3847 3847                  if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
3848 3848                          return (TRUE);
3849 3849          }
3850 3850  
3851 3851          return (FALSE);
3852 3852  }
3853 3853  
3854 3854  /*
3855 3855   * Mark sp as finished and notify any waiters.
3856 3856   */
3857 3857  
3858 3858  void
3859 3859  nfs4_mark_srv_dead(nfs4_server_t *sp)
3860 3860  {
3861 3861          ASSERT(MUTEX_HELD(&sp->s_lock));
3862 3862  
3863 3863          sp->s_thread_exit = NFS4_THREAD_EXIT;
3864 3864          cv_broadcast(&sp->cv_thread_exit);
3865 3865  }
3866 3866  
3867 3867  /*
3868 3868   * Create a new nfs4_server_t structure.
3869 3869   * Returns new node unlocked and not in list, but with a reference count of
3870 3870   * 1.
3871 3871   */
3872 3872  struct nfs4_server *
3873 3873  new_nfs4_server(struct servinfo4 *svp, cred_t *cr)
3874 3874  {
3875 3875          struct nfs4_server *np;
3876 3876          timespec_t tt;
3877 3877          union {
3878 3878                  struct {
3879 3879                          uint32_t sec;
3880 3880                          uint32_t subsec;
3881 3881                  } un_curtime;
3882 3882                  verifier4       un_verifier;
3883 3883          } nfs4clientid_verifier;
3884 3884          /*
3885 3885           * We change this ID string carefully and with the Solaris
3886 3886           * NFS server behaviour in mind.  "+referrals" indicates
3887 3887           * a client that can handle an NFSv4 referral.
3888 3888           */
3889 3889          char id_val[] = "Solaris: %s, NFSv4 kernel client +referrals";
3890 3890          int len;
3891 3891  
3892 3892          np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP);
3893 3893          np->saddr.len = svp->sv_addr.len;
3894 3894          np->saddr.maxlen = svp->sv_addr.maxlen;
3895 3895          np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP);
3896 3896          bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len);
3897 3897          np->s_refcnt = 1;
3898 3898  
3899 3899          /*
3900 3900           * Build the nfs_client_id4 for this server mount.  Ensure
3901 3901           * the verifier is useful and that the identification is
3902 3902           * somehow based on the server's address for the case of
3903 3903           * multi-homed servers.
3904 3904           */
3905 3905          nfs4clientid_verifier.un_verifier = 0;
3906 3906          gethrestime(&tt);
3907 3907          nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec;
3908 3908          nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec;
3909 3909          np->clidtosend.verifier = nfs4clientid_verifier.un_verifier;
3910 3910  
3911 3911          /*
3912 3912           * calculate the length of the opaque identifier.  Subtract 2
3913 3913           * for the "%s" and add the traditional +1 for null
3914 3914           * termination.
3915 3915           */
3916 3916          len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1;
3917 3917          np->clidtosend.id_len = len + np->saddr.maxlen;
3918 3918  
3919 3919          np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP);
3920 3920          (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename());
3921 3921          bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len);
3922 3922  
3923 3923          np->s_flags = 0;
3924 3924          np->mntinfo4_list = NULL;
3925 3925          /* save cred for issuing rfs4calls inside the renew thread */
3926 3926          crhold(cr);
3927 3927          np->s_cred = cr;
3928 3928          cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL);
3929 3929          mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL);
3930 3930          nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL);
3931 3931          list_create(&np->s_deleg_list, sizeof (rnode4_t),
3932 3932              offsetof(rnode4_t, r_deleg_link));
3933 3933          np->s_thread_exit = 0;
3934 3934          np->state_ref_count = 0;
3935 3935          np->lease_valid = NFS4_LEASE_NOT_STARTED;
3936 3936          cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL);
3937 3937          cv_init(&np->s_clientid_pend, NULL, CV_DEFAULT, NULL);
3938 3938          np->s_otw_call_count = 0;
3939 3939          cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL);
3940 3940          np->zoneid = getzoneid();
3941 3941          np->zone_globals = nfs4_get_callback_globals();
3942 3942          ASSERT(np->zone_globals != NULL);
3943 3943          return (np);
3944 3944  }
3945 3945  
3946 3946  /*
3947 3947   * Create a new nfs4_server_t structure and add it to the list.
3948 3948   * Returns new node locked; reference must eventually be freed.
3949 3949   */
3950 3950  static struct nfs4_server *
3951 3951  add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr)
3952 3952  {
3953 3953          nfs4_server_t *sp;
3954 3954  
3955 3955          ASSERT(MUTEX_HELD(&nfs4_server_lst_lock));
3956 3956          sp = new_nfs4_server(svp, cr);
3957 3957          mutex_enter(&sp->s_lock);
3958 3958          insque(sp, &nfs4_server_lst);
3959 3959          sp->s_refcnt++;                 /* list gets a reference */
3960 3960          sp->s_flags |= N4S_INSERTED;
3961 3961          sp->clientid = 0;
3962 3962          return (sp);
3963 3963  }
3964 3964  
3965 3965  int nfs4_server_t_debug = 0;
3966 3966  
3967 3967  #ifdef lint
3968 3968  extern void
3969 3969  dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *);
3970 3970  #endif
3971 3971  
3972 3972  #ifndef lint
3973 3973  #ifdef DEBUG
3974 3974  void
3975 3975  dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p)
3976 3976  {
3977 3977          int hash16(void *p, int len);
3978 3978          nfs4_server_t *np;
3979 3979  
3980 3980          NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE,
3981 3981              "dumping nfs4_server_t list in %s", txt));
3982 3982          NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
3983 3983              "mi 0x%p, want clientid %llx, addr %d/%04X",
3984 3984              mi, (longlong_t)clientid, srv_p->sv_addr.len,
3985 3985              hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len)));
3986 3986          for (np = nfs4_server_lst.forw; np != &nfs4_server_lst;
3987 3987              np = np->forw) {
3988 3988                  NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
3989 3989                      "node 0x%p,    clientid %llx, addr %d/%04X, cnt %d",
3990 3990                      np, (longlong_t)np->clientid, np->saddr.len,
3991 3991                      hash16((void *)np->saddr.buf, np->saddr.len),
3992 3992                      np->state_ref_count));
3993 3993                  if (np->saddr.len == srv_p->sv_addr.len &&
3994 3994                      bcmp(np->saddr.buf, srv_p->sv_addr.buf,
3995 3995                      np->saddr.len) == 0)
3996 3996                          NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
3997 3997                              " - address matches"));
3998 3998                  if (np->clientid == clientid || np->clientid == 0)
3999 3999                          NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
4000 4000                              " - clientid matches"));
4001 4001                  if (np->s_thread_exit != NFS4_THREAD_EXIT)
4002 4002                          NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
4003 4003                              " - thread not exiting"));
4004 4004          }
4005 4005          delay(hz);
4006 4006  }
4007 4007  #endif
4008 4008  #endif
4009 4009  
4010 4010  
4011 4011  /*
4012 4012   * Move a mntinfo4_t from one server list to another.
4013 4013   * Locking of the two nfs4_server_t nodes will be done in list order.
4014 4014   *
4015 4015   * Returns NULL if the current nfs4_server_t for the filesystem could not
4016 4016   * be found (e.g., due to forced unmount).  Otherwise returns a reference
4017 4017   * to the new nfs4_server_t, which must eventually be freed.
4018 4018   */
4019 4019  nfs4_server_t *
4020 4020  nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new)
4021 4021  {
4022 4022          nfs4_server_t *p, *op = NULL, *np = NULL;
4023 4023          int num_open;
4024 4024          zoneid_t zoneid = nfs_zoneid();
4025 4025  
4026 4026          ASSERT(nfs_zone() == mi->mi_zone);
4027 4027  
4028 4028          mutex_enter(&nfs4_server_lst_lock);
4029 4029  #ifdef DEBUG
4030 4030          if (nfs4_server_t_debug)
4031 4031                  dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new);
4032 4032  #endif
4033 4033          for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) {
4034 4034                  if (p->zoneid != zoneid)
4035 4035                          continue;
4036 4036                  if (p->saddr.len == old->sv_addr.len &&
4037 4037                      bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 &&
4038 4038                      p->s_thread_exit != NFS4_THREAD_EXIT) {
4039 4039                          op = p;
4040 4040                          mutex_enter(&op->s_lock);
4041 4041                          op->s_refcnt++;
4042 4042                  }
4043 4043                  if (p->saddr.len == new->sv_addr.len &&
4044 4044                      bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 &&
4045 4045                      p->s_thread_exit != NFS4_THREAD_EXIT) {
4046 4046                          np = p;
4047 4047                          mutex_enter(&np->s_lock);
4048 4048                  }
4049 4049                  if (op != NULL && np != NULL)
4050 4050                          break;
4051 4051          }
4052 4052          if (op == NULL) {
4053 4053                  /*
4054 4054                   * Filesystem has been forcibly unmounted.  Bail out.
4055 4055                   */
4056 4056                  if (np != NULL)
4057 4057                          mutex_exit(&np->s_lock);
4058 4058                  mutex_exit(&nfs4_server_lst_lock);
4059 4059                  return (NULL);
4060 4060          }
4061 4061          if (np != NULL) {
4062 4062                  np->s_refcnt++;
4063 4063          } else {
4064 4064  #ifdef DEBUG
4065 4065                  NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
4066 4066                      "nfs4_move_mi: no target nfs4_server, will create."));
4067 4067  #endif
4068 4068                  np = add_new_nfs4_server(new, kcred);
4069 4069          }
4070 4070          mutex_exit(&nfs4_server_lst_lock);
4071 4071  
4072 4072          NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
4073 4073              "nfs4_move_mi: for mi 0x%p, "
4074 4074              "old servinfo4 0x%p, new servinfo4 0x%p, "
4075 4075              "old nfs4_server 0x%p, new nfs4_server 0x%p, ",
4076 4076              (void*)mi, (void*)old, (void*)new,
4077 4077              (void*)op, (void*)np));
4078 4078          ASSERT(op != NULL && np != NULL);
4079 4079  
4080 4080          /* discard any delegations */
4081 4081          nfs4_deleg_discard(mi, op);
4082 4082  
4083 4083          num_open = mi->mi_open_files;
4084 4084          mi->mi_open_files = 0;
4085 4085          op->state_ref_count -= num_open;
4086 4086          ASSERT(op->state_ref_count >= 0);
4087 4087          np->state_ref_count += num_open;
4088 4088          nfs4_remove_mi_from_server_nolock(mi, op);
4089 4089          mi->mi_open_files = num_open;
4090 4090          NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
4091 4091              "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d",
4092 4092              mi->mi_open_files, op->state_ref_count, np->state_ref_count));
4093 4093  
4094 4094          nfs4_add_mi_to_server(np, mi);
4095 4095  
4096 4096          mutex_exit(&op->s_lock);
4097 4097          mutex_exit(&np->s_lock);
4098 4098          nfs4_server_rele(op);
4099 4099  
4100 4100          return (np);
4101 4101  }
4102 4102  
4103 4103  /*
4104 4104   * Need to have the nfs4_server_lst_lock.
4105 4105   * Search the nfs4_server list to find a match on this servinfo4
4106 4106   * based on its address.
4107 4107   *
4108 4108   * Returns NULL if no match is found.  Otherwise returns a reference (which
4109 4109   * must eventually be freed) to a locked nfs4_server.
4110 4110   */
4111 4111  nfs4_server_t *
4112 4112  servinfo4_to_nfs4_server(servinfo4_t *srv_p)
4113 4113  {
4114 4114          nfs4_server_t *np;
4115 4115          zoneid_t zoneid = nfs_zoneid();
4116 4116  
4117 4117          ASSERT(MUTEX_HELD(&nfs4_server_lst_lock));
4118 4118          for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
4119 4119                  if (np->zoneid == zoneid &&
4120 4120                      np->saddr.len == srv_p->sv_addr.len &&
4121 4121                      bcmp(np->saddr.buf, srv_p->sv_addr.buf,
4122 4122                      np->saddr.len) == 0 &&
4123 4123                      np->s_thread_exit != NFS4_THREAD_EXIT) {
4124 4124                          mutex_enter(&np->s_lock);
4125 4125                          np->s_refcnt++;
4126 4126                          return (np);
4127 4127                  }
4128 4128          }
4129 4129          return (NULL);
4130 4130  }
4131 4131  
4132 4132  /*
4133 4133   * Locks the nfs4_server down if it is found and returns a reference that
4134 4134   * must eventually be freed.
4135 4135   */
4136 4136  static nfs4_server_t *
4137 4137  lookup_nfs4_server(nfs4_server_t *sp, int any_state)
4138 4138  {
4139 4139          nfs4_server_t *np;
4140 4140  
4141 4141          mutex_enter(&nfs4_server_lst_lock);
4142 4142          for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
4143 4143                  mutex_enter(&np->s_lock);
4144 4144                  if (np == sp && np->s_refcnt > 0 &&
4145 4145                      (np->s_thread_exit != NFS4_THREAD_EXIT || any_state)) {
4146 4146                          mutex_exit(&nfs4_server_lst_lock);
4147 4147                          np->s_refcnt++;
4148 4148                          return (np);
4149 4149                  }
4150 4150                  mutex_exit(&np->s_lock);
4151 4151          }
4152 4152          mutex_exit(&nfs4_server_lst_lock);
4153 4153  
4154 4154          return (NULL);
4155 4155  }
4156 4156  
4157 4157  /*
4158 4158   * The caller should be holding mi->mi_recovlock, and it should continue to
4159 4159   * hold the lock until done with the returned nfs4_server_t.  Once
4160 4160   * mi->mi_recovlock is released, there is no guarantee that the returned
4161 4161   * mi->nfs4_server_t will continue to correspond to mi.
4162 4162   */
4163 4163  nfs4_server_t *
4164 4164  find_nfs4_server(mntinfo4_t *mi)
4165 4165  {
4166 4166          ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) ||
4167 4167              nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER));
4168 4168  
4169 4169          return (lookup_nfs4_server(mi->mi_srv, 0));
4170 4170  }
4171 4171  
4172 4172  /*
4173 4173   * Same as above, but takes an "any_state" parameter which can be
4174 4174   * set to 1 if the caller wishes to find nfs4_server_t's which
4175 4175   * have been marked for termination by the exit of the renew
4176 4176   * thread.  This should only be used by operations which are
4177 4177   * cleaning up and will not cause an OTW op.
4178 4178   */
4179 4179  nfs4_server_t *
4180 4180  find_nfs4_server_all(mntinfo4_t *mi, int any_state)
4181 4181  {
4182 4182          ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) ||
4183 4183              nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER));
4184 4184  
4185 4185          return (lookup_nfs4_server(mi->mi_srv, any_state));
4186 4186  }
4187 4187  
4188 4188  /*
4189 4189   * Lock sp, but only if it's still active (in the list and hasn't been
4190 4190   * flagged as exiting) or 'any_state' is non-zero.
4191 4191   * Returns TRUE if sp got locked and adds a reference to sp.
4192 4192   */
4193 4193  bool_t
4194 4194  nfs4_server_vlock(nfs4_server_t *sp, int any_state)
4195 4195  {
4196 4196          return (lookup_nfs4_server(sp, any_state) != NULL);
4197 4197  }
4198 4198  
4199 4199  /*
4200 4200   * Release the reference to sp and destroy it if that's the last one.
4201 4201   */
4202 4202  
4203 4203  void
4204 4204  nfs4_server_rele(nfs4_server_t *sp)
4205 4205  {
4206 4206          mutex_enter(&sp->s_lock);
4207 4207          ASSERT(sp->s_refcnt > 0);
4208 4208          sp->s_refcnt--;
4209 4209          if (sp->s_refcnt > 0) {
4210 4210                  mutex_exit(&sp->s_lock);
4211 4211                  return;
4212 4212          }
4213 4213          mutex_exit(&sp->s_lock);
4214 4214  
4215 4215          mutex_enter(&nfs4_server_lst_lock);
4216 4216          mutex_enter(&sp->s_lock);
4217 4217          if (sp->s_refcnt > 0) {
4218 4218                  mutex_exit(&sp->s_lock);
4219 4219                  mutex_exit(&nfs4_server_lst_lock);
4220 4220                  return;
4221 4221          }
4222 4222          remque(sp);
4223 4223          sp->forw = sp->back = NULL;
4224 4224          mutex_exit(&nfs4_server_lst_lock);
4225 4225          destroy_nfs4_server(sp);
4226 4226  }
4227 4227  
4228 4228  static void
4229 4229  destroy_nfs4_server(nfs4_server_t *sp)
4230 4230  {
4231 4231          ASSERT(MUTEX_HELD(&sp->s_lock));
4232 4232          ASSERT(sp->s_refcnt == 0);
4233 4233          ASSERT(sp->s_otw_call_count == 0);
4234 4234  
4235 4235          remove_all_mi(sp);
4236 4236  
4237 4237          crfree(sp->s_cred);
4238 4238          kmem_free(sp->saddr.buf, sp->saddr.maxlen);
4239 4239          kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len);
4240 4240          mutex_exit(&sp->s_lock);
4241 4241  
4242 4242          /* destroy the nfs4_server */
4243 4243          nfs4callback_destroy(sp);
4244 4244          list_destroy(&sp->s_deleg_list);
4245 4245          mutex_destroy(&sp->s_lock);
4246 4246          cv_destroy(&sp->cv_thread_exit);
4247 4247          cv_destroy(&sp->s_cv_otw_count);
4248 4248          cv_destroy(&sp->s_clientid_pend);
4249 4249          cv_destroy(&sp->wait_cb_null);
4250 4250          nfs_rw_destroy(&sp->s_recovlock);
4251 4251          kmem_free(sp, sizeof (*sp));
4252 4252  }
4253 4253  
4254 4254  /*
4255 4255   * Fork off a thread to free the data structures for a mount.
4256 4256   */
4257 4257  
4258 4258  static void
4259 4259  async_free_mount(vfs_t *vfsp, int flag, cred_t *cr)
4260 4260  {
4261 4261          freemountargs_t *args;
4262 4262          args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP);
4263 4263          args->fm_vfsp = vfsp;
4264 4264          VFS_HOLD(vfsp);
4265 4265          MI4_HOLD(VFTOMI4(vfsp));
4266 4266          args->fm_flag = flag;
4267 4267          args->fm_cr = cr;
4268 4268          crhold(cr);
4269 4269          (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0,
4270 4270              minclsyspri);
4271 4271  }
4272 4272  
4273 4273  static void
4274 4274  nfs4_free_mount_thread(freemountargs_t *args)
4275 4275  {
4276 4276          mntinfo4_t *mi;
4277 4277          nfs4_free_mount(args->fm_vfsp, args->fm_flag, args->fm_cr);
4278 4278          mi = VFTOMI4(args->fm_vfsp);
4279 4279          crfree(args->fm_cr);
4280 4280          VFS_RELE(args->fm_vfsp);
4281 4281          MI4_RELE(mi);
4282 4282          kmem_free(args, sizeof (freemountargs_t));
4283 4283          zthread_exit();
4284 4284          /* NOTREACHED */
4285 4285  }
4286 4286  
4287 4287  /*
4288 4288   * Thread to free the data structures for a given filesystem.
4289 4289   */
4290 4290  static void
4291 4291  nfs4_free_mount(vfs_t *vfsp, int flag, cred_t *cr)
4292 4292  {
4293 4293          mntinfo4_t              *mi = VFTOMI4(vfsp);
4294 4294          nfs4_server_t           *sp;
4295 4295          callb_cpr_t             cpr_info;
4296 4296          kmutex_t                cpr_lock;
4297 4297          boolean_t               async_thread;
4298 4298          int                     removed;
4299 4299  
4300 4300          bool_t                  must_unlock;
4301 4301          nfs4_ephemeral_tree_t   *eph_tree;
4302 4302  
4303 4303          /*
4304 4304           * We need to participate in the CPR framework if this is a kernel
4305 4305           * thread.
4306 4306           */
4307 4307          async_thread = (curproc == nfs_zone()->zone_zsched);
4308 4308          if (async_thread) {
4309 4309                  mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
4310 4310                  CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,
4311 4311                      "nfsv4AsyncUnmount");
4312 4312          }
4313 4313  
4314 4314          /*
4315 4315           * We need to wait for all outstanding OTW calls
4316 4316           * and recovery to finish before we remove the mi
4317 4317           * from the nfs4_server_t, as current pending
4318 4318           * calls might still need this linkage (in order
4319 4319           * to find a nfs4_server_t from a mntinfo4_t).
4320 4320           */
4321 4321          (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE);
4322 4322          sp = find_nfs4_server(mi);
4323 4323          nfs_rw_exit(&mi->mi_recovlock);
4324 4324  
4325 4325          if (sp) {
4326 4326                  while (sp->s_otw_call_count != 0) {
4327 4327                          if (async_thread) {
4328 4328                                  mutex_enter(&cpr_lock);
4329 4329                                  CALLB_CPR_SAFE_BEGIN(&cpr_info);
4330 4330                                  mutex_exit(&cpr_lock);
4331 4331                          }
4332 4332                          cv_wait(&sp->s_cv_otw_count, &sp->s_lock);
4333 4333                          if (async_thread) {
4334 4334                                  mutex_enter(&cpr_lock);
4335 4335                                  CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock);
4336 4336                                  mutex_exit(&cpr_lock);
4337 4337                          }
4338 4338                  }
4339 4339                  mutex_exit(&sp->s_lock);
4340 4340                  nfs4_server_rele(sp);
4341 4341                  sp = NULL;
4342 4342          }
4343 4343  
4344 4344          mutex_enter(&mi->mi_lock);
4345 4345          while (mi->mi_in_recovery != 0) {
4346 4346                  if (async_thread) {
4347 4347                          mutex_enter(&cpr_lock);
4348 4348                          CALLB_CPR_SAFE_BEGIN(&cpr_info);
4349 4349                          mutex_exit(&cpr_lock);
4350 4350                  }
4351 4351                  cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock);
4352 4352                  if (async_thread) {
4353 4353                          mutex_enter(&cpr_lock);
4354 4354                          CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock);
4355 4355                          mutex_exit(&cpr_lock);
4356 4356                  }
4357 4357          }
4358 4358          mutex_exit(&mi->mi_lock);
4359 4359  
4360 4360          /*
4361 4361           * If we got an error, then do not nuke the
4362 4362           * tree. Either the harvester is busy reclaiming
4363 4363           * this node or we ran into some busy condition.
4364 4364           *
4365 4365           * The harvester will eventually come along and cleanup.
4366 4366           * The only problem would be the root mount point.
4367 4367           *
4368 4368           * Since the busy node can occur for a variety
4369 4369           * of reasons and can result in an entry staying
4370 4370           * in df output but no longer accessible from the
4371 4371           * directory tree, we are okay.
4372 4372           */
4373 4373          if (!nfs4_ephemeral_umount(mi, flag, cr,
4374 4374              &must_unlock, &eph_tree))
4375 4375                  nfs4_ephemeral_umount_activate(mi, &must_unlock,
4376 4376                      &eph_tree);
4377 4377  
4378 4378          /*
4379 4379           * The original purge of the dnlc via 'dounmount'
4380 4380           * doesn't guarantee that another dnlc entry was not
4381 4381           * added while we waitied for all outstanding OTW
4382 4382           * and recovery calls to finish.  So re-purge the
4383 4383           * dnlc now.
4384 4384           */
4385 4385          (void) dnlc_purge_vfsp(vfsp, 0);
4386 4386  
4387 4387          /*
4388 4388           * We need to explicitly stop the manager thread; the asyc worker
4389 4389           * threads can timeout and exit on their own.
4390 4390           */
4391 4391          mutex_enter(&mi->mi_async_lock);
4392 4392          mi->mi_max_threads = 0;
4393 4393          NFS4_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv);
4394 4394          mutex_exit(&mi->mi_async_lock);
4395 4395          if (mi->mi_manager_thread)
4396 4396                  nfs4_async_manager_stop(vfsp);
4397 4397  
4398 4398          destroy_rtable4(vfsp, cr);
4399 4399  
4400 4400          nfs4_remove_mi_from_server(mi, NULL);
4401 4401  
4402 4402          if (async_thread) {
4403 4403                  mutex_enter(&cpr_lock);
4404 4404                  CALLB_CPR_EXIT(&cpr_info);      /* drops cpr_lock */
4405 4405                  mutex_destroy(&cpr_lock);
4406 4406          }
4407 4407  
4408 4408          removed = nfs4_mi_zonelist_remove(mi);
4409 4409          if (removed)
4410 4410                  zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4);
4411 4411  }
4412 4412  
4413 4413  /* Referral related sub-routines */
4414 4414  
4415 4415  /* Freeup knetconfig */
4416 4416  static void
4417 4417  free_knconf_contents(struct knetconfig *k)
4418 4418  {
4419 4419          if (k == NULL)
4420 4420                  return;
4421 4421          if (k->knc_protofmly)
4422 4422                  kmem_free(k->knc_protofmly, KNC_STRSIZE);
4423 4423          if (k->knc_proto)
4424 4424                  kmem_free(k->knc_proto, KNC_STRSIZE);
4425 4425  }
4426 4426  
4427 4427  /*
4428 4428   * This updates newpath variable with exact name component from the
4429 4429   * path which gave us a NFS4ERR_MOVED error.
4430 4430   * If the path is /rp/aaa/bbb and nth value is 1, aaa is returned.
4431 4431   */
4432 4432  static char *
4433 4433  extract_referral_point(const char *svp, int nth)
4434 4434  {
4435 4435          int num_slashes = 0;
4436 4436          const char *p;
4437 4437          char *newpath = NULL;
4438 4438          int i = 0;
4439 4439  
4440 4440          newpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4441 4441          for (p = svp; *p; p++) {
4442 4442                  if (*p == '/')
4443 4443                          num_slashes++;
4444 4444                  if (num_slashes == nth + 1) {
4445 4445                          p++;
4446 4446                          while (*p != '/') {
4447 4447                                  if (*p == '\0')
4448 4448                                          break;
4449 4449                                  newpath[i] = *p;
4450 4450                                  i++;
4451 4451                                  p++;
4452 4452                          }
4453 4453                          newpath[i++] = '\0';
4454 4454                          break;
4455 4455                  }
4456 4456          }
4457 4457          return (newpath);
4458 4458  }
4459 4459  
4460 4460  /*
4461 4461   * This sets up a new path in sv_path to do a lookup of the referral point.
4462 4462   * If the path is /rp/aaa/bbb and the referral point is aaa,
4463 4463   * this updates /rp/aaa. This path will be used to get referral
4464 4464   * location.
4465 4465   */
4466 4466  static void
4467 4467  setup_newsvpath(servinfo4_t *svp, int nth)
4468 4468  {
4469 4469          int num_slashes = 0, pathlen, i = 0;
4470 4470          char *newpath, *p;
4471 4471  
4472 4472          newpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4473 4473          for (p = svp->sv_path; *p; p++) {
4474 4474                  newpath[i] =  *p;
4475 4475                  if (*p == '/')
4476 4476                          num_slashes++;
4477 4477                  if (num_slashes == nth + 1) {
4478 4478                          newpath[i] = '\0';
4479 4479                          pathlen = strlen(newpath) + 1;
4480 4480                          kmem_free(svp->sv_path, svp->sv_pathlen);
4481 4481                          svp->sv_path = kmem_alloc(pathlen, KM_SLEEP);
4482 4482                          svp->sv_pathlen = pathlen;
4483 4483                          bcopy(newpath, svp->sv_path, pathlen);
4484 4484                          break;
4485 4485                  }
4486 4486                  i++;
4487 4487          }
4488 4488          kmem_free(newpath, MAXPATHLEN);
4489 4489  }
  
    | 
      ↓ open down ↓ | 
    2313 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX