Print this page
    
Fix NFS design problems re. multiple zone keys
Make NFS server zone-specific data all have the same lifetime
Fix rfs4_clean_state_exi
Fix exi_cache_reclaim
Fix mistakes in zone keys work
More fixes re. exi_zoneid and exi_tree
(danmcd -> Keep some ASSERT()s around for readability.)
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs_export.c
          +++ new/usr/src/uts/common/fs/nfs/nfs_export.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   *      Copyright 1983, 1984, 1985, 1986, 1987, 1988, 1989  AT&T.
  28   28   *              All rights reserved.
  29   29   */
  30   30  
  31   31  /*
  32   32   * Copyright 2018 Nexenta Systems, Inc.
  33   33   */
  34   34  
  35   35  #include <sys/types.h>
  36   36  #include <sys/param.h>
  37   37  #include <sys/time.h>
  38   38  #include <sys/vfs.h>
  39   39  #include <sys/vnode.h>
  40   40  #include <sys/socket.h>
  41   41  #include <sys/errno.h>
  42   42  #include <sys/uio.h>
  43   43  #include <sys/proc.h>
  44   44  #include <sys/user.h>
  45   45  #include <sys/file.h>
  46   46  #include <sys/tiuser.h>
  47   47  #include <sys/kmem.h>
  48   48  #include <sys/pathname.h>
  49   49  #include <sys/debug.h>
  50   50  #include <sys/vtrace.h>
  51   51  #include <sys/cmn_err.h>
  52   52  #include <sys/acl.h>
  53   53  #include <sys/utsname.h>
  54   54  #include <sys/sdt.h>
  55   55  #include <netinet/in.h>
  56   56  #include <sys/avl.h>
  57   57  
  58   58  #include <rpc/types.h>
  59   59  #include <rpc/auth.h>
  60   60  #include <rpc/svc.h>
  
    | 
      ↓ open down ↓ | 
    60 lines elided | 
    
      ↑ open up ↑ | 
  
  61   61  
  62   62  #include <nfs/nfs.h>
  63   63  #include <nfs/export.h>
  64   64  #include <nfs/nfssys.h>
  65   65  #include <nfs/nfs_clnt.h>
  66   66  #include <nfs/nfs_acl.h>
  67   67  #include <nfs/nfs_log.h>
  68   68  #include <nfs/lm.h>
  69   69  #include <sys/sunddi.h>
  70   70  
  71      -static zone_key_t nfs_export_key;
  72      -
  73   71  /*
  74   72   * exi_id support
  75   73   *
  76   74   * exi_id_next          The next exi_id available.
  77   75   * exi_id_overflow      The exi_id_next already overflowed, so we should
  78   76   *                      thoroughly check for duplicates.
  79   77   * exi_id_tree          AVL tree indexed by exi_id.
  80   78   * nfs_exi_id_lock      Lock to protect the export ID list
  81   79   *
  82   80   * All exi_id_next, exi_id_overflow, and exi_id_tree are protected by
  83   81   * nfs_exi_id_lock.
  84   82   */
  85   83  static int exi_id_next;
  86   84  static bool_t exi_id_overflow;
  87   85  avl_tree_t exi_id_tree;
  88   86  kmutex_t nfs_exi_id_lock;
  89   87  
  90   88  static int      unexport(nfs_export_t *, exportinfo_t *);
  91   89  static void     exportfree(exportinfo_t *);
  92   90  static int      loadindex(exportdata_t *);
  93   91  
  94   92  extern void     nfsauth_cache_free(exportinfo_t *);
  95   93  extern int      sec_svc_loadrootnames(int, int, caddr_t **, model_t);
  96   94  extern void     sec_svc_freerootnames(int, int, caddr_t *);
  97   95  
  98   96  static int      build_seclist_nodups(exportdata_t *, secinfo_t *, int);
  99   97  static void     srv_secinfo_add(secinfo_t **, int *, secinfo_t *, int, int);
 100   98  static void     srv_secinfo_remove(secinfo_t **, int *, secinfo_t *, int);
 101   99  static void     srv_secinfo_treeclimb(nfs_export_t *, exportinfo_t *,
 102  100                      secinfo_t *, int, bool_t);
 103  101  
 104  102  #ifdef VOLATILE_FH_TEST
 105  103  static struct ex_vol_rename *find_volrnm_fh(exportinfo_t *, nfs_fh4 *);
 106  104  static uint32_t find_volrnm_fh_id(exportinfo_t *, nfs_fh4 *);
 107  105  static void     free_volrnm_list(exportinfo_t *);
 108  106  #endif /* VOLATILE_FH_TEST */
 109  107  
 110  108  fhandle_t nullfh2;      /* for comparing V2 filehandles */
 111  109  
 112  110  /*
 113  111   * macro for static dtrace probes to trace server namespace ref count mods.
 114  112   */
  
    | 
      ↓ open down ↓ | 
    32 lines elided | 
    
      ↑ open up ↑ | 
  
 115  113  #define SECREF_TRACE(seclist, tag, flav, aftcnt) \
 116  114          DTRACE_PROBE4(nfss__i__nmspc__secref, struct secinfo *, (seclist), \
 117  115                  char *, (tag), int, (int)(flav), int, (int)(aftcnt))
 118  116  
 119  117  
 120  118  #define exptablehash(fsid, fid) (nfs_fhhash((fsid), (fid)) & (EXPTABLESIZE - 1))
 121  119  
 122  120  extern nfs_export_t *
 123  121  nfs_get_export(void)
 124  122  {
 125      -        return (zone_getspecific(nfs_export_key, curzone));
      123 +        nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
      124 +        nfs_export_t *ne = ng->nfs_export;
      125 +        ASSERT(ne != NULL);
      126 +        return (ne);
 126  127  }
 127  128  
 128  129  static uint8_t
 129  130  xor_hash(uint8_t *data, int len)
 130  131  {
 131  132          uint8_t h = 0;
 132  133  
 133  134          while (len--)
 134  135                  h ^= *data++;
 135  136  
 136  137          return (h);
 137  138  }
 138  139  
 139  140  /*
 140  141   * File handle hash function, XOR over all bytes in fsid and fid.
 141  142   */
 142  143  static unsigned
 143  144  nfs_fhhash(fsid_t *fsid, fid_t *fid)
 144  145  {
 145  146          int len;
 146  147          uint8_t h;
 147  148  
 148  149          h = xor_hash((uint8_t *)fsid, sizeof (fsid_t));
 149  150  
 150  151          /*
 151  152           * Sanity check the length before using it
 152  153           * blindly in case the client trashed it.
 153  154           */
 154  155          len = fid->fid_len > NFS_FH4MAXDATA ? 0 : fid->fid_len;
 155  156          h ^= xor_hash((uint8_t *)fid->fid_data, len);
 156  157  
 157  158          return ((unsigned)h);
 158  159  }
 159  160  
 160  161  /*
 161  162   * Free the memory allocated within a secinfo entry.
 162  163   */
 163  164  void
 164  165  srv_secinfo_entry_free(struct secinfo *secp)
 165  166  {
 166  167          if (secp->s_rootcnt > 0 && secp->s_rootnames != NULL) {
 167  168                  sec_svc_freerootnames(secp->s_secinfo.sc_rpcnum,
 168  169                      secp->s_rootcnt, secp->s_rootnames);
 169  170                  secp->s_rootcnt = 0;
 170  171          }
 171  172  
 172  173          if ((secp->s_secinfo.sc_rpcnum == RPCSEC_GSS) &&
 173  174              (secp->s_secinfo.sc_gss_mech_type)) {
 174  175                  kmem_free(secp->s_secinfo.sc_gss_mech_type->elements,
 175  176                      secp->s_secinfo.sc_gss_mech_type->length);
 176  177                  kmem_free(secp->s_secinfo.sc_gss_mech_type,
 177  178                      sizeof (rpc_gss_OID_desc));
 178  179                  secp->s_secinfo.sc_gss_mech_type = NULL;
 179  180          }
 180  181  }
 181  182  
 182  183  /*
 183  184   * Free a list of secinfo allocated in the exportdata structure.
 184  185   */
 185  186  void
 186  187  srv_secinfo_list_free(struct secinfo *secinfo, int cnt)
 187  188  {
 188  189          int i;
 189  190  
 190  191          if (cnt == 0)
 191  192                  return;
 192  193  
 193  194          for (i = 0; i < cnt; i++)
 194  195                  srv_secinfo_entry_free(&secinfo[i]);
 195  196  
 196  197          kmem_free(secinfo, cnt * sizeof (struct secinfo));
 197  198  }
 198  199  
 199  200  /*
 200  201   * Allocate and copy a secinfo data from "from" to "to".
 201  202   *
 202  203   * This routine is used by srv_secinfo_add() to add a new flavor to an
 203  204   * ancestor's export node. The rootnames are not copied because the
 204  205   * allowable rootname access only applies to the explicit exported node,
 205  206   * not its ancestor's.
 206  207   *
 207  208   * "to" should have already been allocated and zeroed before calling
 208  209   * this routine.
 209  210   *
 210  211   * This routine is used under the protection of exported_lock (RW_WRITER).
 211  212   */
 212  213  void
 213  214  srv_secinfo_copy(struct secinfo *from, struct secinfo *to)
 214  215  {
 215  216          to->s_secinfo.sc_nfsnum = from->s_secinfo.sc_nfsnum;
 216  217          to->s_secinfo.sc_rpcnum = from->s_secinfo.sc_rpcnum;
 217  218  
 218  219          if (from->s_secinfo.sc_rpcnum == RPCSEC_GSS) {
 219  220                  to->s_secinfo.sc_service = from->s_secinfo.sc_service;
 220  221                  bcopy(from->s_secinfo.sc_name, to->s_secinfo.sc_name,
 221  222                      strlen(from->s_secinfo.sc_name));
 222  223                  bcopy(from->s_secinfo.sc_gss_mech, to->s_secinfo.sc_gss_mech,
 223  224                      strlen(from->s_secinfo.sc_gss_mech));
 224  225  
 225  226                  /* copy mechanism oid */
 226  227                  to->s_secinfo.sc_gss_mech_type =
 227  228                      kmem_alloc(sizeof (rpc_gss_OID_desc), KM_SLEEP);
 228  229                  to->s_secinfo.sc_gss_mech_type->length =
 229  230                      from->s_secinfo.sc_gss_mech_type->length;
 230  231                  to->s_secinfo.sc_gss_mech_type->elements =
 231  232                      kmem_alloc(from->s_secinfo.sc_gss_mech_type->length,
 232  233                      KM_SLEEP);
 233  234                  bcopy(from->s_secinfo.sc_gss_mech_type->elements,
 234  235                      to->s_secinfo.sc_gss_mech_type->elements,
 235  236                      from->s_secinfo.sc_gss_mech_type->length);
 236  237          }
 237  238  
 238  239          to->s_refcnt = from->s_refcnt;
 239  240          to->s_window = from->s_window;
 240  241          /* no need to copy the mode bits - s_flags */
 241  242  }
 242  243  
 243  244  /*
 244  245   * Create a secinfo array without duplicates.  The condensed
 245  246   * flavor list is used to propagate flavor ref counts  to an
 246  247   * export's ancestor pseudonodes.
 247  248   */
 248  249  static int
 249  250  build_seclist_nodups(exportdata_t *exd, secinfo_t *nodups, int exponly)
 250  251  {
 251  252          int ccnt, c;
 252  253          int ncnt, n;
 253  254          struct secinfo *cursec;
 254  255  
 255  256          ncnt = 0;
 256  257          ccnt = exd->ex_seccnt;
 257  258          cursec = exd->ex_secinfo;
 258  259  
 259  260          for (c = 0; c < ccnt; c++) {
 260  261  
 261  262                  if (exponly && ! SEC_REF_EXPORTED(&cursec[c]))
 262  263                          continue;
 263  264  
 264  265                  for (n = 0; n < ncnt; n++) {
 265  266                          if (nodups[n].s_secinfo.sc_nfsnum ==
 266  267                              cursec[c].s_secinfo.sc_nfsnum)
 267  268                                  break;
 268  269                  }
 269  270  
 270  271                  /*
 271  272                   * The structure copy below also copys ptrs embedded
 272  273                   * within struct secinfo.  The ptrs are copied but
 273  274                   * they are never freed from the nodups array.  If
 274  275                   * an ancestor's secinfo array doesn't contain one
 275  276                   * of the nodups flavors, then the entry is properly
 276  277                   * copied into the ancestor's secinfo array.
 277  278                   * (see srv_secinfo_copy)
 278  279                   */
 279  280                  if (n == ncnt) {
 280  281                          nodups[n] = cursec[c];
 281  282                          ncnt++;
 282  283                  }
 283  284          }
 284  285          return (ncnt);
 285  286  }
 286  287  
 287  288  /*
 288  289   * Add the new security flavors from newdata to the current list, pcursec.
 289  290   * Upon return, *pcursec has the newly merged secinfo list.
 290  291   *
 291  292   * There should be at least 1 secinfo entry in newsec.
 292  293   *
 293  294   * This routine is used under the protection of exported_lock (RW_WRITER).
 294  295   */
 295  296  static void
 296  297  srv_secinfo_add(secinfo_t **pcursec, int *pcurcnt, secinfo_t *newsec,
 297  298      int newcnt, int is_pseudo)
 298  299  {
 299  300          int ccnt, c;            /* sec count in current data - curdata */
 300  301          int n;                  /* index for newsec  - newsecinfo */
 301  302          int tcnt;               /* total sec count after merge */
 302  303          int mcnt;               /* total sec count after merge */
 303  304          struct secinfo *msec;   /* merged secinfo list */
 304  305          struct secinfo *cursec;
 305  306  
 306  307          cursec = *pcursec;
 307  308          ccnt = *pcurcnt;
 308  309  
 309  310          ASSERT(newcnt > 0);
 310  311          tcnt = ccnt + newcnt;
 311  312  
 312  313          for (n = 0; n < newcnt; n++) {
 313  314                  for (c = 0; c < ccnt; c++) {
 314  315                          if (newsec[n].s_secinfo.sc_nfsnum ==
 315  316                              cursec[c].s_secinfo.sc_nfsnum) {
 316  317                                  cursec[c].s_refcnt += newsec[n].s_refcnt;
 317  318                                  SECREF_TRACE(cursec, "add_ref",
 318  319                                      cursec[c].s_secinfo.sc_nfsnum,
 319  320                                      cursec[c].s_refcnt);
 320  321                                  tcnt--;
 321  322                                  break;
 322  323                          }
 323  324                  }
 324  325          }
 325  326  
 326  327          if (tcnt == ccnt)
 327  328                  return; /* no change; no new flavors */
 328  329  
 329  330          msec = kmem_zalloc(tcnt * sizeof (struct secinfo), KM_SLEEP);
 330  331  
 331  332          /* move current secinfo list data to the new list */
 332  333          for (c = 0; c < ccnt; c++)
 333  334                  msec[c] = cursec[c];
 334  335  
 335  336          /* Add the flavor that's not in the current data */
 336  337          mcnt = ccnt;
 337  338          for (n = 0; n < newcnt; n++) {
 338  339                  for (c = 0; c < ccnt; c++) {
 339  340                          if (newsec[n].s_secinfo.sc_nfsnum ==
 340  341                              cursec[c].s_secinfo.sc_nfsnum)
 341  342                                  break;
 342  343                  }
 343  344  
 344  345                  /* This is the one. Add it. */
 345  346                  if (c == ccnt) {
 346  347                          srv_secinfo_copy(&newsec[n], &msec[mcnt]);
 347  348  
 348  349                          if (is_pseudo)
 349  350                                  msec[mcnt].s_flags = M_RO;
 350  351  
 351  352                          SECREF_TRACE(msec, "new_ref",
 352  353                              msec[mcnt].s_secinfo.sc_nfsnum,
 353  354                              msec[mcnt].s_refcnt);
 354  355                          mcnt++;
 355  356                  }
 356  357          }
 357  358  
 358  359          ASSERT(mcnt == tcnt);
 359  360  
 360  361          /*
 361  362           * Done. Update curdata. Free the old secinfo list in
 362  363           * curdata and return the new sec array info
 363  364           */
 364  365          if (ccnt > 0)
 365  366                  kmem_free(cursec, ccnt * sizeof (struct secinfo));
 366  367          *pcurcnt = tcnt;
 367  368          *pcursec = msec;
 368  369  }
 369  370  
 370  371  /*
 371  372   * For NFS V4.
 372  373   * Remove the security data of the unexported node from its ancestors.
 373  374   * Assume there is at least one flavor entry in the current sec list
 374  375   * (pcursec).
 375  376   *
 376  377   * This routine is used under the protection of exported_lock (RW_WRITER).
 377  378   *
 378  379   * Every element of remsec is an explicitly exported flavor.  If
 379  380   * srv_secinfo_remove() is called fom an exportfs error path, then
 380  381   * the flavor list was derived from the user's share cmdline,
 381  382   * and all flavors are explicit.  If it was called from the unshare path,
 382  383   * build_seclist_nodups() was called with the exponly flag.
 383  384   */
 384  385  static void
 385  386  srv_secinfo_remove(secinfo_t **pcursec, int *pcurcnt, secinfo_t *remsec,
 386  387      int remcnt)
 387  388  {
 388  389          int ccnt, c;            /* sec count in current data - cursec */
 389  390          int r;                  /* sec count in removal data - remsec */
 390  391          int tcnt, mcnt;         /* total sec count after removing */
 391  392          struct secinfo *msec;   /* final secinfo list after removing */
 392  393          struct secinfo *cursec;
 393  394  
 394  395          cursec = *pcursec;
 395  396          ccnt = *pcurcnt;
 396  397          tcnt = ccnt;
 397  398  
 398  399          for (r = 0; r < remcnt; r++) {
 399  400                  /*
 400  401                   * At unshare/reshare time, only explicitly shared flavor ref
 401  402                   * counts are decremented and propagated to ancestors.
 402  403                   * Implicit flavor refs came from shared descendants, and
 403  404                   * they must be kept.
 404  405                   */
 405  406                  if (! SEC_REF_EXPORTED(&remsec[r]))
 406  407                          continue;
 407  408  
 408  409                  for (c = 0; c < ccnt; c++) {
 409  410                          if (remsec[r].s_secinfo.sc_nfsnum ==
 410  411                              cursec[c].s_secinfo.sc_nfsnum) {
 411  412  
 412  413                                  /*
 413  414                                   * Decrement secinfo reference count by 1.
 414  415                                   * If this entry is invalid after decrementing
 415  416                                   * the count (i.e. count < 1), this entry will
 416  417                                   * be removed.
 417  418                                   */
 418  419                                  cursec[c].s_refcnt--;
 419  420  
 420  421                                  SECREF_TRACE(cursec, "del_ref",
 421  422                                      cursec[c].s_secinfo.sc_nfsnum,
 422  423                                      cursec[c].s_refcnt);
 423  424  
 424  425                                  ASSERT(cursec[c].s_refcnt >= 0);
 425  426  
 426  427                                  if (SEC_REF_INVALID(&cursec[c]))
 427  428                                          tcnt--;
 428  429                                  break;
 429  430                          }
 430  431                  }
 431  432          }
 432  433  
 433  434          ASSERT(tcnt >= 0);
 434  435          if (tcnt == ccnt)
 435  436                  return; /* no change; no flavors to remove */
 436  437  
 437  438          if (tcnt == 0) {
 438  439                  srv_secinfo_list_free(cursec, ccnt);
 439  440                  *pcurcnt = 0;
 440  441                  *pcursec = NULL;
 441  442                  return;
 442  443          }
 443  444  
 444  445          msec = kmem_zalloc(tcnt * sizeof (struct secinfo), KM_SLEEP);
 445  446  
 446  447          /* walk thru the given secinfo list to remove the flavors */
 447  448          mcnt = 0;
 448  449          for (c = 0; c < ccnt; c++) {
 449  450                  if (SEC_REF_INVALID(&cursec[c])) {
 450  451                          srv_secinfo_entry_free(&cursec[c]);
 451  452                  } else {
 452  453                          msec[mcnt] = cursec[c];
 453  454                          mcnt++;
 454  455                  }
 455  456          }
 456  457  
 457  458          ASSERT(mcnt == tcnt);
 458  459          /*
 459  460           * Done. Update curdata.
 460  461           * Free the existing secinfo list in curdata. All pointers
 461  462           * within the list have either been moved to msec or freed
 462  463           * if it's invalid.
 463  464           */
 464  465          kmem_free(*pcursec, ccnt * sizeof (struct secinfo));
 465  466          *pcursec = msec;
 466  467          *pcurcnt = tcnt;
 467  468  }
 468  469  
 469  470  
 470  471  /*
 471  472   * For the reshare case, sec flavor accounting happens in 3 steps:
 472  473   * 1) propagate addition of new flavor refs up the ancestor tree
 473  474   * 2) transfer flavor refs of descendants to new/reshared exportdata
 474  475   * 3) propagate removal of old flavor refs up the ancestor tree
 475  476   *
 476  477   * srv_secinfo_exp2exp() implements step 2 of a reshare.  At this point,
 477  478   * the new flavor list has already been propagated up through the
 478  479   * ancestor tree via srv_secinfo_treeclimb().
 479  480   *
 480  481   * If there is more than 1 export reference to an old flavor (i.e. some
 481  482   * of its children shared with this flavor), this flavor information
 482  483   * needs to be transferred to the new exportdata struct.  A flavor in
 483  484   * the old exportdata has descendant refs when its s_refcnt > 1 or it
 484  485   * is implicitly shared (M_SEC4_EXPORTED not set in s_flags).
 485  486   *
 486  487   * SEC_REF_EXPORTED() is only true when  M_SEC4_EXPORTED is set
 487  488   * SEC_REF_SELF() is only true when both M_SEC4_EXPORTED is set and s_refcnt==1
 488  489   *
 489  490   * Transferring descendant flavor refcnts happens in 2 passes:
 490  491   * a) flavors used before (oldsecinfo) and after (curdata->ex_secinfo) reshare
 491  492   * b) flavors used before but not after reshare
 492  493   *
 493  494   * This routine is used under the protection of exported_lock (RW_WRITER).
 494  495   */
 495  496  void
 496  497  srv_secinfo_exp2exp(exportdata_t *curdata, secinfo_t *oldsecinfo, int ocnt)
 497  498  {
 498  499          int ccnt, c;            /* sec count in current data - curdata */
 499  500          int o;                  /* sec count in old data - oldsecinfo */
 500  501          int tcnt, mcnt;         /* total sec count after the transfer */
 501  502          struct secinfo *msec;   /* merged secinfo list */
 502  503  
 503  504          ccnt = curdata->ex_seccnt;
 504  505  
 505  506          ASSERT(ocnt > 0);
 506  507          ASSERT(!(curdata->ex_flags & EX_PSEUDO));
 507  508  
 508  509          /*
 509  510           * If the oldsecinfo has flavors with more than 1 reference count
 510  511           * and the flavor is specified in the reshare, transfer the flavor
 511  512           * refs to the new seclist (curdata.ex_secinfo).
 512  513           */
 513  514          tcnt = ccnt + ocnt;
 514  515  
 515  516          for (o = 0; o < ocnt; o++) {
 516  517  
 517  518                  if (SEC_REF_SELF(&oldsecinfo[o])) {
 518  519                          tcnt--;
 519  520                          continue;
 520  521                  }
 521  522  
 522  523                  for (c = 0; c < ccnt; c++) {
 523  524                          if (oldsecinfo[o].s_secinfo.sc_nfsnum ==
 524  525                              curdata->ex_secinfo[c].s_secinfo.sc_nfsnum) {
 525  526  
 526  527                                  /*
 527  528                                   * add old reference to the current
 528  529                                   * secinfo count
 529  530                                   */
 530  531                                  curdata->ex_secinfo[c].s_refcnt +=
 531  532                                      oldsecinfo[o].s_refcnt;
 532  533  
 533  534                                  /*
 534  535                                   * Delete the old export flavor
 535  536                                   * reference.  The initial reference
 536  537                                   * was created during srv_secinfo_add,
 537  538                                   * and the count is decremented below
 538  539                                   * to account for the initial reference.
 539  540                                   */
 540  541                                  if (SEC_REF_EXPORTED(&oldsecinfo[o]))
 541  542                                          curdata->ex_secinfo[c].s_refcnt--;
 542  543  
 543  544                                  SECREF_TRACE(curdata->ex_path,
 544  545                                      "reshare_xfer_common_child_refs",
 545  546                                      curdata->ex_secinfo[c].s_secinfo.sc_nfsnum,
 546  547                                      curdata->ex_secinfo[c].s_refcnt);
 547  548  
 548  549                                  ASSERT(curdata->ex_secinfo[c].s_refcnt >= 0);
 549  550  
 550  551                                  tcnt--;
 551  552                                  break;
 552  553                          }
 553  554                  }
 554  555          }
 555  556  
 556  557          if (tcnt == ccnt)
 557  558                  return; /* no more transfer to do */
 558  559  
 559  560          /*
 560  561           * oldsecinfo has flavors referenced by its children that are not
 561  562           * in the current (new) export flavor list.  Add these flavors.
 562  563           */
 563  564          msec = kmem_zalloc(tcnt * sizeof (struct secinfo), KM_SLEEP);
 564  565  
 565  566          /* move current secinfo list data to the new list */
 566  567          for (c = 0; c < ccnt; c++)
 567  568                  msec[c] = curdata->ex_secinfo[c];
 568  569  
 569  570          /*
 570  571           * Add the flavor that's not in the new export, but still
 571  572           * referenced by its children.
 572  573           */
 573  574          mcnt = ccnt;
 574  575          for (o = 0; o < ocnt; o++) {
 575  576                  if (! SEC_REF_SELF(&oldsecinfo[o])) {
 576  577                          for (c = 0; c < ccnt; c++) {
 577  578                                  if (oldsecinfo[o].s_secinfo.sc_nfsnum ==
 578  579                                      curdata->ex_secinfo[c].s_secinfo.sc_nfsnum)
 579  580                                          break;
 580  581                          }
 581  582  
 582  583                          /*
 583  584                           * This is the one. Add it. Decrement the ref count
 584  585                           * by 1 if the flavor is an explicitly shared flavor
 585  586                           * for the oldsecinfo export node.
 586  587                           */
 587  588                          if (c == ccnt) {
 588  589                                  srv_secinfo_copy(&oldsecinfo[o], &msec[mcnt]);
 589  590                                  if (SEC_REF_EXPORTED(&oldsecinfo[o]))
 590  591                                          msec[mcnt].s_refcnt--;
 591  592  
 592  593                                  SECREF_TRACE(curdata,
 593  594                                      "reshare_xfer_implicit_child_refs",
 594  595                                      msec[mcnt].s_secinfo.sc_nfsnum,
 595  596                                      msec[mcnt].s_refcnt);
 596  597  
 597  598                                  ASSERT(msec[mcnt].s_refcnt >= 0);
 598  599                                  mcnt++;
 599  600                          }
 600  601                  }
 601  602          }
 602  603  
 603  604          ASSERT(mcnt == tcnt);
 604  605          /*
 605  606           * Done. Update curdata, free the existing secinfo list in
 606  607           * curdata and set the new value.
 607  608           */
 608  609          if (ccnt > 0)
 609  610                  kmem_free(curdata->ex_secinfo, ccnt * sizeof (struct secinfo));
 610  611          curdata->ex_seccnt = tcnt;
 611  612          curdata->ex_secinfo = msec;
 612  613  }
 613  614  
 614  615  /*
 615  616   * When unsharing an old export node and the old node becomes a pseudo node,
 616  617   * if there is more than 1 export reference to an old flavor (i.e. some of
 617  618   * its children shared with this flavor), this flavor information needs to
 618  619   * be transferred to the new shared node.
 619  620   *
 620  621   * This routine is used under the protection of exported_lock (RW_WRITER).
 621  622   */
 622  623  void
 623  624  srv_secinfo_exp2pseu(exportdata_t *curdata, exportdata_t *olddata)
 624  625  {
 625  626          int ocnt, o;            /* sec count in transfer data - trandata */
 626  627          int tcnt, mcnt;         /* total sec count after transfer */
 627  628          struct secinfo *msec;   /* merged secinfo list */
 628  629  
 629  630          ASSERT(curdata->ex_flags & EX_PSEUDO);
 630  631          ASSERT(curdata->ex_seccnt == 0);
 631  632  
 632  633          ocnt = olddata->ex_seccnt;
 633  634  
 634  635          /*
 635  636           * If the olddata has flavors with more than 1 reference count,
 636  637           * transfer the information to the curdata.
 637  638           */
 638  639          tcnt = ocnt;
 639  640  
 640  641          for (o = 0; o < ocnt; o++) {
 641  642                  if (SEC_REF_SELF(&olddata->ex_secinfo[o]))
 642  643                          tcnt--;
 643  644          }
 644  645  
 645  646          if (tcnt == 0)
 646  647                  return; /* no transfer to do */
 647  648  
 648  649          msec = kmem_zalloc(tcnt * sizeof (struct secinfo), KM_SLEEP);
 649  650  
 650  651          mcnt = 0;
 651  652          for (o = 0; o < ocnt; o++) {
 652  653                  if (! SEC_REF_SELF(&olddata->ex_secinfo[o])) {
 653  654  
 654  655                          /*
 655  656                           * Decrement the reference count by 1 if the flavor is
 656  657                           * an explicitly shared flavor for the olddata export
 657  658                           * node.
 658  659                           */
 659  660                          srv_secinfo_copy(&olddata->ex_secinfo[o], &msec[mcnt]);
 660  661                          msec[mcnt].s_flags = M_RO;
 661  662                          if (SEC_REF_EXPORTED(&olddata->ex_secinfo[o]))
 662  663                                  msec[mcnt].s_refcnt--;
 663  664  
 664  665                          SECREF_TRACE(curdata, "unshare_morph_pseudo",
 665  666                              msec[mcnt].s_secinfo.sc_nfsnum,
 666  667                              msec[mcnt].s_refcnt);
 667  668  
 668  669                          ASSERT(msec[mcnt].s_refcnt >= 0);
 669  670                          mcnt++;
 670  671                  }
 671  672          }
 672  673  
 673  674          ASSERT(mcnt == tcnt);
 674  675          /*
 675  676           * Done. Update curdata.
 676  677           * Free up the existing secinfo list in curdata and
 677  678           * set the new value.
 678  679           */
 679  680          curdata->ex_seccnt = tcnt;
 680  681          curdata->ex_secinfo = msec;
 681  682  }
 682  683  
 683  684  /*
 684  685   * Find for given treenode the exportinfo which has its
 685  686   * exp_visible linked on its exi_visible list.
 686  687   *
 687  688   * Note: We could add new pointer either to treenode or
 688  689   * to exp_visible, which will point there directly.
 689  690   * This would buy some speed for some memory.
 690  691   */
 691  692  exportinfo_t *
 692  693  vis2exi(treenode_t *tnode)
 693  694  {
 694  695          exportinfo_t *exi_ret = NULL;
 695  696  #ifdef DEBUG
 696  697          zone_t *zone = NULL;
 697  698  #endif
 698  699  
 699  700          for (;;) {
 700  701                  tnode = tnode->tree_parent;
 701  702  #ifdef DEBUG
 702  703                  if (zone == NULL && tnode->tree_exi != NULL)
 703  704                          zone = tnode->tree_exi->exi_zone;
 704  705  #endif
 705  706                  if (TREE_ROOT(tnode)) {
 706  707                          ASSERT3P(zone, ==, tnode->tree_exi->exi_zone);
 707  708                          exi_ret = tnode->tree_exi;
 708  709                          break;
 709  710                  }
 710  711          }
 711  712  
 712  713          ASSERT(exi_ret); /* Every visible should have its home exportinfo */
 713  714          return (exi_ret);
 714  715  }
  
    | 
      ↓ open down ↓ | 
    579 lines elided | 
    
      ↑ open up ↑ | 
  
 715  716  
 716  717  /*
 717  718   * For NFS V4.
 718  719   * Add or remove the newly exported or unexported security flavors of the
 719  720   * given exportinfo from its ancestors upto the system root.
 720  721   */
 721  722  void
 722  723  srv_secinfo_treeclimb(nfs_export_t *ne, exportinfo_t *exip, secinfo_t *sec,
 723  724      int seccnt, bool_t isadd)
 724  725  {
 725      -        treenode_t *tnode = exip->exi_tree;
      726 +        treenode_t *tnode;
 726  727  
 727  728          ASSERT(RW_WRITE_HELD(&ne->exported_lock));
 728      -        ASSERT(tnode != NULL);
 729  729  
      730 +        /*
      731 +         * exi_tree can be null for the zone root
      732 +         * which means we're already at the "top"
      733 +         * and there's nothing more to "climb".
      734 +         */
      735 +        tnode = exip->exi_tree;
      736 +        if (tnode == NULL) {
      737 +                /* Should only happen for... */
      738 +                ASSERT(exip == ne->exi_root);
      739 +                return;
      740 +        }
      741 +
 730  742          if (seccnt == 0)
 731  743                  return;
 732  744  
 733  745          /*
 734  746           * If flavors are being added and the new export root isn't
 735  747           * also VROOT, its implicitly allowed flavors are inherited from
 736  748           * its pseudonode.
 737  749           * Note - for VROOT exports the implicitly allowed flavors were
 738  750           * transferred from the PSEUDO export in exportfs()
 739  751           */
 740  752          if (isadd && !(exip->exi_vp->v_flag & VROOT) &&
 741  753              !VN_IS_CURZONEROOT(exip->exi_vp) &&
 742  754              tnode->tree_vis->vis_seccnt > 0) {
 743  755                  srv_secinfo_add(&exip->exi_export.ex_secinfo,
 744  756                      &exip->exi_export.ex_seccnt, tnode->tree_vis->vis_secinfo,
 745  757                      tnode->tree_vis->vis_seccnt, FALSE);
 746  758          }
 747  759  
 748  760          /*
 749  761           * Move to parent node and propagate sec flavor
 750  762           * to exportinfo and to visible structures.
 751  763           */
 752  764          tnode = tnode->tree_parent;
 753  765  
 754  766          while (tnode != NULL) {
 755  767  
 756  768                  /* If there is exportinfo, update it */
 757  769                  if (tnode->tree_exi != NULL) {
 758  770                          secinfo_t **pxsec =
 759  771                              &tnode->tree_exi->exi_export.ex_secinfo;
 760  772                          int *pxcnt = &tnode->tree_exi->exi_export.ex_seccnt;
 761  773                          int is_pseudo = PSEUDO(tnode->tree_exi);
 762  774                          if (isadd)
 763  775                                  srv_secinfo_add(pxsec, pxcnt, sec, seccnt,
 764  776                                      is_pseudo);
 765  777                          else
 766  778                                  srv_secinfo_remove(pxsec, pxcnt, sec, seccnt);
 767  779                  }
 768  780  
 769  781                  /* Update every visible - only root node has no visible */
 770  782                  if (tnode->tree_vis != NULL) {
 771  783                          secinfo_t **pxsec = &tnode->tree_vis->vis_secinfo;
 772  784                          int *pxcnt = &tnode->tree_vis->vis_seccnt;
 773  785                          if (isadd)
 774  786                                  srv_secinfo_add(pxsec, pxcnt, sec, seccnt,
 775  787                                      FALSE);
 776  788                          else
 777  789                                  srv_secinfo_remove(pxsec, pxcnt, sec, seccnt);
 778  790                  }
 779  791                  tnode = tnode->tree_parent;
 780  792          }
 781  793  }
 782  794  
 783  795  /* hash_name is a text substitution for either fid_hash or path_hash */
 784  796  #define exp_hash_unlink(exi, hash_name) \
 785  797          if (*(exi)->hash_name.bckt == (exi)) \
 786  798                  *(exi)->hash_name.bckt = (exi)->hash_name.next; \
 787  799          if ((exi)->hash_name.prev) \
 788  800                  (exi)->hash_name.prev->hash_name.next = (exi)->hash_name.next; \
 789  801          if ((exi)->hash_name.next) \
 790  802                  (exi)->hash_name.next->hash_name.prev = (exi)->hash_name.prev; \
 791  803          (exi)->hash_name.bckt = NULL;
 792  804  
 793  805  #define exp_hash_link(exi, hash_name, bucket) \
 794  806          (exi)->hash_name.bckt = (bucket); \
 795  807          (exi)->hash_name.prev = NULL; \
 796  808          (exi)->hash_name.next = *(bucket); \
  
    | 
      ↓ open down ↓ | 
    57 lines elided | 
    
      ↑ open up ↑ | 
  
 797  809          if ((exi)->hash_name.next) \
 798  810                  (exi)->hash_name.next->hash_name.prev = (exi); \
 799  811          *(bucket) = (exi);
 800  812  
 801  813  void
 802  814  export_link(nfs_export_t *ne, exportinfo_t *exi)
 803  815  {
 804  816          exportinfo_t **bckt;
 805  817  
 806  818          ASSERT(RW_WRITE_HELD(&ne->exported_lock));
      819 +        ASSERT(exi->exi_zoneid == ne->ne_globals->nfs_zoneid);
 807  820  
 808  821          bckt = &ne->exptable[exptablehash(&exi->exi_fsid, &exi->exi_fid)];
 809  822          exp_hash_link(exi, fid_hash, bckt);
 810  823  
 811  824          bckt = &ne->exptable_path_hash[pkp_tab_hash(exi->exi_export.ex_path,
 812  825              strlen(exi->exi_export.ex_path))];
 813  826          exp_hash_link(exi, path_hash, bckt);
 814  827  }
 815  828  
 816  829  /*
 817  830   * Helper functions for exi_id handling
 818  831   */
 819  832  static int
 820  833  exi_id_compar(const void *v1, const void *v2)
 821  834  {
 822  835          const struct exportinfo *e1 = v1;
 823  836          const struct exportinfo *e2 = v2;
 824  837  
 825  838          if (e1->exi_id < e2->exi_id)
 826  839                  return (-1);
 827  840          if (e1->exi_id > e2->exi_id)
 828  841                  return (1);
 829  842  
 830  843          return (0);
 831  844  }
 832  845  
 833  846  int
 834  847  exi_id_get_next()
 835  848  {
 836  849          struct exportinfo e;
 837  850          int ret = exi_id_next;
 838  851  
 839  852          ASSERT(MUTEX_HELD(&nfs_exi_id_lock));
 840  853  
 841  854          do {
 842  855                  exi_id_next++;
 843  856                  if (exi_id_next == 0)
 844  857                          exi_id_overflow = TRUE;
 845  858  
 846  859                  if (!exi_id_overflow)
 847  860                          break;
  
    | 
      ↓ open down ↓ | 
    31 lines elided | 
    
      ↑ open up ↑ | 
  
 848  861  
 849  862                  if (exi_id_next == ret)
 850  863                          cmn_err(CE_PANIC, "exi_id exhausted");
 851  864  
 852  865                  e.exi_id = exi_id_next;
 853  866          } while (avl_find(&exi_id_tree, &e, NULL) != NULL);
 854  867  
 855  868          return (ret);
 856  869  }
 857  870  
 858      -/*ARGSUSED*/
 859      -static void *
 860      -nfs_export_zone_init(zoneid_t zoneid)
      871 +/*
      872 + * Get the root file handle for this zone.
      873 + * Called when nfs_svc() starts
      874 + */
      875 +int
      876 +nfs_export_get_rootfh(nfs_globals_t *g)
 861  877  {
      878 +        nfs_export_t *ne = g->nfs_export;
      879 +        int err;
      880 +
      881 +        ne->exi_rootfid.fid_len = MAXFIDSZ;
      882 +        err = vop_fid_pseudo(ne->exi_root->exi_vp, &ne->exi_rootfid);
      883 +        if (err != 0) {
      884 +                ne->exi_rootfid.fid_len = 0;
      885 +                return (err);
      886 +        }
      887 +
      888 +        /* Setup the fhandle template exi_fh */
      889 +        ne->exi_root->exi_fh.fh_fsid = rootdir->v_vfsp->vfs_fsid;
      890 +        ne->exi_root->exi_fh.fh_xlen = ne->exi_rootfid.fid_len;
      891 +        bcopy(ne->exi_rootfid.fid_data, ne->exi_root->exi_fh.fh_xdata,
      892 +            ne->exi_rootfid.fid_len);
      893 +        ne->exi_root->exi_fh.fh_len = sizeof (ne->exi_root->exi_fh.fh_data);
      894 +
      895 +        return (0);
      896 +}
      897 +
      898 +void
      899 +nfs_export_zone_init(nfs_globals_t *ng)
      900 +{
 862  901          int i;
 863  902          nfs_export_t *ne;
 864  903  
 865  904          ne = kmem_zalloc(sizeof (*ne), KM_SLEEP);
 866  905  
 867  906          rw_init(&ne->exported_lock, NULL, RW_DEFAULT, NULL);
 868  907  
      908 +        ne->ne_globals = ng; /* "up" pointer */
      909 +
 869  910          /*
 870  911           * Allocate the place holder for the public file handle, which
 871  912           * is all zeroes. It is initially set to the root filesystem.
 872  913           */
 873  914          ne->exi_root = kmem_zalloc(sizeof (*ne->exi_root), KM_SLEEP);
 874  915          ne->exi_public = ne->exi_root;
 875  916  
 876  917          ne->exi_root->exi_export.ex_flags = EX_PUBLIC;
 877  918          ne->exi_root->exi_export.ex_pathlen = 1;        /* length of "/" */
 878  919          ne->exi_root->exi_export.ex_path =
 879  920              kmem_alloc(ne->exi_root->exi_export.ex_pathlen + 1, KM_SLEEP);
 880  921          ne->exi_root->exi_export.ex_path[0] = '/';
 881  922          ne->exi_root->exi_export.ex_path[1] = '\0';
 882  923  
 883  924          ne->exi_root->exi_count = 1;
 884  925          mutex_init(&ne->exi_root->exi_lock, NULL, MUTEX_DEFAULT, NULL);
 885  926  
 886      -        ne->exi_root->exi_zone = zone_find_by_id_nolock(zoneid);
 887      -        ne->exi_root->exi_vp = ne->exi_root->exi_zone->zone_rootvp;
 888      -        ne->exi_rootfid.fid_len = MAXFIDSZ;
 889      -        if (vop_fid_pseudo(ne->exi_root->exi_vp, &ne->exi_rootfid) != 0) {
 890      -                mutex_destroy(&ne->exi_root->exi_lock);
 891      -                kmem_free(ne->exi_root->exi_export.ex_path,
 892      -                    ne->exi_root->exi_export.ex_pathlen + 1);
 893      -                kmem_free(ne->exi_root, sizeof (*ne->exi_root));
 894      -                return (NULL);
 895      -        }
      927 +        ASSERT(curzone->zone_id == ng->nfs_zoneid);
      928 +        ne->exi_root->exi_vp = ZONE_ROOTVP();
      929 +        ne->exi_root->exi_zoneid = ng->nfs_zoneid;
 896  930  
      931 +        /*
      932 +         * Fill in ne->exi_rootfid later, in nfs_export_get_rootfid
      933 +         * because we can't correctly return errors here.
      934 +         */
      935 +
 897  936          /* Initialize auth cache and auth cache lock */
 898  937          for (i = 0; i < AUTH_TABLESIZE; i++) {
 899  938                  ne->exi_root->exi_cache[i] = kmem_alloc(sizeof (avl_tree_t),
 900  939                      KM_SLEEP);
 901  940                  avl_create(ne->exi_root->exi_cache[i],
 902  941                      nfsauth_cache_clnt_compar, sizeof (struct auth_cache_clnt),
 903  942                      offsetof(struct auth_cache_clnt, authc_link));
 904  943          }
 905  944          rw_init(&ne->exi_root->exi_cache_lock, NULL, RW_DEFAULT, NULL);
 906  945  
 907      -        /* Setup the fhandle template */
 908      -        ne->exi_root->exi_fh.fh_fsid = rootdir->v_vfsp->vfs_fsid;
 909      -        ne->exi_root->exi_fh.fh_xlen = ne->exi_rootfid.fid_len;
 910      -        bcopy(ne->exi_rootfid.fid_data, ne->exi_root->exi_fh.fh_xdata,
 911      -            ne->exi_rootfid.fid_len);
 912      -        ne->exi_root->exi_fh.fh_len = sizeof (ne->exi_root->exi_fh.fh_data);
      946 +        /* setup exi_fh later, in nfs_export_get_rootfid */
 913  947  
 914  948          rw_enter(&ne->exported_lock, RW_WRITER);
 915  949  
 916  950          /* Publish the exportinfo in the hash table */
 917  951          export_link(ne, ne->exi_root);
 918  952  
 919  953          /* Initialize exi_id and exi_kstats */
 920  954          mutex_enter(&nfs_exi_id_lock);
 921  955          ne->exi_root->exi_id = exi_id_get_next();
 922  956          avl_add(&exi_id_tree, ne->exi_root);
 923  957          mutex_exit(&nfs_exi_id_lock);
 924  958  
 925  959          rw_exit(&ne->exported_lock);
 926  960          ne->ns_root = NULL;
 927  961  
 928      -        return (ne);
      962 +        ng->nfs_export = ne;
 929  963  }
 930  964  
 931      -/*ARGSUSED*/
 932      -static void
 933      -nfs_export_zone_fini(zoneid_t zoneid, void *data)
      965 +/*
      966 + * During zone shutdown, remove exports
      967 + */
      968 +void
      969 +nfs_export_zone_shutdown(nfs_globals_t *ng)
 934  970  {
      971 +        nfs_export_t *ne = ng->nfs_export;
      972 +        struct exportinfo *exi, *nexi;
      973 +        int i, errors;
      974 +
      975 +        rw_enter(&ne->exported_lock, RW_READER);
      976 +
      977 +        errors = 0;
      978 +        for (i = 0; i < EXPTABLESIZE; i++) {
      979 +
      980 +                exi = ne->exptable[i];
      981 +                if (exi != NULL)
      982 +                        exi_hold(exi);
      983 +
      984 +                while (exi != NULL) {
      985 +
      986 +                        /*
      987 +                         * Get and hold next export before
      988 +                         * dropping the rwlock and unexport
      989 +                         */
      990 +                        nexi = exi->fid_hash.next;
      991 +                        if (nexi != NULL)
      992 +                                exi_hold(nexi);
      993 +
      994 +                        rw_exit(&ne->exported_lock);
      995 +
      996 +                        /*
      997 +                         * Skip ne->exi_root which gets special
      998 +                         * create/destroy handling.
      999 +                         */
     1000 +                        if (exi != ne->exi_root &&
     1001 +                            unexport(ne, exi) != 0)
     1002 +                                errors++;
     1003 +                        exi_rele(exi);
     1004 +
     1005 +                        rw_enter(&ne->exported_lock, RW_READER);
     1006 +                        exi = nexi;
     1007 +                }
     1008 +        }
     1009 +        if (errors > 0) {
     1010 +                cmn_err(CE_NOTE,
     1011 +                    "NFS: failed un-exports in zone %d",
     1012 +                    (int) ng->nfs_zoneid);
     1013 +        }
     1014 +
     1015 +        rw_exit(&ne->exported_lock);
     1016 +}
     1017 +
     1018 +void
     1019 +nfs_export_zone_fini(nfs_globals_t *ng)
     1020 +{
 935 1021          int i;
 936      -        nfs_export_t *ne = data;
     1022 +        nfs_export_t *ne = ng->nfs_export;
 937 1023          struct exportinfo *exi;
 938 1024  
     1025 +        ng->nfs_export = NULL;
     1026 +
 939 1027          rw_enter(&ne->exported_lock, RW_WRITER);
 940      -        mutex_enter(&nfs_exi_id_lock);
 941 1028  
     1029 +        mutex_enter(&nfs_exi_id_lock);
 942 1030          avl_remove(&exi_id_tree, ne->exi_root);
     1031 +        mutex_exit(&nfs_exi_id_lock);
     1032 +
 943 1033          export_unlink(ne, ne->exi_root);
 944 1034  
 945      -        mutex_exit(&nfs_exi_id_lock);
 946 1035          rw_exit(&ne->exported_lock);
 947 1036  
 948 1037          /* Deallocate the place holder for the public file handle */
 949 1038          srv_secinfo_list_free(ne->exi_root->exi_export.ex_secinfo,
 950 1039              ne->exi_root->exi_export.ex_seccnt);
 951 1040          mutex_destroy(&ne->exi_root->exi_lock);
 952 1041  
 953 1042          rw_destroy(&ne->exi_root->exi_cache_lock);
 954 1043          for (i = 0; i < AUTH_TABLESIZE; i++) {
 955 1044                  avl_destroy(ne->exi_root->exi_cache[i]);
 956 1045                  kmem_free(ne->exi_root->exi_cache[i], sizeof (avl_tree_t));
 957 1046          }
 958 1047  
 959 1048          kmem_free(ne->exi_root->exi_export.ex_path,
 960 1049              ne->exi_root->exi_export.ex_pathlen + 1);
 961 1050          kmem_free(ne->exi_root, sizeof (*ne->exi_root));
 962 1051  
     1052 +        /*
     1053 +         * The shutdown hook should have left the exi_id_tree
     1054 +         * with nothing belonging to this zone.
     1055 +         */
     1056 +        mutex_enter(&nfs_exi_id_lock);
     1057 +        i = 0;
 963 1058          exi = avl_first(&exi_id_tree);
 964 1059          while (exi != NULL) {
 965      -                struct exportinfo *nexi = AVL_NEXT(&exi_id_tree, exi);
 966      -                if (zoneid == exi->exi_zoneid)
 967      -                        (void) unexport(ne, exi);
 968      -                exi = nexi;
     1060 +                if (exi->exi_zoneid == ng->nfs_zoneid)
     1061 +                        i++;
     1062 +                exi = AVL_NEXT(&exi_id_tree, exi);
 969 1063          }
 970      -
     1064 +        mutex_exit(&nfs_exi_id_lock);
     1065 +        if (i > 0) {
     1066 +                cmn_err(CE_NOTE,
     1067 +                    "NFS: zone %d has %d export IDs left after shutdown",
     1068 +                    (int) ng->nfs_zoneid, i);
     1069 +        }
 971 1070          rw_destroy(&ne->exported_lock);
 972 1071          kmem_free(ne, sizeof (*ne));
 973 1072  }
 974 1073  
 975 1074  /*
 976 1075   * Initialization routine for export routines.
 977 1076   * Should only be called once.
 978 1077   */
 979 1078  void
 980 1079  nfs_exportinit(void)
 981 1080  {
 982 1081          mutex_init(&nfs_exi_id_lock, NULL, MUTEX_DEFAULT, NULL);
 983 1082  
 984 1083          /* exi_id handling initialization */
 985 1084          exi_id_next = 0;
 986 1085          exi_id_overflow = FALSE;
 987 1086          avl_create(&exi_id_tree, exi_id_compar, sizeof (struct exportinfo),
 988 1087              offsetof(struct exportinfo, exi_id_link));
 989 1088  
 990      -        zone_key_create(&nfs_export_key, nfs_export_zone_init,
 991      -            NULL, nfs_export_zone_fini);
 992      -
 993 1089          nfslog_init();
 994 1090  }
 995 1091  
 996 1092  /*
 997 1093   * Finalization routine for export routines.
 998 1094   */
 999 1095  void
1000 1096  nfs_exportfini(void)
1001 1097  {
1002      -        (void) zone_key_delete(nfs_export_key);
1003 1098          avl_destroy(&exi_id_tree);
1004 1099          mutex_destroy(&nfs_exi_id_lock);
1005 1100  }
1006 1101  
1007 1102  /*
1008 1103   *  Check if 2 gss mechanism identifiers are the same.
1009 1104   *
1010 1105   *  return FALSE if not the same.
1011 1106   *  return TRUE if the same.
1012 1107   */
1013 1108  static bool_t
1014 1109  nfs_mech_equal(rpc_gss_OID mech1, rpc_gss_OID mech2)
1015 1110  {
1016 1111          if ((mech1->length == 0) && (mech2->length == 0))
1017 1112                  return (TRUE);
1018 1113  
1019 1114          if (mech1->length != mech2->length)
1020 1115                  return (FALSE);
1021 1116  
1022 1117          return (bcmp(mech1->elements, mech2->elements, mech1->length) == 0);
1023 1118  }
1024 1119  
1025 1120  /*
1026 1121   *  This routine is used by rpc to map rpc security number
1027 1122   *  to nfs specific security flavor number.
1028 1123   *
1029 1124   *  The gss callback prototype is
1030 1125   *  callback(struct svc_req *, gss_cred_id_t *, gss_ctx_id_t *,
1031 1126   *                              rpc_gss_lock_t *, void **),
1032 1127   *  since nfs does not use the gss_cred_id_t/gss_ctx_id_t arguments
1033 1128   *  we cast them to void.
1034 1129   */
1035 1130  /*ARGSUSED*/
1036 1131  bool_t
1037 1132  rfs_gsscallback(struct svc_req *req, gss_cred_id_t deleg, void *gss_context,
1038 1133      rpc_gss_lock_t *lock, void **cookie)
1039 1134  {
1040 1135          int i, j;
1041 1136          rpc_gss_rawcred_t *raw_cred;
1042 1137          struct exportinfo *exi;
1043 1138          nfs_export_t *ne = nfs_get_export();
1044 1139  
1045 1140          /*
1046 1141           * We don't deal with delegated credentials.
1047 1142           */
1048 1143          if (deleg != GSS_C_NO_CREDENTIAL)
1049 1144                  return (FALSE);
1050 1145  
1051 1146          raw_cred = lock->raw_cred;
1052 1147          *cookie = NULL;
1053 1148  
1054 1149          rw_enter(&ne->exported_lock, RW_READER);
1055 1150  
1056 1151          for (i = 0; i < EXPTABLESIZE; i++) {
1057 1152                  exi = ne->exptable[i];
1058 1153                  while (exi) {
1059 1154                          if (exi->exi_export.ex_seccnt > 0) {
1060 1155                                  struct secinfo *secp;
1061 1156                                  seconfig_t *se;
1062 1157                                  int seccnt;
1063 1158  
1064 1159                                  secp = exi->exi_export.ex_secinfo;
1065 1160                                  seccnt = exi->exi_export.ex_seccnt;
1066 1161                                  for (j = 0; j < seccnt; j++) {
1067 1162                                          /*
1068 1163                                           *  If there is a map of the triplet
1069 1164                                           *  (mechanism, service, qop) between
1070 1165                                           *  raw_cred and the exported flavor,
1071 1166                                           *  get the psudo flavor number.
1072 1167                                           *  Also qop should not be NULL, it
1073 1168                                           *  should be "default" or something
1074 1169                                           *  else.
1075 1170                                           */
1076 1171                                          se = &secp[j].s_secinfo;
1077 1172                                          if ((se->sc_rpcnum == RPCSEC_GSS) &&
1078 1173  
1079 1174                                              (nfs_mech_equal(
1080 1175                                              se->sc_gss_mech_type,
1081 1176                                              raw_cred->mechanism)) &&
1082 1177  
1083 1178                                              (se->sc_service ==
1084 1179                                              raw_cred->service) &&
1085 1180                                              (raw_cred->qop == se->sc_qop)) {
1086 1181  
1087 1182                                                  *cookie = (void *)(uintptr_t)
1088 1183                                                      se->sc_nfsnum;
1089 1184                                                  goto done;
1090 1185                                          }
1091 1186                                  }
1092 1187                          }
1093 1188                          exi = exi->fid_hash.next;
1094 1189                  }
1095 1190          }
1096 1191  done:
1097 1192          rw_exit(&ne->exported_lock);
1098 1193  
1099 1194          /*
1100 1195           * If no nfs pseudo number mapping can be found in the export
1101 1196           * table, assign the nfsflavor to NFS_FLAVOR_NOMAP. In V4, we may
1102 1197           * recover the flavor mismatch from NFS layer (NFS4ERR_WRONGSEC).
1103 1198           *
1104 1199           * For example:
1105 1200           *      server first shares with krb5i;
1106 1201           *      client mounts with krb5i;
1107 1202           *      server re-shares with krb5p;
1108 1203           *      client tries with krb5i, but no mapping can be found;
1109 1204           *      rpcsec_gss module calls this routine to do the mapping,
1110 1205           *              if this routine fails, request is rejected from
1111 1206           *              the rpc layer.
1112 1207           *      What we need is to let the nfs layer rejects the request.
1113 1208           *      For V4, we can reject with NFS4ERR_WRONGSEC and the client
1114 1209           *      may recover from it by getting the new flavor via SECINFO.
1115 1210           *
1116 1211           * nfs pseudo number for RPCSEC_GSS mapping (see nfssec.conf)
1117 1212           * is owned by IANA (see RFC 2623).
1118 1213           *
1119 1214           * XXX NFS_FLAVOR_NOMAP is defined in Solaris to work around
1120 1215           * the implementation issue. This number should not overlap with
1121 1216           * any new IANA defined pseudo flavor numbers.
1122 1217           */
1123 1218          if (*cookie == NULL)
1124 1219                  *cookie = (void *)NFS_FLAVOR_NOMAP;
1125 1220  
1126 1221          lock->locked = TRUE;
1127 1222  
1128 1223          return (TRUE);
1129 1224  }
1130 1225  
1131 1226  
1132 1227  /*
1133 1228   * Exportfs system call; credentials should be checked before
1134 1229   * calling this function.
1135 1230   */
1136 1231  int
1137 1232  exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
1138 1233  {
1139 1234          vnode_t *vp;
1140 1235          vnode_t *dvp;
1141 1236          struct exportdata *kex;
1142 1237          struct exportinfo *exi = NULL;
1143 1238          struct exportinfo *ex, *ex1, *ex2;
1144 1239          fid_t fid;
1145 1240          fsid_t fsid;
1146 1241          int error;
1147 1242          size_t allocsize;
1148 1243          struct secinfo *sp;
1149 1244          struct secinfo *exs;
1150 1245          rpc_gss_callback_t cb;
1151 1246          char *pathbuf;
1152 1247          char *log_buffer;
1153 1248          char *tagbuf;
1154 1249          int callback;
1155 1250          int allocd_seccnt;
1156 1251          STRUCT_HANDLE(exportfs_args, uap);
1157 1252          STRUCT_DECL(exportdata, uexi);
1158 1253          struct secinfo newsec[MAX_FLAVORS];
1159 1254          int newcnt;
1160 1255          struct secinfo oldsec[MAX_FLAVORS];
1161 1256          int oldcnt;
1162 1257          int i;
1163 1258          struct pathname lookpn;
1164 1259          nfs_export_t *ne = nfs_get_export();
1165 1260  
1166 1261          STRUCT_SET_HANDLE(uap, model, args);
1167 1262  
1168 1263          /* Read in pathname from userspace */
1169 1264          if (error = pn_get(STRUCT_FGETP(uap, dname), UIO_USERSPACE, &lookpn))
1170 1265                  return (error);
1171 1266  
1172 1267          /* Walk the export list looking for that pathname */
1173 1268          rw_enter(&ne->exported_lock, RW_READER);
1174 1269          DTRACE_PROBE(nfss__i__exported_lock1_start);
1175 1270          for (ex1 = ne->exptable_path_hash[pkp_tab_hash(lookpn.pn_path,
1176 1271              strlen(lookpn.pn_path))]; ex1; ex1 = ex1->path_hash.next) {
1177 1272                  if (ex1 != ne->exi_root && 0 ==
1178 1273                      strcmp(ex1->exi_export.ex_path, lookpn.pn_path)) {
1179 1274                          exi_hold(ex1);
1180 1275                          break;
1181 1276                  }
1182 1277          }
1183 1278          DTRACE_PROBE(nfss__i__exported_lock1_stop);
1184 1279          rw_exit(&ne->exported_lock);
1185 1280  
1186 1281          /* Is this an unshare? */
1187 1282          if (STRUCT_FGETP(uap, uex) == NULL) {
1188 1283                  pn_free(&lookpn);
1189 1284                  if (ex1 == NULL)
1190 1285                          return (EINVAL);
1191 1286                  error = unexport(ne, ex1);
1192 1287                  exi_rele(ex1);
1193 1288                  return (error);
1194 1289          }
1195 1290  
1196 1291          /* It is a share or a re-share */
1197 1292          error = lookupname(STRUCT_FGETP(uap, dname), UIO_USERSPACE,
1198 1293              FOLLOW, &dvp, &vp);
1199 1294          if (error == EINVAL) {
1200 1295                  /*
1201 1296                   * if fname resolves to / we get EINVAL error
1202 1297                   * since we wanted the parent vnode. Try again
1203 1298                   * with NULL dvp.
1204 1299                   */
1205 1300                  error = lookupname(STRUCT_FGETP(uap, dname), UIO_USERSPACE,
1206 1301                      FOLLOW, NULL, &vp);
1207 1302                  dvp = NULL;
1208 1303          }
1209 1304          if (!error && vp == NULL) {
1210 1305                  /* Last component of fname not found */
1211 1306                  if (dvp != NULL)
1212 1307                          VN_RELE(dvp);
1213 1308                  error = ENOENT;
1214 1309          }
1215 1310          if (error) {
1216 1311                  pn_free(&lookpn);
1217 1312                  if (ex1)
1218 1313                          exi_rele(ex1);
1219 1314                  return (error);
1220 1315          }
1221 1316  
1222 1317          /*
1223 1318           * 'vp' may be an AUTOFS node, so we perform a
1224 1319           * VOP_ACCESS() to trigger the mount of the
1225 1320           * intended filesystem, so we can share the intended
1226 1321           * filesystem instead of the AUTOFS filesystem.
1227 1322           */
1228 1323          (void) VOP_ACCESS(vp, 0, 0, cr, NULL);
1229 1324  
1230 1325          /*
1231 1326           * We're interested in the top most filesystem.
1232 1327           * This is specially important when uap->dname is a trigger
1233 1328           * AUTOFS node, since we're really interested in sharing the
1234 1329           * filesystem AUTOFS mounted as result of the VOP_ACCESS()
1235 1330           * call not the AUTOFS node itself.
1236 1331           */
1237 1332          if (vn_mountedvfs(vp) != NULL) {
1238 1333                  if (error = traverse(&vp)) {
1239 1334                          VN_RELE(vp);
1240 1335                          if (dvp != NULL)
1241 1336                                  VN_RELE(dvp);
1242 1337                          pn_free(&lookpn);
1243 1338                          if (ex1)
1244 1339                                  exi_rele(ex1);
1245 1340                          return (error);
1246 1341                  }
1247 1342          }
1248 1343  
1249 1344          /* Do not allow sharing another vnode for already shared path */
1250 1345          if (ex1 && !PSEUDO(ex1) && !VN_CMP(ex1->exi_vp, vp)) {
1251 1346                  VN_RELE(vp);
1252 1347                  if (dvp != NULL)
1253 1348                          VN_RELE(dvp);
1254 1349                  pn_free(&lookpn);
1255 1350                  exi_rele(ex1);
1256 1351                  return (EEXIST);
1257 1352          }
1258 1353          if (ex1)
1259 1354                  exi_rele(ex1);
1260 1355  
1261 1356          /*
1262 1357           * Get the vfs id
1263 1358           */
1264 1359          bzero(&fid, sizeof (fid));
1265 1360          fid.fid_len = MAXFIDSZ;
1266 1361          error = VOP_FID(vp, &fid, NULL);
1267 1362          fsid = vp->v_vfsp->vfs_fsid;
1268 1363  
1269 1364          if (error) {
1270 1365                  VN_RELE(vp);
1271 1366                  if (dvp != NULL)
1272 1367                          VN_RELE(dvp);
1273 1368                  /*
1274 1369                   * If VOP_FID returns ENOSPC then the fid supplied
1275 1370                   * is too small.  For now we simply return EREMOTE.
1276 1371                   */
1277 1372                  if (error == ENOSPC)
1278 1373                          error = EREMOTE;
1279 1374                  pn_free(&lookpn);
1280 1375                  return (error);
1281 1376          }
1282 1377  
1283 1378          /*
1284 1379           * Do not allow re-sharing a shared vnode under a different path
1285 1380           * PSEUDO export has ex_path fabricated, e.g. "/tmp (pseudo)", skip it.
1286 1381           */
1287 1382          rw_enter(&ne->exported_lock, RW_READER);
1288 1383          DTRACE_PROBE(nfss__i__exported_lock2_start);
1289 1384          for (ex2 = ne->exptable[exptablehash(&fsid, &fid)]; ex2;
1290 1385              ex2 = ex2->fid_hash.next) {
1291 1386                  if (ex2 != ne->exi_root && !PSEUDO(ex2) &&
1292 1387                      VN_CMP(ex2->exi_vp, vp) &&
1293 1388                      strcmp(ex2->exi_export.ex_path, lookpn.pn_path) != 0) {
1294 1389                          DTRACE_PROBE(nfss__i__exported_lock2_stop);
1295 1390                          rw_exit(&ne->exported_lock);
1296 1391                          VN_RELE(vp);
1297 1392                          if (dvp != NULL)
1298 1393                                  VN_RELE(dvp);
1299 1394                          pn_free(&lookpn);
1300 1395                          return (EEXIST);
1301 1396                  }
1302 1397          }
1303 1398          DTRACE_PROBE(nfss__i__exported_lock2_stop);
1304 1399          rw_exit(&ne->exported_lock);
1305 1400          pn_free(&lookpn);
1306 1401  
1307 1402          exi = kmem_zalloc(sizeof (*exi), KM_SLEEP);
1308 1403          exi->exi_fsid = fsid;
1309 1404          exi->exi_fid = fid;
1310 1405          exi->exi_vp = vp;
1311 1406          exi->exi_count = 1;
1312 1407          exi->exi_zone = crgetzone(cr);
1313 1408          ASSERT(exi->exi_zone != NULL);          /* XXX KEBE ASKS... */
1314 1409          ASSERT3P(exi->exi_zone, ==, curzone);   /* ... are these legit? */
1315 1410          exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag &
1316 1411              VSW_VOLATILEDEV) ? 1 : 0;
1317 1412          mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL);
1318 1413          exi->exi_dvp = dvp;
1319 1414  
1320 1415          /*
1321 1416           * Initialize auth cache and auth cache lock
1322 1417           */
1323 1418          for (i = 0; i < AUTH_TABLESIZE; i++) {
1324 1419                  exi->exi_cache[i] = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
1325 1420                  avl_create(exi->exi_cache[i], nfsauth_cache_clnt_compar,
1326 1421                      sizeof (struct auth_cache_clnt),
1327 1422                      offsetof(struct auth_cache_clnt, authc_link));
1328 1423          }
1329 1424          rw_init(&exi->exi_cache_lock, NULL, RW_DEFAULT, NULL);
1330 1425  
1331 1426          /*
1332 1427           * Build up the template fhandle
1333 1428           */
1334 1429          exi->exi_fh.fh_fsid = fsid;
1335 1430          if (exi->exi_fid.fid_len > sizeof (exi->exi_fh.fh_xdata)) {
1336 1431                  error = EREMOTE;
1337 1432                  goto out1;
1338 1433          }
1339 1434          exi->exi_fh.fh_xlen = exi->exi_fid.fid_len;
1340 1435          bcopy(exi->exi_fid.fid_data, exi->exi_fh.fh_xdata,
1341 1436              exi->exi_fid.fid_len);
1342 1437  
1343 1438          exi->exi_fh.fh_len = sizeof (exi->exi_fh.fh_data);
1344 1439  
1345 1440          kex = &exi->exi_export;
1346 1441  
1347 1442          /*
1348 1443           * Load in everything, and do sanity checking
1349 1444           */
1350 1445          STRUCT_INIT(uexi, model);
1351 1446          if (copyin(STRUCT_FGETP(uap, uex), STRUCT_BUF(uexi),
1352 1447              STRUCT_SIZE(uexi))) {
1353 1448                  error = EFAULT;
1354 1449                  goto out1;
1355 1450          }
1356 1451  
1357 1452          kex->ex_version = STRUCT_FGET(uexi, ex_version);
1358 1453          if (kex->ex_version != EX_CURRENT_VERSION) {
1359 1454                  error = EINVAL;
1360 1455                  cmn_err(CE_WARN,
1361 1456                      "NFS: exportfs requires export struct version 2 - got %d\n",
1362 1457                      kex->ex_version);
1363 1458                  goto out1;
1364 1459          }
1365 1460  
1366 1461          /*
1367 1462           * Must have at least one security entry
1368 1463           */
1369 1464          kex->ex_seccnt = STRUCT_FGET(uexi, ex_seccnt);
1370 1465          if (kex->ex_seccnt < 1) {
1371 1466                  error = EINVAL;
1372 1467                  goto out1;
1373 1468          }
1374 1469  
1375 1470          kex->ex_path = STRUCT_FGETP(uexi, ex_path);
1376 1471          kex->ex_pathlen = STRUCT_FGET(uexi, ex_pathlen);
1377 1472          kex->ex_flags = STRUCT_FGET(uexi, ex_flags);
1378 1473          kex->ex_anon = STRUCT_FGET(uexi, ex_anon);
1379 1474          kex->ex_secinfo = STRUCT_FGETP(uexi, ex_secinfo);
1380 1475          kex->ex_index = STRUCT_FGETP(uexi, ex_index);
1381 1476          kex->ex_log_buffer = STRUCT_FGETP(uexi, ex_log_buffer);
1382 1477          kex->ex_log_bufferlen = STRUCT_FGET(uexi, ex_log_bufferlen);
1383 1478          kex->ex_tag = STRUCT_FGETP(uexi, ex_tag);
1384 1479          kex->ex_taglen = STRUCT_FGET(uexi, ex_taglen);
1385 1480  
1386 1481          /*
1387 1482           * Copy the exported pathname into
1388 1483           * an appropriately sized buffer.
1389 1484           */
1390 1485          pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1391 1486          if (copyinstr(kex->ex_path, pathbuf, MAXPATHLEN, &kex->ex_pathlen)) {
1392 1487                  kmem_free(pathbuf, MAXPATHLEN);
1393 1488                  error = EFAULT;
1394 1489                  goto out1;
1395 1490          }
1396 1491          kex->ex_path = kmem_alloc(kex->ex_pathlen + 1, KM_SLEEP);
1397 1492          bcopy(pathbuf, kex->ex_path, kex->ex_pathlen);
1398 1493          kex->ex_path[kex->ex_pathlen] = '\0';
1399 1494          kmem_free(pathbuf, MAXPATHLEN);
1400 1495  
1401 1496          /*
1402 1497           * Get the path to the logging buffer and the tag
1403 1498           */
1404 1499          if (kex->ex_flags & EX_LOG) {
1405 1500                  log_buffer = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1406 1501                  if (copyinstr(kex->ex_log_buffer, log_buffer, MAXPATHLEN,
1407 1502                      &kex->ex_log_bufferlen)) {
1408 1503                          kmem_free(log_buffer, MAXPATHLEN);
1409 1504                          error = EFAULT;
1410 1505                          goto out2;
1411 1506                  }
1412 1507                  kex->ex_log_buffer =
1413 1508                      kmem_alloc(kex->ex_log_bufferlen + 1, KM_SLEEP);
1414 1509                  bcopy(log_buffer, kex->ex_log_buffer, kex->ex_log_bufferlen);
1415 1510                  kex->ex_log_buffer[kex->ex_log_bufferlen] = '\0';
1416 1511                  kmem_free(log_buffer, MAXPATHLEN);
1417 1512  
1418 1513                  tagbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1419 1514                  if (copyinstr(kex->ex_tag, tagbuf, MAXPATHLEN,
1420 1515                      &kex->ex_taglen)) {
1421 1516                          kmem_free(tagbuf, MAXPATHLEN);
1422 1517                          error = EFAULT;
1423 1518                          goto out3;
1424 1519                  }
1425 1520                  kex->ex_tag = kmem_alloc(kex->ex_taglen + 1, KM_SLEEP);
1426 1521                  bcopy(tagbuf, kex->ex_tag, kex->ex_taglen);
1427 1522                  kex->ex_tag[kex->ex_taglen] = '\0';
1428 1523                  kmem_free(tagbuf, MAXPATHLEN);
1429 1524          }
1430 1525  
1431 1526          /*
1432 1527           * Load the security information for each flavor
1433 1528           */
1434 1529          allocsize = kex->ex_seccnt * SIZEOF_STRUCT(secinfo, model);
1435 1530          sp = kmem_zalloc(allocsize, KM_SLEEP);
1436 1531          if (copyin(kex->ex_secinfo, sp, allocsize)) {
1437 1532                  kmem_free(sp, allocsize);
1438 1533                  error = EFAULT;
1439 1534                  goto out4;
1440 1535          }
1441 1536  
1442 1537          /*
1443 1538           * All of these nested structures need to be converted to
1444 1539           * the kernel native format.
1445 1540           */
1446 1541          if (model != DATAMODEL_NATIVE) {
1447 1542                  size_t allocsize2;
1448 1543                  struct secinfo *sp2;
1449 1544  
1450 1545                  allocsize2 = kex->ex_seccnt * sizeof (struct secinfo);
1451 1546                  sp2 = kmem_zalloc(allocsize2, KM_SLEEP);
1452 1547  
1453 1548                  for (i = 0; i < kex->ex_seccnt; i++) {
1454 1549                          STRUCT_HANDLE(secinfo, usi);
1455 1550  
1456 1551                          STRUCT_SET_HANDLE(usi, model,
1457 1552                              (struct secinfo *)((caddr_t)sp +
1458 1553                              (i * SIZEOF_STRUCT(secinfo, model))));
1459 1554                          bcopy(STRUCT_FGET(usi, s_secinfo.sc_name),
1460 1555                              sp2[i].s_secinfo.sc_name, MAX_NAME_LEN);
1461 1556                          sp2[i].s_secinfo.sc_nfsnum =
1462 1557                              STRUCT_FGET(usi, s_secinfo.sc_nfsnum);
1463 1558                          sp2[i].s_secinfo.sc_rpcnum =
1464 1559                              STRUCT_FGET(usi, s_secinfo.sc_rpcnum);
1465 1560                          bcopy(STRUCT_FGET(usi, s_secinfo.sc_gss_mech),
1466 1561                              sp2[i].s_secinfo.sc_gss_mech, MAX_NAME_LEN);
1467 1562                          sp2[i].s_secinfo.sc_gss_mech_type =
1468 1563                              STRUCT_FGETP(usi, s_secinfo.sc_gss_mech_type);
1469 1564                          sp2[i].s_secinfo.sc_qop =
1470 1565                              STRUCT_FGET(usi, s_secinfo.sc_qop);
1471 1566                          sp2[i].s_secinfo.sc_service =
1472 1567                              STRUCT_FGET(usi, s_secinfo.sc_service);
1473 1568  
1474 1569                          sp2[i].s_flags = STRUCT_FGET(usi, s_flags);
1475 1570                          sp2[i].s_window = STRUCT_FGET(usi, s_window);
1476 1571                          sp2[i].s_rootid = STRUCT_FGET(usi, s_rootid);
1477 1572                          sp2[i].s_rootcnt = STRUCT_FGET(usi, s_rootcnt);
1478 1573                          sp2[i].s_rootnames = STRUCT_FGETP(usi, s_rootnames);
1479 1574                  }
1480 1575                  kmem_free(sp, allocsize);
1481 1576                  sp = sp2;
1482 1577                  allocsize = allocsize2;
1483 1578          }
1484 1579  
1485 1580          kex->ex_secinfo = sp;
1486 1581  
1487 1582          /*
1488 1583           * And now copy rootnames for each individual secinfo.
1489 1584           */
1490 1585          callback = 0;
1491 1586          allocd_seccnt = 0;
1492 1587          while (allocd_seccnt < kex->ex_seccnt) {
1493 1588  
1494 1589                  exs = &sp[allocd_seccnt];
1495 1590                  if (exs->s_rootcnt > 0) {
1496 1591                          if (!sec_svc_loadrootnames(exs->s_secinfo.sc_rpcnum,
1497 1592                              exs->s_rootcnt, &exs->s_rootnames, model)) {
1498 1593                                  error = EFAULT;
1499 1594                                  goto out5;
1500 1595                          }
1501 1596                  }
1502 1597  
1503 1598                  if (exs->s_secinfo.sc_rpcnum == RPCSEC_GSS) {
1504 1599                          rpc_gss_OID mech_tmp;
1505 1600                          STRUCT_DECL(rpc_gss_OID_s, umech_tmp);
1506 1601                          caddr_t elements_tmp;
1507 1602  
1508 1603                          /* Copyin mechanism type */
1509 1604                          STRUCT_INIT(umech_tmp, model);
1510 1605                          mech_tmp = kmem_alloc(sizeof (*mech_tmp), KM_SLEEP);
1511 1606                          if (copyin(exs->s_secinfo.sc_gss_mech_type,
1512 1607                              STRUCT_BUF(umech_tmp), STRUCT_SIZE(umech_tmp))) {
1513 1608                                  kmem_free(mech_tmp, sizeof (*mech_tmp));
1514 1609                                  error = EFAULT;
1515 1610                                  goto out5;
1516 1611                          }
1517 1612                          mech_tmp->length = STRUCT_FGET(umech_tmp, length);
1518 1613                          mech_tmp->elements = STRUCT_FGETP(umech_tmp, elements);
1519 1614  
1520 1615                          elements_tmp = kmem_alloc(mech_tmp->length, KM_SLEEP);
1521 1616                          if (copyin(mech_tmp->elements, elements_tmp,
1522 1617                              mech_tmp->length)) {
1523 1618                                  kmem_free(elements_tmp, mech_tmp->length);
1524 1619                                  kmem_free(mech_tmp, sizeof (*mech_tmp));
1525 1620                                  error = EFAULT;
1526 1621                                  goto out5;
1527 1622                          }
1528 1623                          mech_tmp->elements = elements_tmp;
1529 1624                          exs->s_secinfo.sc_gss_mech_type = mech_tmp;
1530 1625                          allocd_seccnt++;
1531 1626  
1532 1627                          callback = 1;
1533 1628                  } else
1534 1629                          allocd_seccnt++;
1535 1630          }
1536 1631  
1537 1632          /*
1538 1633           * Init the secinfo reference count and mark these flavors
1539 1634           * explicitly exported flavors.
1540 1635           */
1541 1636          for (i = 0; i < kex->ex_seccnt; i++) {
1542 1637                  kex->ex_secinfo[i].s_flags |= M_4SEC_EXPORTED;
1543 1638                  kex->ex_secinfo[i].s_refcnt = 1;
1544 1639          }
1545 1640  
1546 1641          /*
1547 1642           *  Set up rpcsec_gss callback routine entry if any.
1548 1643           */
1549 1644          if (callback) {
1550 1645                  cb.callback = rfs_gsscallback;
1551 1646                  cb.program = NFS_ACL_PROGRAM;
1552 1647                  for (cb.version = NFS_ACL_VERSMIN;
1553 1648                      cb.version <= NFS_ACL_VERSMAX; cb.version++) {
1554 1649                          (void) sec_svc_control(RPC_SVC_SET_GSS_CALLBACK,
1555 1650                              (void *)&cb);
1556 1651                  }
1557 1652  
1558 1653                  cb.program = NFS_PROGRAM;
1559 1654                  for (cb.version = NFS_VERSMIN;
1560 1655                      cb.version <= NFS_VERSMAX; cb.version++) {
1561 1656                          (void) sec_svc_control(RPC_SVC_SET_GSS_CALLBACK,
1562 1657                              (void *)&cb);
1563 1658                  }
1564 1659          }
1565 1660  
1566 1661          /*
1567 1662           * Check the index flag. Do this here to avoid holding the
1568 1663           * lock while dealing with the index option (as we do with
1569 1664           * the public option).
1570 1665           */
1571 1666          if (kex->ex_flags & EX_INDEX) {
1572 1667                  if (!kex->ex_index) {   /* sanity check */
1573 1668                          error = EINVAL;
1574 1669                          goto out5;
1575 1670                  }
1576 1671                  if (error = loadindex(kex))
1577 1672                          goto out5;
1578 1673          }
1579 1674  
1580 1675          if (kex->ex_flags & EX_LOG) {
1581 1676                  if (error = nfslog_setup(exi))
1582 1677                          goto out6;
1583 1678          }
1584 1679  
1585 1680          /*
1586 1681           * Insert the new entry at the front of the export list
1587 1682           */
1588 1683          rw_enter(&ne->exported_lock, RW_WRITER);
1589 1684          DTRACE_PROBE(nfss__i__exported_lock3_start);
1590 1685  
1591 1686          export_link(ne, exi);
1592 1687  
1593 1688          /*
1594 1689           * Check the rest of the list for an old entry for the fs.
1595 1690           * If one is found then unlink it, wait until this is the
1596 1691           * only reference and then free it.
1597 1692           */
1598 1693          for (ex = exi->fid_hash.next; ex != NULL; ex = ex->fid_hash.next) {
1599 1694                  if (ex != ne->exi_root && VN_CMP(ex->exi_vp, vp)) {
1600 1695                          mutex_enter(&nfs_exi_id_lock);
1601 1696                          avl_remove(&exi_id_tree, ex);
1602 1697                          mutex_exit(&nfs_exi_id_lock);
1603 1698                          export_unlink(ne, ex);
1604 1699                          break;
1605 1700                  }
1606 1701          }
1607 1702  
1608 1703          /*
1609 1704           * If the public filehandle is pointing at the
1610 1705           * old entry, then point it back at the root.
1611 1706           */
1612 1707          if (ex != NULL && ex == ne->exi_public)
1613 1708                  ne->exi_public = ne->exi_root;
1614 1709  
1615 1710          /*
1616 1711           * If the public flag is on, make the global exi_public
1617 1712           * point to this entry and turn off the public bit so that
1618 1713           * we can distinguish it from the place holder export.
1619 1714           */
1620 1715          if (kex->ex_flags & EX_PUBLIC) {
1621 1716                  ne->exi_public = exi;
1622 1717                  kex->ex_flags &= ~EX_PUBLIC;
1623 1718          }
1624 1719  
1625 1720  #ifdef VOLATILE_FH_TEST
1626 1721          /*
1627 1722           * Set up the volatile_id value if volatile on share.
1628 1723           * The list of volatile renamed filehandles is always destroyed,
1629 1724           * if the fs was reshared.
1630 1725           */
1631 1726          if (kex->ex_flags & EX_VOLFH)
1632 1727                  exi->exi_volatile_id = gethrestime_sec();
1633 1728  
1634 1729          mutex_init(&exi->exi_vol_rename_lock, NULL, MUTEX_DEFAULT, NULL);
1635 1730  #endif /* VOLATILE_FH_TEST */
1636 1731  
1637 1732          /*
1638 1733           * If this is a new export, then climb up
1639 1734           * the tree and check if any pseudo exports
1640 1735           * need to be created to provide a path for
1641 1736           * NFS v4 clients.
1642 1737           */
1643 1738          if (ex == NULL) {
1644 1739                  error = treeclimb_export(exi);
1645 1740                  if (error)
1646 1741                          goto out7;
1647 1742          } else {
1648 1743                  /* If it's a re-export update namespace tree */
1649 1744                  exi->exi_tree = ex->exi_tree;
1650 1745                  exi->exi_tree->tree_exi = exi;
1651 1746  
1652 1747                  /* Update the change timestamp */
1653 1748                  tree_update_change(ne, exi->exi_tree, NULL);
1654 1749          }
1655 1750  
1656 1751          /*
1657 1752           * build a unique flavor list from the flavors specified
1658 1753           * in the share cmd.  unique means that each flavor only
1659 1754           * appears once in the secinfo list -- no duplicates allowed.
1660 1755           */
1661 1756          newcnt = build_seclist_nodups(&exi->exi_export, newsec, FALSE);
1662 1757  
1663 1758          srv_secinfo_treeclimb(ne, exi, newsec, newcnt, TRUE);
1664 1759  
1665 1760          /*
1666 1761           * If re-sharing an old export entry, update the secinfo data
1667 1762           * depending on if the old entry is a pseudo node or not.
1668 1763           */
1669 1764          if (ex != NULL) {
1670 1765                  oldcnt = build_seclist_nodups(&ex->exi_export, oldsec, FALSE);
1671 1766                  if (PSEUDO(ex)) {
1672 1767                          /*
1673 1768                           * The dir being shared is a pseudo export root (which
1674 1769                           * will be transformed into a real export root).  The
1675 1770                           * flavor(s) of the new share were propagated to the
1676 1771                           * ancestors by srv_secinfo_treeclimb() above.  Now
1677 1772                           * transfer the implicit flavor refs from the old
1678 1773                           * pseudo exprot root to the new (real) export root.
1679 1774                           */
1680 1775                          srv_secinfo_add(&exi->exi_export.ex_secinfo,
1681 1776                              &exi->exi_export.ex_seccnt, oldsec, oldcnt, TRUE);
1682 1777                  } else {
1683 1778                          /*
1684 1779                           * First transfer implicit flavor refs to new export.
1685 1780                           * Remove old flavor refs last.
1686 1781                           */
1687 1782                          srv_secinfo_exp2exp(&exi->exi_export, oldsec, oldcnt);
1688 1783                          srv_secinfo_treeclimb(ne, ex, oldsec, oldcnt, FALSE);
1689 1784                  }
1690 1785          }
1691 1786  
1692 1787          /*
1693 1788           * If it's a re-export and the old entry has a pseudonode list,
1694 1789           * transfer it to the new export.
1695 1790           */
1696 1791          if (ex != NULL && (ex->exi_visible != NULL)) {
1697 1792                  exi->exi_visible = ex->exi_visible;
1698 1793                  ex->exi_visible = NULL;
1699 1794          }
1700 1795  
1701 1796          /*
1702 1797           * Initialize exi_id and exi_kstats
1703 1798           */
1704 1799          if (ex != NULL) {
1705 1800                  exi->exi_id = ex->exi_id;
1706 1801          } else {
1707 1802                  mutex_enter(&nfs_exi_id_lock);
1708 1803                  exi->exi_id = exi_id_get_next();
1709 1804                  mutex_exit(&nfs_exi_id_lock);
1710 1805          }
1711 1806          mutex_enter(&nfs_exi_id_lock);
1712 1807          avl_add(&exi_id_tree, exi);
1713 1808          mutex_exit(&nfs_exi_id_lock);
1714 1809  
1715 1810          DTRACE_PROBE(nfss__i__exported_lock3_stop);
1716 1811          rw_exit(&ne->exported_lock);
1717 1812  
1718 1813          if (ne->exi_public == exi || kex->ex_flags & EX_LOG) {
1719 1814                  /*
1720 1815                   * Log share operation to this buffer only.
1721 1816                   */
1722 1817                  nfslog_share_record(exi, cr);
1723 1818          }
1724 1819  
1725 1820          if (ex != NULL)
1726 1821                  exi_rele(ex);
1727 1822  
1728 1823          return (0);
1729 1824  
1730 1825  out7:
1731 1826          /* Unlink the new export in exptable. */
1732 1827          export_unlink(ne, exi);
1733 1828          DTRACE_PROBE(nfss__i__exported_lock3_stop);
1734 1829          rw_exit(&ne->exported_lock);
1735 1830  out6:
1736 1831          if (kex->ex_flags & EX_INDEX)
1737 1832                  kmem_free(kex->ex_index, strlen(kex->ex_index) + 1);
1738 1833  out5:
1739 1834          /* free partially completed allocation */
1740 1835          while (--allocd_seccnt >= 0) {
1741 1836                  exs = &kex->ex_secinfo[allocd_seccnt];
1742 1837                  srv_secinfo_entry_free(exs);
1743 1838          }
1744 1839  
1745 1840          if (kex->ex_secinfo) {
1746 1841                  kmem_free(kex->ex_secinfo,
1747 1842                      kex->ex_seccnt * sizeof (struct secinfo));
1748 1843          }
1749 1844  
1750 1845  out4:
1751 1846          if ((kex->ex_flags & EX_LOG) && kex->ex_tag != NULL)
1752 1847                  kmem_free(kex->ex_tag, kex->ex_taglen + 1);
1753 1848  out3:
1754 1849          if ((kex->ex_flags & EX_LOG) && kex->ex_log_buffer != NULL)
1755 1850                  kmem_free(kex->ex_log_buffer, kex->ex_log_bufferlen + 1);
1756 1851  out2:
1757 1852          kmem_free(kex->ex_path, kex->ex_pathlen + 1);
1758 1853  out1:
1759 1854          VN_RELE(vp);
1760 1855          if (dvp != NULL)
1761 1856                  VN_RELE(dvp);
1762 1857          mutex_destroy(&exi->exi_lock);
1763 1858          rw_destroy(&exi->exi_cache_lock);
1764 1859          for (i = 0; i < AUTH_TABLESIZE; i++) {
1765 1860                  avl_destroy(exi->exi_cache[i]);
1766 1861                  kmem_free(exi->exi_cache[i], sizeof (avl_tree_t));
1767 1862          }
1768 1863  
1769 1864          kmem_free(exi, sizeof (*exi));
1770 1865  
1771 1866          return (error);
1772 1867  }
1773 1868  
1774 1869  /*
1775 1870   * Remove the exportinfo from the export list
1776 1871   */
1777 1872  void
1778 1873  export_unlink(nfs_export_t *ne, struct exportinfo *exi)
1779 1874  {
1780 1875          ASSERT(RW_WRITE_HELD(&ne->exported_lock));
1781 1876  
1782 1877          exp_hash_unlink(exi, fid_hash);
1783 1878          exp_hash_unlink(exi, path_hash);
1784 1879  }
1785 1880  
1786 1881  /*
1787 1882   * Unexport an exported filesystem
1788 1883   */
1789 1884  static int
1790 1885  unexport(nfs_export_t *ne, struct exportinfo *exi)
1791 1886  {
1792 1887          struct secinfo cursec[MAX_FLAVORS];
1793 1888          int curcnt;
1794 1889  
1795 1890          rw_enter(&ne->exported_lock, RW_WRITER);
1796 1891  
1797 1892          /* Check if exi is still linked in the export table */
1798 1893          if (!EXP_LINKED(exi) || PSEUDO(exi)) {
1799 1894                  rw_exit(&ne->exported_lock);
1800 1895                  return (EINVAL);
1801 1896          }
1802 1897  
1803 1898          mutex_enter(&nfs_exi_id_lock);
1804 1899          avl_remove(&exi_id_tree, exi);
1805 1900          mutex_exit(&nfs_exi_id_lock);
1806 1901          export_unlink(ne, exi);
1807 1902  
1808 1903          /*
1809 1904           * Remove security flavors before treeclimb_unexport() is called
1810 1905           * because srv_secinfo_treeclimb needs the namespace tree
1811 1906           */
1812 1907          curcnt = build_seclist_nodups(&exi->exi_export, cursec, TRUE);
1813 1908          srv_secinfo_treeclimb(ne, exi, cursec, curcnt, FALSE);
1814 1909  
1815 1910          /*
1816 1911           * If there's a visible list, then need to leave
1817 1912           * a pseudo export here to retain the visible list
1818 1913           * for paths to exports below.
1819 1914           */
1820 1915          if (exi->exi_visible != NULL) {
1821 1916                  struct exportinfo *newexi;
1822 1917  
1823 1918                  newexi = pseudo_exportfs(ne, exi->exi_vp, &exi->exi_fid,
1824 1919                      exi->exi_visible, &exi->exi_export);
1825 1920                  exi->exi_visible = NULL;
1826 1921  
1827 1922                  /* interconnect the existing treenode with the new exportinfo */
1828 1923                  newexi->exi_zone = exi->exi_zone;
1829 1924                  newexi->exi_tree = exi->exi_tree;
1830 1925                  newexi->exi_tree->tree_exi = newexi;
1831 1926  
1832 1927                  /* Update the change timestamp */
1833 1928                  tree_update_change(ne, exi->exi_tree, NULL);
1834 1929          } else {
1835 1930                  treeclimb_unexport(ne, exi);
  
    | 
      ↓ open down ↓ | 
    823 lines elided | 
    
      ↑ open up ↑ | 
  
1836 1931          }
1837 1932  
1838 1933          rw_exit(&ne->exported_lock);
1839 1934  
1840 1935          /*
1841 1936           * Need to call into the NFSv4 server and release all data
1842 1937           * held on this particular export.  This is important since
1843 1938           * the v4 server may be holding file locks or vnodes under
1844 1939           * this export.
1845 1940           */
1846      -        rfs4_clean_state_exi(exi);
     1941 +        rfs4_clean_state_exi(ne, exi);
1847 1942  
1848 1943          /*
1849 1944           * Notify the lock manager that the filesystem is being
1850 1945           * unexported.
1851 1946           */
1852 1947          lm_unexport(exi);
1853 1948  
1854 1949          /*
1855 1950           * If this was a public export, restore
1856 1951           * the public filehandle to the root.
1857 1952           */
1858 1953  
1859 1954          /*
1860 1955           * XXX KEBE ASKS --> Should CRED() instead be
1861 1956           * exi->exi_zone->zone_kcred?
1862 1957           */
1863 1958          if (exi == ne->exi_public) {
1864 1959                  ne->exi_public = ne->exi_root;
1865 1960  
1866 1961                  nfslog_share_record(ne->exi_public, CRED());
1867 1962          }
1868 1963  
1869 1964          if (exi->exi_export.ex_flags & EX_LOG)
1870 1965                  nfslog_unshare_record(exi, CRED());
1871 1966  
1872 1967          exi_rele(exi);
1873 1968          return (0);
1874 1969  }
1875 1970  
1876 1971  /*
1877 1972   * Get file handle system call.
1878 1973   * Takes file name and returns a file handle for it.
1879 1974   * Credentials must be verified before calling.
1880 1975   */
1881 1976  int
1882 1977  nfs_getfh(struct nfs_getfh_args *args, model_t model, cred_t *cr)
1883 1978  {
1884 1979          nfs_fh3 fh;
1885 1980          char buf[NFS3_MAXFHSIZE];
1886 1981          char *logptr, logbuf[NFS3_MAXFHSIZE];
1887 1982          int l = NFS3_MAXFHSIZE;
1888 1983          vnode_t *vp;
1889 1984          vnode_t *dvp;
1890 1985          struct exportinfo *exi;
1891 1986          int error;
1892 1987          int vers;
1893 1988          STRUCT_HANDLE(nfs_getfh_args, uap);
1894 1989  
1895 1990  #ifdef lint
1896 1991          model = model;          /* STRUCT macros don't always use it */
1897 1992  #endif
1898 1993  
1899 1994          STRUCT_SET_HANDLE(uap, model, args);
1900 1995  
1901 1996          error = lookupname(STRUCT_FGETP(uap, fname), UIO_USERSPACE,
1902 1997              FOLLOW, &dvp, &vp);
1903 1998          if (error == EINVAL) {
1904 1999                  /*
1905 2000                   * if fname resolves to / we get EINVAL error
1906 2001                   * since we wanted the parent vnode. Try again
1907 2002                   * with NULL dvp.
1908 2003                   */
1909 2004                  error = lookupname(STRUCT_FGETP(uap, fname), UIO_USERSPACE,
1910 2005                      FOLLOW, NULL, &vp);
1911 2006                  dvp = NULL;
1912 2007          }
1913 2008          if (!error && vp == NULL) {
1914 2009                  /*
1915 2010                   * Last component of fname not found
1916 2011                   */
1917 2012                  if (dvp != NULL) {
1918 2013                          VN_RELE(dvp);
1919 2014                  }
1920 2015                  error = ENOENT;
1921 2016          }
1922 2017          if (error)
1923 2018                  return (error);
1924 2019  
1925 2020          /*
1926 2021           * 'vp' may be an AUTOFS node, so we perform a
1927 2022           * VOP_ACCESS() to trigger the mount of the
1928 2023           * intended filesystem, so we can share the intended
1929 2024           * filesystem instead of the AUTOFS filesystem.
1930 2025           */
1931 2026          (void) VOP_ACCESS(vp, 0, 0, cr, NULL);
1932 2027  
1933 2028          /*
1934 2029           * We're interested in the top most filesystem.
1935 2030           * This is specially important when uap->dname is a trigger
1936 2031           * AUTOFS node, since we're really interested in sharing the
1937 2032           * filesystem AUTOFS mounted as result of the VOP_ACCESS()
1938 2033           * call not the AUTOFS node itself.
1939 2034           */
1940 2035          if (vn_mountedvfs(vp) != NULL) {
1941 2036                  if (error = traverse(&vp)) {
1942 2037                          VN_RELE(vp);
1943 2038                          if (dvp != NULL)
1944 2039                                  VN_RELE(dvp);
1945 2040                          return (error);
1946 2041                  }
1947 2042          }
1948 2043  
1949 2044          vers = STRUCT_FGET(uap, vers);
1950 2045          exi = nfs_vptoexi(dvp, vp, cr, NULL, &error, FALSE);
1951 2046          if (!error) {
1952 2047                  if (vers == NFS_VERSION) {
1953 2048                          error = makefh((fhandle_t *)buf, vp, exi);
1954 2049                          l = NFS_FHSIZE;
1955 2050                          logptr = buf;
1956 2051                  } else if (vers == NFS_V3) {
1957 2052                          int i, sz, pad;
1958 2053  
1959 2054                          error = makefh3(&fh, vp, exi);
1960 2055                          l = RNDUP(fh.fh3_length);
1961 2056                          if (!error && (l > sizeof (fhandle3_t)))
1962 2057                                  error = EREMOTE;
1963 2058                          logptr = logbuf;
1964 2059                          if (!error) {
1965 2060                                  i = 0;
1966 2061                                  sz = sizeof (fsid_t);
1967 2062                                  bcopy(&fh.fh3_fsid, &buf[i], sz);
1968 2063                                  i += sz;
1969 2064  
1970 2065                                  /*
1971 2066                                   * For backwards compatibility, the
1972 2067                                   * fid length may be less than
1973 2068                                   * NFS_FHMAXDATA, but it was always
1974 2069                                   * encoded as NFS_FHMAXDATA bytes.
1975 2070                                   */
1976 2071  
1977 2072                                  sz = sizeof (ushort_t);
1978 2073                                  bcopy(&fh.fh3_len, &buf[i], sz);
1979 2074                                  i += sz;
1980 2075                                  bcopy(fh.fh3_data, &buf[i], fh.fh3_len);
1981 2076                                  i += fh.fh3_len;
1982 2077                                  pad = (NFS_FHMAXDATA - fh.fh3_len);
1983 2078                                  if (pad > 0) {
1984 2079                                          bzero(&buf[i], pad);
1985 2080                                          i += pad;
1986 2081                                          l += pad;
1987 2082                                  }
1988 2083  
1989 2084                                  sz = sizeof (ushort_t);
1990 2085                                  bcopy(&fh.fh3_xlen, &buf[i], sz);
1991 2086                                  i += sz;
1992 2087                                  bcopy(fh.fh3_xdata, &buf[i], fh.fh3_xlen);
1993 2088                                  i += fh.fh3_xlen;
1994 2089                                  pad = (NFS_FHMAXDATA - fh.fh3_xlen);
1995 2090                                  if (pad > 0) {
1996 2091                                          bzero(&buf[i], pad);
1997 2092                                          i += pad;
1998 2093                                          l += pad;
1999 2094                                  }
2000 2095                          }
2001 2096                          /*
2002 2097                           * If we need to do NFS logging, the filehandle
2003 2098                           * must be downsized to 32 bytes.
2004 2099                           */
2005 2100                          if (!error && exi->exi_export.ex_flags & EX_LOG) {
2006 2101                                  i = 0;
2007 2102                                  sz = sizeof (fsid_t);
2008 2103                                  bcopy(&fh.fh3_fsid, &logbuf[i], sz);
2009 2104                                  i += sz;
2010 2105                                  sz = sizeof (ushort_t);
2011 2106                                  bcopy(&fh.fh3_len, &logbuf[i], sz);
2012 2107                                  i += sz;
2013 2108                                  sz = NFS_FHMAXDATA;
2014 2109                                  bcopy(fh.fh3_data, &logbuf[i], sz);
2015 2110                                  i += sz;
2016 2111                                  sz = sizeof (ushort_t);
2017 2112                                  bcopy(&fh.fh3_xlen, &logbuf[i], sz);
2018 2113                                  i += sz;
2019 2114                                  sz = NFS_FHMAXDATA;
2020 2115                                  bcopy(fh.fh3_xdata, &logbuf[i], sz);
2021 2116                                  i += sz;
2022 2117                          }
2023 2118                  }
2024 2119                  if (!error && exi->exi_export.ex_flags & EX_LOG) {
2025 2120                          nfslog_getfh(exi, (fhandle_t *)logptr,
2026 2121                              STRUCT_FGETP(uap, fname), UIO_USERSPACE, cr);
2027 2122                  }
2028 2123                  exi_rele(exi);
2029 2124                  if (!error) {
2030 2125                          if (copyout(&l, STRUCT_FGETP(uap, lenp), sizeof (int)))
2031 2126                                  error = EFAULT;
2032 2127                          if (copyout(buf, STRUCT_FGETP(uap, fhp), l))
2033 2128                                  error = EFAULT;
2034 2129                  }
2035 2130          }
2036 2131          VN_RELE(vp);
2037 2132          if (dvp != NULL) {
2038 2133                  VN_RELE(dvp);
2039 2134          }
2040 2135          return (error);
2041 2136  }
2042 2137  
2043 2138  /*
2044 2139   * Strategy: if vp is in the export list, then
2045 2140   * return the associated file handle. Otherwise, ".."
2046 2141   * once up the vp and try again, until the root of the
2047 2142   * filesystem is reached.
2048 2143   */
2049 2144  struct   exportinfo *
2050 2145  nfs_vptoexi(vnode_t *dvp, vnode_t *vp, cred_t *cr, int *walk,
2051 2146      int *err, bool_t v4srv)
2052 2147  {
2053 2148          fid_t fid;
2054 2149          int error;
2055 2150          struct exportinfo *exi;
2056 2151  
2057 2152          ASSERT(vp);
2058 2153          VN_HOLD(vp);
2059 2154          if (dvp != NULL) {
2060 2155                  VN_HOLD(dvp);
2061 2156          }
2062 2157          if (walk != NULL)
2063 2158                  *walk = 0;
2064 2159  
2065 2160          for (;;) {
2066 2161                  bzero(&fid, sizeof (fid));
2067 2162                  fid.fid_len = MAXFIDSZ;
2068 2163                  error = vop_fid_pseudo(vp, &fid);
2069 2164                  if (error) {
2070 2165                          /*
2071 2166                           * If vop_fid_pseudo returns ENOSPC then the fid
2072 2167                           * supplied is too small. For now we simply
2073 2168                           * return EREMOTE.
2074 2169                           */
2075 2170                          if (error == ENOSPC)
2076 2171                                  error = EREMOTE;
2077 2172                          break;
2078 2173                  }
2079 2174  
2080 2175                  if (v4srv)
2081 2176                          exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2082 2177                  else
2083 2178                          exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
2084 2179  
2085 2180                  if (exi != NULL) {
2086 2181                          /*
2087 2182                           * Found the export info
2088 2183                           */
2089 2184                          break;
2090 2185                  }
2091 2186  
2092 2187                  /*
2093 2188                   * We have just failed finding a matching export.
2094 2189                   * If we're at the root of this filesystem, then
2095 2190                   * it's time to stop (with failure).
2096 2191                   */
2097 2192                  if ((vp->v_flag & VROOT) || VN_IS_CURZONEROOT(vp)) {
2098 2193                          error = EINVAL;
2099 2194                          break;
2100 2195                  }
2101 2196  
2102 2197                  if (walk != NULL)
2103 2198                          (*walk)++;
2104 2199  
2105 2200                  /*
2106 2201                   * Now, do a ".." up vp. If dvp is supplied, use it,
2107 2202                   * otherwise, look it up.
2108 2203                   */
2109 2204                  if (dvp == NULL) {
2110 2205                          error = VOP_LOOKUP(vp, "..", &dvp, NULL, 0, NULL, cr,
2111 2206                              NULL, NULL, NULL);
2112 2207                          if (error)
2113 2208                                  break;
2114 2209                  }
2115 2210                  VN_RELE(vp);
2116 2211                  vp = dvp;
2117 2212                  dvp = NULL;
2118 2213          }
2119 2214          VN_RELE(vp);
2120 2215          if (dvp != NULL) {
2121 2216                  VN_RELE(dvp);
2122 2217          }
2123 2218          if (error != 0) {
2124 2219                  if (err != NULL)
2125 2220                          *err = error;
2126 2221                  return (NULL);
2127 2222          }
2128 2223          return (exi);
2129 2224  }
2130 2225  
2131 2226  int
2132 2227  chk_clnt_sec(exportinfo_t *exi, struct svc_req *req)
2133 2228  {
2134 2229          int i, nfsflavor;
2135 2230          struct secinfo *sp;
2136 2231  
2137 2232          /*
2138 2233           *  Get the nfs flavor number from xprt.
2139 2234           */
2140 2235          nfsflavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;
2141 2236  
2142 2237          sp = exi->exi_export.ex_secinfo;
2143 2238          for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2144 2239                  if ((nfsflavor == sp[i].s_secinfo.sc_nfsnum) &&
2145 2240                      SEC_REF_EXPORTED(sp + i))
2146 2241                          return (TRUE);
2147 2242          }
2148 2243          return (FALSE);
2149 2244  }
2150 2245  
2151 2246  /*
2152 2247   * Make an fhandle from a vnode
2153 2248   */
2154 2249  int
2155 2250  makefh(fhandle_t *fh, vnode_t *vp, exportinfo_t *exi)
2156 2251  {
2157 2252          int error;
2158 2253  
2159 2254          *fh = exi->exi_fh;      /* struct copy */
2160 2255  
2161 2256          error = VOP_FID(vp, (fid_t *)&fh->fh_len, NULL);
2162 2257          if (error) {
2163 2258                  /*
2164 2259                   * Should be something other than EREMOTE
2165 2260                   */
2166 2261                  return (EREMOTE);
2167 2262          }
2168 2263          return (0);
2169 2264  }
2170 2265  
2171 2266  /*
2172 2267   * This routine makes an overloaded V2 fhandle which contains
2173 2268   * sec modes.
2174 2269   *
2175 2270   * Note that the first four octets contain the length octet,
2176 2271   * the status octet, and two padded octets to make them XDR
2177 2272   * four-octet aligned.
2178 2273   *
2179 2274   *   1   2   3   4                                          32
2180 2275   * +---+---+---+---+---+---+---+---+   +---+---+---+---+   +---+
2181 2276   * | l | s |   |   |     sec_1     |...|     sec_n     |...|   |
2182 2277   * +---+---+---+---+---+---+---+---+   +---+---+---+---+   +---+
2183 2278   *
2184 2279   * where
2185 2280   *
2186 2281   *   the status octet s indicates whether there are more security
2187 2282   *   flavors (1 means yes, 0 means no) that require the client to
2188 2283   *   perform another 0x81 LOOKUP to get them,
2189 2284   *
2190 2285   *   the length octet l is the length describing the number of
2191 2286   *   valid octets that follow.  (l = 4 * n, where n is the number
2192 2287   *   of security flavors sent in the current overloaded filehandle.)
2193 2288   *
2194 2289   *   sec_index should always be in the inclusive range: [1 - ex_seccnt],
2195 2290   *   and it tells server where to start within the secinfo array.
2196 2291   *   Usually it will always be 1; however, if more flavors are used
2197 2292   *   for the public export than can be encoded in the overloaded FH
2198 2293   *   (7 for NFS2), subsequent SNEGO MCLs will have a larger index
2199 2294   *   so the server will pick up where it left off from the previous
2200 2295   *   MCL reply.
2201 2296   *
2202 2297   *   With NFS4 support, implicitly allowed flavors are also in
2203 2298   *   the secinfo array; however, they should not be returned in
2204 2299   *   SNEGO MCL replies.
2205 2300   */
2206 2301  int
2207 2302  makefh_ol(fhandle_t *fh, exportinfo_t *exi, uint_t sec_index)
2208 2303  {
2209 2304          secinfo_t sec[MAX_FLAVORS];
2210 2305          int totalcnt, i, *ipt, cnt, seccnt, secidx, fh_max_cnt;
2211 2306          char *c;
2212 2307  
2213 2308          if (fh == NULL || exi == NULL || sec_index < 1)
2214 2309                  return (EREMOTE);
2215 2310  
2216 2311          /*
2217 2312           * WebNFS clients need to know the unique set of explicitly
2218 2313           * shared flavors in used for the public export. When
2219 2314           * "TRUE" is passed to build_seclist_nodups(), only explicitly
2220 2315           * shared flavors are included in the list.
2221 2316           */
2222 2317          seccnt = build_seclist_nodups(&exi->exi_export, sec, TRUE);
2223 2318          if (sec_index > seccnt)
2224 2319                  return (EREMOTE);
2225 2320  
2226 2321          fh_max_cnt = (NFS_FHSIZE / sizeof (int)) - 1;
2227 2322          totalcnt = seccnt - sec_index + 1;
2228 2323          cnt = totalcnt > fh_max_cnt ? fh_max_cnt : totalcnt;
2229 2324  
2230 2325          c = (char *)fh;
2231 2326          /*
2232 2327           * Encode the length octet representing the number of
2233 2328           * security flavors (in bytes) in this overloaded fh.
2234 2329           */
2235 2330          *c = cnt * sizeof (int);
2236 2331  
2237 2332          /*
2238 2333           * Encode the status octet that indicates whether there
2239 2334           * are more security flavors the client needs to get.
2240 2335           */
2241 2336          *(c + 1) = totalcnt > fh_max_cnt;
2242 2337  
2243 2338          /*
2244 2339           * put security flavors in the overloaded fh
2245 2340           */
2246 2341          ipt = (int *)(c + sizeof (int32_t));
2247 2342          secidx = sec_index - 1;
2248 2343          for (i = 0; i < cnt; i++) {
2249 2344                  ipt[i] = htonl(sec[i + secidx].s_secinfo.sc_nfsnum);
2250 2345          }
2251 2346          return (0);
2252 2347  }
2253 2348  
2254 2349  /*
2255 2350   * Make an nfs_fh3 from a vnode
2256 2351   */
2257 2352  int
2258 2353  makefh3(nfs_fh3 *fh, vnode_t *vp, struct exportinfo *exi)
2259 2354  {
2260 2355          int error;
2261 2356          fid_t fid;
2262 2357  
2263 2358          bzero(&fid, sizeof (fid));
2264 2359          fid.fid_len = sizeof (fh->fh3_data);
2265 2360          error = VOP_FID(vp, &fid, NULL);
2266 2361          if (error)
2267 2362                  return (EREMOTE);
2268 2363  
2269 2364          bzero(fh, sizeof (nfs_fh3));
2270 2365          fh->fh3_fsid = exi->exi_fsid;
2271 2366          fh->fh3_len = fid.fid_len;
2272 2367          bcopy(fid.fid_data, fh->fh3_data, fh->fh3_len);
2273 2368  
2274 2369          fh->fh3_xlen = exi->exi_fid.fid_len;
2275 2370          ASSERT(fh->fh3_xlen <= sizeof (fh->fh3_xdata));
2276 2371          bcopy(exi->exi_fid.fid_data, fh->fh3_xdata, fh->fh3_xlen);
2277 2372  
2278 2373          fh->fh3_length = sizeof (fh->fh3_fsid)
2279 2374              + sizeof (fh->fh3_len) + fh->fh3_len
2280 2375              + sizeof (fh->fh3_xlen) + fh->fh3_xlen;
2281 2376          fh->fh3_flags = 0;
2282 2377  
2283 2378          return (0);
2284 2379  }
2285 2380  
2286 2381  /*
2287 2382   * This routine makes an overloaded V3 fhandle which contains
2288 2383   * sec modes.
2289 2384   *
2290 2385   *  1        4
2291 2386   * +--+--+--+--+
2292 2387   * |    len    |
2293 2388   * +--+--+--+--+
2294 2389   *                                               up to 64
2295 2390   * +--+--+--+--+--+--+--+--+--+--+--+--+     +--+--+--+--+
2296 2391   * |s |  |  |  |   sec_1   |   sec_2   | ... |   sec_n   |
2297 2392   * +--+--+--+--+--+--+--+--+--+--+--+--+     +--+--+--+--+
2298 2393   *
2299 2394   * len = 4 * (n+1), where n is the number of security flavors
2300 2395   * sent in the current overloaded filehandle.
2301 2396   *
2302 2397   * the status octet s indicates whether there are more security
2303 2398   * mechanisms (1 means yes, 0 means no) that require the client
2304 2399   * to perform another 0x81 LOOKUP to get them.
2305 2400   *
2306 2401   * Three octets are padded after the status octet.
2307 2402   */
2308 2403  int
2309 2404  makefh3_ol(nfs_fh3 *fh, struct exportinfo *exi, uint_t sec_index)
2310 2405  {
2311 2406          secinfo_t sec[MAX_FLAVORS];
2312 2407          int totalcnt, cnt, *ipt, i, seccnt, fh_max_cnt, secidx;
2313 2408          char *c;
2314 2409  
2315 2410          if (fh == NULL || exi == NULL || sec_index < 1)
2316 2411                  return (EREMOTE);
2317 2412  
2318 2413          /*
2319 2414           * WebNFS clients need to know the unique set of explicitly
2320 2415           * shared flavors in used for the public export. When
2321 2416           * "TRUE" is passed to build_seclist_nodups(), only explicitly
2322 2417           * shared flavors are included in the list.
2323 2418           */
2324 2419          seccnt = build_seclist_nodups(&exi->exi_export, sec, TRUE);
2325 2420  
2326 2421          if (sec_index > seccnt)
2327 2422                  return (EREMOTE);
2328 2423  
2329 2424          fh_max_cnt = (NFS3_FHSIZE / sizeof (int)) - 1;
2330 2425          totalcnt = seccnt - sec_index + 1;
2331 2426          cnt = totalcnt > fh_max_cnt ? fh_max_cnt : totalcnt;
2332 2427  
2333 2428          /*
2334 2429           * Place the length in fh3_length representing the number
2335 2430           * of security flavors (in bytes) in this overloaded fh.
2336 2431           */
2337 2432          fh->fh3_flags = FH_WEBNFS;
2338 2433          fh->fh3_length = (cnt+1) * sizeof (int32_t);
2339 2434  
2340 2435          c = (char *)&fh->fh3_u.nfs_fh3_i.fh3_i;
2341 2436          /*
2342 2437           * Encode the status octet that indicates whether there
2343 2438           * are more security flavors the client needs to get.
2344 2439           */
2345 2440          *c = totalcnt > fh_max_cnt;
2346 2441  
2347 2442          /*
2348 2443           * put security flavors in the overloaded fh
2349 2444           */
2350 2445          secidx = sec_index - 1;
2351 2446          ipt = (int *)(c + sizeof (int32_t));
2352 2447          for (i = 0; i < cnt; i++) {
2353 2448                  ipt[i] = htonl(sec[i + secidx].s_secinfo.sc_nfsnum);
2354 2449          }
2355 2450          return (0);
2356 2451  }
2357 2452  
2358 2453  /*
2359 2454   * Make an nfs_fh4 from a vnode
2360 2455   */
2361 2456  int
2362 2457  makefh4(nfs_fh4 *fh, vnode_t *vp, struct exportinfo *exi)
2363 2458  {
2364 2459          int error;
2365 2460          nfs_fh4_fmt_t *fh_fmtp = (nfs_fh4_fmt_t *)fh->nfs_fh4_val;
2366 2461          fid_t fid;
2367 2462  
2368 2463          bzero(&fid, sizeof (fid));
2369 2464          fid.fid_len = MAXFIDSZ;
2370 2465          /*
2371 2466           * vop_fid_pseudo() is used to set up NFSv4 namespace, so
2372 2467           * use vop_fid_pseudo() here to get the fid instead of VOP_FID.
2373 2468           */
2374 2469          error = vop_fid_pseudo(vp, &fid);
2375 2470          if (error)
2376 2471                  return (error);
2377 2472  
2378 2473          fh->nfs_fh4_len = NFS_FH4_LEN;
2379 2474  
2380 2475          fh_fmtp->fh4_i.fhx_fsid = exi->exi_fh.fh_fsid;
2381 2476          fh_fmtp->fh4_i.fhx_xlen = exi->exi_fh.fh_xlen;
2382 2477  
2383 2478          bzero(fh_fmtp->fh4_i.fhx_data, sizeof (fh_fmtp->fh4_i.fhx_data));
2384 2479          bzero(fh_fmtp->fh4_i.fhx_xdata, sizeof (fh_fmtp->fh4_i.fhx_xdata));
2385 2480          ASSERT(exi->exi_fh.fh_xlen <= sizeof (fh_fmtp->fh4_i.fhx_xdata));
2386 2481          bcopy(exi->exi_fh.fh_xdata, fh_fmtp->fh4_i.fhx_xdata,
2387 2482              exi->exi_fh.fh_xlen);
2388 2483  
2389 2484          fh_fmtp->fh4_len = fid.fid_len;
2390 2485          ASSERT(fid.fid_len <= sizeof (fh_fmtp->fh4_data));
2391 2486          bcopy(fid.fid_data, fh_fmtp->fh4_data, fid.fid_len);
2392 2487          fh_fmtp->fh4_flag = 0;
2393 2488  
2394 2489  #ifdef VOLATILE_FH_TEST
2395 2490          /*
2396 2491           * XXX (temporary?)
2397 2492           * Use the rnode volatile_id value to add volatility to the fh.
2398 2493           *
2399 2494           * For testing purposes there are currently two scenarios, based
2400 2495           * on whether the filesystem was shared with "volatile_fh"
2401 2496           * or "expire_on_rename". In the first case, use the value of
2402 2497           * export struct share_time as the volatile_id. In the second
2403 2498           * case use the vnode volatile_id value (which is set to the
2404 2499           * time in which the file was renamed).
2405 2500           *
2406 2501           * Note that the above are temporary constructs for testing only
2407 2502           * XXX
2408 2503           */
2409 2504          if (exi->exi_export.ex_flags & EX_VOLRNM) {
2410 2505                  fh_fmtp->fh4_volatile_id = find_volrnm_fh_id(exi, fh);
2411 2506          } else if (exi->exi_export.ex_flags & EX_VOLFH) {
2412 2507                  fh_fmtp->fh4_volatile_id = exi->exi_volatile_id;
2413 2508          } else {
2414 2509                  fh_fmtp->fh4_volatile_id = 0;
2415 2510          }
2416 2511  #endif /* VOLATILE_FH_TEST */
2417 2512  
2418 2513          return (0);
2419 2514  }
2420 2515  
2421 2516  /*
2422 2517   * Convert an fhandle into a vnode.
2423 2518   * Uses the file id (fh_len + fh_data) in the fhandle to get the vnode.
2424 2519   * WARNING: users of this routine must do a VN_RELE on the vnode when they
2425 2520   * are done with it.
2426 2521   */
2427 2522  vnode_t *
2428 2523  nfs_fhtovp(fhandle_t *fh, struct exportinfo *exi)
2429 2524  {
2430 2525          vfs_t *vfsp;
2431 2526          vnode_t *vp;
2432 2527          int error;
2433 2528          fid_t *fidp;
2434 2529  
2435 2530          TRACE_0(TR_FAC_NFS, TR_FHTOVP_START,
2436 2531              "fhtovp_start");
2437 2532  
2438 2533          if (exi == NULL) {
2439 2534                  TRACE_1(TR_FAC_NFS, TR_FHTOVP_END,
2440 2535                      "fhtovp_end:(%S)", "exi NULL");
2441 2536                  return (NULL);  /* not exported */
2442 2537          }
2443 2538  
2444 2539          ASSERT(exi->exi_vp != NULL);
2445 2540  
2446 2541          if (PUBLIC_FH2(fh)) {
2447 2542                  if (exi->exi_export.ex_flags & EX_PUBLIC) {
2448 2543                          TRACE_1(TR_FAC_NFS, TR_FHTOVP_END,
2449 2544                              "fhtovp_end:(%S)", "root not exported");
2450 2545                          return (NULL);
2451 2546                  }
2452 2547                  vp = exi->exi_vp;
2453 2548                  VN_HOLD(vp);
2454 2549                  return (vp);
2455 2550          }
2456 2551  
2457 2552          vfsp = exi->exi_vp->v_vfsp;
2458 2553          ASSERT(vfsp != NULL);
2459 2554          fidp = (fid_t *)&fh->fh_len;
2460 2555  
2461 2556          error = VFS_VGET(vfsp, &vp, fidp);
2462 2557          if (error || vp == NULL) {
2463 2558                  TRACE_1(TR_FAC_NFS, TR_FHTOVP_END,
2464 2559                      "fhtovp_end:(%S)", "VFS_GET failed or vp NULL");
2465 2560                  return (NULL);
2466 2561          }
2467 2562          TRACE_1(TR_FAC_NFS, TR_FHTOVP_END,
2468 2563              "fhtovp_end:(%S)", "end");
2469 2564          return (vp);
2470 2565  }
2471 2566  
2472 2567  /*
2473 2568   * Convert an nfs_fh3 into a vnode.
2474 2569   * Uses the file id (fh_len + fh_data) in the file handle to get the vnode.
2475 2570   * WARNING: users of this routine must do a VN_RELE on the vnode when they
2476 2571   * are done with it.
2477 2572   */
2478 2573  vnode_t *
2479 2574  nfs3_fhtovp(nfs_fh3 *fh, struct exportinfo *exi)
2480 2575  {
2481 2576          vfs_t *vfsp;
2482 2577          vnode_t *vp;
2483 2578          int error;
2484 2579          fid_t *fidp;
2485 2580  
2486 2581          if (exi == NULL)
2487 2582                  return (NULL);  /* not exported */
2488 2583  
2489 2584          ASSERT(exi->exi_vp != NULL);
2490 2585  
2491 2586          if (PUBLIC_FH3(fh)) {
2492 2587                  if (exi->exi_export.ex_flags & EX_PUBLIC)
2493 2588                          return (NULL);
2494 2589                  vp = exi->exi_vp;
2495 2590                  VN_HOLD(vp);
2496 2591                  return (vp);
2497 2592          }
2498 2593  
2499 2594          if (fh->fh3_length < NFS3_OLDFHSIZE ||
2500 2595              fh->fh3_length > NFS3_MAXFHSIZE)
2501 2596                  return (NULL);
2502 2597  
2503 2598          vfsp = exi->exi_vp->v_vfsp;
2504 2599          ASSERT(vfsp != NULL);
2505 2600          fidp = FH3TOFIDP(fh);
2506 2601  
2507 2602          error = VFS_VGET(vfsp, &vp, fidp);
2508 2603          if (error || vp == NULL)
2509 2604                  return (NULL);
2510 2605  
2511 2606          return (vp);
2512 2607  }
2513 2608  
2514 2609  /*
2515 2610   * Convert an nfs_fh4 into a vnode.
2516 2611   * Uses the file id (fh_len + fh_data) in the file handle to get the vnode.
2517 2612   * WARNING: users of this routine must do a VN_RELE on the vnode when they
2518 2613   * are done with it.
2519 2614   */
2520 2615  vnode_t *
2521 2616  nfs4_fhtovp(nfs_fh4 *fh, struct exportinfo *exi, nfsstat4 *statp)
2522 2617  {
2523 2618          vfs_t *vfsp;
2524 2619          vnode_t *vp = NULL;
2525 2620          int error;
2526 2621          fid_t *fidp;
2527 2622          nfs_fh4_fmt_t *fh_fmtp;
2528 2623  #ifdef VOLATILE_FH_TEST
2529 2624          uint32_t volatile_id = 0;
2530 2625  #endif /* VOLATILE_FH_TEST */
2531 2626  
2532 2627          if (exi == NULL) {
2533 2628                  *statp = NFS4ERR_STALE;
2534 2629                  return (NULL);  /* not exported */
2535 2630          }
2536 2631          ASSERT(exi->exi_vp != NULL);
2537 2632  
2538 2633          /* caller should have checked this */
2539 2634          ASSERT(fh->nfs_fh4_len >= NFS_FH4_LEN);
2540 2635  
2541 2636          fh_fmtp = (nfs_fh4_fmt_t *)fh->nfs_fh4_val;
2542 2637          vfsp = exi->exi_vp->v_vfsp;
2543 2638          ASSERT(vfsp != NULL);
2544 2639          fidp = (fid_t *)&fh_fmtp->fh4_len;
2545 2640  
2546 2641  #ifdef VOLATILE_FH_TEST
2547 2642          /* XXX check if volatile - should be changed later */
2548 2643          if (exi->exi_export.ex_flags & (EX_VOLRNM | EX_VOLFH)) {
2549 2644                  /*
2550 2645                   * Filesystem is shared with volatile filehandles
2551 2646                   */
2552 2647                  if (exi->exi_export.ex_flags & EX_VOLRNM)
2553 2648                          volatile_id = find_volrnm_fh_id(exi, fh);
2554 2649                  else
2555 2650                          volatile_id = exi->exi_volatile_id;
2556 2651  
2557 2652                  if (fh_fmtp->fh4_volatile_id != volatile_id) {
2558 2653                          *statp = NFS4ERR_FHEXPIRED;
2559 2654                          return (NULL);
2560 2655                  }
2561 2656          }
2562 2657          /*
2563 2658           * XXX even if test_volatile_fh false, the fh may contain a
2564 2659           * volatile id if obtained when the test was set.
2565 2660           */
2566 2661          fh_fmtp->fh4_volatile_id = (uchar_t)0;
2567 2662  #endif /* VOLATILE_FH_TEST */
2568 2663  
2569 2664          error = VFS_VGET(vfsp, &vp, fidp);
2570 2665          /*
2571 2666           * If we can not get vp from VFS_VGET, perhaps this is
2572 2667           * an nfs v2/v3/v4 node in an nfsv4 pseudo filesystem.
2573 2668           * Check it out.
2574 2669           */
2575 2670          if (error && PSEUDO(exi))
2576 2671                  error = nfs4_vget_pseudo(exi, &vp, fidp);
2577 2672  
2578 2673          if (error || vp == NULL) {
2579 2674                  *statp = NFS4ERR_STALE;
2580 2675                  return (NULL);
2581 2676          }
2582 2677          /* XXX - disgusting hack */
2583 2678          if (vp->v_type == VNON && vp->v_flag & V_XATTRDIR)
2584 2679                  vp->v_type = VDIR;
2585 2680          *statp = NFS4_OK;
2586 2681          return (vp);
2587 2682  }
2588 2683  
2589 2684  /*
2590 2685   * Find the export structure associated with the given filesystem.
2591 2686   * If found, then increment the ref count (exi_count).
2592 2687   */
2593 2688  struct exportinfo *
2594 2689  checkexport(fsid_t *fsid, fid_t *fid)
2595 2690  {
2596 2691          struct exportinfo *exi;
2597 2692          nfs_export_t *ne = nfs_get_export();
2598 2693  
2599 2694          rw_enter(&ne->exported_lock, RW_READER);
2600 2695          for (exi = ne->exptable[exptablehash(fsid, fid)];
2601 2696              exi != NULL;
2602 2697              exi = exi->fid_hash.next) {
2603 2698                  if (exportmatch(exi, fsid, fid)) {
2604 2699                          /*
2605 2700                           * If this is the place holder for the
2606 2701                           * public file handle, then return the
2607 2702                           * real export entry for the public file
2608 2703                           * handle.
2609 2704                           */
2610 2705                          if (exi->exi_export.ex_flags & EX_PUBLIC) {
2611 2706                                  exi = ne->exi_public;
2612 2707                          }
2613 2708  
2614 2709                          exi_hold(exi);
2615 2710                          rw_exit(&ne->exported_lock);
2616 2711                          return (exi);
2617 2712                  }
2618 2713          }
2619 2714          rw_exit(&ne->exported_lock);
2620 2715          return (NULL);
2621 2716  }
2622 2717  
2623 2718  
2624 2719  /*
2625 2720   * "old school" version of checkexport() for NFS4.  NFS4
2626 2721   * rfs4_compound holds exported_lock for duration of compound
2627 2722   * processing.  This version doesn't manipulate exi_count
2628 2723   * since NFS4 breaks fundamental assumptions in the exi_count
2629 2724   * design.
2630 2725   */
2631 2726  struct exportinfo *
2632 2727  checkexport4(fsid_t *fsid, fid_t *fid, vnode_t *vp)
2633 2728  {
2634 2729          struct exportinfo *exi;
2635 2730          nfs_export_t *ne = nfs_get_export();
2636 2731  
2637 2732          ASSERT(RW_LOCK_HELD(&ne->exported_lock));
2638 2733  
2639 2734          for (exi = ne->exptable[exptablehash(fsid, fid)];
2640 2735              exi != NULL;
2641 2736              exi = exi->fid_hash.next) {
2642 2737                  if (exportmatch(exi, fsid, fid)) {
2643 2738                          /*
2644 2739                           * If this is the place holder for the
2645 2740                           * public file handle, then return the
2646 2741                           * real export entry for the public file
2647 2742                           * handle.
2648 2743                           */
2649 2744                          if (exi->exi_export.ex_flags & EX_PUBLIC) {
2650 2745                                  exi = ne->exi_public;
2651 2746                          }
2652 2747  
2653 2748                          /*
2654 2749                           * If vp is given, check if vp is the
2655 2750                           * same vnode as the exported node.
2656 2751                           *
2657 2752                           * Since VOP_FID of a lofs node returns the
2658 2753                           * fid of its real node (ufs), the exported
2659 2754                           * node for lofs and (pseudo) ufs may have
2660 2755                           * the same fsid and fid.
2661 2756                           */
2662 2757                          if (vp == NULL || vp == exi->exi_vp)
2663 2758                                  return (exi);
2664 2759                  }
2665 2760          }
2666 2761  
2667 2762          return (NULL);
2668 2763  }
2669 2764  
2670 2765  /*
2671 2766   * Free an entire export list node
2672 2767   */
2673 2768  void
2674 2769  exportfree(struct exportinfo *exi)
2675 2770  {
2676 2771          struct exportdata *ex;
2677 2772          struct charset_cache *cache;
2678 2773          int i;
2679 2774  
2680 2775          ex = &exi->exi_export;
2681 2776  
2682 2777          ASSERT(exi->exi_vp != NULL && !(exi->exi_export.ex_flags & EX_PUBLIC));
2683 2778          VN_RELE(exi->exi_vp);
2684 2779          if (exi->exi_dvp != NULL)
2685 2780                  VN_RELE(exi->exi_dvp);
2686 2781  
2687 2782          if (ex->ex_flags & EX_INDEX)
2688 2783                  kmem_free(ex->ex_index, strlen(ex->ex_index) + 1);
2689 2784  
2690 2785          kmem_free(ex->ex_path, ex->ex_pathlen + 1);
2691 2786          nfsauth_cache_free(exi);
2692 2787  
2693 2788          /*
2694 2789           * if there is a character set mapping cached, clean it up.
2695 2790           */
2696 2791          for (cache = exi->exi_charset; cache != NULL;
2697 2792              cache = exi->exi_charset) {
2698 2793                  if (cache->inbound != (kiconv_t)-1)
2699 2794                          (void) kiconv_close(cache->inbound);
2700 2795                  if (cache->outbound != (kiconv_t)-1)
2701 2796                          (void) kiconv_close(cache->outbound);
2702 2797                  exi->exi_charset = cache->next;
2703 2798                  kmem_free(cache, sizeof (struct charset_cache));
2704 2799          }
2705 2800  
2706 2801          if (exi->exi_logbuffer != NULL)
2707 2802                  nfslog_disable(exi);
2708 2803  
2709 2804          if (ex->ex_flags & EX_LOG) {
2710 2805                  kmem_free(ex->ex_log_buffer, ex->ex_log_bufferlen + 1);
2711 2806                  kmem_free(ex->ex_tag, ex->ex_taglen + 1);
2712 2807          }
2713 2808  
2714 2809          if (exi->exi_visible)
2715 2810                  free_visible(exi->exi_visible);
2716 2811  
2717 2812          srv_secinfo_list_free(ex->ex_secinfo, ex->ex_seccnt);
2718 2813  
2719 2814  #ifdef VOLATILE_FH_TEST
2720 2815          free_volrnm_list(exi);
2721 2816          mutex_destroy(&exi->exi_vol_rename_lock);
2722 2817  #endif /* VOLATILE_FH_TEST */
2723 2818  
2724 2819          mutex_destroy(&exi->exi_lock);
2725 2820          rw_destroy(&exi->exi_cache_lock);
2726 2821          /*
2727 2822           * All nodes in the exi_cache AVL trees were removed and freed in the
2728 2823           * nfsauth_cache_free() call above.  We will just destroy and free the
2729 2824           * empty AVL trees here.
2730 2825           */
2731 2826          for (i = 0; i < AUTH_TABLESIZE; i++) {
2732 2827                  avl_destroy(exi->exi_cache[i]);
2733 2828                  kmem_free(exi->exi_cache[i], sizeof (avl_tree_t));
2734 2829          }
2735 2830  
2736 2831          kmem_free(exi, sizeof (*exi));
2737 2832  }
2738 2833  
2739 2834  /*
2740 2835   * load the index file from user space into kernel space.
2741 2836   */
2742 2837  static int
2743 2838  loadindex(struct exportdata *kex)
2744 2839  {
2745 2840          int error;
2746 2841          char index[MAXNAMELEN+1];
2747 2842          size_t len;
2748 2843  
2749 2844          /*
2750 2845           * copyinstr copies the complete string including the NULL and
2751 2846           * returns the len with the NULL byte included in the calculation
2752 2847           * as long as the max length is not exceeded.
2753 2848           */
2754 2849          if (error = copyinstr(kex->ex_index, index, sizeof (index), &len))
2755 2850                  return (error);
2756 2851  
2757 2852          kex->ex_index = kmem_alloc(len, KM_SLEEP);
2758 2853          bcopy(index, kex->ex_index, len);
2759 2854  
2760 2855          return (0);
2761 2856  }
2762 2857  
2763 2858  void
2764 2859  exi_hold(struct exportinfo *exi)
2765 2860  {
2766 2861          mutex_enter(&exi->exi_lock);
2767 2862          exi->exi_count++;
2768 2863          mutex_exit(&exi->exi_lock);
2769 2864  }
2770 2865  
2771 2866  /*
2772 2867   * When a thread completes using exi, it should call exi_rele().
2773 2868   * exi_rele() decrements exi_count. It releases exi if exi_count == 0, i.e.
2774 2869   * if this is the last user of exi and exi is not on exportinfo list anymore
2775 2870   */
2776 2871  void
2777 2872  exi_rele(struct exportinfo *exi)
2778 2873  {
2779 2874          mutex_enter(&exi->exi_lock);
2780 2875          exi->exi_count--;
2781 2876          if (exi->exi_count == 0) {
2782 2877                  mutex_exit(&exi->exi_lock);
2783 2878                  exportfree(exi);
2784 2879          } else
2785 2880                  mutex_exit(&exi->exi_lock);
2786 2881  }
2787 2882  
2788 2883  #ifdef VOLATILE_FH_TEST
2789 2884  /*
2790 2885   * Test for volatile fh's - add file handle to list and set its volatile id
2791 2886   * to time it was renamed. If EX_VOLFH is also on and the fs is reshared,
2792 2887   * the vol_rename queue is purged.
2793 2888   *
2794 2889   * XXX This code is for unit testing purposes only... To correctly use it, it
2795 2890   * needs to tie a rename list to the export struct and (more
2796 2891   * important), protect access to the exi rename list using a write lock.
2797 2892   */
2798 2893  
2799 2894  /*
2800 2895   * get the fh vol record if it's in the volatile on rename list. Don't check
2801 2896   * volatile_id in the file handle - compare only the file handles.
2802 2897   */
2803 2898  static struct ex_vol_rename *
2804 2899  find_volrnm_fh(struct exportinfo *exi, nfs_fh4 *fh4p)
2805 2900  {
2806 2901          struct ex_vol_rename *p = NULL;
2807 2902          fhandle4_t *fhp;
2808 2903  
2809 2904          /* XXX shouldn't we assert &exported_lock held? */
2810 2905          ASSERT(MUTEX_HELD(&exi->exi_vol_rename_lock));
2811 2906  
2812 2907          if (fh4p->nfs_fh4_len != NFS_FH4_LEN) {
2813 2908                  return (NULL);
2814 2909          }
2815 2910          fhp = &((nfs_fh4_fmt_t *)fh4p->nfs_fh4_val)->fh4_i;
2816 2911          for (p = exi->exi_vol_rename; p != NULL; p = p->vrn_next) {
2817 2912                  if (bcmp(fhp, &p->vrn_fh_fmt.fh4_i,
2818 2913                      sizeof (fhandle4_t)) == 0)
2819 2914                          break;
2820 2915          }
2821 2916          return (p);
2822 2917  }
2823 2918  
2824 2919  /*
2825 2920   * get the volatile id for the fh (if there is - else return 0). Ignore the
2826 2921   * volatile_id in the file handle - compare only the file handles.
2827 2922   */
2828 2923  static uint32_t
2829 2924  find_volrnm_fh_id(struct exportinfo *exi, nfs_fh4 *fh4p)
2830 2925  {
2831 2926          struct ex_vol_rename *p;
2832 2927          uint32_t volatile_id;
2833 2928  
2834 2929          mutex_enter(&exi->exi_vol_rename_lock);
2835 2930          p = find_volrnm_fh(exi, fh4p);
2836 2931          volatile_id = (p ? p->vrn_fh_fmt.fh4_volatile_id :
2837 2932              exi->exi_volatile_id);
2838 2933          mutex_exit(&exi->exi_vol_rename_lock);
2839 2934          return (volatile_id);
2840 2935  }
2841 2936  
2842 2937  /*
2843 2938   * Free the volatile on rename list - will be called if a filesystem is
2844 2939   * unshared or reshared without EX_VOLRNM
2845 2940   */
2846 2941  static void
2847 2942  free_volrnm_list(struct exportinfo *exi)
2848 2943  {
2849 2944          struct ex_vol_rename *p, *pnext;
2850 2945  
2851 2946          /* no need to hold mutex lock - this one is called from exportfree */
2852 2947          for (p = exi->exi_vol_rename; p != NULL; p = pnext) {
2853 2948                  pnext = p->vrn_next;
2854 2949                  kmem_free(p, sizeof (*p));
2855 2950          }
2856 2951          exi->exi_vol_rename = NULL;
2857 2952  }
2858 2953  
2859 2954  /*
2860 2955   * Add a file handle to the volatile on rename list.
2861 2956   */
2862 2957  void
2863 2958  add_volrnm_fh(struct exportinfo *exi, vnode_t *vp)
2864 2959  {
2865 2960          struct ex_vol_rename *p;
2866 2961          char fhbuf[NFS4_FHSIZE];
2867 2962          nfs_fh4 fh4;
2868 2963          int error;
2869 2964  
2870 2965          fh4.nfs_fh4_val = fhbuf;
2871 2966          error = makefh4(&fh4, vp, exi);
2872 2967          if ((error) || (fh4.nfs_fh4_len != sizeof (p->vrn_fh_fmt))) {
2873 2968                  return;
2874 2969          }
2875 2970  
2876 2971          mutex_enter(&exi->exi_vol_rename_lock);
2877 2972  
2878 2973          p = find_volrnm_fh(exi, &fh4);
2879 2974  
2880 2975          if (p == NULL) {
2881 2976                  p = kmem_alloc(sizeof (*p), KM_SLEEP);
2882 2977                  bcopy(fh4.nfs_fh4_val, &p->vrn_fh_fmt, sizeof (p->vrn_fh_fmt));
2883 2978                  p->vrn_next = exi->exi_vol_rename;
2884 2979                  exi->exi_vol_rename = p;
2885 2980          }
2886 2981  
2887 2982          p->vrn_fh_fmt.fh4_volatile_id = gethrestime_sec();
2888 2983          mutex_exit(&exi->exi_vol_rename_lock);
2889 2984  }
2890 2985  
2891 2986  #endif /* VOLATILE_FH_TEST */
  
    | 
      ↓ open down ↓ | 
    1035 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX