Print this page
    
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/nfs/export.h
          +++ new/usr/src/uts/common/nfs/export.h
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  
    | 
      ↓ open down ↓ | 
    15 lines elided | 
    
      ↑ open up ↑ | 
  
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  25   25   * Copyright 2016 Jason King.
       26 + * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  26   27   */
  27   28  
  28   29  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  29   30  /*        All Rights Reserved   */
  30   31  
  31   32  #ifndef _NFS_EXPORT_H
  32   33  #define _NFS_EXPORT_H
  33   34  
  34   35  #include <nfs/nfs_sec.h>
  35   36  #include <nfs/auth.h>
  36   37  #include <sys/vnode.h>
  37   38  #include <nfs/nfs4.h>
  38   39  #include <sys/kiconv.h>
  39   40  #include <sys/avl.h>
       41 +#include <sys/zone.h>
  40   42  
       43 +#ifdef _KERNEL
       44 +#include <sys/pkp_hash.h> /* for PKP_HASH_SIZE */
       45 +#endif /* _KERNEL */
       46 +
  41   47  #ifdef  __cplusplus
  42   48  extern "C" {
  43   49  #endif
  44   50  
  45   51  /*
  46   52   * nfs pseudo flavor number is owned by IANA. Need to make sure the
  47   53   * Solaris specific NFS_FLAVOR_NOMAP number will not overlap with any
  48   54   * new IANA defined pseudo flavor numbers. The chance for the overlap
  49   55   * is very small since the growth of new flavor numbers is expected
  50   56   * to be limited.
  51   57   */
  52   58  #define NFS_FLAVOR_NOMAP        999999  /* no nfs flavor mapping */
  53   59  
  54   60  /*
  55   61   * As duplicate flavors can be passed into exportfs in the arguments, we
  56   62   * allocate a cleaned up array with non duplicate flavors on the stack.
  57   63   * So we need to know how much to allocate.
  58   64   */
  59   65  #define MAX_FLAVORS             6       /* none, sys, dh, krb5, krb5i krb5p */
  60   66  
  61   67  /*
  62   68   * Note: exported_lock is currently used to ensure the integrity of
  63   69   * the secinfo fields.
  64   70   */
  65   71  struct secinfo {
  66   72          seconfig_t      s_secinfo;      /* /etc/nfssec.conf entry */
  67   73          unsigned int    s_flags;        /* flags (see below) */
  68   74          int32_t         s_refcnt;       /* reference count for tracking */
  69   75                                          /* how many children (self included) */
  70   76                                          /* use this flavor. */
  71   77          int             s_window;       /* window */
  72   78          uint_t          s_rootid;       /* UID to use for authorized roots */
  73   79          int             s_rootcnt;      /* count of root names */
  74   80          caddr_t         *s_rootnames;   /* array of root names */
  75   81                                          /* they are strings for AUTH_DES and */
  76   82                                          /* rpc_gss_principal_t for RPCSEC_GSS */
  77   83  };
  78   84  
  79   85  #ifdef _SYSCALL32
  80   86  struct secinfo32 {
  81   87          seconfig32_t    s_secinfo;      /* /etc/nfssec.conf entry */
  82   88          uint32_t        s_flags;        /* flags (see below) */
  83   89          int32_t         s_refcnt;       /* reference count for tracking */
  84   90                                          /* how many children (self included) */
  85   91                                          /* use this flavor. */
  86   92          int32_t         s_window;       /* window */
  87   93          uint32_t        s_rootid;       /* UID to use for authorized roots */
  88   94          int32_t         s_rootcnt;      /* count of root names */
  89   95          caddr32_t       s_rootnames;    /* array of root names */
  90   96                                          /* they are strings for AUTH_DES and */
  91   97                                          /* rpc_gss_principal_t for RPCSEC_GSS */
  92   98  };
  93   99  #endif /* _SYSCALL32 */
  94  100  
  95  101  /*
  96  102   * security negotiation related
  97  103   */
  98  104  
  99  105  #define SEC_QUERY       0x01    /* query sec modes */
 100  106  
 101  107  struct sec_ol {
 102  108          int             sec_flags;      /* security nego flags */
 103  109          uint_t          sec_index;      /* index into sec flavor array */
 104  110  };
 105  111  
 106  112  /*
 107  113   * Per-mode flags (secinfo.s_flags)
 108  114   */
 109  115  #define M_RO            0x01    /* exported ro to all */
 110  116  #define M_ROL           0x02    /* exported ro to all listed */
 111  117  #define M_RW            0x04    /* exported rw to all */
 112  118  #define M_RWL           0x08    /* exported ro to all listed */
 113  119  #define M_ROOT          0x10    /* root list is defined */
 114  120  #define M_4SEC_EXPORTED 0x20    /* this is an explicitly shared flavor */
 115  121  #define M_NONE          0x40    /* none list is defined */
 116  122  #define M_MAP           0x80    /* uidmap and/or gidmap is defined */
 117  123  
 118  124  /* invalid secinfo reference count */
 119  125  #define SEC_REF_INVALID(p) ((p)->s_refcnt < 1)
 120  126  
 121  127  /* last secinfo reference */
 122  128  #define SEC_REF_LAST(p) ((p)->s_refcnt == 1)
 123  129  
 124  130  /* sec flavor explicitly shared for the exported node */
 125  131  #define SEC_REF_EXPORTED(p) ((p)->s_flags & M_4SEC_EXPORTED)
 126  132  
 127  133  /* the only reference count left is for referring itself */
 128  134  #define SEC_REF_SELF(p) (SEC_REF_LAST(p) && SEC_REF_EXPORTED(p))
 129  135  
 130  136  /*
 131  137   * The export information passed to exportfs() (Version 2)
 132  138   */
 133  139  #define EX_CURRENT_VERSION 2    /* current version of exportdata struct */
 134  140  
 135  141  struct exportdata {
 136  142          int             ex_version;     /* structure version */
 137  143          char            *ex_path;       /* exported path */
 138  144          size_t          ex_pathlen;     /* path length */
 139  145          int             ex_flags;       /* flags */
 140  146          unsigned int    ex_anon;        /* uid for unauthenticated requests */
 141  147          int             ex_seccnt;      /* count of security modes */
 142  148          struct secinfo  *ex_secinfo;    /* security mode info */
 143  149          char            *ex_index;      /* index file for public filesystem */
 144  150          char            *ex_log_buffer; /* path to logging buffer file */
 145  151          size_t          ex_log_bufferlen;       /* buffer file path len */
 146  152          char            *ex_tag;        /* tag used to identify log config */
 147  153          size_t          ex_taglen;      /* tag length */
 148  154  };
 149  155  
 150  156  #ifdef _SYSCALL32
 151  157  struct exportdata32 {
 152  158          int32_t         ex_version;     /* structure version */
 153  159          caddr32_t       ex_path;        /* exported path */
 154  160          int32_t         ex_pathlen;     /* path length */
 155  161          int32_t         ex_flags;       /* flags */
 156  162          uint32_t        ex_anon;        /* uid for unauthenticated requests */
 157  163          int32_t         ex_seccnt;      /* count of security modes */
 158  164          caddr32_t       ex_secinfo;     /* security mode info */
 159  165          caddr32_t       ex_index;       /* index file for public filesystem */
 160  166          caddr32_t       ex_log_buffer;  /* path to logging buffer file */
 161  167          int32_t         ex_log_bufferlen;       /* buffer file path len */
 162  168          caddr32_t       ex_tag;         /* tag used to identify log config */
 163  169          int32_t         ex_taglen;      /* tag length */
 164  170  };
 165  171  #endif /* _SYSCALL32 */
 166  172  
 167  173  /*
 168  174   * exported vfs flags.
 169  175   */
 170  176  
 171  177  #define EX_NOSUID       0x01    /* exported with unsetable set[ug]ids */
 172  178  #define EX_ACLOK        0x02    /* exported with maximal access if acl exists */
 173  179  #define EX_PUBLIC       0x04    /* exported with public filehandle */
 174  180  #define EX_NOSUB        0x08    /* no nfs_getfh or MCL below export point */
 175  181  #define EX_INDEX        0x10    /* exported with index file specified */
 176  182  #define EX_LOG          0x20    /* logging enabled */
 177  183  #define EX_LOG_ALLOPS   0x40    /* logging of all RPC operations enabled */
 178  184                                  /* by default only operations which affect */
 179  185                                  /* transaction logging are enabled */
 180  186  #define EX_PSEUDO       0x80    /* pseudo filesystem export */
 181  187  #ifdef VOLATILE_FH_TEST
 182  188  #define EX_VOLFH        0x100   /* XXX nfsv4 fh may expire anytime */
 183  189  #define EX_VOLRNM       0x200   /* XXX nfsv4 fh expire at rename */
 184  190  #define EX_VOLMIG       0x400   /* XXX nfsv4 fh expire at migration */
 185  191  #define EX_NOEXPOPEN    0x800   /* XXX nfsv4 fh no expire with open */
 186  192  #endif /* VOLATILE_FH_TEST */
 187  193  
 188  194  #define EX_CHARMAP      0x1000  /* NFS may need a character set conversion */
 189  195  #define EX_NOACLFAB     0x2000  /* If set, NFSv2 and v3 servers won't */
 190  196                                  /* fabricate an aclent_t ACL on file systems */
 191  197                                  /* that don't support aclent_t ACLs */
 192  198  #define EX_NOHIDE       0x4000  /* traversable from exported parent */
 193  199  
 194  200  #ifdef  _KERNEL
 195  201  
 196  202  #define RPC_IDEMPOTENT  0x1     /* idempotent or not */
 197  203  /*
 198  204   * Be very careful about which NFS procedures get the RPC_ALLOWANON bit.
 199  205   * Right now, if this bit is on, we ignore the results of per NFS request
 200  206   * access control.
 201  207   */
 202  208  #define RPC_ALLOWANON   0x2     /* allow anonymous access */
 203  209  #define RPC_MAPRESP     0x4     /* use mapped response buffer */
 204  210  #define RPC_AVOIDWORK   0x8     /* do work avoidance for dups */
 205  211  #define RPC_PUBLICFH_OK 0x10    /* allow use of public filehandle */
 206  212  
 207  213  /*
 208  214   * RPC_ALL is an or of all above bits to be used with "don't care"
 209  215   * nfsv4 ops. The flags of an nfsv4 request is the bit-AND of the
 210  216   * per-op flags.
 211  217   */
 212  218  #define RPC_ALL (RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_AVOIDWORK|RPC_PUBLICFH_OK)
 213  219  
 214  220  
 215  221  #ifdef VOLATILE_FH_TEST
 216  222  struct ex_vol_rename {
 217  223          nfs_fh4_fmt_t vrn_fh_fmt;
 218  224          struct ex_vol_rename *vrn_next;
 219  225  };
 220  226  #endif /* VOLATILE_FH_TEST */
 221  227  
 222  228  /*
 223  229   * An auth cache client entry.  This is the umbrella structure and contains all
 224  230   * related auth_cache entries in the authc_tree AVL tree.
 225  231   */
 226  232  struct auth_cache_clnt {
 227  233          avl_node_t              authc_link;
 228  234          struct netbuf           authc_addr;     /* address of the client */
 229  235          krwlock_t               authc_lock;     /* protects authc_tree */
 230  236          avl_tree_t              authc_tree;     /* auth_cache entries */
 231  237  };
 232  238  
 233  239  /*
 234  240   * An auth cache entry can exist in 6 states.
 235  241   *
 236  242   * A NEW entry was recently allocated and added to the cache.  It does not
 237  243   * contain the valid auth state yet.
 238  244   *
 239  245   * A WAITING entry is one which is actively engaging the user land mountd code
 240  246   * to authenticate or re-authenticate it.  The auth state might not be valid
 241  247   * yet.  The other threads should wait on auth_cv until the retrieving thread
 242  248   * finishes the retrieval and changes the auth cache entry to FRESH, or NEW (in
 243  249   * a case this entry had no valid auth state yet).
 244  250   *
 245  251   * A REFRESHING entry is one which is actively engaging the user land mountd
 246  252   * code to re-authenticate the cache entry.  There is currently no other thread
 247  253   * waiting for the results of the refresh.
 248  254   *
 249  255   * A FRESH entry is one which is valid (it is either newly retrieved or has
 250  256   * been refreshed at least once).
 251  257   *
 252  258   * A STALE entry is one which has been detected to be too old.  The transition
 253  259   * from FRESH to STALE prevents multiple threads from submitting refresh
 254  260   * requests.
 255  261   *
 256  262   * An INVALID entry is one which was either STALE or REFRESHING and was deleted
 257  263   * out of the encapsulating exi.  Since we can't delete it yet, we mark it as
 258  264   * INVALID, which lets the refresh thread know not to work on it and free it
 259  265   * instead.
 260  266   *
 261  267   * Note that the auth state of the entry is valid, even if the entry is STALE.
 262  268   * Just as you can eat stale bread, you can consume a stale cache entry. The
 263  269   * only time the contents change could be during the transition from REFRESHING
 264  270   * or WAITING to FRESH.
 265  271   *
 266  272   * Valid state transitions:
 267  273   *
 268  274   *          alloc
 269  275   *            |
 270  276   *            v
 271  277   *         +-----+
 272  278   *    +--->| NEW |------>free
 273  279   *    |    +-----+
 274  280   *    |       |
 275  281   *    |       v
 276  282   *    |  +---------+
 277  283   *    +<-| WAITING |
 278  284   *    ^  +---------+
 279  285   *    |       |
 280  286   *    |       v
 281  287   *    |       +<--------------------------+<---------------+
 282  288   *    |       |                           ^                |
 283  289   *    |       v                           |                |
 284  290   *    |   +-------+    +-------+    +------------+    +---------+
 285  291   *    +---| FRESH |--->| STALE |--->| REFRESHING |--->| WAITING |
 286  292   *        +-------+    +-------+    +------------+    +---------+
 287  293   *            |            |              |
 288  294   *            |            v              |
 289  295   *            v       +---------+         |
 290  296   *          free<-----| INVALID |<--------+
 291  297   *                    +---------+
 292  298   */
 293  299  typedef enum auth_state {
 294  300          NFS_AUTH_FRESH,
 295  301          NFS_AUTH_STALE,
 296  302          NFS_AUTH_REFRESHING,
 297  303          NFS_AUTH_INVALID,
 298  304          NFS_AUTH_NEW,
 299  305          NFS_AUTH_WAITING
 300  306  } auth_state_t;
 301  307  
 302  308  /*
 303  309   * An authorization cache entry
 304  310   *
 305  311   * Either the state in auth_state will protect the
 306  312   * contents or auth_lock must be held.
 307  313   */
 308  314  struct auth_cache {
 309  315          avl_node_t              auth_link;
 310  316          struct auth_cache_clnt  *auth_clnt;
 311  317          int                     auth_flavor;
 312  318          cred_t                  *auth_clnt_cred;
 313  319          uid_t                   auth_srv_uid;
 314  320          gid_t                   auth_srv_gid;
 315  321          uint_t                  auth_srv_ngids;
 316  322          gid_t                   *auth_srv_gids;
 317  323          int                     auth_access;
 318  324          time_t                  auth_time;
 319  325          time_t                  auth_freshness;
 320  326          auth_state_t            auth_state;
 321  327          kmutex_t                auth_lock;
 322  328          kcondvar_t              auth_cv;
 323  329  };
 324  330  
 325  331  #define AUTH_TABLESIZE  32
 326  332  
 327  333  /*
 328  334   * Structure containing log file meta-data.
 329  335   */
 330  336  struct log_file {
 331  337          unsigned int    lf_flags;       /* flags (see below) */
 332  338          int             lf_writers;     /* outstanding writers */
 333  339          int             lf_refcnt;      /* references to this struct */
 334  340          caddr_t         lf_path;        /* buffer file location */
 335  341          vnode_t         *lf_vp;         /* vnode for the buffer file */
 336  342          kmutex_t        lf_lock;
 337  343          kcondvar_t      lf_cv_waiters;
 338  344  };
 339  345  
 340  346  /*
 341  347   * log_file and log_buffer flags.
 342  348   */
 343  349  #define L_WAITING       0x01            /* flush of in-core data to stable */
 344  350                                          /* storage in progress */
 345  351  #define L_PRINTED       0x02            /* error message printed to console */
 346  352  #define L_ERROR         0x04            /* error condition detected */
 347  353  
 348  354  /*
 349  355   * The logging buffer information.
 350  356   * This structure may be shared by multiple exportinfo structures,
 351  357   * if they share the same buffer file.
 352  358   * This structure contains the basic information about the buffer, such
 353  359   * as it's location in the filesystem.
 354  360   *
 355  361   * 'lb_lock' protects all the fields in this structure except for 'lb_path',
 356  362   * and 'lb_next'.
 357  363   * 'lb_path' is a write-once/read-many field which needs no locking, it is
 358  364   * set before the structure is linked to any exportinfo structure.
 359  365   * 'lb_next' is protected by the log_buffer_list_lock.
 360  366   */
 361  367  struct log_buffer {
 362  368          unsigned int    lb_flags;       /* L_ONLIST set? */
 363  369          int             lb_refcnt;      /* references to this struct */
 364  370          unsigned int    lb_rec_id;      /* used to generate unique id */
 365  371          caddr_t         lb_path;        /* buffer file pathname */
 366  372          struct log_file *lb_logfile;    /* points to log_file structure */
 367  373          kmutex_t        lb_lock;
 368  374          struct log_buffer       *lb_next;
 369  375          kcondvar_t      lb_cv_waiters;
 370  376          caddr_t         lb_records;     /* linked list of records to write */
 371  377          int             lb_num_recs;    /* # of records to write */
 372  378          ssize_t         lb_size_queued; /* number of bytes queued for write */
 373  379  };
 374  380  
 375  381  #define LOG_BUFFER_HOLD(lbp)    { \
 376  382          mutex_enter(&(lbp)->lb_lock); \
 377  383          (lbp)->lb_refcnt++; \
 378  384          mutex_exit(&(lbp)->lb_lock); \
 379  385  }
 380  386  
 381  387  #define LOG_BUFFER_RELE(lbp)    { \
 382  388          log_buffer_rele(lbp); \
 383  389  }
 384  390  
 385  391  /*
 386  392   * Structure for character set conversion mapping based on client address.
 387  393   */
 388  394  struct charset_cache {
 389  395          struct charset_cache *next;
 390  396          kiconv_t        inbound;
 391  397          kiconv_t        outbound;
 392  398          struct sockaddr client_addr;
 393  399  };
 394  400  
 395  401  /* Forward declarations */
 396  402  struct exportinfo;
 397  403  struct exp_visible;
 398  404  struct svc_req;
 399  405  
 400  406  /*
 401  407   * Treenodes are used to build tree representing every node which is part
 402  408   * of nfs server pseudo namespace. They are connected with both exportinfo
 403  409   * and exp_visible struct. They were introduced to avoid lookup of ".."
 404  410   * in the underlying file system during unshare, which was failing if the
 405  411   * file system was forcibly unmounted or if the directory was removed.
 406  412   * One exp_visible_t can be shared via several treenode_t, i.e.
 407  413   * different tree_vis can point to the same exp_visible_t.
 408  414   * This will happen if some directory is on two different shared paths:
 409  415   * E.g. after share /tmp/a/b1 and share /tmp/a/b2 there will be two treenodes
 410  416   * corresponding to /tmp/a and both will have same value in tree_vis.
 411  417   *
 412  418   *
 413  419   *
 414  420   *     NEW DATA STRUCT         ORIGINAL DATA STRUCT
 415  421   *
 416  422   * ns_root +---+               +----------+
 417  423   *         | / |               |PSEUDO EXP|-->+---+   +---+   +---+
 418  424   *         +---+---------  ----+----------+   | a |-->| k |-->| b |
 419  425   *          /\                                +---+   +---+   +---+
 420  426   *         /  \                                .       .       .
 421  427   *     +---+...\.........  .....................       .       .
 422  428   *    *| a |    \              +----------+            .       .
 423  429   *     +---+-----\-------  ----|REAL EXP a|            .       .
 424  430   *       /        \            +----------+            .       .
 425  431   *      /        +===+...  .............................       .
 426  432   *     /        *| k |         +----------+                    .
 427  433   *    /          +===+---  ----|REAL EXP k|                    .
 428  434   *   /                         +----------+                    .
 429  435   *  +===+................  .....................................
 430  436   * *| b |                      +----------+
 431  437   *  +===+----------------  ----|REAL EXP b|-->+---+
 432  438   *     \                       +----------+   | d |
 433  439   *     +===+.............  ...................+---+
 434  440   *     | d |                   +----------+
 435  441   *     +===+-------------  ----|PSEUDO EXP|-->+---+   +---+
 436  442   *     /                       +----------+   | e |-->| g |
 437  443   * +---+.................  ...................+---+   +---+
 438  444   * | e |                                              .
 439  445   * +---+                                              .
 440  446   *    \                                               .
 441  447   *    +---+..............  ............................
 442  448   *   *| g |                    +----------+
 443  449   *    +---+--------------  ----|REAL EXP g|
 444  450   *                             +----------+
 445  451   *
 446  452   *
 447  453   *
 448  454   * +===+               +---+                    +---+
 449  455   * | b |..mountpoint   | e |..directory/file   *| a |..node is shared
 450  456   * +===+  (VROOT)      +---+                    +---+
 451  457   *
 452  458   *
 453  459   * Bi-directional interconnect:
 454  460   * treenode_t::tree_exi ---------  exportinfo_t::exi_tree
 455  461   * One-way direction connection:
 456  462   * treenode_t::tree_vis .........> exp_visible_t
 457  463   */
 458  464  /* Access to treenode_t is under protection of exported_lock RW_LOCK */
 459  465  typedef struct treenode {
  
    | 
      ↓ open down ↓ | 
    409 lines elided | 
    
      ↑ open up ↑ | 
  
 460  466          /* support for generic n-ary trees */
 461  467          struct treenode *tree_parent;
 462  468          struct treenode *tree_child_first;
 463  469          struct treenode *tree_sibling; /* next sibling */
 464  470          /* private, nfs specific part */
 465  471          struct exportinfo  *tree_exi;
 466  472          struct exp_visible *tree_vis;
 467  473  } treenode_t;
 468  474  
 469  475  /*
 470      - * TREE_ROOT checks if the node corresponds to a filesystem root
      476 + * Now that we have links to chase, we can get the zone rootvp just from
      477 + * an export.  No current-zone-context needed.
      478 + */
      479 +#define EXI_TO_ZONEROOTVP(exi) ((exi)->exi_ne->exi_root->exi_vp)
      480 +
      481 +/*
      482 + * TREE_ROOT checks if the node corresponds to a filesystem root or
      483 + * the zone's root directory.
 471  484   * TREE_EXPORTED checks if the node is explicitly shared
 472  485   */
 473  486  
 474  487  #define TREE_ROOT(t) \
 475      -        ((t)->tree_exi && (t)->tree_exi->exi_vp->v_flag & VROOT)
      488 +        ((t)->tree_exi != NULL && \
      489 +        (((t)->tree_exi->exi_vp->v_flag & VROOT) || \
      490 +        VN_CMP(EXI_TO_ZONEROOTVP((t)->tree_exi), (t)->tree_exi->exi_vp)))
 476  491  
 477  492  #define TREE_EXPORTED(t) \
 478  493          ((t)->tree_exi && !PSEUDO((t)->tree_exi))
 479  494  
 480      -/* Root of nfs pseudo namespace */
 481      -extern treenode_t *ns_root;
 482      -
 483  495  #define EXPTABLESIZE   256
 484  496  
 485  497  struct exp_hash {
 486  498          struct exportinfo       *prev;  /* ptr to the previous exportinfo */
 487  499          struct exportinfo       *next;  /* ptr to the next exportinfo */
 488  500          struct exportinfo       **bckt; /* backpointer to the hash bucket */
 489  501  };
 490  502  
 491  503  /*
 492  504   * A node associated with an export entry on the
 493  505   * list of exported filesystems.
 494  506   *
 495  507   * exi_count+exi_lock protects an individual exportinfo from being freed
 496  508   * when in use.
 497  509   *
 498  510   * You must have the writer lock on exported_lock to add/delete an exportinfo
 499  511   * structure to/from the list.
 500  512   *
 501  513   * exi_volatile_dev maps to VSW_VOLATILEDEV.  It means that the
 502  514   * underlying fs devno can change on each mount.  When set, the server
 503  515   * should not use va_fsid for a GETATTR(FATTR4_FSID) reply.  It must
 504  516   * use exi_fsid because it is guaranteed to be persistent.  This isn't
 505  517   * in any way related to NFS4 volatile filehandles.
 506  518   *
 507  519   * The exi_cache_lock protects the exi_cache AVL trees.
 508  520   */
 509  521  struct exportinfo {
  
    | 
      ↓ open down ↓ | 
    17 lines elided | 
    
      ↑ open up ↑ | 
  
 510  522          struct exportdata       exi_export;
 511  523          fsid_t                  exi_fsid;
 512  524          struct fid              exi_fid;
 513  525          struct exp_hash         fid_hash;
 514  526          struct exp_hash         path_hash;
 515  527          struct treenode         *exi_tree;
 516  528          fhandle_t               exi_fh;
 517  529          krwlock_t               exi_cache_lock;
 518  530          kmutex_t                exi_lock;
 519  531          uint_t                  exi_count;
      532 +        zoneid_t                exi_zoneid;
 520  533          vnode_t                 *exi_vp;
 521  534          vnode_t                 *exi_dvp;
 522  535          avl_tree_t              *exi_cache[AUTH_TABLESIZE];
 523  536          struct log_buffer       *exi_logbuffer;
 524  537          struct exp_visible      *exi_visible;
 525  538          struct charset_cache    *exi_charset;
 526  539          unsigned                exi_volatile_dev:1;
 527  540          unsigned                exi_moved:1;
      541 +        int                     exi_id;
      542 +        avl_node_t              exi_id_link;
      543 +        /*
      544 +         * Soft-reference/backpointer to zone's nfs_export_t.
      545 +         * This allows us access to the zone's rootvp (stored in
      546 +         * exi_ne->exi_root->exi_vp) even if the current thread isn't in
      547 +         * same-zone context.
      548 +         */
      549 +        struct nfs_export       *exi_ne;
 528  550  #ifdef VOLATILE_FH_TEST
 529  551          uint32_t                exi_volatile_id;
 530  552          struct ex_vol_rename    *exi_vol_rename;
 531  553          kmutex_t                exi_vol_rename_lock;
 532      -#endif /* VOLATILE_FH_TEST */
      554 +#endif /* VOLATILE_FH_TEST -- keep last! */
 533  555  };
 534  556  
 535  557  typedef struct exportinfo exportinfo_t;
 536  558  typedef struct exportdata exportdata_t;
 537  559  typedef struct secinfo secinfo_t;
 538  560  
 539  561  /*
 540  562   * exp_visible is a visible list per filesystem. It is for filesystems
 541  563   * that may need a limited view of its contents. A pseudo export and
 542  564   * a real export at the mount point (VROOT) which has a subtree shared
 543  565   * has a visible list.
 544  566   *
 545  567   * The exi_visible field is NULL for normal, non-pseudo filesystems
 546  568   * which do not have any subtree exported. If the field is non-null,
 547  569   * it points to a list of visible entries, identified by vis_fid and/or
 548  570   * vis_ino. The presence of a "visible" list means that if this export
 549  571   * can only have a limited view, it can only view the entries in the
 550  572   * exp_visible list. The directories in the fid list comprise paths that
 551  573   * lead to exported directories.
 552  574   *
 553  575   * The vis_count field records the number of paths in this filesystem
 554  576   * that use this directory. The vis_exported field is non-zero if the
 555  577   * entry is an exported directory (leaf node).
 556  578   *
 557  579   * exp_visible itself is not reference counted. Each exp_visible is
 558  580   * referenced twice:
 559  581   * 1) from treenode::tree_vis
 560  582   * 2) linked from exportinfo::exi_visible
 561  583   * The 'owner' of exp_visible is the exportinfo structure. exp_visible should
 562  584   * be always freed only from exportinfo_t, never from treenode::tree_vis.
 563  585   */
 564  586  
 565  587  struct exp_visible {
 566  588          vnode_t                 *vis_vp;
 567  589          fid_t                   vis_fid;
 568  590          u_longlong_t            vis_ino;
 569  591          int                     vis_count;
 570  592          int                     vis_exported;
 571  593          struct exp_visible      *vis_next;
 572  594          struct secinfo          *vis_secinfo;
 573  595          int                     vis_seccnt;
 574  596          timespec_t              vis_change;
 575  597  };
 576  598  typedef struct exp_visible exp_visible_t;
 577  599  
 578  600  #define PSEUDO(exi)     ((exi)->exi_export.ex_flags & EX_PSEUDO)
 579  601  #define EXP_LINKED(exi) ((exi)->fid_hash.bckt != NULL)
 580  602  
 581  603  #define EQFSID(fsidp1, fsidp2)  \
 582  604          (((fsidp1)->val[0] == (fsidp2)->val[0]) && \
 583  605              ((fsidp1)->val[1] == (fsidp2)->val[1]))
 584  606  
 585  607  #define EQFID(fidp1, fidp2)     \
 586  608          ((fidp1)->fid_len == (fidp2)->fid_len && \
 587  609              bcmp((char *)(fidp1)->fid_data, (char *)(fidp2)->fid_data, \
 588  610              (uint_t)(fidp1)->fid_len) == 0)
 589  611  
 590  612  #define exportmatch(exi, fsid, fid)     \
 591  613          (EQFSID(&(exi)->exi_fsid, (fsid)) && EQFID(&(exi)->exi_fid, (fid)))
 592  614  
 593  615  /*
 594  616   * Returns true iff exported filesystem is read-only to the given host.
 595  617   *
 596  618   * Note:  this macro should be as fast as possible since it's called
 597  619   * on each NFS modification request.
 598  620   */
 599  621  #define rdonly(ro, vp)  ((ro) || vn_is_readonly(vp))
 600  622  #define rdonly4(req, cs)  \
  
    | 
      ↓ open down ↓ | 
    58 lines elided | 
    
      ↑ open up ↑ | 
  
 601  623          (vn_is_readonly((cs)->vp) || \
 602  624              (nfsauth4_access((cs)->exi, (cs)->vp, (req), (cs)->basecr, NULL, \
 603  625              NULL, NULL, NULL) & (NFSAUTH_RO | NFSAUTH_LIMITED)))
 604  626  
 605  627  extern int      nfsauth4_access(struct exportinfo *, vnode_t *,
 606  628      struct svc_req *, cred_t *, uid_t *, gid_t *, uint_t *, gid_t **);
 607  629  extern int      nfsauth4_secinfo_access(struct exportinfo *,
 608  630      struct svc_req *, int, int, cred_t *);
 609  631  extern int      nfsauth_cache_clnt_compar(const void *, const void *);
 610  632  extern int      nfs_fhbcmp(char *, char *, int);
 611      -extern int      nfs_exportinit(void);
      633 +extern void     nfs_exportinit(void);
 612  634  extern void     nfs_exportfini(void);
      635 +extern void     nfs_export_zone_init(nfs_globals_t *);
      636 +extern void     nfs_export_zone_fini(nfs_globals_t *);
      637 +extern void     nfs_export_zone_shutdown(nfs_globals_t *);
      638 +extern int      nfs_export_get_rootfh(nfs_globals_t *);
 613  639  extern int      chk_clnt_sec(struct exportinfo *, struct svc_req *);
 614  640  extern int      makefh(fhandle_t *, struct vnode *, struct exportinfo *);
 615  641  extern int      makefh_ol(fhandle_t *, struct exportinfo *, uint_t);
 616  642  extern int      makefh3(nfs_fh3 *, struct vnode *, struct exportinfo *);
 617  643  extern int      makefh3_ol(nfs_fh3 *, struct exportinfo *, uint_t);
 618  644  extern vnode_t *nfs_fhtovp(fhandle_t *, struct exportinfo *);
 619  645  extern vnode_t *nfs3_fhtovp(nfs_fh3 *, struct exportinfo *);
 620  646  extern struct   exportinfo *checkexport(fsid_t *, struct fid *);
 621  647  extern struct   exportinfo *checkexport4(fsid_t *, struct fid *, vnode_t *);
 622  648  extern void     exi_hold(struct exportinfo *);
 623  649  extern void     exi_rele(struct exportinfo *);
 624  650  extern struct exportinfo *nfs_vptoexi(vnode_t *, vnode_t *, cred_t *, int *,
 625  651      int *, bool_t);
 626  652  extern int      nfs_check_vpexi(vnode_t *, vnode_t *, cred_t *,
 627  653                          struct exportinfo **);
 628      -extern void     export_link(struct exportinfo *);
 629      -extern void     export_unlink(struct exportinfo *);
 630      -extern vnode_t *untraverse(vnode_t *);
      654 +extern vnode_t *untraverse(vnode_t *, vnode_t *);
 631  655  extern int      vn_is_nfs_reparse(vnode_t *, cred_t *);
 632  656  extern int      client_is_downrev(struct svc_req *);
 633  657  extern char    *build_symlink(vnode_t *, cred_t *, size_t *);
 634  658  
      659 +extern fhandle_t nullfh2;       /* for comparing V2 filehandles */
      660 +
      661 +typedef struct nfs_export {
      662 +        /* Root of nfs pseudo namespace */
      663 +        treenode_t *ns_root;
      664 +
      665 +        nfs_globals_t           *ne_globals;    /* "up" pointer */
      666 +
      667 +        struct exportinfo *exptable_path_hash[PKP_HASH_SIZE];
      668 +        struct exportinfo *exptable[EXPTABLESIZE];
      669 +
      670 +        /*
      671 +         * Read/Write lock that protects the exportinfo list.  This lock
      672 +         * must be held when searching or modifiying the exportinfo list.
      673 +         */
      674 +        krwlock_t exported_lock;
      675 +
      676 +        /* "public" and default (root) location for public filehandle */
      677 +        struct exportinfo *exi_public;
      678 +        struct exportinfo *exi_root;
      679 +        /* For checking default public file handle */
      680 +        fid_t exi_rootfid;
      681 +        /* For comparing V2 filehandles */
      682 +        fhandle_t nullfh2;
      683 +
      684 +        /* The change attribute value of the root of nfs pseudo namespace */
      685 +        timespec_t ns_root_change;
      686 +} nfs_export_t;
      687 +
 635  688  /*
 636  689   * Functions that handle the NFSv4 server namespace
 637  690   */
 638  691  extern exportinfo_t *vis2exi(treenode_t *);
 639  692  extern int      treeclimb_export(struct exportinfo *);
 640      -extern void     treeclimb_unexport(struct exportinfo *);
      693 +extern void     treeclimb_unexport(nfs_export_t *, struct exportinfo *);
 641  694  extern int      nfs_visible(struct exportinfo *, vnode_t *, int *);
 642  695  extern int      nfs_visible_inode(struct exportinfo *, ino64_t,
 643      -    struct exp_visible **);
      696 +                    struct exp_visible **);
 644  697  extern int      has_visible(struct exportinfo *, vnode_t *);
 645  698  extern void     free_visible(struct exp_visible *);
 646  699  extern int      nfs_exported(struct exportinfo *, vnode_t *);
 647      -extern struct exportinfo *pseudo_exportfs(vnode_t *, fid_t *,
 648      -    struct exp_visible *, struct exportdata *);
      700 +extern struct exportinfo *pseudo_exportfs(nfs_export_t *, vnode_t *, fid_t *,
      701 +                    struct exp_visible *, struct exportdata *);
 649  702  extern int      vop_fid_pseudo(vnode_t *, fid_t *);
 650  703  extern int      nfs4_vget_pseudo(struct exportinfo *, vnode_t **, fid_t *);
 651  704  extern bool_t   nfs_visible_change(struct exportinfo *, vnode_t *,
 652      -    timespec_t *);
 653      -extern void     tree_update_change(treenode_t *, timespec_t *);
      705 +                    timespec_t *);
      706 +extern void     tree_update_change(nfs_export_t *, treenode_t *, timespec_t *);
      707 +extern void     rfs4_clean_state_exi(nfs_export_t *, struct exportinfo *);
      708 +
 654  709  /*
 655  710   * Functions that handle the NFSv4 server namespace security flavors
 656  711   * information.
 657  712   */
 658  713  extern void     srv_secinfo_exp2pseu(struct exportdata *, struct exportdata *);
 659  714  extern void     srv_secinfo_list_free(struct secinfo *, int);
 660  715  
      716 +extern nfs_export_t *nfs_get_export();
      717 +extern void     export_link(nfs_export_t *, struct exportinfo *);
      718 +extern void     export_unlink(nfs_export_t *, struct exportinfo *);
      719 +
 661  720  /*
 662      - * "public" and default (root) location for public filehandle
      721 + * exi_id support
 663  722   */
 664      -extern struct exportinfo *exi_public, *exi_root;
 665      -extern fhandle_t nullfh2;       /* for comparing V2 filehandles */
 666      -extern krwlock_t exported_lock;
 667      -extern struct exportinfo *exptable[];
      723 +extern kmutex_t  nfs_exi_id_lock;
      724 +extern avl_tree_t exi_id_tree;
      725 +extern int exi_id_get_next(void);
 668  726  
 669  727  /*
 670  728   * Two macros for identifying public filehandles.
 671  729   * A v2 public filehandle is 32 zero bytes.
 672  730   * A v3 public filehandle is zero length.
 673  731   */
 674  732  #define PUBLIC_FH2(fh) \
 675  733          ((fh)->fh_fsid.val[1] == 0 && \
 676  734          bcmp((fh), &nullfh2, sizeof (fhandle_t)) == 0)
 677  735  
 678  736  #define PUBLIC_FH3(fh) \
 679  737          ((fh)->fh3_length == 0)
 680  738  
 681  739  extern int      makefh4(nfs_fh4 *, struct vnode *, struct exportinfo *);
 682  740  extern vnode_t *nfs4_fhtovp(nfs_fh4 *, struct exportinfo *, nfsstat4 *);
 683  741  
 684  742  #endif /* _KERNEL */
 685  743  
 686  744  #ifdef  __cplusplus
 687  745  }
 688  746  #endif
 689  747  
 690  748  #endif  /* _NFS_EXPORT_H */
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX