Print this page
NEX-16917 Need to reduce the impact of NFS per-share kstats on failover
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-6778 NFS kstats leak and cause system to hang
Revert "NEX-4261 Per-client NFS server IOPS, bandwidth, and latency kstats"
This reverts commit 586c3ab1927647487f01c337ddc011c642575a52.
Revert "NEX-5354 Aggregated IOPS, bandwidth, and latency kstats for NFS server"
This reverts commit c91d7614da8618ef48018102b077f60ecbbac8c2.
Revert "NEX-5667 nfssrv_stats_flags does not work for aggregated kstats"
This reverts commit 3dcf42618be7dd5f408c327f429c81e07ca08e74.
Revert "NEX-5750 Time values for aggregated NFS server kstats should be normalized"
This reverts commit 1f4d4f901153b0191027969fa4a8064f9d3b9ee1.
Revert "NEX-5942 Panic in rfs4_minorvers_mismatch() with NFSv4.1 client"
This reverts commit 40766417094a162f5e4cc8786c0fa0a7e5871cd9.
Revert "NEX-5752 NFS server: namespace collision in kstats"
This reverts commit ae81e668db86050da8e483264acb0cce0444a132.
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-5354 Aggregated IOPS, bandwidth, and latency kstats for NFS server
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-4261 Per-client NFS server IOPS, bandwidth, and latency kstats
Reviewed by: Kevin Crowe <kevin.crowe@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-3097 IOPS, bandwidth, and latency kstats for NFS server
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-3524 CLONE - Port NEX-3505 "wrong authentication" messages with root=@0.0.0.0/0 set, result in loss of client access
Reviewed by: Marcel Telka <marcel.telka@nexenta.com>
NEX-3533 CLONE - Port NEX-3019 NFSv3 writes underneath mounted filesystem to directory
Reviewed by: Dan Fields <dan.fields@nexenta.com>
NEX-1974 Support for more than 16 groups with AUTH_SYS
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-1128 NFS server: Generic uid and gid remapping for AUTH_SYS
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
OS-20 share_nfs(1m) charset handling is unreliable
OS-22 Page fault at nfscmd_dropped_entrysize+0x1e()
OS-23 NFSv2/3/4: READDIR responses are inconsistent when charset conversion fails
OS-24 rfs3_readdir(): Issues related to nfscmd_convdirent()
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
re #13613 rb4516 Tunables needs volatile keyword
closes #12112 rb3823 - nfs-nohide: lookup("..") for submount should be correct
re #3541 rb11254 - nfs nohide - "nfssrv: need ability to go to submounts for v3 and v2 protocols"

@@ -18,23 +18,24 @@
  *
  * CDDL HEADER END
  */
 
 /*
- * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
 /*        All Rights Reserved   */
 
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ */
+
 #ifndef _NFS_NFS_H
 #define _NFS_NFS_H
 
-/*      nfs.h 2.38 88/08/19 SMI         */
-
 #include <sys/isa_defs.h>
 #include <sys/vfs.h>
 #include <sys/stream.h>
 #include <rpc/types.h>
 #include <sys/types32.h>

@@ -70,14 +71,40 @@
  * Used to determine registration and service handling of versions
  */
 #define NFS_VERSMIN_DEFAULT     ((rpcvers_t)2)
 #define NFS_VERSMAX_DEFAULT     ((rpcvers_t)4)
 
-extern rpcvers_t nfs_versmin;
-extern rpcvers_t nfs_versmax;
+/*
+ * Used to track the state of the server so that initialization
+ * can be done properly.
+ */
+typedef enum {
+        NFS_SERVER_STOPPED,     /* server state destroyed */
+        NFS_SERVER_STOPPING,    /* server state being destroyed */
+        NFS_SERVER_RUNNING,
+        NFS_SERVER_QUIESCED,    /* server state preserved */
+        NFS_SERVER_OFFLINE      /* server pool offline */
+} nfs_server_running_t;
 
 /*
+ * Zone globals variables of NFS server
+ */
+typedef struct nfs_globals {
+        rpcvers_t               nfs_versmin;
+        rpcvers_t               nfs_versmax;
+
+        /* NFS server locks and state */
+        nfs_server_running_t    nfs_server_upordown;
+        kmutex_t                nfs_server_upordown_lock;
+        kcondvar_t              nfs_server_upordown_cv;
+
+        /* RDMA wait variables */
+        kcondvar_t              rdma_wait_cv;
+        kmutex_t                rdma_wait_mutex;
+} nfs_globals_t;
+
+/*
  * Default delegation setting for the server ==> "on"
  */
 #define NFS_SERVER_DELEGATION_DEFAULT   (TRUE)
 
 /* Maximum size of data portion of a remote request */

@@ -241,11 +268,11 @@
  * nfsv4 allows for negative values in the protocol, and has a 64-bit
  * time field, so nfs_allow_preepoch_time can be ignored.
  */
 #ifdef _KERNEL
 
-extern bool_t           nfs_allow_preepoch_time;
+extern volatile bool_t  nfs_allow_preepoch_time;
 
 #ifdef _LP64
 
 /*
  * If no negative otw values are allowed, may use the full 32-bits of the

@@ -613,34 +640,47 @@
         uint32_t rda_offset;    /* offset in directory (opaque) */
         uint32_t rda_count;     /* number of directory bytes to read */
 };
 
 /*
+ * Entry structure
+ */
+struct nfsentry {
+        uint32_t fileid;
+        char *name;
+        uint32_t cookie;
+        struct nfsentry *nextentry;
+};
+
+/*
  * NFS_OK part of readdir result
  */
 struct nfsrdok {
+        struct nfsentry *rdok_entries;  /* variable number of entries */
+        bool_t rdok_eof;                /* true if last entry is in result */
+
         uint32_t rdok_offset;           /* next offset (opaque) */
         uint32_t rdok_size;             /* size in bytes of entries */
-        bool_t  rdok_eof;               /* true if last entry is in result */
-        struct dirent64 *rdok_entries;  /* variable number of entries */
+        struct dirent64 *rdok_dirents;  /* variable number of entries */
 };
 
 /*
  * Readdir result
  */
 struct nfsrddirres {
         nfsstat rd_status;
-        uint_t          rd_bufsize;     /* client request size (not xdr'ed) */
         union {
                 struct nfsrdok rd_rdok_u;
         } rd_u;
 };
 #define rd_rdok         rd_u.rd_rdok_u
+#define rd_entries      rd_u.rd_rdok_u.rdok_entries
+#define rd_eof          rd_u.rd_rdok_u.rdok_eof
+
 #define rd_offset       rd_u.rd_rdok_u.rdok_offset
 #define rd_size         rd_u.rd_rdok_u.rdok_size
-#define rd_eof          rd_u.rd_rdok_u.rdok_eof
-#define rd_entries      rd_u.rd_rdok_u.rdok_entries
+#define rd_dirents      rd_u.rd_rdok_u.rdok_dirents
 
 
 /*
  * Arguments for directory operations
  */

@@ -902,11 +942,11 @@
 extern void     nfs_async_stop(struct vfs *);
 extern int      nfs_async_stop_sig(struct vfs *);
 extern int      nfs_clntinit(void);
 extern void     nfs_clntfini(void);
 extern int      nfstsize(void);
-extern int      nfs_srvinit(void);
+extern void     nfs_srvinit(void);
 extern void     nfs_srvfini(void);
 extern int      vattr_to_sattr(struct vattr *, struct nfssattr *);
 extern void     setdiropargs(struct nfsdiropargs *, char *, vnode_t *);
 extern int      setdirgid(vnode_t *, gid_t *, cred_t *);
 extern int      setdirmode(vnode_t *, mode_t *, cred_t *);

@@ -955,13 +995,16 @@
 extern time_t   rfs4_grace_period;
 extern nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
 
 extern kstat_named_t    *global_svstat_ptr[];
 
+extern zone_key_t       rfs4_zone_key;
+extern zone_key_t       nfssrv_zone_key;
 extern krwlock_t        rroklock;
 extern vtype_t          nf_to_vt[];
 extern kstat_named_t    *rfsproccnt_v2_ptr;
+extern kstat_t          **rfsprocio_v2_ptr;
 extern kmutex_t         nfs_minor_lock;
 extern int              nfs_major;
 extern int              nfs_minor;
 extern vfsops_t         *nfs_vfsops;
 extern struct vnodeops  *nfs_vnodeops;

@@ -969,17 +1012,21 @@
 extern int              nfsfstyp;
 extern void             (*nfs_srv_quiesce_func)(void);
 extern int              (*nfs_srv_dss_func)(char *, size_t);
 
 /*
- * Per-zone stats as consumed by nfsstat(1m)
+ * Per-zone stats
  */
 struct nfs_version_stats {
         kstat_named_t   *aclreqcnt_ptr;         /* nfs_acl:0:aclreqcnt_v? */
         kstat_named_t   *aclproccnt_ptr;        /* nfs_acl:0:aclproccnt_v? */
+        kstat_t         **aclprocio_ptr;        /* nfs_acl:0:aclprocio_v?_* */
+        kmutex_t        aclprocio_lock;         /* protects aclprocio */
         kstat_named_t   *rfsreqcnt_ptr;         /* nfs:0:rfsreqcnt_v? */
         kstat_named_t   *rfsproccnt_ptr;        /* nfs:0:rfsproccnt_v? */
+        kstat_t         **rfsprocio_ptr;        /* nfs:0:rfsprocio_v?_* */
+        kmutex_t        rfsprocio_lock;         /* protects rfsprocio */
 };
 
 /*
  * A bit of asymmetry: nfs:0:nfs_client isn't part of this structure.
  */

@@ -999,10 +1046,35 @@
  * Zone callback functions.
  */
 extern void     *nfsstat_zone_init(zoneid_t);
 extern void     nfsstat_zone_fini(zoneid_t, void *);
 
+/*
+ * Per-exportinfo stats
+ */
+struct exp_kstats {
+        kstat_t         *share_kstat;           /* Generic share kstat */
+        struct {
+                kstat_named_t   path;           /* Shared path */
+                kstat_named_t   filesystem;     /* pseudo|real */
+        } share_kstat_data;                     /* Generic share kstat data */
+        char            *share_path;            /* Shared path string */
+        kstat_t         **rfsshr_v3_ptr;        /* NFS v3 per share stats */
+        kstat_t         **rfsshr_v4_ptr;        /* NFS v4 per share stats */
+        kmutex_t        procio_lock;            /* protects all exp_kstats */
+};
+
+extern struct exp_kstats *exp_kstats_init(zoneid_t, int, const char *, size_t,
+    bool_t);
+extern void exp_kstats_delete(struct exp_kstats *);
+extern void exp_kstats_fini(struct exp_kstats *);
+extern void exp_kstats_reset(struct exp_kstats *, const char *, size_t, bool_t);
+
+extern kstat_t *exp_kstats_v2(struct exp_kstats *, uint_t);
+extern kstat_t *exp_kstats_v3(struct exp_kstats *, uint_t);
+extern kstat_t *exp_kstats_v4(struct exp_kstats *, uint_t);
+
 #endif  /* _KERNEL */
 
 /*
  * Version 3 declarations and definitions.
  */

@@ -1819,14 +1891,10 @@
 
 struct READDIR3resok {
         post_op_attr dir_attributes;
         cookieverf3 cookieverf;
         dirlist3 reply;
-        uint_t size;
-        uint_t count;
-        uint_t freecount;
-        cookie3 cookie;
 };
 typedef struct READDIR3resok READDIR3resok;
 
 struct READDIR3resfail {
         post_op_attr dir_attributes;

@@ -1879,25 +1947,14 @@
         entryplus3 *entries;
         bool_t eof;
 };
 typedef struct dirlistplus3 dirlistplus3;
 
-struct entryplus3_info {
-        post_op_attr attr;
-        post_op_fh3 fh;
-        uint_t namelen;
-};
-typedef struct entryplus3_info entryplus3_info;
-
 struct READDIRPLUS3resok {
         post_op_attr dir_attributes;
         cookieverf3 cookieverf;
         dirlistplus3 reply;
-        uint_t size;
-        uint_t count;
-        uint_t maxcount;
-        entryplus3_info *infop;
 };
 typedef struct READDIRPLUS3resok READDIRPLUS3resok;
 
 struct READDIRPLUS3resfail {
         post_op_attr dir_attributes;

@@ -2276,13 +2333,16 @@
 extern void     mblk_to_iov(mblk_t *, int, struct iovec *);
 extern int      rfs_publicfh_mclookup(char *, vnode_t *, cred_t *, vnode_t **,
     struct exportinfo **, struct sec_ol *);
 extern int      rfs_pathname(char *, vnode_t **, vnode_t **, vnode_t *,
     cred_t *, int);
+extern int      rfs_cross_mnt(vnode_t **, struct exportinfo **);
+extern int      rfs_climb_crossmnt(vnode_t **, struct exportinfo **, cred_t *);
 
 extern vtype_t          nf3_to_vt[];
 extern kstat_named_t    *rfsproccnt_v3_ptr;
+extern kstat_t          **rfsprocio_v3_ptr;
 extern vfsops_t         *nfs3_vfsops;
 extern struct vnodeops  *nfs3_vnodeops;
 extern const struct fs_operation_def nfs3_vnodeops_template[];
 
 /*

@@ -2289,11 +2349,11 @@
  * Some servers do not properly update the attributes of the
  * directory when changes are made.  To allow interoperability
  * with these broken servers, the nfs_disable_rddir_cache
  * parameter can be used to disable readdir response caching.
  */
-extern int              nfs_disable_rddir_cache;
+extern volatile int     nfs_disable_rddir_cache;
 
 /*
  * External functions called by the v2/v3 code into the v4 code
  */
 extern void     nfs4_clnt_init(void);

@@ -2308,15 +2368,15 @@
  * call back to the delegated client to get attributes for AT_MTIME and
  * AT_SIZE. Invoke VOP_GETATTR to get all other attributes or all attributes
  * if no delegation is present.
  */
 extern int      rfs4_delegated_getattr(vnode_t *, vattr_t *, int, cred_t *);
-extern void     rfs4_hold_deleg_policy(void);
-extern void     rfs4_rele_deleg_policy(void);
 
 extern int      do_xattr_exists_check(vnode_t *, ulong_t *, cred_t *);
 
+extern int      protect_zfs_mntpt(vnode_t *);
+
 extern ts_label_t *nfs_getflabel(vnode_t *, struct exportinfo *);
 extern boolean_t do_rfs_label_check(bslabel_t *, vnode_t *, int,
     struct exportinfo *);
 
 /*