Print this page
NEX-16917 Need to reduce the impact of NFS per-share kstats on failover
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-6778 NFS kstats leak and cause system to hang
Revert "NEX-4261 Per-client NFS server IOPS, bandwidth, and latency kstats"
This reverts commit 586c3ab1927647487f01c337ddc011c642575a52.
Revert "NEX-5354 Aggregated IOPS, bandwidth, and latency kstats for NFS server"
This reverts commit c91d7614da8618ef48018102b077f60ecbbac8c2.
Revert "NEX-5667 nfssrv_stats_flags does not work for aggregated kstats"
This reverts commit 3dcf42618be7dd5f408c327f429c81e07ca08e74.
Revert "NEX-5750 Time values for aggregated NFS server kstats should be normalized"
This reverts commit 1f4d4f901153b0191027969fa4a8064f9d3b9ee1.
Revert "NEX-5942 Panic in rfs4_minorvers_mismatch() with NFSv4.1 client"
This reverts commit 40766417094a162f5e4cc8786c0fa0a7e5871cd9.
Revert "NEX-5752 NFS server: namespace collision in kstats"
This reverts commit ae81e668db86050da8e483264acb0cce0444a132.
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-5354 Aggregated IOPS, bandwidth, and latency kstats for NFS server
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-4261 Per-client NFS server IOPS, bandwidth, and latency kstats
Reviewed by: Kevin Crowe <kevin.crowe@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-3097 IOPS, bandwidth, and latency kstats for NFS server
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-3524 CLONE - Port NEX-3505 "wrong authentication" messages with root=@0.0.0.0/0 set, result in loss of client access
Reviewed by: Marcel Telka <marcel.telka@nexenta.com>
NEX-3533 CLONE - Port NEX-3019 NFSv3 writes underneath mounted filesystem to directory
Reviewed by: Dan Fields <dan.fields@nexenta.com>
NEX-1974 Support for more than 16 groups with AUTH_SYS
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-1128 NFS server: Generic uid and gid remapping for AUTH_SYS
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
OS-20 share_nfs(1m) charset handling is unreliable
OS-22 Page fault at nfscmd_dropped_entrysize+0x1e()
OS-23 NFSv2/3/4: READDIR responses are inconsistent when charset conversion fails
OS-24 rfs3_readdir(): Issues related to nfscmd_convdirent()
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
re #13613 rb4516 Tunables needs volatile keyword
closes #12112 rb3823 - nfs-nohide: lookup("..") for submount should be correct
re #3541 rb11254 - nfs nohide - "nfssrv: need ability to go to submounts for v3 and v2 protocols"
@@ -18,23 +18,24 @@
*
* CDDL HEADER END
*/
/*
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ */
+
#ifndef _NFS_NFS_H
#define _NFS_NFS_H
-/* nfs.h 2.38 88/08/19 SMI */
-
#include <sys/isa_defs.h>
#include <sys/vfs.h>
#include <sys/stream.h>
#include <rpc/types.h>
#include <sys/types32.h>
@@ -70,14 +71,40 @@
* Used to determine registration and service handling of versions
*/
#define NFS_VERSMIN_DEFAULT ((rpcvers_t)2)
#define NFS_VERSMAX_DEFAULT ((rpcvers_t)4)
-extern rpcvers_t nfs_versmin;
-extern rpcvers_t nfs_versmax;
+/*
+ * Used to track the state of the server so that initialization
+ * can be done properly.
+ */
+typedef enum {
+ NFS_SERVER_STOPPED, /* server state destroyed */
+ NFS_SERVER_STOPPING, /* server state being destroyed */
+ NFS_SERVER_RUNNING,
+ NFS_SERVER_QUIESCED, /* server state preserved */
+ NFS_SERVER_OFFLINE /* server pool offline */
+} nfs_server_running_t;
/*
+ * Zone globals variables of NFS server
+ */
+typedef struct nfs_globals {
+ rpcvers_t nfs_versmin;
+ rpcvers_t nfs_versmax;
+
+ /* NFS server locks and state */
+ nfs_server_running_t nfs_server_upordown;
+ kmutex_t nfs_server_upordown_lock;
+ kcondvar_t nfs_server_upordown_cv;
+
+ /* RDMA wait variables */
+ kcondvar_t rdma_wait_cv;
+ kmutex_t rdma_wait_mutex;
+} nfs_globals_t;
+
+/*
* Default delegation setting for the server ==> "on"
*/
#define NFS_SERVER_DELEGATION_DEFAULT (TRUE)
/* Maximum size of data portion of a remote request */
@@ -241,11 +268,11 @@
* nfsv4 allows for negative values in the protocol, and has a 64-bit
* time field, so nfs_allow_preepoch_time can be ignored.
*/
#ifdef _KERNEL
-extern bool_t nfs_allow_preepoch_time;
+extern volatile bool_t nfs_allow_preepoch_time;
#ifdef _LP64
/*
* If no negative otw values are allowed, may use the full 32-bits of the
@@ -613,34 +640,47 @@
uint32_t rda_offset; /* offset in directory (opaque) */
uint32_t rda_count; /* number of directory bytes to read */
};
/*
+ * Entry structure
+ */
+struct nfsentry {
+ uint32_t fileid;
+ char *name;
+ uint32_t cookie;
+ struct nfsentry *nextentry;
+};
+
+/*
* NFS_OK part of readdir result
*/
struct nfsrdok {
+ struct nfsentry *rdok_entries; /* variable number of entries */
+ bool_t rdok_eof; /* true if last entry is in result */
+
uint32_t rdok_offset; /* next offset (opaque) */
uint32_t rdok_size; /* size in bytes of entries */
- bool_t rdok_eof; /* true if last entry is in result */
- struct dirent64 *rdok_entries; /* variable number of entries */
+ struct dirent64 *rdok_dirents; /* variable number of entries */
};
/*
* Readdir result
*/
struct nfsrddirres {
nfsstat rd_status;
- uint_t rd_bufsize; /* client request size (not xdr'ed) */
union {
struct nfsrdok rd_rdok_u;
} rd_u;
};
#define rd_rdok rd_u.rd_rdok_u
+#define rd_entries rd_u.rd_rdok_u.rdok_entries
+#define rd_eof rd_u.rd_rdok_u.rdok_eof
+
#define rd_offset rd_u.rd_rdok_u.rdok_offset
#define rd_size rd_u.rd_rdok_u.rdok_size
-#define rd_eof rd_u.rd_rdok_u.rdok_eof
-#define rd_entries rd_u.rd_rdok_u.rdok_entries
+#define rd_dirents rd_u.rd_rdok_u.rdok_dirents
/*
* Arguments for directory operations
*/
@@ -902,11 +942,11 @@
extern void nfs_async_stop(struct vfs *);
extern int nfs_async_stop_sig(struct vfs *);
extern int nfs_clntinit(void);
extern void nfs_clntfini(void);
extern int nfstsize(void);
-extern int nfs_srvinit(void);
+extern void nfs_srvinit(void);
extern void nfs_srvfini(void);
extern int vattr_to_sattr(struct vattr *, struct nfssattr *);
extern void setdiropargs(struct nfsdiropargs *, char *, vnode_t *);
extern int setdirgid(vnode_t *, gid_t *, cred_t *);
extern int setdirmode(vnode_t *, mode_t *, cred_t *);
@@ -955,13 +995,16 @@
extern time_t rfs4_grace_period;
extern nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
extern kstat_named_t *global_svstat_ptr[];
+extern zone_key_t rfs4_zone_key;
+extern zone_key_t nfssrv_zone_key;
extern krwlock_t rroklock;
extern vtype_t nf_to_vt[];
extern kstat_named_t *rfsproccnt_v2_ptr;
+extern kstat_t **rfsprocio_v2_ptr;
extern kmutex_t nfs_minor_lock;
extern int nfs_major;
extern int nfs_minor;
extern vfsops_t *nfs_vfsops;
extern struct vnodeops *nfs_vnodeops;
@@ -969,17 +1012,21 @@
extern int nfsfstyp;
extern void (*nfs_srv_quiesce_func)(void);
extern int (*nfs_srv_dss_func)(char *, size_t);
/*
- * Per-zone stats as consumed by nfsstat(1m)
+ * Per-zone stats
*/
struct nfs_version_stats {
kstat_named_t *aclreqcnt_ptr; /* nfs_acl:0:aclreqcnt_v? */
kstat_named_t *aclproccnt_ptr; /* nfs_acl:0:aclproccnt_v? */
+ kstat_t **aclprocio_ptr; /* nfs_acl:0:aclprocio_v?_* */
+ kmutex_t aclprocio_lock; /* protects aclprocio */
kstat_named_t *rfsreqcnt_ptr; /* nfs:0:rfsreqcnt_v? */
kstat_named_t *rfsproccnt_ptr; /* nfs:0:rfsproccnt_v? */
+ kstat_t **rfsprocio_ptr; /* nfs:0:rfsprocio_v?_* */
+ kmutex_t rfsprocio_lock; /* protects rfsprocio */
};
/*
* A bit of asymmetry: nfs:0:nfs_client isn't part of this structure.
*/
@@ -999,10 +1046,35 @@
* Zone callback functions.
*/
extern void *nfsstat_zone_init(zoneid_t);
extern void nfsstat_zone_fini(zoneid_t, void *);
+/*
+ * Per-exportinfo stats
+ */
+struct exp_kstats {
+ kstat_t *share_kstat; /* Generic share kstat */
+ struct {
+ kstat_named_t path; /* Shared path */
+ kstat_named_t filesystem; /* pseudo|real */
+ } share_kstat_data; /* Generic share kstat data */
+ char *share_path; /* Shared path string */
+ kstat_t **rfsshr_v3_ptr; /* NFS v3 per share stats */
+ kstat_t **rfsshr_v4_ptr; /* NFS v4 per share stats */
+ kmutex_t procio_lock; /* protects all exp_kstats */
+};
+
+extern struct exp_kstats *exp_kstats_init(zoneid_t, int, const char *, size_t,
+ bool_t);
+extern void exp_kstats_delete(struct exp_kstats *);
+extern void exp_kstats_fini(struct exp_kstats *);
+extern void exp_kstats_reset(struct exp_kstats *, const char *, size_t, bool_t);
+
+extern kstat_t *exp_kstats_v2(struct exp_kstats *, uint_t);
+extern kstat_t *exp_kstats_v3(struct exp_kstats *, uint_t);
+extern kstat_t *exp_kstats_v4(struct exp_kstats *, uint_t);
+
#endif /* _KERNEL */
/*
* Version 3 declarations and definitions.
*/
@@ -1819,14 +1891,10 @@
struct READDIR3resok {
post_op_attr dir_attributes;
cookieverf3 cookieverf;
dirlist3 reply;
- uint_t size;
- uint_t count;
- uint_t freecount;
- cookie3 cookie;
};
typedef struct READDIR3resok READDIR3resok;
struct READDIR3resfail {
post_op_attr dir_attributes;
@@ -1879,25 +1947,14 @@
entryplus3 *entries;
bool_t eof;
};
typedef struct dirlistplus3 dirlistplus3;
-struct entryplus3_info {
- post_op_attr attr;
- post_op_fh3 fh;
- uint_t namelen;
-};
-typedef struct entryplus3_info entryplus3_info;
-
struct READDIRPLUS3resok {
post_op_attr dir_attributes;
cookieverf3 cookieverf;
dirlistplus3 reply;
- uint_t size;
- uint_t count;
- uint_t maxcount;
- entryplus3_info *infop;
};
typedef struct READDIRPLUS3resok READDIRPLUS3resok;
struct READDIRPLUS3resfail {
post_op_attr dir_attributes;
@@ -2276,13 +2333,16 @@
extern void mblk_to_iov(mblk_t *, int, struct iovec *);
extern int rfs_publicfh_mclookup(char *, vnode_t *, cred_t *, vnode_t **,
struct exportinfo **, struct sec_ol *);
extern int rfs_pathname(char *, vnode_t **, vnode_t **, vnode_t *,
cred_t *, int);
+extern int rfs_cross_mnt(vnode_t **, struct exportinfo **);
+extern int rfs_climb_crossmnt(vnode_t **, struct exportinfo **, cred_t *);
extern vtype_t nf3_to_vt[];
extern kstat_named_t *rfsproccnt_v3_ptr;
+extern kstat_t **rfsprocio_v3_ptr;
extern vfsops_t *nfs3_vfsops;
extern struct vnodeops *nfs3_vnodeops;
extern const struct fs_operation_def nfs3_vnodeops_template[];
/*
@@ -2289,11 +2349,11 @@
* Some servers do not properly update the attributes of the
* directory when changes are made. To allow interoperability
* with these broken servers, the nfs_disable_rddir_cache
* parameter can be used to disable readdir response caching.
*/
-extern int nfs_disable_rddir_cache;
+extern volatile int nfs_disable_rddir_cache;
/*
* External functions called by the v2/v3 code into the v4 code
*/
extern void nfs4_clnt_init(void);
@@ -2308,15 +2368,15 @@
* call back to the delegated client to get attributes for AT_MTIME and
* AT_SIZE. Invoke VOP_GETATTR to get all other attributes or all attributes
* if no delegation is present.
*/
extern int rfs4_delegated_getattr(vnode_t *, vattr_t *, int, cred_t *);
-extern void rfs4_hold_deleg_policy(void);
-extern void rfs4_rele_deleg_policy(void);
extern int do_xattr_exists_check(vnode_t *, ulong_t *, cred_t *);
+extern int protect_zfs_mntpt(vnode_t *);
+
extern ts_label_t *nfs_getflabel(vnode_t *, struct exportinfo *);
extern boolean_t do_rfs_label_check(bslabel_t *, vnode_t *, int,
struct exportinfo *);
/*