Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
*** 20,31 ****
*/
/*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Bayard G. Bell. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2017 Joyent Inc
*/
/*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
* All rights reserved.
--- 20,31 ----
*/
/*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Bayard G. Bell. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2017 Joyent Inc
+ * Copyright 2019 Nexenta by DDN, Inc.
*/
/*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
* All rights reserved.
*** 81,91 ****
#include <nfs/export.h>
#include <nfs/nfssys.h>
#include <nfs/nfs_clnt.h>
#include <nfs/nfs_acl.h>
#include <nfs/nfs_log.h>
- #include <nfs/nfs_cmd.h>
#include <nfs/lm.h>
#include <nfs/nfs_dispatch.h>
#include <nfs/nfs4_drc.h>
#include <sys/modctl.h>
--- 81,90 ----
*** 107,128 ****
static struct modlinkage modlinkage = {
MODREV_1, (void *)&modlmisc, NULL
};
kmem_cache_t *nfs_xuio_cache;
int nfs_loaned_buffers = 0;
int
_init(void)
{
int status;
! if ((status = nfs_srvinit()) != 0) {
! cmn_err(CE_WARN, "_init: nfs_srvinit failed");
! return (status);
! }
status = mod_install((struct modlinkage *)&modlinkage);
if (status != 0) {
/*
* Could not load module, cleanup previous
--- 106,128 ----
static struct modlinkage modlinkage = {
MODREV_1, (void *)&modlmisc, NULL
};
+ zone_key_t nfssrv_zone_key;
+ list_t nfssrv_globals_list;
+ krwlock_t nfssrv_globals_rwl;
+
kmem_cache_t *nfs_xuio_cache;
int nfs_loaned_buffers = 0;
int
_init(void)
{
int status;
! nfs_srvinit();
status = mod_install((struct modlinkage *)&modlinkage);
if (status != 0) {
/*
* Could not load module, cleanup previous
*** 175,205 ****
* modifying those routines to avoid the duplication. For now, we optimize
* by calling exportmatch() only after checking that the dispatch routine
* supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
* public (i.e., not the placeholder).
*/
! #define PUBLICFH_CHECK(disp, exi, fsid, xfid) \
((disp->dis_flags & RPC_PUBLICFH_OK) && \
((exi->exi_export.ex_flags & EX_PUBLIC) || \
! (exi == exi_public && exportmatch(exi_root, \
fsid, xfid))))
static void nfs_srv_shutdown_all(int);
! static void rfs4_server_start(int);
static void nullfree(void);
static void rfs_dispatch(struct svc_req *, SVCXPRT *);
static void acl_dispatch(struct svc_req *, SVCXPRT *);
- static void common_dispatch(struct svc_req *, SVCXPRT *,
- rpcvers_t, rpcvers_t, char *,
- struct rpc_disptable *);
- static void hanfsv4_failover(void);
static int checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
bool_t, bool_t *);
static char *client_name(struct svc_req *req);
static char *client_addr(struct svc_req *req, char *buf);
extern int sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
extern bool_t sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
#define NFSLOG_COPY_NETBUF(exi, xprt, nb) { \
(nb)->maxlen = (xprt)->xp_rtaddr.maxlen; \
(nb)->len = (xprt)->xp_rtaddr.len; \
(nb)->buf = kmem_alloc((nb)->len, KM_SLEEP); \
--- 175,204 ----
* modifying those routines to avoid the duplication. For now, we optimize
* by calling exportmatch() only after checking that the dispatch routine
* supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
* public (i.e., not the placeholder).
*/
! #define PUBLICFH_CHECK(ne, disp, exi, fsid, xfid) \
((disp->dis_flags & RPC_PUBLICFH_OK) && \
((exi->exi_export.ex_flags & EX_PUBLIC) || \
! (exi == ne->exi_public && exportmatch(ne->exi_root, \
fsid, xfid))))
static void nfs_srv_shutdown_all(int);
! static void rfs4_server_start(nfs_globals_t *, int);
static void nullfree(void);
static void rfs_dispatch(struct svc_req *, SVCXPRT *);
static void acl_dispatch(struct svc_req *, SVCXPRT *);
static int checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
bool_t, bool_t *);
static char *client_name(struct svc_req *req);
static char *client_addr(struct svc_req *req, char *buf);
extern int sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
extern bool_t sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
+ static void *nfs_server_zone_init(zoneid_t);
+ static void nfs_server_zone_fini(zoneid_t, void *);
+ static void nfs_server_zone_shutdown(zoneid_t, void *);
#define NFSLOG_COPY_NETBUF(exi, xprt, nb) { \
(nb)->maxlen = (xprt)->xp_rtaddr.maxlen; \
(nb)->len = (xprt)->xp_rtaddr.len; \
(nb)->buf = kmem_alloc((nb)->len, KM_SLEEP); \
*** 246,303 ****
};
static SVC_CALLOUT_TABLE nfs_sct_rdma = {
sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
};
- rpcvers_t nfs_versmin = NFS_VERSMIN_DEFAULT;
- rpcvers_t nfs_versmax = NFS_VERSMAX_DEFAULT;
/*
- * Used to track the state of the server so that initialization
- * can be done properly.
- */
- typedef enum {
- NFS_SERVER_STOPPED, /* server state destroyed */
- NFS_SERVER_STOPPING, /* server state being destroyed */
- NFS_SERVER_RUNNING,
- NFS_SERVER_QUIESCED, /* server state preserved */
- NFS_SERVER_OFFLINE /* server pool offline */
- } nfs_server_running_t;
-
- static nfs_server_running_t nfs_server_upordown;
- static kmutex_t nfs_server_upordown_lock;
- static kcondvar_t nfs_server_upordown_cv;
-
- /*
* DSS: distributed stable storage
* lists of all DSS paths: current, and before last warmstart
*/
nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
/*
! * RDMA wait variables.
*/
! static kcondvar_t rdma_wait_cv;
! static kmutex_t rdma_wait_mutex;
/*
* Will be called at the point the server pool is being unregistered
* from the pool list. From that point onwards, the pool is waiting
* to be drained and as such the server state is stale and pertains
* to the old instantiation of the NFS server pool.
*/
void
nfs_srv_offline(void)
{
! mutex_enter(&nfs_server_upordown_lock);
! if (nfs_server_upordown == NFS_SERVER_RUNNING) {
! nfs_server_upordown = NFS_SERVER_OFFLINE;
}
! mutex_exit(&nfs_server_upordown_lock);
}
/*
* Will be called at the point the server pool is being destroyed so
* all transports have been closed and no service threads are in
--- 245,302 ----
};
static SVC_CALLOUT_TABLE nfs_sct_rdma = {
sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
};
/*
* DSS: distributed stable storage
* lists of all DSS paths: current, and before last warmstart
*/
nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
/*
! * Stash NFS zone globals in TSD to avoid some lock contention
! * from frequent zone_getspecific calls.
*/
! static uint_t nfs_server_tsd_key;
+ nfs_globals_t *
+ nfs_srv_getzg(void)
+ {
+ nfs_globals_t *ng;
+
+ ng = tsd_get(nfs_server_tsd_key);
+ if (ng == NULL) {
+ ng = zone_getspecific(nfssrv_zone_key, curzone);
+ (void) tsd_set(nfs_server_tsd_key, ng);
+ }
+
+ return (ng);
+ }
+
/*
* Will be called at the point the server pool is being unregistered
* from the pool list. From that point onwards, the pool is waiting
* to be drained and as such the server state is stale and pertains
* to the old instantiation of the NFS server pool.
*/
void
nfs_srv_offline(void)
{
! nfs_globals_t *ng;
!
! ng = nfs_srv_getzg();
!
! mutex_enter(&ng->nfs_server_upordown_lock);
! if (ng->nfs_server_upordown == NFS_SERVER_RUNNING) {
! ng->nfs_server_upordown = NFS_SERVER_OFFLINE;
}
! mutex_exit(&ng->nfs_server_upordown_lock);
}
/*
* Will be called at the point the server pool is being destroyed so
* all transports have been closed and no service threads are in
*** 322,358 ****
int quiesce = 1;
nfs_srv_shutdown_all(quiesce);
}
static void
! nfs_srv_shutdown_all(int quiesce) {
! mutex_enter(&nfs_server_upordown_lock);
if (quiesce) {
! if (nfs_server_upordown == NFS_SERVER_RUNNING ||
! nfs_server_upordown == NFS_SERVER_OFFLINE) {
! nfs_server_upordown = NFS_SERVER_QUIESCED;
! cv_signal(&nfs_server_upordown_cv);
! /* reset DSS state, for subsequent warm restart */
rfs4_dss_numnewpaths = 0;
rfs4_dss_newpaths = NULL;
cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
"NFSv4 state has been preserved");
}
} else {
! if (nfs_server_upordown == NFS_SERVER_OFFLINE) {
! nfs_server_upordown = NFS_SERVER_STOPPING;
! mutex_exit(&nfs_server_upordown_lock);
! rfs4_state_fini();
! rfs4_fini_drc(nfs4_drc);
! mutex_enter(&nfs_server_upordown_lock);
! nfs_server_upordown = NFS_SERVER_STOPPED;
! cv_signal(&nfs_server_upordown_cv);
}
}
! mutex_exit(&nfs_server_upordown_lock);
}
static int
nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
rpcvers_t versmin, rpcvers_t versmax)
--- 321,365 ----
int quiesce = 1;
nfs_srv_shutdown_all(quiesce);
}
static void
! nfs_srv_shutdown_all(int quiesce)
! {
! nfs_globals_t *ng = nfs_srv_getzg();
!
! mutex_enter(&ng->nfs_server_upordown_lock);
if (quiesce) {
! if (ng->nfs_server_upordown == NFS_SERVER_RUNNING ||
! ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
! ng->nfs_server_upordown = NFS_SERVER_QUIESCED;
! cv_signal(&ng->nfs_server_upordown_cv);
! /* reset DSS state */
rfs4_dss_numnewpaths = 0;
rfs4_dss_newpaths = NULL;
cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
"NFSv4 state has been preserved");
}
} else {
! if (ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
! ng->nfs_server_upordown = NFS_SERVER_STOPPING;
! mutex_exit(&ng->nfs_server_upordown_lock);
! rfs4_state_zone_fini();
! rfs4_fini_drc();
! mutex_enter(&ng->nfs_server_upordown_lock);
! ng->nfs_server_upordown = NFS_SERVER_STOPPED;
!
! /* reset DSS state */
! rfs4_dss_numnewpaths = 0;
! rfs4_dss_newpaths = NULL;
!
! cv_signal(&ng->nfs_server_upordown_cv);
}
}
! mutex_exit(&ng->nfs_server_upordown_lock);
}
static int
nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
rpcvers_t versmin, rpcvers_t versmax)
*** 416,425 ****
--- 423,433 ----
* uap->fd is the fd of an open transport provider
*/
int
nfs_svc(struct nfs_svc_args *arg, model_t model)
{
+ nfs_globals_t *ng;
file_t *fp;
SVCMASTERXPRT *xprt;
int error;
int readsize;
char buf[KNC_STRSIZE];
*** 430,446 ****
--- 438,459 ----
#ifdef lint
model = model; /* STRUCT macros don't always refer to it */
#endif
+ ng = nfs_srv_getzg();
STRUCT_SET_HANDLE(uap, model, arg);
/* Check privileges in nfssys() */
if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
return (EBADF);
+ /* Setup global file handle in nfs_export */
+ if ((error = nfs_export_get_rootfh(ng)) != 0)
+ return (error);
+
/*
* Set read buffer size to rsize
* and add room for RPC headers.
*/
readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
*** 463,493 ****
releasef(STRUCT_FGET(uap, fd));
kmem_free(addrmask.buf, addrmask.maxlen);
return (error);
}
! nfs_versmin = STRUCT_FGET(uap, versmin);
! nfs_versmax = STRUCT_FGET(uap, versmax);
/* Double check the vers min/max ranges */
! if ((nfs_versmin > nfs_versmax) ||
! (nfs_versmin < NFS_VERSMIN) ||
! (nfs_versmax > NFS_VERSMAX)) {
! nfs_versmin = NFS_VERSMIN_DEFAULT;
! nfs_versmax = NFS_VERSMAX_DEFAULT;
}
! if (error =
! nfs_srv_set_sc_versions(fp, &sctp, nfs_versmin, nfs_versmax)) {
releasef(STRUCT_FGET(uap, fd));
kmem_free(addrmask.buf, addrmask.maxlen);
return (error);
}
/* Initialize nfsv4 server */
! if (nfs_versmax == (rpcvers_t)NFS_V4)
! rfs4_server_start(STRUCT_FGET(uap, delegation));
/* Create a transport handle. */
error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
sctp, NULL, NFS_SVCPOOL_ID, TRUE);
--- 476,506 ----
releasef(STRUCT_FGET(uap, fd));
kmem_free(addrmask.buf, addrmask.maxlen);
return (error);
}
! ng->nfs_versmin = STRUCT_FGET(uap, versmin);
! ng->nfs_versmax = STRUCT_FGET(uap, versmax);
/* Double check the vers min/max ranges */
! if ((ng->nfs_versmin > ng->nfs_versmax) ||
! (ng->nfs_versmin < NFS_VERSMIN) ||
! (ng->nfs_versmax > NFS_VERSMAX)) {
! ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
! ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
}
! if (error = nfs_srv_set_sc_versions(fp, &sctp, ng->nfs_versmin,
! ng->nfs_versmax)) {
releasef(STRUCT_FGET(uap, fd));
kmem_free(addrmask.buf, addrmask.maxlen);
return (error);
}
/* Initialize nfsv4 server */
! if (ng->nfs_versmax == (rpcvers_t)NFS_V4)
! rfs4_server_start(ng, STRUCT_FGET(uap, delegation));
/* Create a transport handle. */
error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
sctp, NULL, NFS_SVCPOOL_ID, TRUE);
*** 502,573 ****
return (error);
}
static void
! rfs4_server_start(int nfs4_srv_delegation)
{
/*
* Determine if the server has previously been "started" and
* if not, do the per instance initialization
*/
! mutex_enter(&nfs_server_upordown_lock);
! if (nfs_server_upordown != NFS_SERVER_RUNNING) {
/* Do we need to stop and wait on the previous server? */
! while (nfs_server_upordown == NFS_SERVER_STOPPING ||
! nfs_server_upordown == NFS_SERVER_OFFLINE)
! cv_wait(&nfs_server_upordown_cv,
! &nfs_server_upordown_lock);
! if (nfs_server_upordown != NFS_SERVER_RUNNING) {
(void) svc_pool_control(NFS_SVCPOOL_ID,
SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
(void) svc_pool_control(NFS_SVCPOOL_ID,
SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
! /* is this an nfsd warm start? */
! if (nfs_server_upordown == NFS_SERVER_QUIESCED) {
! cmn_err(CE_NOTE, "nfs_server: "
! "server was previously quiesced; "
! "existing NFSv4 state will be re-used");
! /*
! * HA-NFSv4: this is also the signal
! * that a Resource Group failover has
! * occurred.
! */
! if (cluster_bootflags & CLUSTER_BOOTED)
! hanfsv4_failover();
! } else {
! /* cold start */
! rfs4_state_init();
! nfs4_drc = rfs4_init_drc(nfs4_drc_max,
! nfs4_drc_hash);
}
!
! /*
! * Check to see if delegation is to be
! * enabled at the server
! */
! if (nfs4_srv_delegation != FALSE)
! rfs4_set_deleg_policy(SRV_NORMAL_DELEGATE);
!
! nfs_server_upordown = NFS_SERVER_RUNNING;
}
! cv_signal(&nfs_server_upordown_cv);
! }
! mutex_exit(&nfs_server_upordown_lock);
}
/*
* If RDMA device available,
* start RDMA listener.
*/
int
rdma_start(struct rdma_svc_args *rsa)
{
int error;
rdma_xprt_group_t started_rdma_xprts;
rdma_stat stat;
int svc_state = 0;
--- 515,564 ----
return (error);
}
static void
! rfs4_server_start(nfs_globals_t *ng, int nfs4_srv_delegation)
{
/*
* Determine if the server has previously been "started" and
* if not, do the per instance initialization
*/
! mutex_enter(&ng->nfs_server_upordown_lock);
! if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
/* Do we need to stop and wait on the previous server? */
! while (ng->nfs_server_upordown == NFS_SERVER_STOPPING ||
! ng->nfs_server_upordown == NFS_SERVER_OFFLINE)
! cv_wait(&ng->nfs_server_upordown_cv,
! &ng->nfs_server_upordown_lock);
! if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
(void) svc_pool_control(NFS_SVCPOOL_ID,
SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
(void) svc_pool_control(NFS_SVCPOOL_ID,
SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
! rfs4_do_server_start(ng->nfs_server_upordown,
! nfs4_srv_delegation,
! cluster_bootflags & CLUSTER_BOOTED);
! ng->nfs_server_upordown = NFS_SERVER_RUNNING;
}
! cv_signal(&ng->nfs_server_upordown_cv);
}
! mutex_exit(&ng->nfs_server_upordown_lock);
}
/*
* If RDMA device available,
* start RDMA listener.
*/
int
rdma_start(struct rdma_svc_args *rsa)
{
+ nfs_globals_t *ng;
int error;
rdma_xprt_group_t started_rdma_xprts;
rdma_stat stat;
int svc_state = 0;
*** 576,588 ****
(rsa->nfs_versmin < NFS_VERSMIN) ||
(rsa->nfs_versmax > NFS_VERSMAX)) {
rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
}
- nfs_versmin = rsa->nfs_versmin;
- nfs_versmax = rsa->nfs_versmax;
/* Set the versions in the callout table */
__nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
__nfs_sc_rdma[0].sc_versmax = rsa->nfs_versmax;
/* For the NFS_ACL program, check the max version */
__nfs_sc_rdma[1].sc_versmin = rsa->nfs_versmin;
--- 567,581 ----
(rsa->nfs_versmin < NFS_VERSMIN) ||
(rsa->nfs_versmax > NFS_VERSMAX)) {
rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
}
+ ng = nfs_srv_getzg();
+ ng->nfs_versmin = rsa->nfs_versmin;
+ ng->nfs_versmax = rsa->nfs_versmax;
+
/* Set the versions in the callout table */
__nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
__nfs_sc_rdma[0].sc_versmax = rsa->nfs_versmax;
/* For the NFS_ACL program, check the max version */
__nfs_sc_rdma[1].sc_versmin = rsa->nfs_versmin;
*** 591,601 ****
else
__nfs_sc_rdma[1].sc_versmax = rsa->nfs_versmax;
/* Initialize nfsv4 server */
if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
! rfs4_server_start(rsa->delegation);
started_rdma_xprts.rtg_count = 0;
started_rdma_xprts.rtg_listhead = NULL;
started_rdma_xprts.rtg_poolid = rsa->poolid;
--- 584,594 ----
else
__nfs_sc_rdma[1].sc_versmax = rsa->nfs_versmax;
/* Initialize nfsv4 server */
if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
! rfs4_server_start(ng, rsa->delegation);
started_rdma_xprts.rtg_count = 0;
started_rdma_xprts.rtg_listhead = NULL;
started_rdma_xprts.rtg_poolid = rsa->poolid;
*** 608,618 ****
while (!error) {
/*
* wait till either interrupted by a signal on
* nfs service stop/restart or signalled by a
! * rdma plugin attach/detatch.
*/
stat = rdma_kwait();
/*
--- 601,611 ----
while (!error) {
/*
* wait till either interrupted by a signal on
* nfs service stop/restart or signalled by a
! * rdma attach/detatch.
*/
stat = rdma_kwait();
/*
*** 659,672 ****
/* ARGSUSED */
void
rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
struct svc_req *req, cred_t *cr, bool_t ro)
{
! DTRACE_NFSV3_3(op__null__start, struct svc_req *, req,
! cred_t *, cr, vnode_t *, NULL);
! DTRACE_NFSV3_3(op__null__done, struct svc_req *, req,
! cred_t *, cr, vnode_t *, NULL);
}
/* ARGSUSED */
static void
rfs_error(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
--- 652,665 ----
/* ARGSUSED */
void
rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
struct svc_req *req, cred_t *cr, bool_t ro)
{
! DTRACE_NFSV3_4(op__null__start, struct svc_req *, req,
! cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
! DTRACE_NFSV3_4(op__null__done, struct svc_req *, req,
! cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
}
/* ARGSUSED */
static void
rfs_error(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
*** 1340,1356 ****
};
static struct rpc_disptable rfs_disptable[] = {
{sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
rfscallnames_v2,
! &rfsproccnt_v2_ptr, rfsdisptab_v2},
{sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
rfscallnames_v3,
! &rfsproccnt_v3_ptr, rfsdisptab_v3},
{sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
rfscallnames_v4,
! &rfsproccnt_v4_ptr, rfsdisptab_v4},
};
/*
* If nfs_portmon is set, then clients are required to use privileged
* ports (ports < IPPORT_RESERVED) in order to get NFS services.
--- 1333,1349 ----
};
static struct rpc_disptable rfs_disptable[] = {
{sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
rfscallnames_v2,
! rfsdisptab_v2},
{sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
rfscallnames_v3,
! rfsdisptab_v3},
{sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
rfscallnames_v4,
! rfsdisptab_v4},
};
/*
* If nfs_portmon is set, then clients are required to use privileged
* ports (ports < IPPORT_RESERVED) in order to get NFS services.
*** 1365,1375 ****
#ifdef DEBUG
static int cred_hits = 0;
static int cred_misses = 0;
#endif
-
#ifdef DEBUG
/*
* Debug code to allow disabling of rfs_dispatch() use of
* fastxdrargs() and fastxdrres() calls for testing purposes.
*/
--- 1358,1367 ----
*** 1469,1483 ****
return (TRUE);
}
return (FALSE);
}
-
static void
common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
! rpcvers_t max_vers, char *pgmname,
! struct rpc_disptable *disptable)
{
int which;
rpcvers_t vers;
char *args;
union {
--- 1461,1473 ----
return (TRUE);
}
return (FALSE);
}
static void
common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
! rpcvers_t max_vers, char *pgmname, struct rpc_disptable *disptable)
{
int which;
rpcvers_t vers;
char *args;
union {
*** 1506,1518 ****
--- 1496,1517 ----
bool_t logging_enabled = FALSE;
struct exportinfo *nfslog_exi = NULL;
char **procnames;
char cbuf[INET6_ADDRSTRLEN]; /* to hold both IPv4 and IPv6 addr */
bool_t ro = FALSE;
+ nfs_globals_t *ng = nfs_srv_getzg();
+ nfs_export_t *ne = ng->nfs_export;
+ kstat_named_t *svstat, *procstat;
+ ASSERT(req->rq_prog == NFS_PROGRAM || req->rq_prog == NFS_ACL_PROGRAM);
+
vers = req->rq_vers;
+ svstat = ng->svstat[req->rq_vers];
+ procstat = (req->rq_prog == NFS_PROGRAM) ?
+ ng->rfsproccnt[vers] : ng->aclproccnt[vers];
+
if (vers < min_vers || vers > max_vers) {
svcerr_progvers(req->rq_xprt, min_vers, max_vers);
error++;
cmn_err(CE_NOTE, "%s: bad version number %u", pgmname, vers);
goto done;
*** 1524,1534 ****
svcerr_noproc(req->rq_xprt);
error++;
goto done;
}
! (*(disptable[(int)vers].dis_proccntp))[which].value.ui64++;
disp = &disptable[(int)vers].dis_table[which];
procnames = disptable[(int)vers].dis_procnames;
auth_flavor = req->rq_cred.oa_flavor;
--- 1523,1533 ----
svcerr_noproc(req->rq_xprt);
error++;
goto done;
}
! procstat[which].value.ui64++;
disp = &disptable[(int)vers].dis_table[which];
procnames = disptable[(int)vers].dis_procnames;
auth_flavor = req->rq_cred.oa_flavor;
*** 1630,1646 ****
--- 1629,1647 ----
anon_ok = 0;
cr = xprt->xp_cred;
ASSERT(cr != NULL);
#ifdef DEBUG
+ {
if (crgetref(cr) != 1) {
crfree(cr);
cr = crget();
xprt->xp_cred = cr;
cred_misses++;
} else
cred_hits++;
+ }
#else
if (crgetref(cr) != 1) {
crfree(cr);
cr = crget();
xprt->xp_cred = cr;
*** 1648,1658 ****
#endif
exi = checkexport(fsid, xfid);
if (exi != NULL) {
! publicfh_ok = PUBLICFH_CHECK(disp, exi, fsid, xfid);
/*
* Don't allow non-V4 clients access
* to pseudo exports
*/
--- 1649,1659 ----
#endif
exi = checkexport(fsid, xfid);
if (exi != NULL) {
! publicfh_ok = PUBLICFH_CHECK(ne, disp, exi, fsid, xfid);
/*
* Don't allow non-V4 clients access
* to pseudo exports
*/
*** 1761,1771 ****
* the later writing of the log record. This is done for
* the case that a lookup is done across a non-logged public
* file system.
*/
if (nfslog_buffer_list != NULL) {
! nfslog_exi = nfslog_get_exi(exi, req, res, &nfslog_rec_id);
/*
* Is logging enabled?
*/
logging_enabled = (nfslog_exi != NULL);
--- 1762,1772 ----
* the later writing of the log record. This is done for
* the case that a lookup is done across a non-logged public
* file system.
*/
if (nfslog_buffer_list != NULL) {
! nfslog_exi = nfslog_get_exi(ne, exi, req, res, &nfslog_rec_id);
/*
* Is logging enabled?
*/
logging_enabled = (nfslog_exi != NULL);
*** 1844,1856 ****
}
if (exi != NULL)
exi_rele(exi);
! global_svstat_ptr[req->rq_vers][NFS_BADCALLS].value.ui64 += error;
!
! global_svstat_ptr[req->rq_vers][NFS_CALLS].value.ui64++;
}
static void
rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
{
--- 1845,1856 ----
}
if (exi != NULL)
exi_rele(exi);
! svstat[NFS_BADCALLS].value.ui64 += error;
! svstat[NFS_CALLS].value.ui64++;
}
static void
rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
{
*** 1969,1982 ****
};
static struct rpc_disptable acl_disptable[] = {
{sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
aclcallnames_v2,
! &aclproccnt_v2_ptr, acldisptab_v2},
{sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
aclcallnames_v3,
! &aclproccnt_v3_ptr, acldisptab_v3},
};
static void
acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
{
--- 1969,1982 ----
};
static struct rpc_disptable acl_disptable[] = {
{sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
aclcallnames_v2,
! acldisptab_v2},
{sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
aclcallnames_v3,
! acldisptab_v3},
};
static void
acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
{
*** 2566,2600 ****
* once. It performs the following tasks:
* - Call sub-initialization routines (localize access to variables)
* - Initialize all locks
* - initialize the version 3 write verifier
*/
! int
nfs_srvinit(void)
{
- int error;
! error = nfs_exportinit();
! if (error != 0)
! return (error);
! error = rfs4_srvrinit();
! if (error != 0) {
! nfs_exportfini();
! return (error);
! }
rfs_srvrinit();
rfs3_srvrinit();
nfsauth_init();
! /* Init the stuff to control start/stop */
! nfs_server_upordown = NFS_SERVER_STOPPED;
! mutex_init(&nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
! cv_init(&nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
! mutex_init(&rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
! cv_init(&rdma_wait_cv, NULL, CV_DEFAULT, NULL);
!
! return (0);
}
/*
* NFS Server finalization routine. This routine is called to cleanup the
* initialization work previously performed if the NFS server module could
--- 2566,2598 ----
* once. It performs the following tasks:
* - Call sub-initialization routines (localize access to variables)
* - Initialize all locks
* - initialize the version 3 write verifier
*/
! void
nfs_srvinit(void)
{
! /* Truly global stuff in this module (not per zone) */
! rw_init(&nfssrv_globals_rwl, NULL, RW_DEFAULT, NULL);
! list_create(&nfssrv_globals_list, sizeof (nfs_globals_t),
! offsetof(nfs_globals_t, nfs_g_link));
! tsd_create(&nfs_server_tsd_key, NULL);
!
! /* The order here is important */
! nfs_exportinit();
rfs_srvrinit();
rfs3_srvrinit();
+ rfs4_srvrinit();
nfsauth_init();
! /*
! * NFS server zone-specific global variables
! * Note the zone_init is called for the GZ here.
! */
! zone_key_create(&nfssrv_zone_key, nfs_server_zone_init,
! nfs_server_zone_shutdown, nfs_server_zone_fini);
}
/*
* NFS Server finalization routine. This routine is called to cleanup the
* initialization work previously performed if the NFS server module could
*** 2601,2625 ****
* not be loaded correctly.
*/
void
nfs_srvfini(void)
{
nfsauth_fini();
rfs3_srvrfini();
rfs_srvrfini();
nfs_exportfini();
! mutex_destroy(&nfs_server_upordown_lock);
! cv_destroy(&nfs_server_upordown_cv);
! mutex_destroy(&rdma_wait_mutex);
! cv_destroy(&rdma_wait_cv);
}
/*
! * Set up an iovec array of up to cnt pointers.
*/
void
mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
{
while (m != NULL && cnt-- > 0) {
iovp->iov_base = (caddr_t)m->b_rptr;
--- 2599,2728 ----
* not be loaded correctly.
*/
void
nfs_srvfini(void)
{
+
+ /*
+ * NFS server zone-specific global variables
+ * Note the zone_fini is called for the GZ here.
+ */
+ (void) zone_key_delete(nfssrv_zone_key);
+
+ /* The order here is important (reverse of init) */
nfsauth_fini();
+ rfs4_srvrfini();
rfs3_srvrfini();
rfs_srvrfini();
nfs_exportfini();
! /* Truly global stuff in this module (not per zone) */
! tsd_destroy(&nfs_server_tsd_key);
! list_destroy(&nfssrv_globals_list);
! rw_destroy(&nfssrv_globals_rwl);
}
/*
! * Zone init, shutdown, fini functions for the NFS server
! *
! * This design is careful to create the entire hierarhcy of
! * NFS server "globals" (including those created by various
! * per-module *_zone_init functions, etc.) so that all these
! * objects have exactly the same lifetime.
! *
! * These objects are also kept on a list for two reasons:
! * 1: It makes finding these in mdb _much_ easier.
! * 2: It allows operating across all zone globals for
! * functions like nfs_auth.c:exi_cache_reclaim
*/
+ static void *
+ nfs_server_zone_init(zoneid_t zoneid)
+ {
+ nfs_globals_t *ng;
+ ng = kmem_zalloc(sizeof (*ng), KM_SLEEP);
+
+ ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
+ ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
+
+ /* Init the stuff to control start/stop */
+ ng->nfs_server_upordown = NFS_SERVER_STOPPED;
+ mutex_init(&ng->nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&ng->nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&ng->rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&ng->rdma_wait_cv, NULL, CV_DEFAULT, NULL);
+
+ ng->nfs_zoneid = zoneid;
+
+ /*
+ * Order here is important.
+ * export init must precede srv init calls.
+ */
+ nfs_export_zone_init(ng);
+ rfs_stat_zone_init(ng);
+ rfs_srv_zone_init(ng);
+ rfs3_srv_zone_init(ng);
+ rfs4_srv_zone_init(ng);
+ nfsauth_zone_init(ng);
+
+ rw_enter(&nfssrv_globals_rwl, RW_WRITER);
+ list_insert_tail(&nfssrv_globals_list, ng);
+ rw_exit(&nfssrv_globals_rwl);
+
+ return (ng);
+ }
+
+ /* ARGSUSED */
+ static void
+ nfs_server_zone_shutdown(zoneid_t zoneid, void *data)
+ {
+ nfs_globals_t *ng;
+
+ ng = (nfs_globals_t *)data;
+
+ /*
+ * Order is like _fini, but only
+ * some modules need this hook.
+ */
+ nfsauth_zone_shutdown(ng);
+ nfs_export_zone_shutdown(ng);
+ }
+
+ /* ARGSUSED */
+ static void
+ nfs_server_zone_fini(zoneid_t zoneid, void *data)
+ {
+ nfs_globals_t *ng;
+
+ ng = (nfs_globals_t *)data;
+
+ rw_enter(&nfssrv_globals_rwl, RW_WRITER);
+ list_remove(&nfssrv_globals_list, ng);
+ rw_exit(&nfssrv_globals_rwl);
+
+ /*
+ * Order here is important.
+ * reverse order from init
+ */
+ nfsauth_zone_fini(ng);
+ rfs4_srv_zone_fini(ng);
+ rfs3_srv_zone_fini(ng);
+ rfs_srv_zone_fini(ng);
+ rfs_stat_zone_fini(ng);
+ nfs_export_zone_fini(ng);
+
+ mutex_destroy(&ng->nfs_server_upordown_lock);
+ cv_destroy(&ng->nfs_server_upordown_cv);
+ mutex_destroy(&ng->rdma_wait_mutex);
+ cv_destroy(&ng->rdma_wait_cv);
+
+ kmem_free(ng, sizeof (*ng));
+ }
+
+ /*
+ * Set up an iovec array of up to cnt pointers.
+ */
void
mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
{
while (m != NULL && cnt-- > 0) {
iovp->iov_base = (caddr_t)m->b_rptr;
*** 2853,2867 ****
--- 2956,2972 ----
*/
/* Release the reference on the old exi value */
ASSERT(*exi != NULL);
exi_rele(*exi);
+ *exi = NULL;
if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
VN_RELE(*vpp);
goto publicfh_done;
}
+ /* Have a new *exi */
}
}
publicfh_done:
if (mc_dvp)
*** 2884,2901 ****
{
char namebuf[TYPICALMAXPATHLEN];
struct pathname pn;
int error;
/*
* If pathname starts with '/', then set startdvp to root.
*/
if (*path == '/') {
while (*path == '/')
path++;
! startdvp = rootdir;
}
error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
if (error == 0) {
/*
--- 2989,3008 ----
{
char namebuf[TYPICALMAXPATHLEN];
struct pathname pn;
int error;
+ ASSERT3U(crgetzoneid(cr), ==, curzone->zone_id);
+
/*
* If pathname starts with '/', then set startdvp to root.
*/
if (*path == '/') {
while (*path == '/')
path++;
! startdvp = ZONE_ROOTVP();
}
error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
if (error == 0) {
/*
*** 2914,2924 ****
if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
return (ENOENT);
}
VN_HOLD(startdvp);
error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
! rootdir, startdvp, cr);
}
if (error == ENAMETOOLONG) {
/*
* This thread used a pathname > TYPICALMAXPATHLEN bytes long.
*/
--- 3021,3031 ----
if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
return (ENOENT);
}
VN_HOLD(startdvp);
error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
! ZONE_ROOTVP(), startdvp, cr);
}
if (error == ENAMETOOLONG) {
/*
* This thread used a pathname > TYPICALMAXPATHLEN bytes long.
*/
*** 2931,2941 ****
return (ENOENT);
}
}
VN_HOLD(startdvp);
error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
! rootdir, startdvp, cr);
pn_free(&pn);
}
return (error);
}
--- 3038,3048 ----
return (ENOENT);
}
}
VN_HOLD(startdvp);
error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
! ZONE_ROOTVP(), startdvp, cr);
pn_free(&pn);
}
return (error);
}
*** 3035,3206 ****
}
return (error);
}
- /*
- * Do the main work of handling HA-NFSv4 Resource Group failover on
- * Sun Cluster.
- * We need to detect whether any RG admin paths have been added or removed,
- * and adjust resources accordingly.
- * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
- * order to scale, the list and array of paths need to be held in more
- * suitable data structures.
- */
- static void
- hanfsv4_failover(void)
- {
- int i, start_grace, numadded_paths = 0;
- char **added_paths = NULL;
- rfs4_dss_path_t *dss_path;
-
- /*
- * Note: currently, rfs4_dss_pathlist cannot be NULL, since
- * it will always include an entry for NFS4_DSS_VAR_DIR. If we
- * make the latter dynamically specified too, the following will
- * need to be adjusted.
- */
-
- /*
- * First, look for removed paths: RGs that have been failed-over
- * away from this node.
- * Walk the "currently-serving" rfs4_dss_pathlist and, for each
- * path, check if it is on the "passed-in" rfs4_dss_newpaths array
- * from nfsd. If not, that RG path has been removed.
- *
- * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
- * any duplicates.
- */
- dss_path = rfs4_dss_pathlist;
- do {
- int found = 0;
- char *path = dss_path->path;
-
- /* used only for non-HA so may not be removed */
- if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
- dss_path = dss_path->next;
- continue;
- }
-
- for (i = 0; i < rfs4_dss_numnewpaths; i++) {
- int cmpret;
- char *newpath = rfs4_dss_newpaths[i];
-
- /*
- * Since nfsd has sorted rfs4_dss_newpaths for us,
- * once the return from strcmp is negative we know
- * we've passed the point where "path" should be,
- * and can stop searching: "path" has been removed.
- */
- cmpret = strcmp(path, newpath);
- if (cmpret < 0)
- break;
- if (cmpret == 0) {
- found = 1;
- break;
- }
- }
-
- if (found == 0) {
- unsigned index = dss_path->index;
- rfs4_servinst_t *sip = dss_path->sip;
- rfs4_dss_path_t *path_next = dss_path->next;
-
- /*
- * This path has been removed.
- * We must clear out the servinst reference to
- * it, since it's now owned by another
- * node: we should not attempt to touch it.
- */
- ASSERT(dss_path == sip->dss_paths[index]);
- sip->dss_paths[index] = NULL;
-
- /* remove from "currently-serving" list, and destroy */
- remque(dss_path);
- /* allow for NUL */
- kmem_free(dss_path->path, strlen(dss_path->path) + 1);
- kmem_free(dss_path, sizeof (rfs4_dss_path_t));
-
- dss_path = path_next;
- } else {
- /* path was found; not removed */
- dss_path = dss_path->next;
- }
- } while (dss_path != rfs4_dss_pathlist);
-
- /*
- * Now, look for added paths: RGs that have been failed-over
- * to this node.
- * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
- * for each path, check if it is on the "currently-serving"
- * rfs4_dss_pathlist. If not, that RG path has been added.
- *
- * Note: we don't do duplicate detection here; nfsd does that for us.
- *
- * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
- * an upper bound for the size needed for added_paths[numadded_paths].
- */
-
- /* probably more space than we need, but guaranteed to be enough */
- if (rfs4_dss_numnewpaths > 0) {
- size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
- added_paths = kmem_zalloc(sz, KM_SLEEP);
- }
-
- /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
- for (i = 0; i < rfs4_dss_numnewpaths; i++) {
- int found = 0;
- char *newpath = rfs4_dss_newpaths[i];
-
- dss_path = rfs4_dss_pathlist;
- do {
- char *path = dss_path->path;
-
- /* used only for non-HA */
- if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
- dss_path = dss_path->next;
- continue;
- }
-
- if (strncmp(path, newpath, strlen(path)) == 0) {
- found = 1;
- break;
- }
-
- dss_path = dss_path->next;
- } while (dss_path != rfs4_dss_pathlist);
-
- if (found == 0) {
- added_paths[numadded_paths] = newpath;
- numadded_paths++;
- }
- }
-
- /* did we find any added paths? */
- if (numadded_paths > 0) {
- /* create a new server instance, and start its grace period */
- start_grace = 1;
- rfs4_servinst_create(start_grace, numadded_paths, added_paths);
-
- /* read in the stable storage state from these paths */
- rfs4_dss_readstate(numadded_paths, added_paths);
-
- /*
- * Multiple failovers during a grace period will cause
- * clients of the same resource group to be partitioned
- * into different server instances, with different
- * grace periods. Since clients of the same resource
- * group must be subject to the same grace period,
- * we need to reset all currently active grace periods.
- */
- rfs4_grace_reset_all();
- }
-
- if (rfs4_dss_numnewpaths > 0)
- kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
- }
-
/*
* Used by NFSv3 and NFSv4 server to query label of
* a pathname component during lookup/access ops.
*/
ts_label_t *
--- 3142,3151 ----