Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
@@ -18,14 +18,14 @@
*
* CDDL HEADER END
*/
/*
- * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015 by Delphix. All rights reserved.
* Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/vfs.h>
@@ -51,14 +51,16 @@
#include <nfs/nfs_clnt.h>
#include <nfs/auth.h>
static struct kmem_cache *exi_cache_handle;
static void exi_cache_reclaim(void *);
+static void exi_cache_reclaim_zone(nfs_globals_t *);
static void exi_cache_trim(struct exportinfo *exi);
extern pri_t minclsyspri;
+/* NFS auth cache statistics */
volatile uint_t nfsauth_cache_hit;
volatile uint_t nfsauth_cache_miss;
volatile uint_t nfsauth_cache_refresh;
volatile uint_t nfsauth_cache_reclaim;
volatile uint_t exi_cache_auth_reclaim_failed;
@@ -118,141 +120,174 @@
char *ran_netid;
list_node_t ran_node;
} refreshq_auth_node_t;
/*
- * Used to manipulate things on the refreshq_queue.
- * Note that the refresh thread will effectively
- * pop a node off of the queue, at which point it
+ * Used to manipulate things on the refreshq_queue. Note that the refresh
+ * thread will effectively pop a node off of the queue, at which point it
* will no longer need to hold the mutex.
*/
static kmutex_t refreshq_lock;
static list_t refreshq_queue;
static kcondvar_t refreshq_cv;
/*
- * If there is ever a problem with loading the
- * module, then nfsauth_fini() needs to be called
- * to remove state. In that event, since the
- * refreshq thread has been started, they need to
- * work together to get rid of state.
+ * If there is ever a problem with loading the module, then nfsauth_fini()
+ * needs to be called to remove state. In that event, since the refreshq
+ * thread has been started, they need to work together to get rid of state.
*/
typedef enum nfsauth_refreshq_thread_state {
REFRESHQ_THREAD_RUNNING,
REFRESHQ_THREAD_FINI_REQ,
- REFRESHQ_THREAD_HALTED
+ REFRESHQ_THREAD_HALTED,
+ REFRESHQ_THREAD_NEED_CREATE
} nfsauth_refreshq_thread_state_t;
-nfsauth_refreshq_thread_state_t
-refreshq_thread_state = REFRESHQ_THREAD_HALTED;
+typedef struct nfsauth_globals {
+ kmutex_t mountd_lock;
+ door_handle_t mountd_dh;
+ /*
+ * Used to manipulate things on the refreshq_queue. Note that the
+ * refresh thread will effectively pop a node off of the queue,
+ * at which point it will no longer need to hold the mutex.
+ */
+ kmutex_t refreshq_lock;
+ list_t refreshq_queue;
+ kcondvar_t refreshq_cv;
+
+ /*
+ * A list_t would be overkill. These are auth_cache entries which are
+ * no longer linked to an exi. It should be the case that all of their
+ * states are NFS_AUTH_INVALID, i.e., the only way to be put on this
+ * list is iff their state indicated that they had been placed on the
+ * refreshq_queue.
+ *
+ * Note that while there is no link from the exi or back to the exi,
+ * the exi can not go away until these entries are harvested.
+ */
+ struct auth_cache *refreshq_dead_entries;
+ nfsauth_refreshq_thread_state_t refreshq_thread_state;
+
+} nfsauth_globals_t;
+
static void nfsauth_free_node(struct auth_cache *);
-static void nfsauth_refresh_thread(void);
+static void nfsauth_refresh_thread(nfsauth_globals_t *);
static int nfsauth_cache_compar(const void *, const void *);
-/*
- * mountd is a server-side only daemon. This will need to be
- * revisited if the NFS server is ever made zones-aware.
- */
-kmutex_t mountd_lock;
-door_handle_t mountd_dh;
+static nfsauth_globals_t *
+nfsauth_get_zg(void)
+{
+ nfs_globals_t *ng = nfs_srv_getzg();
+ nfsauth_globals_t *nag = ng->nfs_auth;
+ ASSERT(nag != NULL);
+ return (nag);
+}
void
mountd_args(uint_t did)
{
- mutex_enter(&mountd_lock);
- if (mountd_dh != NULL)
- door_ki_rele(mountd_dh);
- mountd_dh = door_ki_lookup(did);
- mutex_exit(&mountd_lock);
+ nfsauth_globals_t *nag;
+
+ nag = nfsauth_get_zg();
+ mutex_enter(&nag->mountd_lock);
+ if (nag->mountd_dh != NULL)
+ door_ki_rele(nag->mountd_dh);
+ nag->mountd_dh = door_ki_lookup(did);
+ mutex_exit(&nag->mountd_lock);
}
void
nfsauth_init(void)
{
- /*
- * mountd can be restarted by smf(5). We need to make sure
- * the updated door handle will safely make it to mountd_dh
- */
- mutex_init(&mountd_lock, NULL, MUTEX_DEFAULT, NULL);
+ exi_cache_handle = kmem_cache_create("exi_cache_handle",
+ sizeof (struct auth_cache), 0, NULL, NULL,
+ exi_cache_reclaim, NULL, NULL, 0);
+}
- mutex_init(&refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&refreshq_queue, sizeof (refreshq_exi_node_t),
- offsetof(refreshq_exi_node_t, ren_node));
+void
+nfsauth_fini(void)
+{
+ kmem_cache_destroy(exi_cache_handle);
+}
- cv_init(&refreshq_cv, NULL, CV_DEFAULT, NULL);
+void
+nfsauth_zone_init(nfs_globals_t *ng)
+{
+ nfsauth_globals_t *nag;
+ nag = kmem_zalloc(sizeof (*nag), KM_SLEEP);
+
/*
- * Allocate nfsauth cache handle
+ * mountd can be restarted by smf(5). We need to make sure
+ * the updated door handle will safely make it to mountd_dh.
*/
- exi_cache_handle = kmem_cache_create("exi_cache_handle",
- sizeof (struct auth_cache), 0, NULL, NULL,
- exi_cache_reclaim, NULL, NULL, 0);
+ mutex_init(&nag->mountd_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&nag->refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&nag->refreshq_queue, sizeof (refreshq_exi_node_t),
+ offsetof(refreshq_exi_node_t, ren_node));
+ cv_init(&nag->refreshq_cv, NULL, CV_DEFAULT, NULL);
+ nag->refreshq_thread_state = REFRESHQ_THREAD_NEED_CREATE;
- refreshq_thread_state = REFRESHQ_THREAD_RUNNING;
- (void) zthread_create(NULL, 0, nfsauth_refresh_thread,
- NULL, 0, minclsyspri);
+ ng->nfs_auth = nag;
}
-/*
- * Finalization routine for nfsauth. It is important to call this routine
- * before destroying the exported_lock.
- */
void
-nfsauth_fini(void)
+nfsauth_zone_shutdown(nfs_globals_t *ng)
{
refreshq_exi_node_t *ren;
+ nfsauth_globals_t *nag = ng->nfs_auth;
- /*
- * Prevent the nfsauth_refresh_thread from getting new
- * work.
- */
- mutex_enter(&refreshq_lock);
- if (refreshq_thread_state != REFRESHQ_THREAD_HALTED) {
- refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
- cv_broadcast(&refreshq_cv);
+ /* Prevent the nfsauth_refresh_thread from getting new work */
+ mutex_enter(&nag->refreshq_lock);
+ if (nag->refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
+ nag->refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
+ cv_broadcast(&nag->refreshq_cv);
- /*
- * Also, wait for nfsauth_refresh_thread() to exit.
- */
- while (refreshq_thread_state != REFRESHQ_THREAD_HALTED) {
- cv_wait(&refreshq_cv, &refreshq_lock);
+ /* Wait for nfsauth_refresh_thread() to exit */
+ while (nag->refreshq_thread_state != REFRESHQ_THREAD_HALTED)
+ cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
}
- }
- mutex_exit(&refreshq_lock);
+ mutex_exit(&nag->refreshq_lock);
/*
* Walk the exi_list and in turn, walk the auth_lists and free all
* lists. In addition, free INVALID auth_cache entries.
*/
- while ((ren = list_remove_head(&refreshq_queue))) {
+ while ((ren = list_remove_head(&nag->refreshq_queue))) {
refreshq_auth_node_t *ran;
while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) {
struct auth_cache *p = ran->ran_auth;
if (p->auth_state == NFS_AUTH_INVALID)
nfsauth_free_node(p);
strfree(ran->ran_netid);
- kmem_free(ran, sizeof (refreshq_auth_node_t));
+ kmem_free(ran, sizeof (*ran));
}
list_destroy(&ren->ren_authlist);
exi_rele(ren->ren_exi);
- kmem_free(ren, sizeof (refreshq_exi_node_t));
+ kmem_free(ren, sizeof (*ren));
}
- list_destroy(&refreshq_queue);
+}
- cv_destroy(&refreshq_cv);
- mutex_destroy(&refreshq_lock);
+void
+nfsauth_zone_fini(nfs_globals_t *ng)
+{
+ nfsauth_globals_t *nag = ng->nfs_auth;
- mutex_destroy(&mountd_lock);
+ ng->nfs_auth = NULL;
- /*
- * Deallocate nfsauth cache handle
- */
- kmem_cache_destroy(exi_cache_handle);
+ list_destroy(&nag->refreshq_queue);
+ cv_destroy(&nag->refreshq_cv);
+ mutex_destroy(&nag->refreshq_lock);
+ mutex_destroy(&nag->mountd_lock);
+ /* Extra cleanup. */
+ if (nag->mountd_dh != NULL)
+ door_ki_rele(nag->mountd_dh);
+ kmem_free(nag, sizeof (*nag));
}
/*
* Convert the address in a netbuf to
* a hash index for the auth_cache table.
@@ -340,13 +375,14 @@
/*
* Callup to the mountd to get access information in the kernel.
*/
static bool_t
-nfsauth_retrieve(struct exportinfo *exi, char *req_netid, int flavor,
- struct netbuf *addr, int *access, cred_t *clnt_cred, uid_t *srv_uid,
- gid_t *srv_gid, uint_t *srv_gids_cnt, gid_t **srv_gids)
+nfsauth_retrieve(nfsauth_globals_t *nag, struct exportinfo *exi,
+ char *req_netid, int flavor, struct netbuf *addr, int *access,
+ cred_t *clnt_cred, uid_t *srv_uid, gid_t *srv_gid, uint_t *srv_gids_cnt,
+ gid_t **srv_gids)
{
varg_t varg = {0};
nfsauth_res_t res = {0};
XDR xdrs;
size_t absz;
@@ -415,15 +451,15 @@
da.desc_num = 0;
da.rbuf = NULL;
da.rsize = 1;
retry:
- mutex_enter(&mountd_lock);
- dh = mountd_dh;
+ mutex_enter(&nag->mountd_lock);
+ dh = nag->mountd_dh;
if (dh != NULL)
door_ki_hold(dh);
- mutex_exit(&mountd_lock);
+ mutex_exit(&nag->mountd_lock);
if (dh == NULL) {
/*
* The rendezvous point has not been established yet!
* This could mean that either mountd(1m) has not yet
@@ -489,16 +525,16 @@
* the (existing) door on us; we
* want to wait to give smf(5) a
* chance to restart mountd(1m)
* and establish a new door handle.
*/
- mutex_enter(&mountd_lock);
- if (dh == mountd_dh) {
- door_ki_rele(mountd_dh);
- mountd_dh = NULL;
+ mutex_enter(&nag->mountd_lock);
+ if (dh == nag->mountd_dh) {
+ door_ki_rele(nag->mountd_dh);
+ nag->mountd_dh = NULL;
}
- mutex_exit(&mountd_lock);
+ mutex_exit(&nag->mountd_lock);
delay(hz);
goto retry;
}
/*
* If the door was _not_ revoked on us,
@@ -591,11 +627,11 @@
return (TRUE);
}
static void
-nfsauth_refresh_thread(void)
+nfsauth_refresh_thread(nfsauth_globals_t *nag)
{
refreshq_exi_node_t *ren;
refreshq_auth_node_t *ran;
struct exportinfo *exi;
@@ -603,29 +639,29 @@
int access;
bool_t retrieval;
callb_cpr_t cprinfo;
- CALLB_CPR_INIT(&cprinfo, &refreshq_lock, callb_generic_cpr,
+ CALLB_CPR_INIT(&cprinfo, &nag->refreshq_lock, callb_generic_cpr,
"nfsauth_refresh");
for (;;) {
- mutex_enter(&refreshq_lock);
- if (refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
+ mutex_enter(&nag->refreshq_lock);
+ if (nag->refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
/* Keep the hold on the lock! */
break;
}
- ren = list_remove_head(&refreshq_queue);
+ ren = list_remove_head(&nag->refreshq_queue);
if (ren == NULL) {
CALLB_CPR_SAFE_BEGIN(&cprinfo);
- cv_wait(&refreshq_cv, &refreshq_lock);
- CALLB_CPR_SAFE_END(&cprinfo, &refreshq_lock);
- mutex_exit(&refreshq_lock);
+ cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
+ CALLB_CPR_SAFE_END(&cprinfo, &nag->refreshq_lock);
+ mutex_exit(&nag->refreshq_lock);
continue;
}
- mutex_exit(&refreshq_lock);
+ mutex_exit(&nag->refreshq_lock);
exi = ren->ren_exi;
ASSERT(exi != NULL);
/*
@@ -668,11 +704,12 @@
* will miss such advisory, nothing catastrophic
* happens: we will just spin longer here before the
* shutdown.
*/
if (p->auth_state == NFS_AUTH_INVALID ||
- refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
+ nag->refreshq_thread_state !=
+ REFRESHQ_THREAD_RUNNING) {
mutex_exit(&p->auth_lock);
if (p->auth_state == NFS_AUTH_INVALID)
nfsauth_free_node(p);
@@ -703,11 +740,11 @@
* the same netid. It doesn't matter. So
* when we refresh, we simply use the netid
* of the request which triggered the
* refresh attempt.
*/
- retrieval = nfsauth_retrieve(exi, netid,
+ retrieval = nfsauth_retrieve(nag, exi, netid,
p->auth_flavor, &p->auth_clnt->authc_addr, &access,
p->auth_clnt_cred, &uid, &gid, &ngids, &gids);
/*
* This can only be set in one other place
@@ -750,13 +787,14 @@
list_destroy(&ren->ren_authlist);
exi_rele(ren->ren_exi);
kmem_free(ren, sizeof (refreshq_exi_node_t));
}
- refreshq_thread_state = REFRESHQ_THREAD_HALTED;
- cv_broadcast(&refreshq_cv);
+ nag->refreshq_thread_state = REFRESHQ_THREAD_HALTED;
+ cv_broadcast(&nag->refreshq_cv);
CALLB_CPR_EXIT(&cprinfo);
+ DTRACE_PROBE(nfsauth__nfsauth__refresh__thread__exit);
zthread_exit();
}
int
nfsauth_cache_clnt_compar(const void *v1, const void *v2)
@@ -824,10 +862,11 @@
*/
static int
nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
{
+ nfsauth_globals_t *nag;
struct netbuf *taddrmask;
struct netbuf addr; /* temporary copy of client's address */
const struct netbuf *claddr;
avl_tree_t *tree;
struct auth_cache ac; /* used as a template for avl_find() */
@@ -843,10 +882,13 @@
avl_index_t where; /* used for avl_find()/avl_insert() */
ASSERT(cr != NULL);
+ ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid);
+ nag = nfsauth_get_zg();
+
/*
* Now check whether this client already
* has an entry for this flavor in the cache
* for this export.
* Get the caller's address, mask off the
@@ -855,12 +897,16 @@
* it to find the chain of cache entries.
*/
claddr = svc_getrpccaller(req->rq_xprt);
addr = *claddr;
+ if (claddr->len != 0) {
addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP);
bcopy(claddr->buf, addr.buf, claddr->len);
+ } else {
+ addr.buf = NULL;
+ }
SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask);
ASSERT(taddrmask != NULL);
addrmask(&addr, taddrmask);
@@ -1002,12 +1048,13 @@
kmem_free(addr.buf, addr.maxlen);
addr = p->auth_clnt->authc_addr;
atomic_inc_uint(&nfsauth_cache_miss);
- res = nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor,
- &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids);
+ res = nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt),
+ flavor, &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids,
+ &tmpgids);
p->auth_access = access;
p->auth_time = p->auth_freshness = gethrestime_sec();
if (res == TRUE) {
@@ -1088,25 +1135,37 @@
ran = kmem_alloc(sizeof (refreshq_auth_node_t),
KM_SLEEP);
ran->ran_auth = p;
ran->ran_netid = strdup(svc_getnetid(req->rq_xprt));
- mutex_enter(&refreshq_lock);
+ mutex_enter(&nag->refreshq_lock);
+
+ if (nag->refreshq_thread_state ==
+ REFRESHQ_THREAD_NEED_CREATE) {
+ /* Launch nfsauth refresh thread */
+ nag->refreshq_thread_state =
+ REFRESHQ_THREAD_RUNNING;
+ (void) zthread_create(NULL, 0,
+ nfsauth_refresh_thread, nag, 0,
+ minclsyspri);
+ }
+
/*
- * We should not add a work queue
- * item if the thread is not
- * accepting them.
+ * We should not add a work queue item if the thread
+ * is not accepting them.
*/
- if (refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
+ if (nag->refreshq_thread_state ==
+ REFRESHQ_THREAD_RUNNING) {
refreshq_exi_node_t *ren;
/*
* Is there an existing exi_list?
*/
- for (ren = list_head(&refreshq_queue);
+ for (ren = list_head(&nag->refreshq_queue);
ren != NULL;
- ren = list_next(&refreshq_queue, ren)) {
+ ren = list_next(&nag->refreshq_queue,
+ ren)) {
if (ren->ren_exi == exi) {
list_insert_tail(
&ren->ren_authlist, ran);
break;
}
@@ -1125,20 +1184,21 @@
offsetof(refreshq_auth_node_t,
ran_node));
list_insert_tail(&ren->ren_authlist,
ran);
- list_insert_tail(&refreshq_queue, ren);
+ list_insert_tail(&nag->refreshq_queue,
+ ren);
}
- cv_broadcast(&refreshq_cv);
+ cv_broadcast(&nag->refreshq_cv);
} else {
strfree(ran->ran_netid);
kmem_free(ran, sizeof (refreshq_auth_node_t));
}
- mutex_exit(&refreshq_lock);
+ mutex_exit(&nag->refreshq_lock);
} else {
mutex_exit(&p->auth_lock);
}
nach = atomic_inc_uint_nv(&nfsauth_cache_hit);
@@ -1160,12 +1220,12 @@
ASSERT(p == NULL);
atomic_inc_uint(&nfsauth_cache_miss);
- if (nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor, &addr,
- &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
+ if (nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), flavor,
+ &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
if (uid != NULL)
*uid = tmpuid;
if (gid != NULL)
*gid = tmpgid;
if (ngids != NULL && gids != NULL) {
@@ -1408,36 +1468,59 @@
nfsauth_free_clnt_node(node);
}
}
/*
- * Called by the kernel memory allocator when
- * memory is low. Free unused cache entries.
- * If that's not enough, the VM system will
- * call again for some more.
+ * Called by the kernel memory allocator when memory is low.
+ * Free unused cache entries. If that's not enough, the VM system
+ * will call again for some more.
+ *
+ * This needs to operate on all zones, so we take a reader lock
+ * on the list of zones and walk the list. This is OK here
+ * becuase exi_cache_trim doesn't block or cause new objects
+ * to be allocated (basically just frees lots of stuff).
+ * Use care if nfssrv_globals_rwl is taken as reader in any
+ * other cases because it will block nfs_server_zone_init
+ * and nfs_server_zone_fini, which enter as writer.
*/
/*ARGSUSED*/
void
exi_cache_reclaim(void *cdrarg)
{
+ nfs_globals_t *ng;
+
+ rw_enter(&nfssrv_globals_rwl, RW_READER);
+
+ ng = list_head(&nfssrv_globals_list);
+ while (ng != NULL) {
+ exi_cache_reclaim_zone(ng);
+ ng = list_next(&nfssrv_globals_list, ng);
+ }
+
+ rw_exit(&nfssrv_globals_rwl);
+}
+
+static void
+exi_cache_reclaim_zone(nfs_globals_t *ng)
+{
int i;
struct exportinfo *exi;
+ nfs_export_t *ne = ng->nfs_export;
- rw_enter(&exported_lock, RW_READER);
+ rw_enter(&ne->exported_lock, RW_READER);
for (i = 0; i < EXPTABLESIZE; i++) {
- for (exi = exptable[i]; exi; exi = exi->fid_hash.next) {
+ for (exi = ne->exptable[i]; exi; exi = exi->fid_hash.next)
exi_cache_trim(exi);
}
- }
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
atomic_inc_uint(&nfsauth_cache_reclaim);
}
-void
+static void
exi_cache_trim(struct exportinfo *exi)
{
struct auth_cache_clnt *c;
struct auth_cache_clnt *nextc;
struct auth_cache *p;