Print this page
NEX-17125 NFS: nbmand lock entered but not exited on error path
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-9275 Got "bad mutex" panic when run IO to nfs share from clients
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-3524 CLONE - Port NEX-3505 "wrong authentication" messages with root=@0.0.0.0/0 set, result in loss of client access
Reviewed by: Marcel Telka <marcel.telka@nexenta.com>
NEX-3533 CLONE - Port NEX-3019 NFSv3 writes underneath mounted filesystem to directory
Reviewed by: Dan Fields <dan.fields@nexenta.com>
NEX-3095 Issues related to NFS nohide
Reviewed by: Dan Fields <dan.fields@nexenta.com>
NEX-1128 NFS server: Generic uid and gid remapping for AUTH_SYS
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
OS-20 share_nfs(1m) charset handling is unreliable
OS-22 Page fault at nfscmd_dropped_entrysize+0x1e()
OS-23 NFSv2/3/4: READDIR responses are inconsistent when charset conversion fails
OS-24 rfs3_readdir(): Issues related to nfscmd_convdirent()
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
re #13613 rb4516 Tunables needs volatile keyword
closes #12112 rb3823 - nfs-nohide: lookup("..") for submount should be correct
re #3541 rb11254 - nfs nohide - "nfssrv: need ability to go to submounts for v3 and v2 protocols"
*** 16,36 ****
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2016 by Delphix. All rights reserved.
*/
/*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
* All rights reserved.
*/
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/cred.h>
#include <sys/buf.h>
--- 16,40 ----
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
* All rights reserved.
*/
+ /*
+ * Copyright 2018 Nexenta Systems, Inc.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/cred.h>
#include <sys/buf.h>
*** 67,86 ****
--- 71,108 ----
#include <vm/seg_map.h>
#include <vm/seg_kmem.h>
#include <sys/strsubr.h>
+ struct rfs_async_write_list;
+
/*
+ * Zone globals of NFSv2 server
+ */
+ typedef struct nfs_srv {
+ kmutex_t async_write_lock;
+ struct rfs_async_write_list *async_write_head;
+
+ /*
+ * enables write clustering if == 1
+ */
+ int write_async;
+ } nfs_srv_t;
+
+ /*
* These are the interface routines for the server side of the
* Network File System. See the NFS version 2 protocol specification
* for a description of this interface.
*/
static int sattr_to_vattr(struct nfssattr *, struct vattr *);
static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
cred_t *);
+ static void *rfs_zone_init(zoneid_t zoneid);
+ static void rfs_zone_fini(zoneid_t zoneid, void *data);
+
/*
* Some "over the wire" UNIX file types. These are encoded
* into the mode. This needs to be fixed in the next rev.
*/
#define IFMT 0170000 /* type of file */
*** 87,96 ****
--- 109,119 ----
#define IFCHR 0020000 /* character special */
#define IFBLK 0060000 /* block special */
#define IFSOCK 0140000 /* socket */
u_longlong_t nfs2_srv_caller_id;
+ static zone_key_t rfs_zone_key;
/*
* Get file attributes.
* Returns the current attributes of the file with the given fhandle.
*/
*** 327,337 ****
--- 350,434 ----
rfs_setattr_getfh(struct nfssaargs *args)
{
return (&args->saa_fh);
}
+ /* Change and release @exip and @vpp only in success */
+ int
+ rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
+ {
+ struct exportinfo *exi;
+ vnode_t *vp = *vpp;
+ fid_t fid;
+ int error;
+
+ VN_HOLD(vp);
+
+ if ((error = traverse(&vp)) != 0) {
+ VN_RELE(vp);
+ return (error);
+ }
+
+ bzero(&fid, sizeof (fid));
+ fid.fid_len = MAXFIDSZ;
+ error = VOP_FID(vp, &fid, NULL);
+ if (error) {
+ VN_RELE(vp);
+ return (error);
+ }
+
+ exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
+ if (exi == NULL ||
+ (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
+ /*
+ * It is not error, just subdir is not exported
+ * or "nohide" is not set
+ */
+ if (exi != NULL)
+ exi_rele(&exi);
+ VN_RELE(vp);
+ } else {
+ /* go to submount */
+ exi_rele(exip);
+ *exip = exi;
+
+ VN_RELE(*vpp);
+ *vpp = vp;
+ }
+
+ return (0);
+ }
+
/*
+ * Given mounted "dvp" and "exi", go upper mountpoint
+ * with dvp/exi correction
+ * Return 0 in success
+ */
+ int
+ rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
+ {
+ struct exportinfo *exi;
+ vnode_t *dvp = *dvpp;
+
+ ASSERT(dvp->v_flag & VROOT);
+
+ VN_HOLD(dvp);
+ dvp = untraverse(dvp);
+ exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
+ if (exi == NULL) {
+ VN_RELE(dvp);
+ return (-1);
+ }
+
+ exi_rele(exip);
+ *exip = exi;
+ VN_RELE(*dvpp);
+ *dvpp = dvp;
+
+ return (0);
+ }
+ /*
* Directory lookup.
* Returns an fhandle and file attributes for file name in a directory.
*/
/* ARGSUSED */
void
*** 369,407 ****
/*
* Allow lookups from the root - the default
* location of the public filehandle.
*/
if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
! dvp = rootdir;
VN_HOLD(dvp);
} else {
dvp = nfs_fhtovp(fhp, exi);
if (dvp == NULL) {
dr->dr_status = NFSERR_STALE;
return;
}
}
/*
* Not allow lookup beyond root.
* If the filehandle matches a filehandle of the exi,
* then the ".." refers beyond the root of an exported filesystem.
*/
if (strcmp(da->da_name, "..") == 0 &&
EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
! VN_RELE(dvp);
! dr->dr_status = NFSERR_NOENT;
! return;
}
ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
MAXPATHLEN);
if (name == NULL) {
! dr->dr_status = NFSERR_ACCES;
! return;
}
/*
* If the public filehandle is used then allow
* a multi-component lookup, i.e. evaluate
--- 466,516 ----
/*
* Allow lookups from the root - the default
* location of the public filehandle.
*/
if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
! dvp = ZONE_ROOTVP();
VN_HOLD(dvp);
} else {
dvp = nfs_fhtovp(fhp, exi);
if (dvp == NULL) {
dr->dr_status = NFSERR_STALE;
return;
}
}
+ exi_hold(exi);
+
/*
* Not allow lookup beyond root.
* If the filehandle matches a filehandle of the exi,
* then the ".." refers beyond the root of an exported filesystem.
*/
if (strcmp(da->da_name, "..") == 0 &&
EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
! if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
! (dvp->v_flag & VROOT)) {
! /*
! * special case for ".." and 'nohide'exported root
! */
! if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
! error = NFSERR_ACCES;
! goto out;
}
+ } else {
+ error = NFSERR_NOENT;
+ goto out;
+ }
+ }
ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
MAXPATHLEN);
if (name == NULL) {
! error = NFSERR_ACCES;
! goto out;
}
/*
* If the public filehandle is used then allow
* a multi-component lookup, i.e. evaluate
*** 411,420 ****
--- 520,532 ----
* This may result in a vnode in another filesystem
* which is OK as long as the filesystem is exported.
*/
if (PUBLIC_FH2(fhp)) {
publicfh_flag = TRUE;
+
+ exi_rele(&exi);
+
error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
&sec);
} else {
/*
* Do a normal single component lookup.
*** 424,433 ****
--- 536,550 ----
}
if (name != da->da_name)
kmem_free(name, MAXPATHLEN);
+ if (error == 0 && vn_ismntpt(vp)) {
+ error = rfs_cross_mnt(&vp, &exi);
+ if (error)
+ VN_RELE(vp);
+ }
if (!error) {
va.va_mask = AT_ALL; /* we want everything */
error = rfs4_delegated_getattr(vp, &va, 0, cr);
*** 450,469 ****
}
}
VN_RELE(vp);
}
VN_RELE(dvp);
! /*
! * If publicfh_flag is true then we have called rfs_publicfh_mclookup
! * and have obtained a new exportinfo in exi which needs to be
! * released. Note the the original exportinfo pointed to by exi
! * will be released by the caller, comon_dispatch.
! */
! if (publicfh_flag && exi != NULL)
! exi_rele(exi);
/*
* If it's public fh, no 0x81, and client's flavor is
* invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
* Then set RPC status to AUTH_TOOWEAK in common_dispatch.
--- 567,581 ----
}
}
VN_RELE(vp);
}
+ out:
VN_RELE(dvp);
! if (exi != NULL)
! exi_rele(&exi);
/*
* If it's public fh, no 0x81, and client's flavor is
* invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
* Then set RPC status to AUTH_TOOWEAK in common_dispatch.
*** 683,692 ****
--- 795,806 ----
error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
/* check if a monitor detected a delegation conflict */
if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
+ if (in_crit)
+ nbl_end_crit(vp);
VN_RELE(vp);
/* mark as wouldblock so response is dropped */
curthread->t_flag |= T_WOULDBLOCK;
rr->rr_data = NULL;
*** 1008,1021 ****
error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
/* check if a monitor detected a delegation conflict */
if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
! VN_RELE(vp);
! /* mark as wouldblock so response is dropped */
! curthread->t_flag |= T_WOULDBLOCK;
! return;
}
if (wa->wa_data || wa->wa_rlist) {
/* Do the RDMA thing if necessary */
if (wa->wa_rlist) {
--- 1122,1132 ----
error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
/* check if a monitor detected a delegation conflict */
if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
! goto out;
}
if (wa->wa_data || wa->wa_rlist) {
/* Do the RDMA thing if necessary */
if (wa->wa_rlist) {
*** 1051,1060 ****
--- 1162,1172 ----
savecred = curthread->t_cred;
curthread->t_cred = cr;
error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
curthread->t_cred = savecred;
} else {
+
iovcnt = 0;
for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
iovcnt++;
if (iovcnt <= MAX_IOVECS) {
#ifdef DEBUG
*** 1149,1159 ****
struct rfs_async_write_list *next;
};
static struct rfs_async_write_list *rfs_async_write_head = NULL;
static kmutex_t rfs_async_write_lock;
! static int rfs_write_async = 1; /* enables write clustering if == 1 */
#define MAXCLIOVECS 42
#define RFSWRITE_INITVAL (enum nfsstat) -1
#ifdef DEBUG
--- 1261,1271 ----
struct rfs_async_write_list *next;
};
static struct rfs_async_write_list *rfs_async_write_head = NULL;
static kmutex_t rfs_async_write_lock;
! volatile int rfs_write_async = 1; /* enables write clustering if == 1 */
#define MAXCLIOVECS 42
#define RFSWRITE_INITVAL (enum nfsstat) -1
#ifdef DEBUG
*** 1194,1205 ****
struct rfs_async_write_list nlpsp;
ushort_t t_flag;
cred_t *savecred;
int in_crit = 0;
caller_context_t ct;
! if (!rfs_write_async) {
rfs_write_sync(wa, ns, exi, req, cr, ro);
return;
}
/*
--- 1306,1319 ----
struct rfs_async_write_list nlpsp;
ushort_t t_flag;
cred_t *savecred;
int in_crit = 0;
caller_context_t ct;
+ nfs_srv_t *nsrv;
! nsrv = zone_getspecific(rfs_zone_key, curzone);
! if (!nsrv->write_async) {
rfs_write_sync(wa, ns, exi, req, cr, ro);
return;
}
/*
*** 1220,1231 ****
/*
* Look to see if there is already a cluster started
* for this file.
*/
! mutex_enter(&rfs_async_write_lock);
! for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
if (bcmp(&wa->wa_fhandle, lp->fhp,
sizeof (fhandle_t)) == 0)
break;
}
--- 1334,1345 ----
/*
* Look to see if there is already a cluster started
* for this file.
*/
! mutex_enter(&nsrv->async_write_lock);
! for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
if (bcmp(&wa->wa_fhandle, lp->fhp,
sizeof (fhandle_t)) == 0)
break;
}
*** 1247,1258 ****
if (trp == NULL)
lp->list = nrp;
else
trp->list = nrp;
while (nrp->ns->ns_status == RFSWRITE_INITVAL)
! cv_wait(&lp->cv, &rfs_async_write_lock);
! mutex_exit(&rfs_async_write_lock);
return;
}
/*
--- 1361,1372 ----
if (trp == NULL)
lp->list = nrp;
else
trp->list = nrp;
while (nrp->ns->ns_status == RFSWRITE_INITVAL)
! cv_wait(&lp->cv, &nsrv->async_write_lock);
! mutex_exit(&nsrv->async_write_lock);
return;
}
/*
*** 1265,1295 ****
nlp->fhp = &wa->wa_fhandle;
cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
nlp->list = nrp;
nlp->next = NULL;
! if (rfs_async_write_head == NULL) {
! rfs_async_write_head = nlp;
} else {
! lp = rfs_async_write_head;
while (lp->next != NULL)
lp = lp->next;
lp->next = nlp;
}
! mutex_exit(&rfs_async_write_lock);
/*
* Convert the file handle common to all of the requests
* in this cluster to a vnode.
*/
vp = nfs_fhtovp(&wa->wa_fhandle, exi);
if (vp == NULL) {
! mutex_enter(&rfs_async_write_lock);
! if (rfs_async_write_head == nlp)
! rfs_async_write_head = nlp->next;
else {
! lp = rfs_async_write_head;
while (lp->next != nlp)
lp = lp->next;
lp->next = nlp->next;
}
t_flag = curthread->t_flag & T_WOULDBLOCK;
--- 1379,1409 ----
nlp->fhp = &wa->wa_fhandle;
cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
nlp->list = nrp;
nlp->next = NULL;
! if (nsrv->async_write_head == NULL) {
! nsrv->async_write_head = nlp;
} else {
! lp = nsrv->async_write_head;
while (lp->next != NULL)
lp = lp->next;
lp->next = nlp;
}
! mutex_exit(&nsrv->async_write_lock);
/*
* Convert the file handle common to all of the requests
* in this cluster to a vnode.
*/
vp = nfs_fhtovp(&wa->wa_fhandle, exi);
if (vp == NULL) {
! mutex_enter(&nsrv->async_write_lock);
! if (nsrv->async_write_head == nlp)
! nsrv->async_write_head = nlp->next;
else {
! lp = nsrv->async_write_head;
while (lp->next != nlp)
lp = lp->next;
lp->next = nlp->next;
}
t_flag = curthread->t_flag & T_WOULDBLOCK;
*** 1296,1306 ****
for (rp = nlp->list; rp != NULL; rp = rp->list) {
rp->ns->ns_status = NFSERR_STALE;
rp->thread->t_flag |= t_flag;
}
cv_broadcast(&nlp->cv);
! mutex_exit(&rfs_async_write_lock);
return;
}
/*
--- 1410,1420 ----
for (rp = nlp->list; rp != NULL; rp = rp->list) {
rp->ns->ns_status = NFSERR_STALE;
rp->thread->t_flag |= t_flag;
}
cv_broadcast(&nlp->cv);
! mutex_exit(&nsrv->async_write_lock);
return;
}
/*
*** 1307,1321 ****
* Can only write regular files. Attempts to write any
* other file types fail with EISDIR.
*/
if (vp->v_type != VREG) {
VN_RELE(vp);
! mutex_enter(&rfs_async_write_lock);
! if (rfs_async_write_head == nlp)
! rfs_async_write_head = nlp->next;
else {
! lp = rfs_async_write_head;
while (lp->next != nlp)
lp = lp->next;
lp->next = nlp->next;
}
t_flag = curthread->t_flag & T_WOULDBLOCK;
--- 1421,1435 ----
* Can only write regular files. Attempts to write any
* other file types fail with EISDIR.
*/
if (vp->v_type != VREG) {
VN_RELE(vp);
! mutex_enter(&nsrv->async_write_lock);
! if (nsrv->async_write_head == nlp)
! nsrv->async_write_head = nlp->next;
else {
! lp = nsrv->async_write_head;
while (lp->next != nlp)
lp = lp->next;
lp->next = nlp->next;
}
t_flag = curthread->t_flag & T_WOULDBLOCK;
*** 1322,1332 ****
for (rp = nlp->list; rp != NULL; rp = rp->list) {
rp->ns->ns_status = NFSERR_ISDIR;
rp->thread->t_flag |= t_flag;
}
cv_broadcast(&nlp->cv);
! mutex_exit(&rfs_async_write_lock);
return;
}
/*
--- 1436,1446 ----
for (rp = nlp->list; rp != NULL; rp = rp->list) {
rp->ns->ns_status = NFSERR_ISDIR;
rp->thread->t_flag |= t_flag;
}
cv_broadcast(&nlp->cv);
! mutex_exit(&nsrv->async_write_lock);
return;
}
/*
*** 1354,1368 ****
if (in_crit)
nbl_end_crit(vp);
VN_RELE(vp);
/* mark as wouldblock so response is dropped */
curthread->t_flag |= T_WOULDBLOCK;
! mutex_enter(&rfs_async_write_lock);
! if (rfs_async_write_head == nlp)
! rfs_async_write_head = nlp->next;
else {
! lp = rfs_async_write_head;
while (lp->next != nlp)
lp = lp->next;
lp->next = nlp->next;
}
for (rp = nlp->list; rp != NULL; rp = rp->list) {
--- 1468,1482 ----
if (in_crit)
nbl_end_crit(vp);
VN_RELE(vp);
/* mark as wouldblock so response is dropped */
curthread->t_flag |= T_WOULDBLOCK;
! mutex_enter(&nsrv->async_write_lock);
! if (nsrv->async_write_head == nlp)
! nsrv->async_write_head = nlp->next;
else {
! lp = nsrv->async_write_head;
while (lp->next != nlp)
lp = lp->next;
lp->next = nlp->next;
}
for (rp = nlp->list; rp != NULL; rp = rp->list) {
*** 1370,1380 ****
rp->ns->ns_status = puterrno(error);
rp->thread->t_flag |= T_WOULDBLOCK;
}
}
cv_broadcast(&nlp->cv);
! mutex_exit(&rfs_async_write_lock);
return;
}
/*
--- 1484,1494 ----
rp->ns->ns_status = puterrno(error);
rp->thread->t_flag |= T_WOULDBLOCK;
}
}
cv_broadcast(&nlp->cv);
! mutex_exit(&nsrv->async_write_lock);
return;
}
/*
*** 1392,1411 ****
* a new cluster and be blocked in VOP_RWLOCK while
* the first request is being processed. This delay
* will allow more requests to be clustered in this
* second cluster.
*/
! mutex_enter(&rfs_async_write_lock);
! if (rfs_async_write_head == nlp)
! rfs_async_write_head = nlp->next;
else {
! lp = rfs_async_write_head;
while (lp->next != nlp)
lp = lp->next;
lp->next = nlp->next;
}
! mutex_exit(&rfs_async_write_lock);
/*
* Step through the list of requests in this cluster.
* We need to check permissions to make sure that all
* of the requests have sufficient permission to write
--- 1506,1525 ----
* a new cluster and be blocked in VOP_RWLOCK while
* the first request is being processed. This delay
* will allow more requests to be clustered in this
* second cluster.
*/
! mutex_enter(&nsrv->async_write_lock);
! if (nsrv->async_write_head == nlp)
! nsrv->async_write_head = nlp->next;
else {
! lp = nsrv->async_write_head;
while (lp->next != nlp)
lp = lp->next;
lp->next = nlp->next;
}
! mutex_exit(&nsrv->async_write_lock);
/*
* Step through the list of requests in this cluster.
* We need to check permissions to make sure that all
* of the requests have sufficient permission to write
*** 1646,1664 ****
if (in_crit)
nbl_end_crit(vp);
VN_RELE(vp);
t_flag = curthread->t_flag & T_WOULDBLOCK;
! mutex_enter(&rfs_async_write_lock);
for (rp = nlp->list; rp != NULL; rp = rp->list) {
if (rp->ns->ns_status == RFSWRITE_INITVAL) {
rp->ns->ns_status = puterrno(error);
rp->thread->t_flag |= t_flag;
}
}
cv_broadcast(&nlp->cv);
! mutex_exit(&rfs_async_write_lock);
}
void *
rfs_write_getfh(struct nfswriteargs *wa)
--- 1760,1778 ----
if (in_crit)
nbl_end_crit(vp);
VN_RELE(vp);
t_flag = curthread->t_flag & T_WOULDBLOCK;
! mutex_enter(&nsrv->async_write_lock);
for (rp = nlp->list; rp != NULL; rp = rp->list) {
if (rp->ns->ns_status == RFSWRITE_INITVAL) {
rp->ns->ns_status = puterrno(error);
rp->thread->t_flag |= t_flag;
}
}
cv_broadcast(&nlp->cv);
! mutex_exit(&nsrv->async_write_lock);
}
void *
rfs_write_getfh(struct nfswriteargs *wa)
*** 1716,1725 ****
--- 1830,1845 ----
VN_RELE(dvp);
dr->dr_status = NFSERR_INVAL;
return;
}
+ if (protect_zfs_mntpt(dvp) != 0) {
+ VN_RELE(dvp);
+ dr->dr_status = NFSERR_ACCES;
+ return;
+ }
+
/*
* This is a completely gross hack to make mknod
* work over the wire until we can wack the protocol
*/
if ((va.va_mode & IFMT) == IFCHR) {
*** 2055,2065 ****
if (to_exi == NULL) {
VN_RELE(fromvp);
*status = NFSERR_ACCES;
return;
}
! exi_rele(to_exi);
if (to_exi != exi) {
VN_RELE(fromvp);
*status = NFSERR_XDEV;
return;
--- 2175,2185 ----
if (to_exi == NULL) {
VN_RELE(fromvp);
*status = NFSERR_ACCES;
return;
}
! exi_rele(&to_exi);
if (to_exi != exi) {
VN_RELE(fromvp);
*status = NFSERR_XDEV;
return;
*** 2095,2104 ****
--- 2215,2231 ----
VN_RELE(fromvp);
*status = NFSERR_ROFS;
return;
}
+ if (protect_zfs_mntpt(tovp) != 0) {
+ VN_RELE(tovp);
+ VN_RELE(fromvp);
+ *status = NFSERR_ACCES;
+ return;
+ }
+
/*
* Check for a conflict with a non-blocking mandatory share reservation.
*/
error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
NULL, cr, NULL, NULL, NULL);
*** 2119,2129 ****
return;
}
/* Check for delegation on the file being renamed over, if it exists */
! if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
NULL, NULL, NULL) == 0) {
if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
VN_RELE(tovp);
--- 2246,2256 ----
return;
}
/* Check for delegation on the file being renamed over, if it exists */
! if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
NULL, NULL, NULL) == 0) {
if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
VN_RELE(tovp);
*** 2201,2211 ****
if (to_exi == NULL) {
VN_RELE(fromvp);
*status = NFSERR_ACCES;
return;
}
! exi_rele(to_exi);
if (to_exi != exi) {
VN_RELE(fromvp);
*status = NFSERR_XDEV;
return;
--- 2328,2338 ----
if (to_exi == NULL) {
VN_RELE(fromvp);
*status = NFSERR_ACCES;
return;
}
! exi_rele(&to_exi);
if (to_exi != exi) {
VN_RELE(fromvp);
*status = NFSERR_XDEV;
return;
*** 2239,2248 ****
--- 2366,2382 ----
VN_RELE(fromvp);
*status = NFSERR_ROFS;
return;
}
+ if (protect_zfs_mntpt(tovp) != 0) {
+ VN_RELE(tovp);
+ VN_RELE(fromvp);
+ *status = NFSERR_ACCES;
+ return;
+ }
+
error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
/*
* Force modified data and metadata out to stable storage.
*/
*** 2261,2271 ****
return (args->la_from);
}
/*
* Symbolicly link to a file.
! * Create a file (to) with the given attributes which is a symbolic link
* to the given path name (to).
*/
void
rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
--- 2395,2405 ----
return (args->la_from);
}
/*
* Symbolicly link to a file.
! * Create a file (from) with the given attributes which is a symbolic link
* to the given path name (to).
*/
void
rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
*** 2309,2318 ****
--- 2443,2458 ----
VN_RELE(vp);
*status = NFSERR_INVAL;
return;
}
+ if (protect_zfs_mntpt(vp) != 0) {
+ VN_RELE(vp);
+ *status = NFSERR_ACCES;
+ return;
+ }
+
ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
name = nfscmd_convname(ca, exi, args->sla_tnm,
NFSCMD_CONV_INBOUND, MAXPATHLEN);
if (name == NULL) {
*** 2401,2410 ****
--- 2541,2556 ----
VN_RELE(vp);
dr->dr_status = NFSERR_INVAL;
return;
}
+ if (protect_zfs_mntpt(vp) != 0) {
+ VN_RELE(vp);
+ dr->dr_status = NFSERR_ACCES;
+ return;
+ }
+
va.va_type = VDIR;
va.va_mask |= AT_TYPE;
error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
*** 2486,2496 ****
* Of course, NFS servers have no idea what their
* clients' current directories are. We fake it by
* supplying a vnode known to exist and illegal to
* remove.
*/
! error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
/*
* Force modified data and metadata out to stable storage.
*/
(void) VOP_FSYNC(vp, 0, cr, NULL);
--- 2632,2642 ----
* Of course, NFS servers have no idea what their
* clients' current directories are. We fake it by
* supplying a vnode known to exist and illegal to
* remove.
*/
! error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
/*
* Force modified data and metadata out to stable storage.
*/
(void) VOP_FSYNC(vp, 0, cr, NULL);
*** 2513,2635 ****
rfs_rmdir_getfh(struct nfsdiropargs *da)
{
return (da->da_fhandle);
}
/* ARGSUSED */
void
rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
{
int error;
! int iseof;
struct iovec iov;
struct uio uio;
! vnode_t *vp;
! char *ndata = NULL;
struct sockaddr *ca;
! size_t nents;
! int ret;
vp = nfs_fhtovp(&rda->rda_fh, exi);
if (vp == NULL) {
- rd->rd_entries = NULL;
rd->rd_status = NFSERR_STALE;
return;
}
if (vp->v_type != VDIR) {
VN_RELE(vp);
- rd->rd_entries = NULL;
rd->rd_status = NFSERR_NOTDIR;
return;
}
(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
!
! if (error) {
! rd->rd_entries = NULL;
goto bad;
- }
! if (rda->rda_count == 0) {
! rd->rd_entries = NULL;
! rd->rd_size = 0;
! rd->rd_eof = FALSE;
! goto bad;
! }
! rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
/*
! * Allocate data for entries. This will be freed by rfs_rddirfree.
*/
! rd->rd_bufsize = (uint_t)rda->rda_count;
! rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
/*
! * Set up io vector to read directory data
*/
! iov.iov_base = (caddr_t)rd->rd_entries;
! iov.iov_len = rda->rda_count;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_extflg = UIO_COPY_CACHED;
uio.uio_loffset = (offset_t)rda->rda_offset;
! uio.uio_resid = rda->rda_count;
! /*
! * read directory
! */
error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
! /*
! * Clean up
! */
! if (!error) {
! /*
! * set size and eof
! */
! if (uio.uio_resid == rda->rda_count) {
! rd->rd_size = 0;
! rd->rd_eof = TRUE;
! } else {
! rd->rd_size = (uint32_t)(rda->rda_count -
! uio.uio_resid);
! rd->rd_eof = iseof ? TRUE : FALSE;
}
}
- ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
- nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
- ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
- rda->rda_count, &ndata);
-
- if (ret != 0) {
- size_t dropbytes;
/*
! * We had to drop one or more entries in order to fit
! * during the character conversion. We need to patch
! * up the size and eof info.
*/
! if (rd->rd_eof)
! rd->rd_eof = FALSE;
! dropbytes = nfscmd_dropped_entrysize(
! (struct dirent64 *)rd->rd_entries, nents, ret);
! rd->rd_size -= dropbytes;
}
! if (ndata == NULL) {
! ndata = (char *)rd->rd_entries;
! } else if (ndata != (char *)rd->rd_entries) {
! kmem_free(rd->rd_entries, rd->rd_bufsize);
! rd->rd_entries = (void *)ndata;
! rd->rd_bufsize = rda->rda_count;
}
bad:
VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
#if 0 /* notyet */
/*
--- 2659,2862 ----
rfs_rmdir_getfh(struct nfsdiropargs *da)
{
return (da->da_fhandle);
}
+ #ifdef nextdp
+ #undef nextdp
+ #endif
+ #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
+
/* ARGSUSED */
void
rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
{
int error;
! vnode_t *vp;
struct iovec iov;
struct uio uio;
! int iseof;
!
! uint32_t count = rda->rda_count;
! uint32_t size; /* size of the readdirres structure */
! int overflow = 0;
!
! size_t datasz;
! char *data = NULL;
! dirent64_t *dp;
!
struct sockaddr *ca;
! struct nfsentry **eptr;
! struct nfsentry *entry;
vp = nfs_fhtovp(&rda->rda_fh, exi);
if (vp == NULL) {
rd->rd_status = NFSERR_STALE;
return;
}
if (vp->v_type != VDIR) {
VN_RELE(vp);
rd->rd_status = NFSERR_NOTDIR;
return;
}
(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
! if (error)
goto bad;
! /*
! * Don't allow arbitrary counts for allocation
! */
! if (count > NFS_MAXDATA)
! count = NFS_MAXDATA;
! /*
! * struct readdirres:
! * status: 1
! * entries (bool): 1
! * eof: 1
! */
! size = (1 + 1 + 1) * BYTES_PER_XDR_UNIT;
+ if (size > count) {
+ eptr = &rd->rd_entries;
+ iseof = 0;
+ size = 0;
+
+ goto done;
+ }
+
/*
! * This is simplification. The dirent64_t size is not the same as the
! * size of XDR representation of entry, but the sizes are similar so
! * we'll assume they are same. This assumption should not cause any
! * harm. In worst case we will need to issue VOP_READDIR() once more.
*/
! datasz = count;
/*
! * Make sure that there is room to read at least one entry
! * if any are available.
*/
! if (datasz < DIRENT64_RECLEN(MAXNAMELEN))
! datasz = DIRENT64_RECLEN(MAXNAMELEN);
!
! data = kmem_alloc(datasz, KM_NOSLEEP);
! if (data == NULL) {
! /* The allocation failed; downsize and wait for it this time */
! if (datasz > MAXBSIZE)
! datasz = MAXBSIZE;
! data = kmem_alloc(datasz, KM_SLEEP);
! }
!
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_extflg = UIO_COPY_CACHED;
uio.uio_loffset = (offset_t)rda->rda_offset;
! uio.uio_resid = datasz;
! ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
! eptr = &rd->rd_entries;
! entry = NULL;
!
! getmoredents:
! iov.iov_base = data;
! iov.iov_len = datasz;
!
error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
+ if (error) {
+ iseof = 0;
+ goto done;
+ }
! if (iov.iov_len == datasz)
! goto done;
!
! for (dp = (dirent64_t *)data;
! (char *)dp - data < datasz - iov.iov_len && !overflow;
! dp = nextdp(dp)) {
! char *name;
! uint32_t esize;
! uint32_t cookie;
!
! overflow = (uint64_t)dp->d_off > UINT32_MAX;
! if (overflow) {
! cookie = 0;
! iseof = 1;
! } else
! cookie = (uint32_t)dp->d_off;
!
! if (dp->d_ino == 0 || (uint64_t)dp->d_ino > UINT32_MAX) {
! if (entry != NULL)
! entry->cookie = cookie;
! continue;
}
+
+ name = nfscmd_convname(ca, exi, dp->d_name,
+ NFSCMD_CONV_OUTBOUND, NFS_MAXPATHLEN + 1);
+ if (name == NULL) {
+ if (entry != NULL)
+ entry->cookie = cookie;
+ continue;
}
/*
! * struct entry:
! * fileid: 1
! * name (length): 1
! * name (data): length (rounded up)
! * cookie: 1
! * nextentry (bool): 1
*/
! esize = (1 + 1 + 1 + 1) * BYTES_PER_XDR_UNIT +
! RNDUP(strlen(name));
!
! /* If the new entry does not fit, discard it */
! if (esize > count - size) {
! if (name != dp->d_name)
! kmem_free(name, NFS_MAXPATHLEN + 1);
! iseof = 0;
! goto done;
}
!
! entry = kmem_alloc(sizeof (struct nfsentry), KM_SLEEP);
!
! entry->fileid = (uint32_t)dp->d_ino;
! entry->name = strdup(name);
! if (name != dp->d_name)
! kmem_free(name, NFS_MAXPATHLEN + 1);
! entry->cookie = cookie;
!
! size += esize;
!
! /* Add the entry to the linked list */
! *eptr = entry;
! eptr = &entry->nextentry;
}
+ if (!iseof && size < count) {
+ uio.uio_resid = MIN(datasz, MAXBSIZE);
+ goto getmoredents;
+ }
+
+ done:
+ *eptr = NULL;
+
+ if (iseof || rd->rd_entries != NULL || !error) {
+ error = 0;
+ rd->rd_eof = iseof ? TRUE : FALSE;
+
+ /* This is for nfslog only */
+ rd->rd_offset = rda->rda_offset;
+ rd->rd_size = size;
+ }
+
bad:
VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
#if 0 /* notyet */
/*
*** 2645,2665 ****
VN_RELE(vp);
rd->rd_status = puterrno(error);
}
void *
rfs_readdir_getfh(struct nfsrddirargs *rda)
{
return (&rda->rda_fh);
}
void
rfs_rddirfree(struct nfsrddirres *rd)
{
! if (rd->rd_entries != NULL)
! kmem_free(rd->rd_entries, rd->rd_bufsize);
}
/* ARGSUSED */
void
rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
--- 2872,2901 ----
VN_RELE(vp);
rd->rd_status = puterrno(error);
+ if (data != NULL)
+ kmem_free(data, datasz);
}
void *
rfs_readdir_getfh(struct nfsrddirargs *rda)
{
return (&rda->rda_fh);
}
void
rfs_rddirfree(struct nfsrddirres *rd)
{
! if (rd->rd_status == NFS_OK) {
! struct nfsentry *entry, *nentry;
!
! for (entry = rd->rd_entries; entry != NULL; entry = nentry) {
! nentry = entry->nextentry;
! strfree(entry->name);
! kmem_free(entry, sizeof (struct nfsentry));
! }
! }
}
/* ARGSUSED */
void
rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
*** 2761,2771 ****
vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
}
return (0);
}
! static enum nfsftype vt_to_nf[] = {
0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
};
/*
* check the following fields for overflow: nodeid, size, and time.
--- 2997,3007 ----
vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
}
return (0);
}
! static const enum nfsftype vt_to_nf[] = {
0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
};
/*
* check the following fields for overflow: nodeid, size, and time.
*** 2980,2999 ****
}
void
rfs_srvrinit(void)
{
- mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
nfs2_srv_caller_id = fs_new_caller_id();
}
void
rfs_srvrfini(void)
{
- mutex_destroy(&rfs_async_write_lock);
}
static int
rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
{
struct clist *wcl;
int wlist_len;
--- 3216,3259 ----
}
void
rfs_srvrinit(void)
{
nfs2_srv_caller_id = fs_new_caller_id();
+ zone_key_create(&rfs_zone_key, rfs_zone_init, NULL, rfs_zone_fini);
}
void
rfs_srvrfini(void)
{
}
+ /* ARGSUSED */
+ static void *
+ rfs_zone_init(zoneid_t zoneid)
+ {
+ nfs_srv_t *ns;
+
+ ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
+
+ mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
+ ns->write_async = 1;
+
+ return (ns);
+ }
+
+ /* ARGSUSED */
+ static void
+ rfs_zone_fini(zoneid_t zoneid, void *data)
+ {
+ nfs_srv_t *ns;
+
+ ns = (nfs_srv_t *)data;
+ mutex_destroy(&ns->async_write_lock);
+ kmem_free(ns, sizeof (*ns));
+ }
+
static int
rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
{
struct clist *wcl;
int wlist_len;