Print this page
NEX-17125 NFS: nbmand lock entered but not exited on error path
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-9275 Got "bad mutex" panic when run IO to nfs share from clients
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-3524 CLONE - Port NEX-3505 "wrong authentication" messages with root=@0.0.0.0/0 set, result in loss of client access
Reviewed by: Marcel Telka <marcel.telka@nexenta.com>
NEX-3533 CLONE - Port NEX-3019 NFSv3 writes underneath mounted filesystem to directory
Reviewed by: Dan Fields <dan.fields@nexenta.com>
NEX-3095 Issues related to NFS nohide
Reviewed by: Dan Fields <dan.fields@nexenta.com>
NEX-1128 NFS server: Generic uid and gid remapping for AUTH_SYS
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
OS-20 share_nfs(1m) charset handling is unreliable
OS-22 Page fault at nfscmd_dropped_entrysize+0x1e()
OS-23 NFSv2/3/4: READDIR responses are inconsistent when charset conversion fails
OS-24 rfs3_readdir(): Issues related to nfscmd_convdirent()
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
re #13613 rb4516 Tunables needs volatile keyword
closes #12112 rb3823 - nfs-nohide: lookup("..") for submount should be correct
re #3541 rb11254 - nfs nohide - "nfssrv: need ability to go to submounts for v3 and v2 protocols"

*** 16,36 **** * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. */ /* * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. * All rights reserved. */ #include <sys/param.h> #include <sys/types.h> #include <sys/systm.h> #include <sys/cred.h> #include <sys/buf.h> --- 16,40 ---- * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ + /* * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. */ /* * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. * All rights reserved. */ + /* + * Copyright 2018 Nexenta Systems, Inc. + * Copyright (c) 2016 by Delphix. All rights reserved. + */ + #include <sys/param.h> #include <sys/types.h> #include <sys/systm.h> #include <sys/cred.h> #include <sys/buf.h>
*** 67,86 **** --- 71,108 ---- #include <vm/seg_map.h> #include <vm/seg_kmem.h> #include <sys/strsubr.h> + struct rfs_async_write_list; + /* + * Zone globals of NFSv2 server + */ + typedef struct nfs_srv { + kmutex_t async_write_lock; + struct rfs_async_write_list *async_write_head; + + /* + * enables write clustering if == 1 + */ + int write_async; + } nfs_srv_t; + + /* * These are the interface routines for the server side of the * Network File System. See the NFS version 2 protocol specification * for a description of this interface. */ static int sattr_to_vattr(struct nfssattr *, struct vattr *); static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, cred_t *); + static void *rfs_zone_init(zoneid_t zoneid); + static void rfs_zone_fini(zoneid_t zoneid, void *data); + /* * Some "over the wire" UNIX file types. These are encoded * into the mode. This needs to be fixed in the next rev. */ #define IFMT 0170000 /* type of file */
*** 87,96 **** --- 109,119 ---- #define IFCHR 0020000 /* character special */ #define IFBLK 0060000 /* block special */ #define IFSOCK 0140000 /* socket */ u_longlong_t nfs2_srv_caller_id; + static zone_key_t rfs_zone_key; /* * Get file attributes. * Returns the current attributes of the file with the given fhandle. */
*** 327,337 **** --- 350,434 ---- rfs_setattr_getfh(struct nfssaargs *args) { return (&args->saa_fh); } + /* Change and release @exip and @vpp only in success */ + int + rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip) + { + struct exportinfo *exi; + vnode_t *vp = *vpp; + fid_t fid; + int error; + + VN_HOLD(vp); + + if ((error = traverse(&vp)) != 0) { + VN_RELE(vp); + return (error); + } + + bzero(&fid, sizeof (fid)); + fid.fid_len = MAXFIDSZ; + error = VOP_FID(vp, &fid, NULL); + if (error) { + VN_RELE(vp); + return (error); + } + + exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid); + if (exi == NULL || + (exi->exi_export.ex_flags & EX_NOHIDE) == 0) { + /* + * It is not error, just subdir is not exported + * or "nohide" is not set + */ + if (exi != NULL) + exi_rele(&exi); + VN_RELE(vp); + } else { + /* go to submount */ + exi_rele(exip); + *exip = exi; + + VN_RELE(*vpp); + *vpp = vp; + } + + return (0); + } + /* + * Given mounted "dvp" and "exi", go upper mountpoint + * with dvp/exi correction + * Return 0 in success + */ + int + rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr) + { + struct exportinfo *exi; + vnode_t *dvp = *dvpp; + + ASSERT(dvp->v_flag & VROOT); + + VN_HOLD(dvp); + dvp = untraverse(dvp); + exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE); + if (exi == NULL) { + VN_RELE(dvp); + return (-1); + } + + exi_rele(exip); + *exip = exi; + VN_RELE(*dvpp); + *dvpp = dvp; + + return (0); + } + /* * Directory lookup. * Returns an fhandle and file attributes for file name in a directory. */ /* ARGSUSED */ void
*** 369,407 **** /* * Allow lookups from the root - the default * location of the public filehandle. */ if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { ! dvp = rootdir; VN_HOLD(dvp); } else { dvp = nfs_fhtovp(fhp, exi); if (dvp == NULL) { dr->dr_status = NFSERR_STALE; return; } } /* * Not allow lookup beyond root. * If the filehandle matches a filehandle of the exi, * then the ".." refers beyond the root of an exported filesystem. */ if (strcmp(da->da_name, "..") == 0 && EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { ! VN_RELE(dvp); ! dr->dr_status = NFSERR_NOENT; ! return; } ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, MAXPATHLEN); if (name == NULL) { ! dr->dr_status = NFSERR_ACCES; ! return; } /* * If the public filehandle is used then allow * a multi-component lookup, i.e. evaluate --- 466,516 ---- /* * Allow lookups from the root - the default * location of the public filehandle. */ if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { ! dvp = ZONE_ROOTVP(); VN_HOLD(dvp); } else { dvp = nfs_fhtovp(fhp, exi); if (dvp == NULL) { dr->dr_status = NFSERR_STALE; return; } } + exi_hold(exi); + /* * Not allow lookup beyond root. * If the filehandle matches a filehandle of the exi, * then the ".." refers beyond the root of an exported filesystem. */ if (strcmp(da->da_name, "..") == 0 && EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { ! if ((exi->exi_export.ex_flags & EX_NOHIDE) && ! (dvp->v_flag & VROOT)) { ! /* ! * special case for ".." and 'nohide'exported root ! */ ! if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) { ! error = NFSERR_ACCES; ! goto out; } + } else { + error = NFSERR_NOENT; + goto out; + } + } ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, MAXPATHLEN); if (name == NULL) { ! error = NFSERR_ACCES; ! goto out; } /* * If the public filehandle is used then allow * a multi-component lookup, i.e. evaluate
*** 411,420 **** --- 520,532 ---- * This may result in a vnode in another filesystem * which is OK as long as the filesystem is exported. */ if (PUBLIC_FH2(fhp)) { publicfh_flag = TRUE; + + exi_rele(&exi); + error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, &sec); } else { /* * Do a normal single component lookup.
*** 424,433 **** --- 536,550 ---- } if (name != da->da_name) kmem_free(name, MAXPATHLEN); + if (error == 0 && vn_ismntpt(vp)) { + error = rfs_cross_mnt(&vp, &exi); + if (error) + VN_RELE(vp); + } if (!error) { va.va_mask = AT_ALL; /* we want everything */ error = rfs4_delegated_getattr(vp, &va, 0, cr);
*** 450,469 **** } } VN_RELE(vp); } VN_RELE(dvp); ! /* ! * If publicfh_flag is true then we have called rfs_publicfh_mclookup ! * and have obtained a new exportinfo in exi which needs to be ! * released. Note the the original exportinfo pointed to by exi ! * will be released by the caller, comon_dispatch. ! */ ! if (publicfh_flag && exi != NULL) ! exi_rele(exi); /* * If it's public fh, no 0x81, and client's flavor is * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. * Then set RPC status to AUTH_TOOWEAK in common_dispatch. --- 567,581 ---- } } VN_RELE(vp); } + out: VN_RELE(dvp); ! if (exi != NULL) ! exi_rele(&exi); /* * If it's public fh, no 0x81, and client's flavor is * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
*** 683,692 **** --- 795,806 ---- error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); /* check if a monitor detected a delegation conflict */ if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { + if (in_crit) + nbl_end_crit(vp); VN_RELE(vp); /* mark as wouldblock so response is dropped */ curthread->t_flag |= T_WOULDBLOCK; rr->rr_data = NULL;
*** 1008,1021 **** error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); /* check if a monitor detected a delegation conflict */ if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { ! VN_RELE(vp); ! /* mark as wouldblock so response is dropped */ ! curthread->t_flag |= T_WOULDBLOCK; ! return; } if (wa->wa_data || wa->wa_rlist) { /* Do the RDMA thing if necessary */ if (wa->wa_rlist) { --- 1122,1132 ---- error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); /* check if a monitor detected a delegation conflict */ if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { ! goto out; } if (wa->wa_data || wa->wa_rlist) { /* Do the RDMA thing if necessary */ if (wa->wa_rlist) {
*** 1051,1060 **** --- 1162,1172 ---- savecred = curthread->t_cred; curthread->t_cred = cr; error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); curthread->t_cred = savecred; } else { + iovcnt = 0; for (m = wa->wa_mblk; m != NULL; m = m->b_cont) iovcnt++; if (iovcnt <= MAX_IOVECS) { #ifdef DEBUG
*** 1149,1159 **** struct rfs_async_write_list *next; }; static struct rfs_async_write_list *rfs_async_write_head = NULL; static kmutex_t rfs_async_write_lock; ! static int rfs_write_async = 1; /* enables write clustering if == 1 */ #define MAXCLIOVECS 42 #define RFSWRITE_INITVAL (enum nfsstat) -1 #ifdef DEBUG --- 1261,1271 ---- struct rfs_async_write_list *next; }; static struct rfs_async_write_list *rfs_async_write_head = NULL; static kmutex_t rfs_async_write_lock; ! volatile int rfs_write_async = 1; /* enables write clustering if == 1 */ #define MAXCLIOVECS 42 #define RFSWRITE_INITVAL (enum nfsstat) -1 #ifdef DEBUG
*** 1194,1205 **** struct rfs_async_write_list nlpsp; ushort_t t_flag; cred_t *savecred; int in_crit = 0; caller_context_t ct; ! if (!rfs_write_async) { rfs_write_sync(wa, ns, exi, req, cr, ro); return; } /* --- 1306,1319 ---- struct rfs_async_write_list nlpsp; ushort_t t_flag; cred_t *savecred; int in_crit = 0; caller_context_t ct; + nfs_srv_t *nsrv; ! nsrv = zone_getspecific(rfs_zone_key, curzone); ! if (!nsrv->write_async) { rfs_write_sync(wa, ns, exi, req, cr, ro); return; } /*
*** 1220,1231 **** /* * Look to see if there is already a cluster started * for this file. */ ! mutex_enter(&rfs_async_write_lock); ! for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { if (bcmp(&wa->wa_fhandle, lp->fhp, sizeof (fhandle_t)) == 0) break; } --- 1334,1345 ---- /* * Look to see if there is already a cluster started * for this file. */ ! mutex_enter(&nsrv->async_write_lock); ! for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) { if (bcmp(&wa->wa_fhandle, lp->fhp, sizeof (fhandle_t)) == 0) break; }
*** 1247,1258 **** if (trp == NULL) lp->list = nrp; else trp->list = nrp; while (nrp->ns->ns_status == RFSWRITE_INITVAL) ! cv_wait(&lp->cv, &rfs_async_write_lock); ! mutex_exit(&rfs_async_write_lock); return; } /* --- 1361,1372 ---- if (trp == NULL) lp->list = nrp; else trp->list = nrp; while (nrp->ns->ns_status == RFSWRITE_INITVAL) ! cv_wait(&lp->cv, &nsrv->async_write_lock); ! mutex_exit(&nsrv->async_write_lock); return; } /*
*** 1265,1295 **** nlp->fhp = &wa->wa_fhandle; cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); nlp->list = nrp; nlp->next = NULL; ! if (rfs_async_write_head == NULL) { ! rfs_async_write_head = nlp; } else { ! lp = rfs_async_write_head; while (lp->next != NULL) lp = lp->next; lp->next = nlp; } ! mutex_exit(&rfs_async_write_lock); /* * Convert the file handle common to all of the requests * in this cluster to a vnode. */ vp = nfs_fhtovp(&wa->wa_fhandle, exi); if (vp == NULL) { ! mutex_enter(&rfs_async_write_lock); ! if (rfs_async_write_head == nlp) ! rfs_async_write_head = nlp->next; else { ! lp = rfs_async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; } t_flag = curthread->t_flag & T_WOULDBLOCK; --- 1379,1409 ---- nlp->fhp = &wa->wa_fhandle; cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); nlp->list = nrp; nlp->next = NULL; ! if (nsrv->async_write_head == NULL) { ! nsrv->async_write_head = nlp; } else { ! lp = nsrv->async_write_head; while (lp->next != NULL) lp = lp->next; lp->next = nlp; } ! mutex_exit(&nsrv->async_write_lock); /* * Convert the file handle common to all of the requests * in this cluster to a vnode. */ vp = nfs_fhtovp(&wa->wa_fhandle, exi); if (vp == NULL) { ! mutex_enter(&nsrv->async_write_lock); ! if (nsrv->async_write_head == nlp) ! nsrv->async_write_head = nlp->next; else { ! lp = nsrv->async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; } t_flag = curthread->t_flag & T_WOULDBLOCK;
*** 1296,1306 **** for (rp = nlp->list; rp != NULL; rp = rp->list) { rp->ns->ns_status = NFSERR_STALE; rp->thread->t_flag |= t_flag; } cv_broadcast(&nlp->cv); ! mutex_exit(&rfs_async_write_lock); return; } /* --- 1410,1420 ---- for (rp = nlp->list; rp != NULL; rp = rp->list) { rp->ns->ns_status = NFSERR_STALE; rp->thread->t_flag |= t_flag; } cv_broadcast(&nlp->cv); ! mutex_exit(&nsrv->async_write_lock); return; } /*
*** 1307,1321 **** * Can only write regular files. Attempts to write any * other file types fail with EISDIR. */ if (vp->v_type != VREG) { VN_RELE(vp); ! mutex_enter(&rfs_async_write_lock); ! if (rfs_async_write_head == nlp) ! rfs_async_write_head = nlp->next; else { ! lp = rfs_async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; } t_flag = curthread->t_flag & T_WOULDBLOCK; --- 1421,1435 ---- * Can only write regular files. Attempts to write any * other file types fail with EISDIR. */ if (vp->v_type != VREG) { VN_RELE(vp); ! mutex_enter(&nsrv->async_write_lock); ! if (nsrv->async_write_head == nlp) ! nsrv->async_write_head = nlp->next; else { ! lp = nsrv->async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; } t_flag = curthread->t_flag & T_WOULDBLOCK;
*** 1322,1332 **** for (rp = nlp->list; rp != NULL; rp = rp->list) { rp->ns->ns_status = NFSERR_ISDIR; rp->thread->t_flag |= t_flag; } cv_broadcast(&nlp->cv); ! mutex_exit(&rfs_async_write_lock); return; } /* --- 1436,1446 ---- for (rp = nlp->list; rp != NULL; rp = rp->list) { rp->ns->ns_status = NFSERR_ISDIR; rp->thread->t_flag |= t_flag; } cv_broadcast(&nlp->cv); ! mutex_exit(&nsrv->async_write_lock); return; } /*
*** 1354,1368 **** if (in_crit) nbl_end_crit(vp); VN_RELE(vp); /* mark as wouldblock so response is dropped */ curthread->t_flag |= T_WOULDBLOCK; ! mutex_enter(&rfs_async_write_lock); ! if (rfs_async_write_head == nlp) ! rfs_async_write_head = nlp->next; else { ! lp = rfs_async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; } for (rp = nlp->list; rp != NULL; rp = rp->list) { --- 1468,1482 ---- if (in_crit) nbl_end_crit(vp); VN_RELE(vp); /* mark as wouldblock so response is dropped */ curthread->t_flag |= T_WOULDBLOCK; ! mutex_enter(&nsrv->async_write_lock); ! if (nsrv->async_write_head == nlp) ! nsrv->async_write_head = nlp->next; else { ! lp = nsrv->async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; } for (rp = nlp->list; rp != NULL; rp = rp->list) {
*** 1370,1380 **** rp->ns->ns_status = puterrno(error); rp->thread->t_flag |= T_WOULDBLOCK; } } cv_broadcast(&nlp->cv); ! mutex_exit(&rfs_async_write_lock); return; } /* --- 1484,1494 ---- rp->ns->ns_status = puterrno(error); rp->thread->t_flag |= T_WOULDBLOCK; } } cv_broadcast(&nlp->cv); ! mutex_exit(&nsrv->async_write_lock); return; } /*
*** 1392,1411 **** * a new cluster and be blocked in VOP_RWLOCK while * the first request is being processed. This delay * will allow more requests to be clustered in this * second cluster. */ ! mutex_enter(&rfs_async_write_lock); ! if (rfs_async_write_head == nlp) ! rfs_async_write_head = nlp->next; else { ! lp = rfs_async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; } ! mutex_exit(&rfs_async_write_lock); /* * Step through the list of requests in this cluster. * We need to check permissions to make sure that all * of the requests have sufficient permission to write --- 1506,1525 ---- * a new cluster and be blocked in VOP_RWLOCK while * the first request is being processed. This delay * will allow more requests to be clustered in this * second cluster. */ ! mutex_enter(&nsrv->async_write_lock); ! if (nsrv->async_write_head == nlp) ! nsrv->async_write_head = nlp->next; else { ! lp = nsrv->async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; } ! mutex_exit(&nsrv->async_write_lock); /* * Step through the list of requests in this cluster. * We need to check permissions to make sure that all * of the requests have sufficient permission to write
*** 1646,1664 **** if (in_crit) nbl_end_crit(vp); VN_RELE(vp); t_flag = curthread->t_flag & T_WOULDBLOCK; ! mutex_enter(&rfs_async_write_lock); for (rp = nlp->list; rp != NULL; rp = rp->list) { if (rp->ns->ns_status == RFSWRITE_INITVAL) { rp->ns->ns_status = puterrno(error); rp->thread->t_flag |= t_flag; } } cv_broadcast(&nlp->cv); ! mutex_exit(&rfs_async_write_lock); } void * rfs_write_getfh(struct nfswriteargs *wa) --- 1760,1778 ---- if (in_crit) nbl_end_crit(vp); VN_RELE(vp); t_flag = curthread->t_flag & T_WOULDBLOCK; ! mutex_enter(&nsrv->async_write_lock); for (rp = nlp->list; rp != NULL; rp = rp->list) { if (rp->ns->ns_status == RFSWRITE_INITVAL) { rp->ns->ns_status = puterrno(error); rp->thread->t_flag |= t_flag; } } cv_broadcast(&nlp->cv); ! mutex_exit(&nsrv->async_write_lock); } void * rfs_write_getfh(struct nfswriteargs *wa)
*** 1716,1725 **** --- 1830,1845 ---- VN_RELE(dvp); dr->dr_status = NFSERR_INVAL; return; } + if (protect_zfs_mntpt(dvp) != 0) { + VN_RELE(dvp); + dr->dr_status = NFSERR_ACCES; + return; + } + /* * This is a completely gross hack to make mknod * work over the wire until we can wack the protocol */ if ((va.va_mode & IFMT) == IFCHR) {
*** 2055,2065 **** if (to_exi == NULL) { VN_RELE(fromvp); *status = NFSERR_ACCES; return; } ! exi_rele(to_exi); if (to_exi != exi) { VN_RELE(fromvp); *status = NFSERR_XDEV; return; --- 2175,2185 ---- if (to_exi == NULL) { VN_RELE(fromvp); *status = NFSERR_ACCES; return; } ! exi_rele(&to_exi); if (to_exi != exi) { VN_RELE(fromvp); *status = NFSERR_XDEV; return;
*** 2095,2104 **** --- 2215,2231 ---- VN_RELE(fromvp); *status = NFSERR_ROFS; return; } + if (protect_zfs_mntpt(tovp) != 0) { + VN_RELE(tovp); + VN_RELE(fromvp); + *status = NFSERR_ACCES; + return; + } + /* * Check for a conflict with a non-blocking mandatory share reservation. */ error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, NULL, cr, NULL, NULL, NULL);
*** 2119,2129 **** return; } /* Check for delegation on the file being renamed over, if it exists */ ! if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, NULL, NULL, NULL) == 0) { if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { VN_RELE(tovp); --- 2246,2256 ---- return; } /* Check for delegation on the file being renamed over, if it exists */ ! if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE && VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, NULL, NULL, NULL) == 0) { if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { VN_RELE(tovp);
*** 2201,2211 **** if (to_exi == NULL) { VN_RELE(fromvp); *status = NFSERR_ACCES; return; } ! exi_rele(to_exi); if (to_exi != exi) { VN_RELE(fromvp); *status = NFSERR_XDEV; return; --- 2328,2338 ---- if (to_exi == NULL) { VN_RELE(fromvp); *status = NFSERR_ACCES; return; } ! exi_rele(&to_exi); if (to_exi != exi) { VN_RELE(fromvp); *status = NFSERR_XDEV; return;
*** 2239,2248 **** --- 2366,2382 ---- VN_RELE(fromvp); *status = NFSERR_ROFS; return; } + if (protect_zfs_mntpt(tovp) != 0) { + VN_RELE(tovp); + VN_RELE(fromvp); + *status = NFSERR_ACCES; + return; + } + error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); /* * Force modified data and metadata out to stable storage. */
*** 2261,2271 **** return (args->la_from); } /* * Symbolicly link to a file. ! * Create a file (to) with the given attributes which is a symbolic link * to the given path name (to). */ void rfs_symlink(struct nfsslargs *args, enum nfsstat *status, struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) --- 2395,2405 ---- return (args->la_from); } /* * Symbolicly link to a file. ! * Create a file (from) with the given attributes which is a symbolic link * to the given path name (to). */ void rfs_symlink(struct nfsslargs *args, enum nfsstat *status, struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
*** 2309,2318 **** --- 2443,2458 ---- VN_RELE(vp); *status = NFSERR_INVAL; return; } + if (protect_zfs_mntpt(vp) != 0) { + VN_RELE(vp); + *status = NFSERR_ACCES; + return; + } + ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; name = nfscmd_convname(ca, exi, args->sla_tnm, NFSCMD_CONV_INBOUND, MAXPATHLEN); if (name == NULL) {
*** 2401,2410 **** --- 2541,2556 ---- VN_RELE(vp); dr->dr_status = NFSERR_INVAL; return; } + if (protect_zfs_mntpt(vp) != 0) { + VN_RELE(vp); + dr->dr_status = NFSERR_ACCES; + return; + } + va.va_type = VDIR; va.va_mask |= AT_TYPE; error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
*** 2486,2496 **** * Of course, NFS servers have no idea what their * clients' current directories are. We fake it by * supplying a vnode known to exist and illegal to * remove. */ ! error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); /* * Force modified data and metadata out to stable storage. */ (void) VOP_FSYNC(vp, 0, cr, NULL); --- 2632,2642 ---- * Of course, NFS servers have no idea what their * clients' current directories are. We fake it by * supplying a vnode known to exist and illegal to * remove. */ ! error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0); /* * Force modified data and metadata out to stable storage. */ (void) VOP_FSYNC(vp, 0, cr, NULL);
*** 2513,2635 **** rfs_rmdir_getfh(struct nfsdiropargs *da) { return (da->da_fhandle); } /* ARGSUSED */ void rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) { int error; ! int iseof; struct iovec iov; struct uio uio; ! vnode_t *vp; ! char *ndata = NULL; struct sockaddr *ca; ! size_t nents; ! int ret; vp = nfs_fhtovp(&rda->rda_fh, exi); if (vp == NULL) { - rd->rd_entries = NULL; rd->rd_status = NFSERR_STALE; return; } if (vp->v_type != VDIR) { VN_RELE(vp); - rd->rd_entries = NULL; rd->rd_status = NFSERR_NOTDIR; return; } (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); ! ! if (error) { ! rd->rd_entries = NULL; goto bad; - } ! if (rda->rda_count == 0) { ! rd->rd_entries = NULL; ! rd->rd_size = 0; ! rd->rd_eof = FALSE; ! goto bad; ! } ! rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); /* ! * Allocate data for entries. This will be freed by rfs_rddirfree. */ ! rd->rd_bufsize = (uint_t)rda->rda_count; ! rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); /* ! * Set up io vector to read directory data */ ! iov.iov_base = (caddr_t)rd->rd_entries; ! iov.iov_len = rda->rda_count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_SYSSPACE; uio.uio_extflg = UIO_COPY_CACHED; uio.uio_loffset = (offset_t)rda->rda_offset; ! uio.uio_resid = rda->rda_count; ! /* ! * read directory ! */ error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); ! /* ! * Clean up ! */ ! if (!error) { ! /* ! * set size and eof ! */ ! if (uio.uio_resid == rda->rda_count) { ! rd->rd_size = 0; ! rd->rd_eof = TRUE; ! } else { ! rd->rd_size = (uint32_t)(rda->rda_count - ! uio.uio_resid); ! rd->rd_eof = iseof ? TRUE : FALSE; } } - ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; - nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); - ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, - rda->rda_count, &ndata); - - if (ret != 0) { - size_t dropbytes; /* ! * We had to drop one or more entries in order to fit ! * during the character conversion. We need to patch ! * up the size and eof info. */ ! if (rd->rd_eof) ! rd->rd_eof = FALSE; ! dropbytes = nfscmd_dropped_entrysize( ! (struct dirent64 *)rd->rd_entries, nents, ret); ! rd->rd_size -= dropbytes; } ! if (ndata == NULL) { ! ndata = (char *)rd->rd_entries; ! } else if (ndata != (char *)rd->rd_entries) { ! kmem_free(rd->rd_entries, rd->rd_bufsize); ! rd->rd_entries = (void *)ndata; ! rd->rd_bufsize = rda->rda_count; } bad: VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); #if 0 /* notyet */ /* --- 2659,2862 ---- rfs_rmdir_getfh(struct nfsdiropargs *da) { return (da->da_fhandle); } + #ifdef nextdp + #undef nextdp + #endif + #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) + /* ARGSUSED */ void rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) { int error; ! vnode_t *vp; struct iovec iov; struct uio uio; ! int iseof; ! ! uint32_t count = rda->rda_count; ! uint32_t size; /* size of the readdirres structure */ ! int overflow = 0; ! ! size_t datasz; ! char *data = NULL; ! dirent64_t *dp; ! struct sockaddr *ca; ! struct nfsentry **eptr; ! struct nfsentry *entry; vp = nfs_fhtovp(&rda->rda_fh, exi); if (vp == NULL) { rd->rd_status = NFSERR_STALE; return; } if (vp->v_type != VDIR) { VN_RELE(vp); rd->rd_status = NFSERR_NOTDIR; return; } (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); ! if (error) goto bad; ! /* ! * Don't allow arbitrary counts for allocation ! */ ! if (count > NFS_MAXDATA) ! count = NFS_MAXDATA; ! /* ! * struct readdirres: ! * status: 1 ! * entries (bool): 1 ! * eof: 1 ! */ ! size = (1 + 1 + 1) * BYTES_PER_XDR_UNIT; + if (size > count) { + eptr = &rd->rd_entries; + iseof = 0; + size = 0; + + goto done; + } + /* ! * This is simplification. The dirent64_t size is not the same as the ! * size of XDR representation of entry, but the sizes are similar so ! * we'll assume they are same. This assumption should not cause any ! * harm. In worst case we will need to issue VOP_READDIR() once more. */ ! datasz = count; /* ! * Make sure that there is room to read at least one entry ! * if any are available. */ ! if (datasz < DIRENT64_RECLEN(MAXNAMELEN)) ! datasz = DIRENT64_RECLEN(MAXNAMELEN); ! ! data = kmem_alloc(datasz, KM_NOSLEEP); ! if (data == NULL) { ! /* The allocation failed; downsize and wait for it this time */ ! if (datasz > MAXBSIZE) ! datasz = MAXBSIZE; ! data = kmem_alloc(datasz, KM_SLEEP); ! } ! uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_SYSSPACE; uio.uio_extflg = UIO_COPY_CACHED; uio.uio_loffset = (offset_t)rda->rda_offset; ! uio.uio_resid = datasz; ! ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; ! eptr = &rd->rd_entries; ! entry = NULL; ! ! getmoredents: ! iov.iov_base = data; ! iov.iov_len = datasz; ! error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); + if (error) { + iseof = 0; + goto done; + } ! if (iov.iov_len == datasz) ! goto done; ! ! for (dp = (dirent64_t *)data; ! (char *)dp - data < datasz - iov.iov_len && !overflow; ! dp = nextdp(dp)) { ! char *name; ! uint32_t esize; ! uint32_t cookie; ! ! overflow = (uint64_t)dp->d_off > UINT32_MAX; ! if (overflow) { ! cookie = 0; ! iseof = 1; ! } else ! cookie = (uint32_t)dp->d_off; ! ! if (dp->d_ino == 0 || (uint64_t)dp->d_ino > UINT32_MAX) { ! if (entry != NULL) ! entry->cookie = cookie; ! continue; } + + name = nfscmd_convname(ca, exi, dp->d_name, + NFSCMD_CONV_OUTBOUND, NFS_MAXPATHLEN + 1); + if (name == NULL) { + if (entry != NULL) + entry->cookie = cookie; + continue; } /* ! * struct entry: ! * fileid: 1 ! * name (length): 1 ! * name (data): length (rounded up) ! * cookie: 1 ! * nextentry (bool): 1 */ ! esize = (1 + 1 + 1 + 1) * BYTES_PER_XDR_UNIT + ! RNDUP(strlen(name)); ! ! /* If the new entry does not fit, discard it */ ! if (esize > count - size) { ! if (name != dp->d_name) ! kmem_free(name, NFS_MAXPATHLEN + 1); ! iseof = 0; ! goto done; } ! ! entry = kmem_alloc(sizeof (struct nfsentry), KM_SLEEP); ! ! entry->fileid = (uint32_t)dp->d_ino; ! entry->name = strdup(name); ! if (name != dp->d_name) ! kmem_free(name, NFS_MAXPATHLEN + 1); ! entry->cookie = cookie; ! ! size += esize; ! ! /* Add the entry to the linked list */ ! *eptr = entry; ! eptr = &entry->nextentry; } + if (!iseof && size < count) { + uio.uio_resid = MIN(datasz, MAXBSIZE); + goto getmoredents; + } + + done: + *eptr = NULL; + + if (iseof || rd->rd_entries != NULL || !error) { + error = 0; + rd->rd_eof = iseof ? TRUE : FALSE; + + /* This is for nfslog only */ + rd->rd_offset = rda->rda_offset; + rd->rd_size = size; + } + bad: VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); #if 0 /* notyet */ /*
*** 2645,2665 **** VN_RELE(vp); rd->rd_status = puterrno(error); } void * rfs_readdir_getfh(struct nfsrddirargs *rda) { return (&rda->rda_fh); } void rfs_rddirfree(struct nfsrddirres *rd) { ! if (rd->rd_entries != NULL) ! kmem_free(rd->rd_entries, rd->rd_bufsize); } /* ARGSUSED */ void rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, --- 2872,2901 ---- VN_RELE(vp); rd->rd_status = puterrno(error); + if (data != NULL) + kmem_free(data, datasz); } void * rfs_readdir_getfh(struct nfsrddirargs *rda) { return (&rda->rda_fh); } void rfs_rddirfree(struct nfsrddirres *rd) { ! if (rd->rd_status == NFS_OK) { ! struct nfsentry *entry, *nentry; ! ! for (entry = rd->rd_entries; entry != NULL; entry = nentry) { ! nentry = entry->nextentry; ! strfree(entry->name); ! kmem_free(entry, sizeof (struct nfsentry)); ! } ! } } /* ARGSUSED */ void rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
*** 2761,2771 **** vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); } return (0); } ! static enum nfsftype vt_to_nf[] = { 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 }; /* * check the following fields for overflow: nodeid, size, and time. --- 2997,3007 ---- vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); } return (0); } ! static const enum nfsftype vt_to_nf[] = { 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 }; /* * check the following fields for overflow: nodeid, size, and time.
*** 2980,2999 **** } void rfs_srvrinit(void) { - mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); nfs2_srv_caller_id = fs_new_caller_id(); } void rfs_srvrfini(void) { - mutex_destroy(&rfs_async_write_lock); } static int rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) { struct clist *wcl; int wlist_len; --- 3216,3259 ---- } void rfs_srvrinit(void) { nfs2_srv_caller_id = fs_new_caller_id(); + zone_key_create(&rfs_zone_key, rfs_zone_init, NULL, rfs_zone_fini); } void rfs_srvrfini(void) { } + /* ARGSUSED */ + static void * + rfs_zone_init(zoneid_t zoneid) + { + nfs_srv_t *ns; + + ns = kmem_zalloc(sizeof (*ns), KM_SLEEP); + + mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL); + ns->write_async = 1; + + return (ns); + } + + /* ARGSUSED */ + static void + rfs_zone_fini(zoneid_t zoneid, void *data) + { + nfs_srv_t *ns; + + ns = (nfs_srv_t *)data; + mutex_destroy(&ns->async_write_lock); + kmem_free(ns, sizeof (*ns)); + } + static int rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) { struct clist *wcl; int wlist_len;