1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  28  * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
  29  */
  30 
  31 #include <sys/param.h>
  32 #include <sys/types.h>
  33 #include <sys/systm.h>
  34 #include <sys/cred.h>
  35 #include <sys/proc.h>
  36 #include <sys/user.h>
  37 #include <sys/time.h>
  38 #include <sys/buf.h>
  39 #include <sys/vfs.h>
  40 #include <sys/vnode.h>
  41 #include <sys/socket.h>
  42 #include <sys/uio.h>
  43 #include <sys/tiuser.h>
  44 #include <sys/swap.h>
  45 #include <sys/errno.h>
  46 #include <sys/debug.h>
  47 #include <sys/kmem.h>
 
 
 114  * two different threads may race to remove the rnode from the
 115  * freelist.  This race can be resolved by holding the mutex for the
 116  * freelist.  Please note that the mutex for the freelist does not
 117  * need to held if the rnode is not on the freelist.  It can not be
 118  * placed on the freelist due to the requirement that the thread
 119  * putting the rnode on the freelist must hold the exclusive lock
 120  * to the hash queue and the thread doing the lookup in the hash
 121  * queue is holding either a shared or exclusive lock to the hash
 122  * queue.
 123  *
 124  * The lock ordering is:
 125  *
 126  *      hash bucket lock -> vnode lock
 127  *      hash bucket lock -> freelist lock
 128  */
 129 static rhashq_t *rtable;
 130 
 131 static kmutex_t rpfreelist_lock;
 132 static rnode_t *rpfreelist = NULL;
 133 static long rnew = 0;
 134 long nrnode = 0;
 135 
 136 static int rtablesize;
 137 static int rtablemask;
 138 
 139 static int hashlen = 4;
 140 
 141 static struct kmem_cache *rnode_cache;
 142 
 143 /*
 144  * Mutex to protect the following variables:
 145  *      nfs_major
 146  *      nfs_minor
 147  */
 148 kmutex_t nfs_minor_lock;
 149 int nfs_major;
 150 int nfs_minor;
 151 
 152 /* Do we allow preepoch (negative) time values otw? */
 153 bool_t nfs_allow_preepoch_time = FALSE; /* default: do not allow preepoch */
 154 
 155 /*
 156  * Access cache
 157  */
 158 static acache_hash_t *acache;
 159 static long nacache;    /* used strictly to size the number of hash queues */
 160 
 161 static int acachesize;
 162 static int acachemask;
 163 static struct kmem_cache *acache_cache;
 164 
 165 /*
 166  * Client side utilities
 167  */
 168 
 169 /*
 170  * client side statistics
 171  */
 172 static const struct clstat clstat_tmpl = {
 173         { "calls",      KSTAT_DATA_UINT64 },
 174         { "badcalls",   KSTAT_DATA_UINT64 },
 175         { "clgets",     KSTAT_DATA_UINT64 },
 176         { "cltoomany",  KSTAT_DATA_UINT64 },
 177 #ifdef DEBUG
 178         { "clalloc",    KSTAT_DATA_UINT64 },
 179         { "noresponse", KSTAT_DATA_UINT64 },
 
 211         { "r_path",     KSTAT_DATA_UINT64 },
 212 };
 213 #endif  /* DEBUG */
 214 
 215 /*
 216  * We keep a global list of per-zone client data, so we can clean up all zones
 217  * if we get low on memory.
 218  */
 219 static list_t nfs_clnt_list;
 220 static kmutex_t nfs_clnt_list_lock;
 221 static zone_key_t nfsclnt_zone_key;
 222 
 223 static struct kmem_cache *chtab_cache;
 224 
 225 /*
 226  * Some servers do not properly update the attributes of the
 227  * directory when changes are made.  To allow interoperability
 228  * with these broken servers, the nfs_disable_rddir_cache
 229  * parameter must be set in /etc/system
 230  */
 231 int nfs_disable_rddir_cache = 0;
 232 
 233 int             clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **,
 234                     struct chtab **);
 235 void            clfree(CLIENT *, struct chtab *);
 236 static int      acl_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
 237                     struct chtab **, struct nfs_clnt *);
 238 static int      nfs_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
 239                     struct chtab **, struct nfs_clnt *);
 240 static void     clreclaim(void *);
 241 static int      nfs_feedback(int, int, mntinfo_t *);
 242 static int      rfscall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
 243                     caddr_t, cred_t *, int *, enum clnt_stat *, int,
 244                     failinfo_t *);
 245 static int      aclcall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
 246                     caddr_t, cred_t *, int *, int, failinfo_t *);
 247 static void     rinactive(rnode_t *, cred_t *);
 248 static int      rtablehash(nfs_fhandle *);
 249 static vnode_t  *make_rnode(nfs_fhandle *, rhashq_t *, struct vfs *,
 250                     struct vnodeops *,
 251                     int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
 
 847                         rfs2call_hits++;
 848 #endif
 849                         rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres,
 850                             resp, cr, douprintf, NULL, flags, fi);
 851                         crfree(cr);
 852 #ifdef DEBUG
 853                         if (*statusp == NFSERR_ACCES)
 854                                 rfs2call_misses++;
 855 #endif
 856                 }
 857         } else if (rpc_status == RPC_PROCUNAVAIL) {
 858                 *statusp = NFSERR_OPNOTSUPP;
 859                 rpcerror = 0;
 860         }
 861 
 862         return (rpcerror);
 863 }
 864 
 865 #define NFS3_JUKEBOX_DELAY      10 * hz
 866 
 867 static clock_t nfs3_jukebox_delay = 0;
 868 
 869 #ifdef DEBUG
 870 static int rfs3call_hits = 0;
 871 static int rfs3call_misses = 0;
 872 #endif
 873 
 874 int
 875 rfs3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
 876     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
 877     nfsstat3 *statusp, int flags, failinfo_t *fi)
 878 {
 879         int rpcerror;
 880         int user_informed;
 881 
 882         user_informed = 0;
 883         do {
 884                 rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
 885                     cr, douprintf, NULL, flags, fi);
 886                 if (!rpcerror) {
 887                         cred_t *crr;
 
5202                 kmem_free(dbuf, dlen);
5203                 return (error);
5204         }
5205 
5206         dp = (dirent64_t *)dbuf;
5207 
5208         while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) {
5209                 if (strcmp(dp->d_name, ".") == 0 ||
5210                     strcmp(dp->d_name, "..") == 0 || strcmp(dp->d_name,
5211                     VIEW_READWRITE) == 0 || strcmp(dp->d_name,
5212                     VIEW_READONLY) == 0) {
5213                         dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
5214                         continue;
5215                 }
5216 
5217                 *valp = 1;
5218                 break;
5219         }
5220         kmem_free(dbuf, dlen);
5221         return (0);
5222 }
 | 
   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  29  * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
  30  */
  31 
  32 #include <sys/param.h>
  33 #include <sys/types.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/proc.h>
  37 #include <sys/user.h>
  38 #include <sys/time.h>
  39 #include <sys/buf.h>
  40 #include <sys/vfs.h>
  41 #include <sys/vnode.h>
  42 #include <sys/socket.h>
  43 #include <sys/uio.h>
  44 #include <sys/tiuser.h>
  45 #include <sys/swap.h>
  46 #include <sys/errno.h>
  47 #include <sys/debug.h>
  48 #include <sys/kmem.h>
 
 
 115  * two different threads may race to remove the rnode from the
 116  * freelist.  This race can be resolved by holding the mutex for the
 117  * freelist.  Please note that the mutex for the freelist does not
 118  * need to held if the rnode is not on the freelist.  It can not be
 119  * placed on the freelist due to the requirement that the thread
 120  * putting the rnode on the freelist must hold the exclusive lock
 121  * to the hash queue and the thread doing the lookup in the hash
 122  * queue is holding either a shared or exclusive lock to the hash
 123  * queue.
 124  *
 125  * The lock ordering is:
 126  *
 127  *      hash bucket lock -> vnode lock
 128  *      hash bucket lock -> freelist lock
 129  */
 130 static rhashq_t *rtable;
 131 
 132 static kmutex_t rpfreelist_lock;
 133 static rnode_t *rpfreelist = NULL;
 134 static long rnew = 0;
 135 volatile long nrnode = 0;
 136 
 137 static int rtablesize;
 138 static int rtablemask;
 139 
 140 static int hashlen = 4;
 141 
 142 static struct kmem_cache *rnode_cache;
 143 
 144 /*
 145  * Mutex to protect the following variables:
 146  *      nfs_major
 147  *      nfs_minor
 148  */
 149 kmutex_t nfs_minor_lock;
 150 int nfs_major;
 151 int nfs_minor;
 152 
 153 /*
 154  * Do we allow preepoch (negative) time values otw?
 155  * default: do not allow preepoch
 156  */
 157 volatile bool_t nfs_allow_preepoch_time = FALSE;
 158 
 159 /*
 160  * Access cache
 161  */
 162 static acache_hash_t *acache;
 163 volatile long nacache;  /* used strictly to size the number of hash queues */
 164 
 165 static int acachesize;
 166 static int acachemask;
 167 static struct kmem_cache *acache_cache;
 168 
 169 /*
 170  * Client side utilities
 171  */
 172 
 173 /*
 174  * client side statistics
 175  */
 176 static const struct clstat clstat_tmpl = {
 177         { "calls",      KSTAT_DATA_UINT64 },
 178         { "badcalls",   KSTAT_DATA_UINT64 },
 179         { "clgets",     KSTAT_DATA_UINT64 },
 180         { "cltoomany",  KSTAT_DATA_UINT64 },
 181 #ifdef DEBUG
 182         { "clalloc",    KSTAT_DATA_UINT64 },
 183         { "noresponse", KSTAT_DATA_UINT64 },
 
 215         { "r_path",     KSTAT_DATA_UINT64 },
 216 };
 217 #endif  /* DEBUG */
 218 
 219 /*
 220  * We keep a global list of per-zone client data, so we can clean up all zones
 221  * if we get low on memory.
 222  */
 223 static list_t nfs_clnt_list;
 224 static kmutex_t nfs_clnt_list_lock;
 225 static zone_key_t nfsclnt_zone_key;
 226 
 227 static struct kmem_cache *chtab_cache;
 228 
 229 /*
 230  * Some servers do not properly update the attributes of the
 231  * directory when changes are made.  To allow interoperability
 232  * with these broken servers, the nfs_disable_rddir_cache
 233  * parameter must be set in /etc/system
 234  */
 235 volatile int nfs_disable_rddir_cache = 0;
 236 
 237 int             clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **,
 238                     struct chtab **);
 239 void            clfree(CLIENT *, struct chtab *);
 240 static int      acl_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
 241                     struct chtab **, struct nfs_clnt *);
 242 static int      nfs_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
 243                     struct chtab **, struct nfs_clnt *);
 244 static void     clreclaim(void *);
 245 static int      nfs_feedback(int, int, mntinfo_t *);
 246 static int      rfscall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
 247                     caddr_t, cred_t *, int *, enum clnt_stat *, int,
 248                     failinfo_t *);
 249 static int      aclcall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
 250                     caddr_t, cred_t *, int *, int, failinfo_t *);
 251 static void     rinactive(rnode_t *, cred_t *);
 252 static int      rtablehash(nfs_fhandle *);
 253 static vnode_t  *make_rnode(nfs_fhandle *, rhashq_t *, struct vfs *,
 254                     struct vnodeops *,
 255                     int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
 
 851                         rfs2call_hits++;
 852 #endif
 853                         rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres,
 854                             resp, cr, douprintf, NULL, flags, fi);
 855                         crfree(cr);
 856 #ifdef DEBUG
 857                         if (*statusp == NFSERR_ACCES)
 858                                 rfs2call_misses++;
 859 #endif
 860                 }
 861         } else if (rpc_status == RPC_PROCUNAVAIL) {
 862                 *statusp = NFSERR_OPNOTSUPP;
 863                 rpcerror = 0;
 864         }
 865 
 866         return (rpcerror);
 867 }
 868 
 869 #define NFS3_JUKEBOX_DELAY      10 * hz
 870 
 871 volatile clock_t nfs3_jukebox_delay = 0;
 872 
 873 #ifdef DEBUG
 874 static int rfs3call_hits = 0;
 875 static int rfs3call_misses = 0;
 876 #endif
 877 
 878 int
 879 rfs3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
 880     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
 881     nfsstat3 *statusp, int flags, failinfo_t *fi)
 882 {
 883         int rpcerror;
 884         int user_informed;
 885 
 886         user_informed = 0;
 887         do {
 888                 rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
 889                     cr, douprintf, NULL, flags, fi);
 890                 if (!rpcerror) {
 891                         cred_t *crr;
 
5206                 kmem_free(dbuf, dlen);
5207                 return (error);
5208         }
5209 
5210         dp = (dirent64_t *)dbuf;
5211 
5212         while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) {
5213                 if (strcmp(dp->d_name, ".") == 0 ||
5214                     strcmp(dp->d_name, "..") == 0 || strcmp(dp->d_name,
5215                     VIEW_READWRITE) == 0 || strcmp(dp->d_name,
5216                     VIEW_READONLY) == 0) {
5217                         dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
5218                         continue;
5219                 }
5220 
5221                 *valp = 1;
5222                 break;
5223         }
5224         kmem_free(dbuf, dlen);
5225         return (0);
5226 }
5227 
5228 /*
5229  * Return non-zero in a case the vp is an empty directory used as a ZFS mount
5230  * point.  The NFSv2 and NFSv3 servers should not allow to write to such
5231  * directories.
5232  */
5233 int
5234 protect_zfs_mntpt(vnode_t *vp)
5235 {
5236         int error;
5237         vfs_t *vfsp;
5238         struct uio uio;
5239         struct iovec iov;
5240         int eof;
5241         size_t len = 8 * 1024;
5242         char *buf;
5243 
5244         if (vp->v_type != VDIR || vn_ismntpt(vp) == 0)
5245                 return (0);
5246 
5247         error = vn_vfsrlock_wait(vp);
5248         if (error != 0)
5249                 return (error);
5250 
5251         /*
5252          * We protect ZFS mount points only
5253          */
5254         if ((vfsp = vn_mountedvfs(vp)) == NULL ||
5255             strncmp(vfssw[vfsp->vfs_fstype].vsw_name, "zfs", 3) != 0) {
5256                 vn_vfsunlock(vp);
5257                 return (0);
5258         }
5259 
5260         vn_vfsunlock(vp);
5261 
5262         buf = kmem_alloc(len, KM_SLEEP);
5263 
5264         uio.uio_iov = &iov;
5265         uio.uio_iovcnt = 1;
5266         uio.uio_segflg = UIO_SYSSPACE;
5267         uio.uio_fmode = 0;
5268         uio.uio_extflg = UIO_COPY_CACHED;
5269         uio.uio_loffset = 0;
5270         uio.uio_llimit = MAXOFFSET_T;
5271 
5272         eof = 0;
5273 
5274         do {
5275                 size_t rlen;
5276                 dirent64_t *dp;
5277 
5278                 uio.uio_resid = len;
5279                 iov.iov_base = buf;
5280                 iov.iov_len = len;
5281 
5282                 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
5283                 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
5284                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
5285 
5286                 if (error != 0)
5287                         break;
5288 
5289                 error = EBUSY;
5290 
5291                 rlen = len - uio.uio_resid;
5292                 if (rlen == 0)
5293                         break;
5294 
5295                 for (dp = (dirent64_t *)buf;
5296                     (intptr_t)dp < (intptr_t)buf + rlen;
5297                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
5298                         if (strcmp(dp->d_name, ".") != 0 &&
5299                             strcmp(dp->d_name, "..") != 0) {
5300                                 error = 0;
5301                                 break;
5302                         }
5303                 }
5304         } while (eof == 0 && error != 0);
5305 
5306         kmem_free(buf, len);
5307 
5308         return (error);
5309 }
 |