1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
28 * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
29 */
30
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/proc.h>
36 #include <sys/user.h>
37 #include <sys/time.h>
38 #include <sys/buf.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/socket.h>
42 #include <sys/uio.h>
43 #include <sys/tiuser.h>
44 #include <sys/swap.h>
45 #include <sys/errno.h>
46 #include <sys/debug.h>
47 #include <sys/kmem.h>
114 * two different threads may race to remove the rnode from the
115 * freelist. This race can be resolved by holding the mutex for the
116 * freelist. Please note that the mutex for the freelist does not
117 * need to held if the rnode is not on the freelist. It can not be
118 * placed on the freelist due to the requirement that the thread
119 * putting the rnode on the freelist must hold the exclusive lock
120 * to the hash queue and the thread doing the lookup in the hash
121 * queue is holding either a shared or exclusive lock to the hash
122 * queue.
123 *
124 * The lock ordering is:
125 *
126 * hash bucket lock -> vnode lock
127 * hash bucket lock -> freelist lock
128 */
129 static rhashq_t *rtable;
130
131 static kmutex_t rpfreelist_lock;
132 static rnode_t *rpfreelist = NULL;
133 static long rnew = 0;
134 long nrnode = 0;
135
136 static int rtablesize;
137 static int rtablemask;
138
139 static int hashlen = 4;
140
141 static struct kmem_cache *rnode_cache;
142
143 /*
144 * Mutex to protect the following variables:
145 * nfs_major
146 * nfs_minor
147 */
148 kmutex_t nfs_minor_lock;
149 int nfs_major;
150 int nfs_minor;
151
152 /* Do we allow preepoch (negative) time values otw? */
153 bool_t nfs_allow_preepoch_time = FALSE; /* default: do not allow preepoch */
154
155 /*
156 * Access cache
157 */
158 static acache_hash_t *acache;
159 static long nacache; /* used strictly to size the number of hash queues */
160
161 static int acachesize;
162 static int acachemask;
163 static struct kmem_cache *acache_cache;
164
165 /*
166 * Client side utilities
167 */
168
169 /*
170 * client side statistics
171 */
172 static const struct clstat clstat_tmpl = {
173 { "calls", KSTAT_DATA_UINT64 },
174 { "badcalls", KSTAT_DATA_UINT64 },
175 { "clgets", KSTAT_DATA_UINT64 },
176 { "cltoomany", KSTAT_DATA_UINT64 },
177 #ifdef DEBUG
178 { "clalloc", KSTAT_DATA_UINT64 },
179 { "noresponse", KSTAT_DATA_UINT64 },
211 { "r_path", KSTAT_DATA_UINT64 },
212 };
213 #endif /* DEBUG */
214
215 /*
216 * We keep a global list of per-zone client data, so we can clean up all zones
217 * if we get low on memory.
218 */
219 static list_t nfs_clnt_list;
220 static kmutex_t nfs_clnt_list_lock;
221 static zone_key_t nfsclnt_zone_key;
222
223 static struct kmem_cache *chtab_cache;
224
225 /*
226 * Some servers do not properly update the attributes of the
227 * directory when changes are made. To allow interoperability
228 * with these broken servers, the nfs_disable_rddir_cache
229 * parameter must be set in /etc/system
230 */
231 int nfs_disable_rddir_cache = 0;
232
233 int clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **,
234 struct chtab **);
235 void clfree(CLIENT *, struct chtab *);
236 static int acl_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
237 struct chtab **, struct nfs_clnt *);
238 static int nfs_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
239 struct chtab **, struct nfs_clnt *);
240 static void clreclaim(void *);
241 static int nfs_feedback(int, int, mntinfo_t *);
242 static int rfscall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
243 caddr_t, cred_t *, int *, enum clnt_stat *, int,
244 failinfo_t *);
245 static int aclcall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
246 caddr_t, cred_t *, int *, int, failinfo_t *);
247 static void rinactive(rnode_t *, cred_t *);
248 static int rtablehash(nfs_fhandle *);
249 static vnode_t *make_rnode(nfs_fhandle *, rhashq_t *, struct vfs *,
250 struct vnodeops *,
251 int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
847 rfs2call_hits++;
848 #endif
849 rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres,
850 resp, cr, douprintf, NULL, flags, fi);
851 crfree(cr);
852 #ifdef DEBUG
853 if (*statusp == NFSERR_ACCES)
854 rfs2call_misses++;
855 #endif
856 }
857 } else if (rpc_status == RPC_PROCUNAVAIL) {
858 *statusp = NFSERR_OPNOTSUPP;
859 rpcerror = 0;
860 }
861
862 return (rpcerror);
863 }
864
865 #define NFS3_JUKEBOX_DELAY 10 * hz
866
867 static clock_t nfs3_jukebox_delay = 0;
868
869 #ifdef DEBUG
870 static int rfs3call_hits = 0;
871 static int rfs3call_misses = 0;
872 #endif
873
874 int
875 rfs3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
876 xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
877 nfsstat3 *statusp, int flags, failinfo_t *fi)
878 {
879 int rpcerror;
880 int user_informed;
881
882 user_informed = 0;
883 do {
884 rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
885 cr, douprintf, NULL, flags, fi);
886 if (!rpcerror) {
887 cred_t *crr;
5202 kmem_free(dbuf, dlen);
5203 return (error);
5204 }
5205
5206 dp = (dirent64_t *)dbuf;
5207
5208 while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) {
5209 if (strcmp(dp->d_name, ".") == 0 ||
5210 strcmp(dp->d_name, "..") == 0 || strcmp(dp->d_name,
5211 VIEW_READWRITE) == 0 || strcmp(dp->d_name,
5212 VIEW_READONLY) == 0) {
5213 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
5214 continue;
5215 }
5216
5217 *valp = 1;
5218 break;
5219 }
5220 kmem_free(dbuf, dlen);
5221 return (0);
5222 }
|
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
29 * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
30 */
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/proc.h>
37 #include <sys/user.h>
38 #include <sys/time.h>
39 #include <sys/buf.h>
40 #include <sys/vfs.h>
41 #include <sys/vnode.h>
42 #include <sys/socket.h>
43 #include <sys/uio.h>
44 #include <sys/tiuser.h>
45 #include <sys/swap.h>
46 #include <sys/errno.h>
47 #include <sys/debug.h>
48 #include <sys/kmem.h>
115 * two different threads may race to remove the rnode from the
116 * freelist. This race can be resolved by holding the mutex for the
117 * freelist. Please note that the mutex for the freelist does not
118 * need to held if the rnode is not on the freelist. It can not be
119 * placed on the freelist due to the requirement that the thread
120 * putting the rnode on the freelist must hold the exclusive lock
121 * to the hash queue and the thread doing the lookup in the hash
122 * queue is holding either a shared or exclusive lock to the hash
123 * queue.
124 *
125 * The lock ordering is:
126 *
127 * hash bucket lock -> vnode lock
128 * hash bucket lock -> freelist lock
129 */
130 static rhashq_t *rtable;
131
132 static kmutex_t rpfreelist_lock;
133 static rnode_t *rpfreelist = NULL;
134 static long rnew = 0;
135 volatile long nrnode = 0;
136
137 static int rtablesize;
138 static int rtablemask;
139
140 static int hashlen = 4;
141
142 static struct kmem_cache *rnode_cache;
143
144 /*
145 * Mutex to protect the following variables:
146 * nfs_major
147 * nfs_minor
148 */
149 kmutex_t nfs_minor_lock;
150 int nfs_major;
151 int nfs_minor;
152
153 /*
154 * Do we allow preepoch (negative) time values otw?
155 * default: do not allow preepoch
156 */
157 volatile bool_t nfs_allow_preepoch_time = FALSE;
158
159 /*
160 * Access cache
161 */
162 static acache_hash_t *acache;
163 volatile long nacache; /* used strictly to size the number of hash queues */
164
165 static int acachesize;
166 static int acachemask;
167 static struct kmem_cache *acache_cache;
168
169 /*
170 * Client side utilities
171 */
172
173 /*
174 * client side statistics
175 */
176 static const struct clstat clstat_tmpl = {
177 { "calls", KSTAT_DATA_UINT64 },
178 { "badcalls", KSTAT_DATA_UINT64 },
179 { "clgets", KSTAT_DATA_UINT64 },
180 { "cltoomany", KSTAT_DATA_UINT64 },
181 #ifdef DEBUG
182 { "clalloc", KSTAT_DATA_UINT64 },
183 { "noresponse", KSTAT_DATA_UINT64 },
215 { "r_path", KSTAT_DATA_UINT64 },
216 };
217 #endif /* DEBUG */
218
219 /*
220 * We keep a global list of per-zone client data, so we can clean up all zones
221 * if we get low on memory.
222 */
223 static list_t nfs_clnt_list;
224 static kmutex_t nfs_clnt_list_lock;
225 static zone_key_t nfsclnt_zone_key;
226
227 static struct kmem_cache *chtab_cache;
228
229 /*
230 * Some servers do not properly update the attributes of the
231 * directory when changes are made. To allow interoperability
232 * with these broken servers, the nfs_disable_rddir_cache
233 * parameter must be set in /etc/system
234 */
235 volatile int nfs_disable_rddir_cache = 0;
236
237 int clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **,
238 struct chtab **);
239 void clfree(CLIENT *, struct chtab *);
240 static int acl_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
241 struct chtab **, struct nfs_clnt *);
242 static int nfs_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
243 struct chtab **, struct nfs_clnt *);
244 static void clreclaim(void *);
245 static int nfs_feedback(int, int, mntinfo_t *);
246 static int rfscall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
247 caddr_t, cred_t *, int *, enum clnt_stat *, int,
248 failinfo_t *);
249 static int aclcall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
250 caddr_t, cred_t *, int *, int, failinfo_t *);
251 static void rinactive(rnode_t *, cred_t *);
252 static int rtablehash(nfs_fhandle *);
253 static vnode_t *make_rnode(nfs_fhandle *, rhashq_t *, struct vfs *,
254 struct vnodeops *,
255 int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
851 rfs2call_hits++;
852 #endif
853 rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres,
854 resp, cr, douprintf, NULL, flags, fi);
855 crfree(cr);
856 #ifdef DEBUG
857 if (*statusp == NFSERR_ACCES)
858 rfs2call_misses++;
859 #endif
860 }
861 } else if (rpc_status == RPC_PROCUNAVAIL) {
862 *statusp = NFSERR_OPNOTSUPP;
863 rpcerror = 0;
864 }
865
866 return (rpcerror);
867 }
868
869 #define NFS3_JUKEBOX_DELAY 10 * hz
870
871 volatile clock_t nfs3_jukebox_delay = 0;
872
873 #ifdef DEBUG
874 static int rfs3call_hits = 0;
875 static int rfs3call_misses = 0;
876 #endif
877
878 int
879 rfs3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
880 xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
881 nfsstat3 *statusp, int flags, failinfo_t *fi)
882 {
883 int rpcerror;
884 int user_informed;
885
886 user_informed = 0;
887 do {
888 rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
889 cr, douprintf, NULL, flags, fi);
890 if (!rpcerror) {
891 cred_t *crr;
5206 kmem_free(dbuf, dlen);
5207 return (error);
5208 }
5209
5210 dp = (dirent64_t *)dbuf;
5211
5212 while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) {
5213 if (strcmp(dp->d_name, ".") == 0 ||
5214 strcmp(dp->d_name, "..") == 0 || strcmp(dp->d_name,
5215 VIEW_READWRITE) == 0 || strcmp(dp->d_name,
5216 VIEW_READONLY) == 0) {
5217 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
5218 continue;
5219 }
5220
5221 *valp = 1;
5222 break;
5223 }
5224 kmem_free(dbuf, dlen);
5225 return (0);
5226 }
5227
5228 /*
5229 * Return non-zero in a case the vp is an empty directory used as a ZFS mount
5230 * point. The NFSv2 and NFSv3 servers should not allow to write to such
5231 * directories.
5232 */
5233 int
5234 protect_zfs_mntpt(vnode_t *vp)
5235 {
5236 int error;
5237 vfs_t *vfsp;
5238 struct uio uio;
5239 struct iovec iov;
5240 int eof;
5241 size_t len = 8 * 1024;
5242 char *buf;
5243
5244 if (vp->v_type != VDIR || vn_ismntpt(vp) == 0)
5245 return (0);
5246
5247 error = vn_vfsrlock_wait(vp);
5248 if (error != 0)
5249 return (error);
5250
5251 /*
5252 * We protect ZFS mount points only
5253 */
5254 if ((vfsp = vn_mountedvfs(vp)) == NULL ||
5255 strncmp(vfssw[vfsp->vfs_fstype].vsw_name, "zfs", 3) != 0) {
5256 vn_vfsunlock(vp);
5257 return (0);
5258 }
5259
5260 vn_vfsunlock(vp);
5261
5262 buf = kmem_alloc(len, KM_SLEEP);
5263
5264 uio.uio_iov = &iov;
5265 uio.uio_iovcnt = 1;
5266 uio.uio_segflg = UIO_SYSSPACE;
5267 uio.uio_fmode = 0;
5268 uio.uio_extflg = UIO_COPY_CACHED;
5269 uio.uio_loffset = 0;
5270 uio.uio_llimit = MAXOFFSET_T;
5271
5272 eof = 0;
5273
5274 do {
5275 size_t rlen;
5276 dirent64_t *dp;
5277
5278 uio.uio_resid = len;
5279 iov.iov_base = buf;
5280 iov.iov_len = len;
5281
5282 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
5283 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
5284 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
5285
5286 if (error != 0)
5287 break;
5288
5289 error = EBUSY;
5290
5291 rlen = len - uio.uio_resid;
5292 if (rlen == 0)
5293 break;
5294
5295 for (dp = (dirent64_t *)buf;
5296 (intptr_t)dp < (intptr_t)buf + rlen;
5297 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
5298 if (strcmp(dp->d_name, ".") != 0 &&
5299 strcmp(dp->d_name, "..") != 0) {
5300 error = 0;
5301 break;
5302 }
5303 }
5304 } while (eof == 0 && error != 0);
5305
5306 kmem_free(buf, len);
5307
5308 return (error);
5309 }
|