Print this page
NEX-17125 NFS: nbmand lock entered but not exited on error path
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-9275 Got "bad mutex" panic when run IO to nfs share from clients
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-3524 CLONE - Port NEX-3505 "wrong authentication" messages with root=@0.0.0.0/0 set, result in loss of client access
Reviewed by: Marcel Telka <marcel.telka@nexenta.com>
NEX-3533 CLONE - Port NEX-3019 NFSv3 writes underneath mounted filesystem to directory
Reviewed by: Dan Fields <dan.fields@nexenta.com>
NEX-3095 Issues related to NFS nohide
Reviewed by: Dan Fields <dan.fields@nexenta.com>
NEX-1128 NFS server: Generic uid and gid remapping for AUTH_SYS
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
OS-20 share_nfs(1m) charset handling is unreliable
OS-22 Page fault at nfscmd_dropped_entrysize+0x1e()
OS-23 NFSv2/3/4: READDIR responses are inconsistent when charset conversion fails
OS-24 rfs3_readdir(): Issues related to nfscmd_convdirent()
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
re #13613 rb4516 Tunables needs volatile keyword
closes #12112 rb3823 - nfs-nohide: lookup("..") for submount should be correct
re #3541 rb11254 - nfs nohide - "nfssrv: need ability to go to submounts for v3 and v2 protocols"
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
|
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 +
21 22 /*
22 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23 - * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
24 - * Copyright (c) 2016 by Delphix. All rights reserved.
25 24 */
26 25
27 26 /*
28 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
29 28 * All rights reserved.
30 29 */
31 30
31 +/*
32 + * Copyright 2018 Nexenta Systems, Inc.
33 + * Copyright (c) 2016 by Delphix. All rights reserved.
34 + */
35 +
32 36 #include <sys/param.h>
33 37 #include <sys/types.h>
34 38 #include <sys/systm.h>
35 39 #include <sys/cred.h>
36 40 #include <sys/buf.h>
37 41 #include <sys/vfs.h>
38 42 #include <sys/vnode.h>
39 43 #include <sys/uio.h>
40 44 #include <sys/stat.h>
41 45 #include <sys/errno.h>
42 46 #include <sys/sysmacros.h>
43 47 #include <sys/statvfs.h>
44 48 #include <sys/kmem.h>
45 49 #include <sys/kstat.h>
46 50 #include <sys/dirent.h>
47 51 #include <sys/cmn_err.h>
48 52 #include <sys/debug.h>
49 53 #include <sys/vtrace.h>
50 54 #include <sys/mode.h>
51 55 #include <sys/acl.h>
52 56 #include <sys/nbmlock.h>
53 57 #include <sys/policy.h>
54 58 #include <sys/sdt.h>
55 59
56 60 #include <rpc/types.h>
57 61 #include <rpc/auth.h>
58 62 #include <rpc/svc.h>
59 63
60 64 #include <nfs/nfs.h>
61 65 #include <nfs/export.h>
|
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
62 66 #include <nfs/nfs_cmd.h>
63 67
64 68 #include <vm/hat.h>
65 69 #include <vm/as.h>
66 70 #include <vm/seg.h>
67 71 #include <vm/seg_map.h>
68 72 #include <vm/seg_kmem.h>
69 73
70 74 #include <sys/strsubr.h>
71 75
76 +struct rfs_async_write_list;
77 +
72 78 /*
79 + * Zone globals of NFSv2 server
80 + */
81 +typedef struct nfs_srv {
82 + kmutex_t async_write_lock;
83 + struct rfs_async_write_list *async_write_head;
84 +
85 + /*
86 + * enables write clustering if == 1
87 + */
88 + int write_async;
89 +} nfs_srv_t;
90 +
91 +/*
73 92 * These are the interface routines for the server side of the
74 93 * Network File System. See the NFS version 2 protocol specification
75 94 * for a description of this interface.
76 95 */
77 96
78 97 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
79 98 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
80 99 cred_t *);
100 +static void *rfs_zone_init(zoneid_t zoneid);
101 +static void rfs_zone_fini(zoneid_t zoneid, void *data);
81 102
103 +
82 104 /*
83 105 * Some "over the wire" UNIX file types. These are encoded
84 106 * into the mode. This needs to be fixed in the next rev.
85 107 */
86 108 #define IFMT 0170000 /* type of file */
87 109 #define IFCHR 0020000 /* character special */
88 110 #define IFBLK 0060000 /* block special */
89 111 #define IFSOCK 0140000 /* socket */
90 112
91 113 u_longlong_t nfs2_srv_caller_id;
114 +static zone_key_t rfs_zone_key;
92 115
93 116 /*
94 117 * Get file attributes.
95 118 * Returns the current attributes of the file with the given fhandle.
96 119 */
97 120 /* ARGSUSED */
98 121 void
99 122 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
100 123 struct svc_req *req, cred_t *cr, bool_t ro)
101 124 {
102 125 int error;
103 126 vnode_t *vp;
104 127 struct vattr va;
105 128
106 129 vp = nfs_fhtovp(fhp, exi);
107 130 if (vp == NULL) {
108 131 ns->ns_status = NFSERR_STALE;
109 132 return;
110 133 }
111 134
112 135 /*
113 136 * Do the getattr.
114 137 */
115 138 va.va_mask = AT_ALL; /* we want all the attributes */
116 139
117 140 error = rfs4_delegated_getattr(vp, &va, 0, cr);
118 141
119 142 /* check for overflows */
120 143 if (!error) {
121 144 /* Lie about the object type for a referral */
122 145 if (vn_is_nfs_reparse(vp, cr))
123 146 va.va_type = VLNK;
124 147
125 148 acl_perm(vp, exi, &va, cr);
126 149 error = vattr_to_nattr(&va, &ns->ns_attr);
127 150 }
128 151
129 152 VN_RELE(vp);
130 153
131 154 ns->ns_status = puterrno(error);
132 155 }
133 156 void *
134 157 rfs_getattr_getfh(fhandle_t *fhp)
135 158 {
136 159 return (fhp);
137 160 }
138 161
139 162 /*
140 163 * Set file attributes.
141 164 * Sets the attributes of the file with the given fhandle. Returns
142 165 * the new attributes.
143 166 */
144 167 /* ARGSUSED */
145 168 void
146 169 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
147 170 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
148 171 {
149 172 int error;
150 173 int flag;
151 174 int in_crit = 0;
152 175 vnode_t *vp;
153 176 struct vattr va;
154 177 struct vattr bva;
155 178 struct flock64 bf;
156 179 caller_context_t ct;
157 180
158 181
159 182 vp = nfs_fhtovp(&args->saa_fh, exi);
160 183 if (vp == NULL) {
161 184 ns->ns_status = NFSERR_STALE;
162 185 return;
163 186 }
164 187
165 188 if (rdonly(ro, vp)) {
166 189 VN_RELE(vp);
167 190 ns->ns_status = NFSERR_ROFS;
168 191 return;
169 192 }
170 193
171 194 error = sattr_to_vattr(&args->saa_sa, &va);
172 195 if (error) {
173 196 VN_RELE(vp);
174 197 ns->ns_status = puterrno(error);
175 198 return;
176 199 }
177 200
178 201 /*
179 202 * If the client is requesting a change to the mtime,
180 203 * but the nanosecond field is set to 1 billion, then
181 204 * this is a flag to the server that it should set the
182 205 * atime and mtime fields to the server's current time.
183 206 * The 1 billion number actually came from the client
184 207 * as 1 million, but the units in the over the wire
185 208 * request are microseconds instead of nanoseconds.
186 209 *
187 210 * This is an overload of the protocol and should be
188 211 * documented in the NFS Version 2 protocol specification.
189 212 */
190 213 if (va.va_mask & AT_MTIME) {
191 214 if (va.va_mtime.tv_nsec == 1000000000) {
192 215 gethrestime(&va.va_mtime);
193 216 va.va_atime = va.va_mtime;
194 217 va.va_mask |= AT_ATIME;
195 218 flag = 0;
196 219 } else
197 220 flag = ATTR_UTIME;
198 221 } else
199 222 flag = 0;
200 223
201 224 /*
202 225 * If the filesystem is exported with nosuid, then mask off
203 226 * the setuid and setgid bits.
204 227 */
205 228 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
206 229 (exi->exi_export.ex_flags & EX_NOSUID))
207 230 va.va_mode &= ~(VSUID | VSGID);
208 231
209 232 ct.cc_sysid = 0;
210 233 ct.cc_pid = 0;
211 234 ct.cc_caller_id = nfs2_srv_caller_id;
212 235 ct.cc_flags = CC_DONTBLOCK;
213 236
214 237 /*
215 238 * We need to specially handle size changes because it is
216 239 * possible for the client to create a file with modes
217 240 * which indicate read-only, but with the file opened for
218 241 * writing. If the client then tries to set the size of
219 242 * the file, then the normal access checking done in
220 243 * VOP_SETATTR would prevent the client from doing so,
221 244 * although it should be legal for it to do so. To get
222 245 * around this, we do the access checking for ourselves
223 246 * and then use VOP_SPACE which doesn't do the access
224 247 * checking which VOP_SETATTR does. VOP_SPACE can only
225 248 * operate on VREG files, let VOP_SETATTR handle the other
226 249 * extremely rare cases.
227 250 * Also the client should not be allowed to change the
228 251 * size of the file if there is a conflicting non-blocking
229 252 * mandatory lock in the region of change.
230 253 */
231 254 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
232 255 if (nbl_need_check(vp)) {
233 256 nbl_start_crit(vp, RW_READER);
234 257 in_crit = 1;
235 258 }
236 259
237 260 bva.va_mask = AT_UID | AT_SIZE;
238 261
239 262 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
240 263
241 264 if (error) {
242 265 if (in_crit)
243 266 nbl_end_crit(vp);
244 267 VN_RELE(vp);
245 268 ns->ns_status = puterrno(error);
246 269 return;
247 270 }
248 271
249 272 if (in_crit) {
250 273 u_offset_t offset;
251 274 ssize_t length;
252 275
253 276 if (va.va_size < bva.va_size) {
254 277 offset = va.va_size;
255 278 length = bva.va_size - va.va_size;
256 279 } else {
257 280 offset = bva.va_size;
258 281 length = va.va_size - bva.va_size;
259 282 }
260 283 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
261 284 NULL)) {
262 285 error = EACCES;
263 286 }
264 287 }
265 288
266 289 if (crgetuid(cr) == bva.va_uid && !error &&
267 290 va.va_size != bva.va_size) {
268 291 va.va_mask &= ~AT_SIZE;
269 292 bf.l_type = F_WRLCK;
270 293 bf.l_whence = 0;
271 294 bf.l_start = (off64_t)va.va_size;
272 295 bf.l_len = 0;
273 296 bf.l_sysid = 0;
274 297 bf.l_pid = 0;
275 298
276 299 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
277 300 (offset_t)va.va_size, cr, &ct);
278 301 }
279 302 if (in_crit)
280 303 nbl_end_crit(vp);
281 304 } else
282 305 error = 0;
283 306
284 307 /*
285 308 * Do the setattr.
286 309 */
287 310 if (!error && va.va_mask) {
288 311 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
289 312 }
290 313
291 314 /*
292 315 * check if the monitor on either vop_space or vop_setattr detected
293 316 * a delegation conflict and if so, mark the thread flag as
294 317 * wouldblock so that the response is dropped and the client will
295 318 * try again.
296 319 */
297 320 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
298 321 VN_RELE(vp);
299 322 curthread->t_flag |= T_WOULDBLOCK;
300 323 return;
301 324 }
302 325
303 326 if (!error) {
304 327 va.va_mask = AT_ALL; /* get everything */
305 328
306 329 error = rfs4_delegated_getattr(vp, &va, 0, cr);
307 330
308 331 /* check for overflows */
309 332 if (!error) {
310 333 acl_perm(vp, exi, &va, cr);
311 334 error = vattr_to_nattr(&va, &ns->ns_attr);
312 335 }
313 336 }
314 337
315 338 ct.cc_flags = 0;
316 339
317 340 /*
318 341 * Force modified metadata out to stable storage.
319 342 */
320 343 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
321 344
|
↓ open down ↓ |
220 lines elided |
↑ open up ↑ |
322 345 VN_RELE(vp);
323 346
324 347 ns->ns_status = puterrno(error);
325 348 }
326 349 void *
327 350 rfs_setattr_getfh(struct nfssaargs *args)
328 351 {
329 352 return (&args->saa_fh);
330 353 }
331 354
355 +/* Change and release @exip and @vpp only in success */
356 +int
357 +rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
358 +{
359 + struct exportinfo *exi;
360 + vnode_t *vp = *vpp;
361 + fid_t fid;
362 + int error;
363 +
364 + VN_HOLD(vp);
365 +
366 + if ((error = traverse(&vp)) != 0) {
367 + VN_RELE(vp);
368 + return (error);
369 + }
370 +
371 + bzero(&fid, sizeof (fid));
372 + fid.fid_len = MAXFIDSZ;
373 + error = VOP_FID(vp, &fid, NULL);
374 + if (error) {
375 + VN_RELE(vp);
376 + return (error);
377 + }
378 +
379 + exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
380 + if (exi == NULL ||
381 + (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
382 + /*
383 + * It is not error, just subdir is not exported
384 + * or "nohide" is not set
385 + */
386 + if (exi != NULL)
387 + exi_rele(&exi);
388 + VN_RELE(vp);
389 + } else {
390 + /* go to submount */
391 + exi_rele(exip);
392 + *exip = exi;
393 +
394 + VN_RELE(*vpp);
395 + *vpp = vp;
396 + }
397 +
398 + return (0);
399 +}
400 +
332 401 /*
402 + * Given mounted "dvp" and "exi", go upper mountpoint
403 + * with dvp/exi correction
404 + * Return 0 in success
405 + */
406 +int
407 +rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
408 +{
409 + struct exportinfo *exi;
410 + vnode_t *dvp = *dvpp;
411 +
412 + ASSERT(dvp->v_flag & VROOT);
413 +
414 + VN_HOLD(dvp);
415 + dvp = untraverse(dvp);
416 + exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
417 + if (exi == NULL) {
418 + VN_RELE(dvp);
419 + return (-1);
420 + }
421 +
422 + exi_rele(exip);
423 + *exip = exi;
424 + VN_RELE(*dvpp);
425 + *dvpp = dvp;
426 +
427 + return (0);
428 +}
429 +/*
333 430 * Directory lookup.
334 431 * Returns an fhandle and file attributes for file name in a directory.
335 432 */
336 433 /* ARGSUSED */
337 434 void
338 435 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
339 436 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
340 437 {
341 438 int error;
342 439 vnode_t *dvp;
343 440 vnode_t *vp;
344 441 struct vattr va;
345 442 fhandle_t *fhp = da->da_fhandle;
346 443 struct sec_ol sec = {0, 0};
347 444 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
348 445 char *name;
349 446 struct sockaddr *ca;
350 447
351 448 /*
352 449 * Trusted Extension doesn't support NFSv2. MOUNT
353 450 * will reject v2 clients. Need to prevent v2 client
354 451 * access via WebNFS here.
355 452 */
356 453 if (is_system_labeled() && req->rq_vers == 2) {
357 454 dr->dr_status = NFSERR_ACCES;
358 455 return;
359 456 }
360 457
361 458 /*
362 459 * Disallow NULL paths
363 460 */
|
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
364 461 if (da->da_name == NULL || *da->da_name == '\0') {
365 462 dr->dr_status = NFSERR_ACCES;
366 463 return;
367 464 }
368 465
369 466 /*
370 467 * Allow lookups from the root - the default
371 468 * location of the public filehandle.
372 469 */
373 470 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
374 - dvp = rootdir;
471 + dvp = ZONE_ROOTVP();
375 472 VN_HOLD(dvp);
376 473 } else {
377 474 dvp = nfs_fhtovp(fhp, exi);
378 475 if (dvp == NULL) {
379 476 dr->dr_status = NFSERR_STALE;
380 477 return;
381 478 }
382 479 }
383 480
481 + exi_hold(exi);
482 +
384 483 /*
385 484 * Not allow lookup beyond root.
386 485 * If the filehandle matches a filehandle of the exi,
387 486 * then the ".." refers beyond the root of an exported filesystem.
388 487 */
389 488 if (strcmp(da->da_name, "..") == 0 &&
390 489 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
391 - VN_RELE(dvp);
392 - dr->dr_status = NFSERR_NOENT;
393 - return;
490 + if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
491 + (dvp->v_flag & VROOT)) {
492 + /*
493 + * special case for ".." and 'nohide'exported root
494 + */
495 + if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
496 + error = NFSERR_ACCES;
497 + goto out;
498 + }
499 + } else {
500 + error = NFSERR_NOENT;
501 + goto out;
502 + }
394 503 }
395 504
396 505 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
397 506 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
398 507 MAXPATHLEN);
399 508
400 509 if (name == NULL) {
401 - dr->dr_status = NFSERR_ACCES;
402 - return;
510 + error = NFSERR_ACCES;
511 + goto out;
403 512 }
404 513
405 514 /*
406 515 * If the public filehandle is used then allow
407 516 * a multi-component lookup, i.e. evaluate
408 517 * a pathname and follow symbolic links if
409 518 * necessary.
410 519 *
411 520 * This may result in a vnode in another filesystem
412 521 * which is OK as long as the filesystem is exported.
413 522 */
414 523 if (PUBLIC_FH2(fhp)) {
415 524 publicfh_flag = TRUE;
525 +
526 + exi_rele(&exi);
527 +
416 528 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
417 529 &sec);
418 530 } else {
419 531 /*
420 532 * Do a normal single component lookup.
421 533 */
422 534 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
423 535 NULL, NULL, NULL);
424 536 }
425 537
426 538 if (name != da->da_name)
427 539 kmem_free(name, MAXPATHLEN);
428 540
541 + if (error == 0 && vn_ismntpt(vp)) {
542 + error = rfs_cross_mnt(&vp, &exi);
543 + if (error)
544 + VN_RELE(vp);
545 + }
429 546
430 547 if (!error) {
431 548 va.va_mask = AT_ALL; /* we want everything */
432 549
433 550 error = rfs4_delegated_getattr(vp, &va, 0, cr);
434 551
435 552 /* check for overflows */
436 553 if (!error) {
437 554 acl_perm(vp, exi, &va, cr);
438 555 error = vattr_to_nattr(&va, &dr->dr_attr);
439 556 if (!error) {
440 557 if (sec.sec_flags & SEC_QUERY)
441 558 error = makefh_ol(&dr->dr_fhandle, exi,
442 559 sec.sec_index);
443 560 else {
444 561 error = makefh(&dr->dr_fhandle, vp,
|
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
445 562 exi);
446 563 if (!error && publicfh_flag &&
447 564 !chk_clnt_sec(exi, req))
448 565 auth_weak = TRUE;
449 566 }
450 567 }
451 568 }
452 569 VN_RELE(vp);
453 570 }
454 571
572 +out:
455 573 VN_RELE(dvp);
456 574
457 - /*
458 - * If publicfh_flag is true then we have called rfs_publicfh_mclookup
459 - * and have obtained a new exportinfo in exi which needs to be
460 - * released. Note the the original exportinfo pointed to by exi
461 - * will be released by the caller, comon_dispatch.
462 - */
463 - if (publicfh_flag && exi != NULL)
464 - exi_rele(exi);
575 + if (exi != NULL)
576 + exi_rele(&exi);
465 577
466 578 /*
467 579 * If it's public fh, no 0x81, and client's flavor is
468 580 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
469 581 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
470 582 */
471 583 if (auth_weak)
472 584 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
473 585 else
474 586 dr->dr_status = puterrno(error);
475 587 }
476 588 void *
477 589 rfs_lookup_getfh(struct nfsdiropargs *da)
478 590 {
479 591 return (da->da_fhandle);
480 592 }
481 593
482 594 /*
483 595 * Read symbolic link.
484 596 * Returns the string in the symbolic link at the given fhandle.
485 597 */
486 598 /* ARGSUSED */
487 599 void
488 600 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
489 601 struct svc_req *req, cred_t *cr, bool_t ro)
490 602 {
491 603 int error;
492 604 struct iovec iov;
493 605 struct uio uio;
494 606 vnode_t *vp;
495 607 struct vattr va;
496 608 struct sockaddr *ca;
497 609 char *name = NULL;
498 610 int is_referral = 0;
499 611
500 612 vp = nfs_fhtovp(fhp, exi);
501 613 if (vp == NULL) {
502 614 rl->rl_data = NULL;
503 615 rl->rl_status = NFSERR_STALE;
504 616 return;
505 617 }
506 618
507 619 va.va_mask = AT_MODE;
508 620
509 621 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
510 622
511 623 if (error) {
512 624 VN_RELE(vp);
513 625 rl->rl_data = NULL;
514 626 rl->rl_status = puterrno(error);
515 627 return;
516 628 }
517 629
518 630 if (MANDLOCK(vp, va.va_mode)) {
519 631 VN_RELE(vp);
520 632 rl->rl_data = NULL;
521 633 rl->rl_status = NFSERR_ACCES;
522 634 return;
523 635 }
524 636
525 637 /* We lied about the object type for a referral */
526 638 if (vn_is_nfs_reparse(vp, cr))
527 639 is_referral = 1;
528 640
529 641 /*
530 642 * XNFS and RFC1094 require us to return ENXIO if argument
531 643 * is not a link. BUGID 1138002.
532 644 */
533 645 if (vp->v_type != VLNK && !is_referral) {
534 646 VN_RELE(vp);
535 647 rl->rl_data = NULL;
536 648 rl->rl_status = NFSERR_NXIO;
537 649 return;
538 650 }
539 651
540 652 /*
541 653 * Allocate data for pathname. This will be freed by rfs_rlfree.
542 654 */
543 655 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
544 656
545 657 if (is_referral) {
546 658 char *s;
547 659 size_t strsz;
548 660
549 661 /* Get an artificial symlink based on a referral */
550 662 s = build_symlink(vp, cr, &strsz);
551 663 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
552 664 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
553 665 vnode_t *, vp, char *, s);
554 666 if (s == NULL)
555 667 error = EINVAL;
556 668 else {
557 669 error = 0;
558 670 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
559 671 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
560 672 kmem_free(s, strsz);
561 673 }
562 674
563 675 } else {
564 676
565 677 /*
566 678 * Set up io vector to read sym link data
567 679 */
568 680 iov.iov_base = rl->rl_data;
569 681 iov.iov_len = NFS_MAXPATHLEN;
570 682 uio.uio_iov = &iov;
571 683 uio.uio_iovcnt = 1;
572 684 uio.uio_segflg = UIO_SYSSPACE;
573 685 uio.uio_extflg = UIO_COPY_CACHED;
574 686 uio.uio_loffset = (offset_t)0;
575 687 uio.uio_resid = NFS_MAXPATHLEN;
576 688
577 689 /*
578 690 * Do the readlink.
579 691 */
580 692 error = VOP_READLINK(vp, &uio, cr, NULL);
581 693
582 694 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
583 695
584 696 if (!error)
585 697 rl->rl_data[rl->rl_count] = '\0';
586 698
587 699 }
588 700
589 701
590 702 VN_RELE(vp);
591 703
592 704 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
593 705 name = nfscmd_convname(ca, exi, rl->rl_data,
594 706 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
595 707
596 708 if (name != NULL && name != rl->rl_data) {
597 709 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
598 710 rl->rl_data = name;
599 711 }
600 712
601 713 /*
602 714 * XNFS and RFC1094 require us to return ENXIO if argument
603 715 * is not a link. UFS returns EINVAL if this is the case,
604 716 * so we do the mapping here. BUGID 1138002.
605 717 */
606 718 if (error == EINVAL)
607 719 rl->rl_status = NFSERR_NXIO;
608 720 else
609 721 rl->rl_status = puterrno(error);
610 722
611 723 }
612 724 void *
613 725 rfs_readlink_getfh(fhandle_t *fhp)
614 726 {
615 727 return (fhp);
616 728 }
617 729 /*
618 730 * Free data allocated by rfs_readlink
619 731 */
620 732 void
621 733 rfs_rlfree(struct nfsrdlnres *rl)
622 734 {
623 735 if (rl->rl_data != NULL)
624 736 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
625 737 }
626 738
627 739 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
628 740
629 741 /*
630 742 * Read data.
631 743 * Returns some data read from the file at the given fhandle.
632 744 */
633 745 /* ARGSUSED */
634 746 void
635 747 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
636 748 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
637 749 {
638 750 vnode_t *vp;
639 751 int error;
640 752 struct vattr va;
641 753 struct iovec iov;
642 754 struct uio uio;
643 755 mblk_t *mp;
644 756 int alloc_err = 0;
645 757 int in_crit = 0;
646 758 caller_context_t ct;
647 759
648 760 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
649 761 if (vp == NULL) {
650 762 rr->rr_data = NULL;
651 763 rr->rr_status = NFSERR_STALE;
652 764 return;
653 765 }
654 766
655 767 if (vp->v_type != VREG) {
656 768 VN_RELE(vp);
657 769 rr->rr_data = NULL;
658 770 rr->rr_status = NFSERR_ISDIR;
659 771 return;
660 772 }
661 773
662 774 ct.cc_sysid = 0;
663 775 ct.cc_pid = 0;
664 776 ct.cc_caller_id = nfs2_srv_caller_id;
665 777 ct.cc_flags = CC_DONTBLOCK;
666 778
667 779 /*
668 780 * Enter the critical region before calling VOP_RWLOCK
669 781 * to avoid a deadlock with write requests.
670 782 */
671 783 if (nbl_need_check(vp)) {
672 784 nbl_start_crit(vp, RW_READER);
673 785 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
674 786 0, NULL)) {
675 787 nbl_end_crit(vp);
676 788 VN_RELE(vp);
677 789 rr->rr_data = NULL;
|
↓ open down ↓ |
203 lines elided |
↑ open up ↑ |
678 790 rr->rr_status = NFSERR_ACCES;
679 791 return;
680 792 }
681 793 in_crit = 1;
682 794 }
683 795
684 796 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
685 797
686 798 /* check if a monitor detected a delegation conflict */
687 799 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
800 + if (in_crit)
801 + nbl_end_crit(vp);
688 802 VN_RELE(vp);
689 803 /* mark as wouldblock so response is dropped */
690 804 curthread->t_flag |= T_WOULDBLOCK;
691 805
692 806 rr->rr_data = NULL;
693 807 return;
694 808 }
695 809
696 810 va.va_mask = AT_ALL;
697 811
698 812 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
699 813
700 814 if (error) {
701 815 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
702 816 if (in_crit)
703 817 nbl_end_crit(vp);
704 818
705 819 VN_RELE(vp);
706 820 rr->rr_data = NULL;
707 821 rr->rr_status = puterrno(error);
708 822
709 823 return;
710 824 }
711 825
712 826 /*
713 827 * This is a kludge to allow reading of files created
714 828 * with no read permission. The owner of the file
715 829 * is always allowed to read it.
716 830 */
717 831 if (crgetuid(cr) != va.va_uid) {
718 832 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
719 833
720 834 if (error) {
721 835 /*
722 836 * Exec is the same as read over the net because
723 837 * of demand loading.
724 838 */
725 839 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
726 840 }
727 841 if (error) {
728 842 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
729 843 if (in_crit)
730 844 nbl_end_crit(vp);
731 845 VN_RELE(vp);
732 846 rr->rr_data = NULL;
733 847 rr->rr_status = puterrno(error);
734 848
735 849 return;
736 850 }
737 851 }
738 852
739 853 if (MANDLOCK(vp, va.va_mode)) {
740 854 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
741 855 if (in_crit)
742 856 nbl_end_crit(vp);
743 857
744 858 VN_RELE(vp);
745 859 rr->rr_data = NULL;
746 860 rr->rr_status = NFSERR_ACCES;
747 861
748 862 return;
749 863 }
750 864
751 865 rr->rr_ok.rrok_wlist_len = 0;
752 866 rr->rr_ok.rrok_wlist = NULL;
753 867
754 868 if ((u_offset_t)ra->ra_offset >= va.va_size) {
755 869 rr->rr_count = 0;
756 870 rr->rr_data = NULL;
757 871 /*
758 872 * In this case, status is NFS_OK, but there is no data
759 873 * to encode. So set rr_mp to NULL.
760 874 */
761 875 rr->rr_mp = NULL;
762 876 rr->rr_ok.rrok_wlist = ra->ra_wlist;
763 877 if (rr->rr_ok.rrok_wlist)
764 878 clist_zero_len(rr->rr_ok.rrok_wlist);
765 879 goto done;
766 880 }
767 881
768 882 if (ra->ra_wlist) {
769 883 mp = NULL;
770 884 rr->rr_mp = NULL;
771 885 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
772 886 if (ra->ra_count > iov.iov_len) {
773 887 rr->rr_data = NULL;
774 888 rr->rr_status = NFSERR_INVAL;
775 889 goto done;
776 890 }
777 891 } else {
778 892 /*
779 893 * mp will contain the data to be sent out in the read reply.
780 894 * This will be freed after the reply has been sent out (by the
781 895 * driver).
782 896 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
783 897 * that the call to xdrmblk_putmblk() never fails.
784 898 */
785 899 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
786 900 &alloc_err);
787 901 ASSERT(mp != NULL);
788 902 ASSERT(alloc_err == 0);
789 903
790 904 rr->rr_mp = mp;
791 905
792 906 /*
793 907 * Set up io vector
794 908 */
795 909 iov.iov_base = (caddr_t)mp->b_datap->db_base;
796 910 iov.iov_len = ra->ra_count;
797 911 }
798 912
799 913 uio.uio_iov = &iov;
800 914 uio.uio_iovcnt = 1;
801 915 uio.uio_segflg = UIO_SYSSPACE;
802 916 uio.uio_extflg = UIO_COPY_CACHED;
803 917 uio.uio_loffset = (offset_t)ra->ra_offset;
804 918 uio.uio_resid = ra->ra_count;
805 919
806 920 error = VOP_READ(vp, &uio, 0, cr, &ct);
807 921
808 922 if (error) {
809 923 if (mp)
810 924 freeb(mp);
811 925
812 926 /*
813 927 * check if a monitor detected a delegation conflict and
814 928 * mark as wouldblock so response is dropped
815 929 */
816 930 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
817 931 curthread->t_flag |= T_WOULDBLOCK;
818 932 else
819 933 rr->rr_status = puterrno(error);
820 934
821 935 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
822 936 if (in_crit)
823 937 nbl_end_crit(vp);
824 938
825 939 VN_RELE(vp);
826 940 rr->rr_data = NULL;
827 941
828 942 return;
829 943 }
830 944
831 945 /*
832 946 * Get attributes again so we can send the latest access
833 947 * time to the client side for its cache.
834 948 */
835 949 va.va_mask = AT_ALL;
836 950
837 951 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
838 952
839 953 if (error) {
840 954 if (mp)
841 955 freeb(mp);
842 956
843 957 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
844 958 if (in_crit)
845 959 nbl_end_crit(vp);
846 960
847 961 VN_RELE(vp);
848 962 rr->rr_data = NULL;
849 963 rr->rr_status = puterrno(error);
850 964
851 965 return;
852 966 }
853 967
854 968 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
855 969
856 970 if (mp) {
857 971 rr->rr_data = (char *)mp->b_datap->db_base;
858 972 } else {
859 973 if (ra->ra_wlist) {
860 974 rr->rr_data = (caddr_t)iov.iov_base;
861 975 if (!rdma_setup_read_data2(ra, rr)) {
862 976 rr->rr_data = NULL;
863 977 rr->rr_status = puterrno(NFSERR_INVAL);
864 978 }
865 979 }
866 980 }
867 981 done:
868 982 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
869 983 if (in_crit)
870 984 nbl_end_crit(vp);
871 985
872 986 acl_perm(vp, exi, &va, cr);
873 987
874 988 /* check for overflows */
875 989 error = vattr_to_nattr(&va, &rr->rr_attr);
876 990
877 991 VN_RELE(vp);
878 992
879 993 rr->rr_status = puterrno(error);
880 994 }
881 995
882 996 /*
883 997 * Free data allocated by rfs_read
884 998 */
885 999 void
886 1000 rfs_rdfree(struct nfsrdresult *rr)
887 1001 {
888 1002 mblk_t *mp;
889 1003
890 1004 if (rr->rr_status == NFS_OK) {
891 1005 mp = rr->rr_mp;
892 1006 if (mp != NULL)
893 1007 freeb(mp);
894 1008 }
895 1009 }
896 1010
897 1011 void *
898 1012 rfs_read_getfh(struct nfsreadargs *ra)
899 1013 {
900 1014 return (&ra->ra_fhandle);
901 1015 }
902 1016
903 1017 #define MAX_IOVECS 12
904 1018
905 1019 #ifdef DEBUG
906 1020 static int rfs_write_sync_hits = 0;
907 1021 static int rfs_write_sync_misses = 0;
908 1022 #endif
909 1023
910 1024 /*
911 1025 * Write data to file.
912 1026 * Returns attributes of a file after writing some data to it.
913 1027 *
914 1028 * Any changes made here, especially in error handling might have
915 1029 * to also be done in rfs_write (which clusters write requests).
916 1030 */
917 1031 /* ARGSUSED */
918 1032 void
919 1033 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
920 1034 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
921 1035 {
922 1036 int error;
923 1037 vnode_t *vp;
924 1038 rlim64_t rlimit;
925 1039 struct vattr va;
926 1040 struct uio uio;
927 1041 struct iovec iov[MAX_IOVECS];
928 1042 mblk_t *m;
929 1043 struct iovec *iovp;
930 1044 int iovcnt;
931 1045 cred_t *savecred;
932 1046 int in_crit = 0;
933 1047 caller_context_t ct;
934 1048
935 1049 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
936 1050 if (vp == NULL) {
937 1051 ns->ns_status = NFSERR_STALE;
938 1052 return;
939 1053 }
940 1054
941 1055 if (rdonly(ro, vp)) {
942 1056 VN_RELE(vp);
943 1057 ns->ns_status = NFSERR_ROFS;
944 1058 return;
945 1059 }
946 1060
947 1061 if (vp->v_type != VREG) {
948 1062 VN_RELE(vp);
949 1063 ns->ns_status = NFSERR_ISDIR;
950 1064 return;
951 1065 }
952 1066
953 1067 ct.cc_sysid = 0;
954 1068 ct.cc_pid = 0;
955 1069 ct.cc_caller_id = nfs2_srv_caller_id;
956 1070 ct.cc_flags = CC_DONTBLOCK;
957 1071
958 1072 va.va_mask = AT_UID|AT_MODE;
959 1073
960 1074 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
961 1075
962 1076 if (error) {
963 1077 VN_RELE(vp);
964 1078 ns->ns_status = puterrno(error);
965 1079
966 1080 return;
967 1081 }
968 1082
969 1083 if (crgetuid(cr) != va.va_uid) {
970 1084 /*
971 1085 * This is a kludge to allow writes of files created
972 1086 * with read only permission. The owner of the file
973 1087 * is always allowed to write it.
974 1088 */
975 1089 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
976 1090
977 1091 if (error) {
978 1092 VN_RELE(vp);
979 1093 ns->ns_status = puterrno(error);
980 1094 return;
981 1095 }
982 1096 }
983 1097
984 1098 /*
985 1099 * Can't access a mandatory lock file. This might cause
986 1100 * the NFS service thread to block forever waiting for a
987 1101 * lock to be released that will never be released.
988 1102 */
989 1103 if (MANDLOCK(vp, va.va_mode)) {
990 1104 VN_RELE(vp);
991 1105 ns->ns_status = NFSERR_ACCES;
992 1106 return;
993 1107 }
994 1108
995 1109 /*
996 1110 * We have to enter the critical region before calling VOP_RWLOCK
997 1111 * to avoid a deadlock with ufs.
998 1112 */
999 1113 if (nbl_need_check(vp)) {
1000 1114 nbl_start_crit(vp, RW_READER);
1001 1115 in_crit = 1;
1002 1116 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
|
↓ open down ↓ |
305 lines elided |
↑ open up ↑ |
1003 1117 wa->wa_count, 0, NULL)) {
1004 1118 error = EACCES;
1005 1119 goto out;
1006 1120 }
1007 1121 }
1008 1122
1009 1123 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1010 1124
1011 1125 /* check if a monitor detected a delegation conflict */
1012 1126 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1013 - VN_RELE(vp);
1014 - /* mark as wouldblock so response is dropped */
1015 - curthread->t_flag |= T_WOULDBLOCK;
1016 - return;
1127 + goto out;
1017 1128 }
1018 1129
1019 1130 if (wa->wa_data || wa->wa_rlist) {
1020 1131 /* Do the RDMA thing if necessary */
1021 1132 if (wa->wa_rlist) {
1022 1133 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1023 1134 iov[0].iov_len = wa->wa_count;
1024 1135 } else {
1025 1136 iov[0].iov_base = wa->wa_data;
1026 1137 iov[0].iov_len = wa->wa_count;
1027 1138 }
1028 1139 uio.uio_iov = iov;
1029 1140 uio.uio_iovcnt = 1;
1030 1141 uio.uio_segflg = UIO_SYSSPACE;
1031 1142 uio.uio_extflg = UIO_COPY_DEFAULT;
1032 1143 uio.uio_loffset = (offset_t)wa->wa_offset;
1033 1144 uio.uio_resid = wa->wa_count;
1034 1145 /*
1035 1146 * The limit is checked on the client. We
1036 1147 * should allow any size writes here.
1037 1148 */
1038 1149 uio.uio_llimit = curproc->p_fsz_ctl;
1039 1150 rlimit = uio.uio_llimit - wa->wa_offset;
1040 1151 if (rlimit < (rlim64_t)uio.uio_resid)
1041 1152 uio.uio_resid = (uint_t)rlimit;
1042 1153
1043 1154 /*
1044 1155 * for now we assume no append mode
1045 1156 */
|
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
1046 1157 /*
1047 1158 * We're changing creds because VM may fault and we need
1048 1159 * the cred of the current thread to be used if quota
1049 1160 * checking is enabled.
1050 1161 */
1051 1162 savecred = curthread->t_cred;
1052 1163 curthread->t_cred = cr;
1053 1164 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1054 1165 curthread->t_cred = savecred;
1055 1166 } else {
1167 +
1056 1168 iovcnt = 0;
1057 1169 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1058 1170 iovcnt++;
1059 1171 if (iovcnt <= MAX_IOVECS) {
1060 1172 #ifdef DEBUG
1061 1173 rfs_write_sync_hits++;
1062 1174 #endif
1063 1175 iovp = iov;
1064 1176 } else {
1065 1177 #ifdef DEBUG
1066 1178 rfs_write_sync_misses++;
1067 1179 #endif
1068 1180 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1069 1181 }
1070 1182 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1071 1183 uio.uio_iov = iovp;
1072 1184 uio.uio_iovcnt = iovcnt;
1073 1185 uio.uio_segflg = UIO_SYSSPACE;
1074 1186 uio.uio_extflg = UIO_COPY_DEFAULT;
1075 1187 uio.uio_loffset = (offset_t)wa->wa_offset;
1076 1188 uio.uio_resid = wa->wa_count;
1077 1189 /*
1078 1190 * The limit is checked on the client. We
1079 1191 * should allow any size writes here.
1080 1192 */
1081 1193 uio.uio_llimit = curproc->p_fsz_ctl;
1082 1194 rlimit = uio.uio_llimit - wa->wa_offset;
1083 1195 if (rlimit < (rlim64_t)uio.uio_resid)
1084 1196 uio.uio_resid = (uint_t)rlimit;
1085 1197
1086 1198 /*
1087 1199 * For now we assume no append mode.
1088 1200 */
1089 1201 /*
1090 1202 * We're changing creds because VM may fault and we need
1091 1203 * the cred of the current thread to be used if quota
1092 1204 * checking is enabled.
1093 1205 */
1094 1206 savecred = curthread->t_cred;
1095 1207 curthread->t_cred = cr;
1096 1208 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1097 1209 curthread->t_cred = savecred;
1098 1210
1099 1211 if (iovp != iov)
1100 1212 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1101 1213 }
1102 1214
1103 1215 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1104 1216
1105 1217 if (!error) {
1106 1218 /*
1107 1219 * Get attributes again so we send the latest mod
1108 1220 * time to the client side for its cache.
1109 1221 */
1110 1222 va.va_mask = AT_ALL; /* now we want everything */
1111 1223
1112 1224 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1113 1225
1114 1226 /* check for overflows */
1115 1227 if (!error) {
1116 1228 acl_perm(vp, exi, &va, cr);
1117 1229 error = vattr_to_nattr(&va, &ns->ns_attr);
1118 1230 }
1119 1231 }
1120 1232
1121 1233 out:
1122 1234 if (in_crit)
1123 1235 nbl_end_crit(vp);
1124 1236 VN_RELE(vp);
1125 1237
1126 1238 /* check if a monitor detected a delegation conflict */
1127 1239 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1128 1240 /* mark as wouldblock so response is dropped */
1129 1241 curthread->t_flag |= T_WOULDBLOCK;
1130 1242 else
1131 1243 ns->ns_status = puterrno(error);
1132 1244
1133 1245 }
1134 1246
1135 1247 struct rfs_async_write {
1136 1248 struct nfswriteargs *wa;
1137 1249 struct nfsattrstat *ns;
1138 1250 struct svc_req *req;
1139 1251 cred_t *cr;
1140 1252 bool_t ro;
1141 1253 kthread_t *thread;
1142 1254 struct rfs_async_write *list;
1143 1255 };
|
↓ open down ↓ |
78 lines elided |
↑ open up ↑ |
1144 1256
1145 1257 struct rfs_async_write_list {
1146 1258 fhandle_t *fhp;
1147 1259 kcondvar_t cv;
1148 1260 struct rfs_async_write *list;
1149 1261 struct rfs_async_write_list *next;
1150 1262 };
1151 1263
1152 1264 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1153 1265 static kmutex_t rfs_async_write_lock;
1154 -static int rfs_write_async = 1; /* enables write clustering if == 1 */
1266 +volatile int rfs_write_async = 1; /* enables write clustering if == 1 */
1155 1267
1156 1268 #define MAXCLIOVECS 42
1157 1269 #define RFSWRITE_INITVAL (enum nfsstat) -1
1158 1270
1159 1271 #ifdef DEBUG
1160 1272 static int rfs_write_hits = 0;
1161 1273 static int rfs_write_misses = 0;
1162 1274 #endif
1163 1275
1164 1276 /*
1165 1277 * Write data to file.
1166 1278 * Returns attributes of a file after writing some data to it.
1167 1279 */
1168 1280 void
1169 1281 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1170 1282 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1171 1283 {
1172 1284 int error;
1173 1285 vnode_t *vp;
1174 1286 rlim64_t rlimit;
1175 1287 struct vattr va;
1176 1288 struct uio uio;
1177 1289 struct rfs_async_write_list *lp;
1178 1290 struct rfs_async_write_list *nlp;
1179 1291 struct rfs_async_write *rp;
1180 1292 struct rfs_async_write *nrp;
1181 1293 struct rfs_async_write *trp;
1182 1294 struct rfs_async_write *lrp;
1183 1295 int data_written;
1184 1296 int iovcnt;
1185 1297 mblk_t *m;
1186 1298 struct iovec *iovp;
1187 1299 struct iovec *niovp;
1188 1300 struct iovec iov[MAXCLIOVECS];
|
↓ open down ↓ |
24 lines elided |
↑ open up ↑ |
1189 1301 int count;
1190 1302 int rcount;
1191 1303 uint_t off;
1192 1304 uint_t len;
1193 1305 struct rfs_async_write nrpsp;
1194 1306 struct rfs_async_write_list nlpsp;
1195 1307 ushort_t t_flag;
1196 1308 cred_t *savecred;
1197 1309 int in_crit = 0;
1198 1310 caller_context_t ct;
1311 + nfs_srv_t *nsrv;
1199 1312
1200 - if (!rfs_write_async) {
1313 + nsrv = zone_getspecific(rfs_zone_key, curzone);
1314 + if (!nsrv->write_async) {
1201 1315 rfs_write_sync(wa, ns, exi, req, cr, ro);
1202 1316 return;
1203 1317 }
1204 1318
1205 1319 /*
1206 1320 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1207 1321 * is considered an OK.
1208 1322 */
1209 1323 ns->ns_status = RFSWRITE_INITVAL;
1210 1324
1211 1325 nrp = &nrpsp;
1212 1326 nrp->wa = wa;
1213 1327 nrp->ns = ns;
1214 1328 nrp->req = req;
|
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
1215 1329 nrp->cr = cr;
1216 1330 nrp->ro = ro;
1217 1331 nrp->thread = curthread;
1218 1332
1219 1333 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1220 1334
1221 1335 /*
1222 1336 * Look to see if there is already a cluster started
1223 1337 * for this file.
1224 1338 */
1225 - mutex_enter(&rfs_async_write_lock);
1226 - for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1339 + mutex_enter(&nsrv->async_write_lock);
1340 + for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1227 1341 if (bcmp(&wa->wa_fhandle, lp->fhp,
1228 1342 sizeof (fhandle_t)) == 0)
1229 1343 break;
1230 1344 }
1231 1345
1232 1346 /*
1233 1347 * If lp is non-NULL, then there is already a cluster
1234 1348 * started. We need to place ourselves in the cluster
1235 1349 * list in the right place as determined by starting
1236 1350 * offset. Conflicts with non-blocking mandatory locked
1237 1351 * regions will be checked when the cluster is processed.
1238 1352 */
1239 1353 if (lp != NULL) {
1240 1354 rp = lp->list;
1241 1355 trp = NULL;
|
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
1242 1356 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1243 1357 trp = rp;
1244 1358 rp = rp->list;
1245 1359 }
1246 1360 nrp->list = rp;
1247 1361 if (trp == NULL)
1248 1362 lp->list = nrp;
1249 1363 else
1250 1364 trp->list = nrp;
1251 1365 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1252 - cv_wait(&lp->cv, &rfs_async_write_lock);
1253 - mutex_exit(&rfs_async_write_lock);
1366 + cv_wait(&lp->cv, &nsrv->async_write_lock);
1367 + mutex_exit(&nsrv->async_write_lock);
1254 1368
1255 1369 return;
1256 1370 }
1257 1371
1258 1372 /*
1259 1373 * No cluster started yet, start one and add ourselves
1260 1374 * to the list of clusters.
1261 1375 */
1262 1376 nrp->list = NULL;
1263 1377
1264 1378 nlp = &nlpsp;
1265 1379 nlp->fhp = &wa->wa_fhandle;
1266 1380 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1267 1381 nlp->list = nrp;
1268 1382 nlp->next = NULL;
1269 1383
1270 - if (rfs_async_write_head == NULL) {
1271 - rfs_async_write_head = nlp;
1384 + if (nsrv->async_write_head == NULL) {
1385 + nsrv->async_write_head = nlp;
1272 1386 } else {
1273 - lp = rfs_async_write_head;
1387 + lp = nsrv->async_write_head;
1274 1388 while (lp->next != NULL)
1275 1389 lp = lp->next;
1276 1390 lp->next = nlp;
1277 1391 }
1278 - mutex_exit(&rfs_async_write_lock);
1392 + mutex_exit(&nsrv->async_write_lock);
1279 1393
1280 1394 /*
1281 1395 * Convert the file handle common to all of the requests
1282 1396 * in this cluster to a vnode.
1283 1397 */
1284 1398 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1285 1399 if (vp == NULL) {
1286 - mutex_enter(&rfs_async_write_lock);
1287 - if (rfs_async_write_head == nlp)
1288 - rfs_async_write_head = nlp->next;
1400 + mutex_enter(&nsrv->async_write_lock);
1401 + if (nsrv->async_write_head == nlp)
1402 + nsrv->async_write_head = nlp->next;
1289 1403 else {
1290 - lp = rfs_async_write_head;
1404 + lp = nsrv->async_write_head;
1291 1405 while (lp->next != nlp)
1292 1406 lp = lp->next;
1293 1407 lp->next = nlp->next;
1294 1408 }
1295 1409 t_flag = curthread->t_flag & T_WOULDBLOCK;
1296 1410 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1297 1411 rp->ns->ns_status = NFSERR_STALE;
1298 1412 rp->thread->t_flag |= t_flag;
1299 1413 }
1300 1414 cv_broadcast(&nlp->cv);
1301 - mutex_exit(&rfs_async_write_lock);
1415 + mutex_exit(&nsrv->async_write_lock);
1302 1416
1303 1417 return;
1304 1418 }
1305 1419
1306 1420 /*
1307 1421 * Can only write regular files. Attempts to write any
1308 1422 * other file types fail with EISDIR.
1309 1423 */
1310 1424 if (vp->v_type != VREG) {
1311 1425 VN_RELE(vp);
1312 - mutex_enter(&rfs_async_write_lock);
1313 - if (rfs_async_write_head == nlp)
1314 - rfs_async_write_head = nlp->next;
1426 + mutex_enter(&nsrv->async_write_lock);
1427 + if (nsrv->async_write_head == nlp)
1428 + nsrv->async_write_head = nlp->next;
1315 1429 else {
1316 - lp = rfs_async_write_head;
1430 + lp = nsrv->async_write_head;
1317 1431 while (lp->next != nlp)
1318 1432 lp = lp->next;
1319 1433 lp->next = nlp->next;
1320 1434 }
1321 1435 t_flag = curthread->t_flag & T_WOULDBLOCK;
1322 1436 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1323 1437 rp->ns->ns_status = NFSERR_ISDIR;
1324 1438 rp->thread->t_flag |= t_flag;
1325 1439 }
1326 1440 cv_broadcast(&nlp->cv);
1327 - mutex_exit(&rfs_async_write_lock);
1441 + mutex_exit(&nsrv->async_write_lock);
1328 1442
1329 1443 return;
1330 1444 }
1331 1445
1332 1446 /*
1333 1447 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1334 1448 * deadlock with ufs.
1335 1449 */
1336 1450 if (nbl_need_check(vp)) {
1337 1451 nbl_start_crit(vp, RW_READER);
1338 1452 in_crit = 1;
1339 1453 }
1340 1454
1341 1455 ct.cc_sysid = 0;
1342 1456 ct.cc_pid = 0;
1343 1457 ct.cc_caller_id = nfs2_srv_caller_id;
1344 1458 ct.cc_flags = CC_DONTBLOCK;
1345 1459
1346 1460 /*
1347 1461 * Lock the file for writing. This operation provides
1348 1462 * the delay which allows clusters to grow.
|
↓ open down ↓ |
11 lines elided |
↑ open up ↑ |
1349 1463 */
1350 1464 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1351 1465
1352 1466 /* check if a monitor detected a delegation conflict */
1353 1467 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1354 1468 if (in_crit)
1355 1469 nbl_end_crit(vp);
1356 1470 VN_RELE(vp);
1357 1471 /* mark as wouldblock so response is dropped */
1358 1472 curthread->t_flag |= T_WOULDBLOCK;
1359 - mutex_enter(&rfs_async_write_lock);
1360 - if (rfs_async_write_head == nlp)
1361 - rfs_async_write_head = nlp->next;
1473 + mutex_enter(&nsrv->async_write_lock);
1474 + if (nsrv->async_write_head == nlp)
1475 + nsrv->async_write_head = nlp->next;
1362 1476 else {
1363 - lp = rfs_async_write_head;
1477 + lp = nsrv->async_write_head;
1364 1478 while (lp->next != nlp)
1365 1479 lp = lp->next;
1366 1480 lp->next = nlp->next;
1367 1481 }
1368 1482 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1369 1483 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1370 1484 rp->ns->ns_status = puterrno(error);
1371 1485 rp->thread->t_flag |= T_WOULDBLOCK;
1372 1486 }
1373 1487 }
1374 1488 cv_broadcast(&nlp->cv);
1375 - mutex_exit(&rfs_async_write_lock);
1489 + mutex_exit(&nsrv->async_write_lock);
1376 1490
1377 1491 return;
1378 1492 }
1379 1493
1380 1494 /*
1381 1495 * Disconnect this cluster from the list of clusters.
1382 1496 * The cluster that is being dealt with must be fixed
1383 1497 * in size after this point, so there is no reason
1384 1498 * to leave it on the list so that new requests can
1385 1499 * find it.
1386 1500 *
|
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
1387 1501 * The algorithm is that the first write request will
1388 1502 * create a cluster, convert the file handle to a
1389 1503 * vnode pointer, and then lock the file for writing.
1390 1504 * This request is not likely to be clustered with
1391 1505 * any others. However, the next request will create
1392 1506 * a new cluster and be blocked in VOP_RWLOCK while
1393 1507 * the first request is being processed. This delay
1394 1508 * will allow more requests to be clustered in this
1395 1509 * second cluster.
1396 1510 */
1397 - mutex_enter(&rfs_async_write_lock);
1398 - if (rfs_async_write_head == nlp)
1399 - rfs_async_write_head = nlp->next;
1511 + mutex_enter(&nsrv->async_write_lock);
1512 + if (nsrv->async_write_head == nlp)
1513 + nsrv->async_write_head = nlp->next;
1400 1514 else {
1401 - lp = rfs_async_write_head;
1515 + lp = nsrv->async_write_head;
1402 1516 while (lp->next != nlp)
1403 1517 lp = lp->next;
1404 1518 lp->next = nlp->next;
1405 1519 }
1406 - mutex_exit(&rfs_async_write_lock);
1520 + mutex_exit(&nsrv->async_write_lock);
1407 1521
1408 1522 /*
1409 1523 * Step through the list of requests in this cluster.
1410 1524 * We need to check permissions to make sure that all
1411 1525 * of the requests have sufficient permission to write
1412 1526 * the file. A cluster can be composed of requests
1413 1527 * from different clients and different users on each
1414 1528 * client.
1415 1529 *
1416 1530 * As a side effect, we also calculate the size of the
1417 1531 * byte range that this cluster encompasses.
1418 1532 */
1419 1533 rp = nlp->list;
1420 1534 off = rp->wa->wa_offset;
1421 1535 len = (uint_t)0;
1422 1536 do {
1423 1537 if (rdonly(rp->ro, vp)) {
1424 1538 rp->ns->ns_status = NFSERR_ROFS;
1425 1539 t_flag = curthread->t_flag & T_WOULDBLOCK;
1426 1540 rp->thread->t_flag |= t_flag;
1427 1541 continue;
1428 1542 }
1429 1543
1430 1544 va.va_mask = AT_UID|AT_MODE;
1431 1545
1432 1546 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1433 1547
1434 1548 if (!error) {
1435 1549 if (crgetuid(rp->cr) != va.va_uid) {
1436 1550 /*
1437 1551 * This is a kludge to allow writes of files
1438 1552 * created with read only permission. The
1439 1553 * owner of the file is always allowed to
1440 1554 * write it.
1441 1555 */
1442 1556 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1443 1557 }
1444 1558 if (!error && MANDLOCK(vp, va.va_mode))
1445 1559 error = EACCES;
1446 1560 }
1447 1561
1448 1562 /*
1449 1563 * Check for a conflict with a nbmand-locked region.
1450 1564 */
1451 1565 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1452 1566 rp->wa->wa_count, 0, NULL)) {
1453 1567 error = EACCES;
1454 1568 }
1455 1569
1456 1570 if (error) {
1457 1571 rp->ns->ns_status = puterrno(error);
1458 1572 t_flag = curthread->t_flag & T_WOULDBLOCK;
1459 1573 rp->thread->t_flag |= t_flag;
1460 1574 continue;
1461 1575 }
1462 1576 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1463 1577 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1464 1578 } while ((rp = rp->list) != NULL);
1465 1579
1466 1580 /*
1467 1581 * Step through the cluster attempting to gather as many
1468 1582 * requests which are contiguous as possible. These
1469 1583 * contiguous requests are handled via one call to VOP_WRITE
1470 1584 * instead of different calls to VOP_WRITE. We also keep
1471 1585 * track of the fact that any data was written.
1472 1586 */
1473 1587 rp = nlp->list;
1474 1588 data_written = 0;
1475 1589 do {
1476 1590 /*
1477 1591 * Skip any requests which are already marked as having an
1478 1592 * error.
1479 1593 */
1480 1594 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1481 1595 rp = rp->list;
1482 1596 continue;
1483 1597 }
1484 1598
1485 1599 /*
1486 1600 * Count the number of iovec's which are required
1487 1601 * to handle this set of requests. One iovec is
1488 1602 * needed for each data buffer, whether addressed
1489 1603 * by wa_data or by the b_rptr pointers in the
1490 1604 * mblk chains.
1491 1605 */
1492 1606 iovcnt = 0;
1493 1607 lrp = rp;
1494 1608 for (;;) {
1495 1609 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1496 1610 iovcnt++;
1497 1611 else {
1498 1612 m = lrp->wa->wa_mblk;
1499 1613 while (m != NULL) {
1500 1614 iovcnt++;
1501 1615 m = m->b_cont;
1502 1616 }
1503 1617 }
1504 1618 if (lrp->list == NULL ||
1505 1619 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1506 1620 lrp->wa->wa_offset + lrp->wa->wa_count !=
1507 1621 lrp->list->wa->wa_offset) {
1508 1622 lrp = lrp->list;
1509 1623 break;
1510 1624 }
1511 1625 lrp = lrp->list;
1512 1626 }
1513 1627
1514 1628 if (iovcnt <= MAXCLIOVECS) {
1515 1629 #ifdef DEBUG
1516 1630 rfs_write_hits++;
1517 1631 #endif
1518 1632 niovp = iov;
1519 1633 } else {
1520 1634 #ifdef DEBUG
1521 1635 rfs_write_misses++;
1522 1636 #endif
1523 1637 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1524 1638 }
1525 1639 /*
1526 1640 * Put together the scatter/gather iovecs.
1527 1641 */
1528 1642 iovp = niovp;
1529 1643 trp = rp;
1530 1644 count = 0;
1531 1645 do {
1532 1646 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1533 1647 if (trp->wa->wa_rlist) {
1534 1648 iovp->iov_base =
1535 1649 (char *)((trp->wa->wa_rlist)->
1536 1650 u.c_daddr3);
1537 1651 iovp->iov_len = trp->wa->wa_count;
1538 1652 } else {
1539 1653 iovp->iov_base = trp->wa->wa_data;
1540 1654 iovp->iov_len = trp->wa->wa_count;
1541 1655 }
1542 1656 iovp++;
1543 1657 } else {
1544 1658 m = trp->wa->wa_mblk;
1545 1659 rcount = trp->wa->wa_count;
1546 1660 while (m != NULL) {
1547 1661 iovp->iov_base = (caddr_t)m->b_rptr;
1548 1662 iovp->iov_len = (m->b_wptr - m->b_rptr);
1549 1663 rcount -= iovp->iov_len;
1550 1664 if (rcount < 0)
1551 1665 iovp->iov_len += rcount;
1552 1666 iovp++;
1553 1667 if (rcount <= 0)
1554 1668 break;
1555 1669 m = m->b_cont;
1556 1670 }
1557 1671 }
1558 1672 count += trp->wa->wa_count;
1559 1673 trp = trp->list;
1560 1674 } while (trp != lrp);
1561 1675
1562 1676 uio.uio_iov = niovp;
1563 1677 uio.uio_iovcnt = iovcnt;
1564 1678 uio.uio_segflg = UIO_SYSSPACE;
1565 1679 uio.uio_extflg = UIO_COPY_DEFAULT;
1566 1680 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1567 1681 uio.uio_resid = count;
1568 1682 /*
1569 1683 * The limit is checked on the client. We
1570 1684 * should allow any size writes here.
1571 1685 */
1572 1686 uio.uio_llimit = curproc->p_fsz_ctl;
1573 1687 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1574 1688 if (rlimit < (rlim64_t)uio.uio_resid)
1575 1689 uio.uio_resid = (uint_t)rlimit;
1576 1690
1577 1691 /*
1578 1692 * For now we assume no append mode.
1579 1693 */
1580 1694
1581 1695 /*
1582 1696 * We're changing creds because VM may fault
1583 1697 * and we need the cred of the current
1584 1698 * thread to be used if quota * checking is
1585 1699 * enabled.
1586 1700 */
1587 1701 savecred = curthread->t_cred;
1588 1702 curthread->t_cred = cr;
1589 1703 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1590 1704 curthread->t_cred = savecred;
1591 1705
1592 1706 /* check if a monitor detected a delegation conflict */
1593 1707 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1594 1708 /* mark as wouldblock so response is dropped */
1595 1709 curthread->t_flag |= T_WOULDBLOCK;
1596 1710
1597 1711 if (niovp != iov)
1598 1712 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1599 1713
1600 1714 if (!error) {
1601 1715 data_written = 1;
1602 1716 /*
1603 1717 * Get attributes again so we send the latest mod
1604 1718 * time to the client side for its cache.
1605 1719 */
1606 1720 va.va_mask = AT_ALL; /* now we want everything */
1607 1721
1608 1722 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1609 1723
1610 1724 if (!error)
1611 1725 acl_perm(vp, exi, &va, rp->cr);
1612 1726 }
1613 1727
1614 1728 /*
1615 1729 * Fill in the status responses for each request
1616 1730 * which was just handled. Also, copy the latest
1617 1731 * attributes in to the attribute responses if
1618 1732 * appropriate.
1619 1733 */
1620 1734 t_flag = curthread->t_flag & T_WOULDBLOCK;
1621 1735 do {
1622 1736 rp->thread->t_flag |= t_flag;
1623 1737 /* check for overflows */
1624 1738 if (!error) {
1625 1739 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1626 1740 }
1627 1741 rp->ns->ns_status = puterrno(error);
1628 1742 rp = rp->list;
1629 1743 } while (rp != lrp);
1630 1744 } while (rp != NULL);
1631 1745
1632 1746 /*
1633 1747 * If any data was written at all, then we need to flush
1634 1748 * the data and metadata to stable storage.
1635 1749 */
1636 1750 if (data_written) {
1637 1751 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1638 1752
1639 1753 if (!error) {
1640 1754 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
|
↓ open down ↓ |
224 lines elided |
↑ open up ↑ |
1641 1755 }
1642 1756 }
1643 1757
1644 1758 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1645 1759
1646 1760 if (in_crit)
1647 1761 nbl_end_crit(vp);
1648 1762 VN_RELE(vp);
1649 1763
1650 1764 t_flag = curthread->t_flag & T_WOULDBLOCK;
1651 - mutex_enter(&rfs_async_write_lock);
1765 + mutex_enter(&nsrv->async_write_lock);
1652 1766 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1653 1767 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1654 1768 rp->ns->ns_status = puterrno(error);
1655 1769 rp->thread->t_flag |= t_flag;
1656 1770 }
1657 1771 }
1658 1772 cv_broadcast(&nlp->cv);
1659 - mutex_exit(&rfs_async_write_lock);
1773 + mutex_exit(&nsrv->async_write_lock);
1660 1774
1661 1775 }
1662 1776
1663 1777 void *
1664 1778 rfs_write_getfh(struct nfswriteargs *wa)
1665 1779 {
1666 1780 return (&wa->wa_fhandle);
1667 1781 }
1668 1782
1669 1783 /*
1670 1784 * Create a file.
1671 1785 * Creates a file with given attributes and returns those attributes
1672 1786 * and an fhandle for the new file.
1673 1787 */
1674 1788 void
1675 1789 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1676 1790 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1677 1791 {
1678 1792 int error;
1679 1793 int lookuperr;
1680 1794 int in_crit = 0;
1681 1795 struct vattr va;
1682 1796 vnode_t *vp;
1683 1797 vnode_t *realvp;
1684 1798 vnode_t *dvp;
1685 1799 char *name = args->ca_da.da_name;
1686 1800 vnode_t *tvp = NULL;
1687 1801 int mode;
1688 1802 int lookup_ok;
1689 1803 bool_t trunc;
1690 1804 struct sockaddr *ca;
1691 1805
1692 1806 /*
1693 1807 * Disallow NULL paths
1694 1808 */
1695 1809 if (name == NULL || *name == '\0') {
1696 1810 dr->dr_status = NFSERR_ACCES;
1697 1811 return;
1698 1812 }
1699 1813
1700 1814 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1701 1815 if (dvp == NULL) {
1702 1816 dr->dr_status = NFSERR_STALE;
1703 1817 return;
1704 1818 }
1705 1819
1706 1820 error = sattr_to_vattr(args->ca_sa, &va);
1707 1821 if (error) {
1708 1822 dr->dr_status = puterrno(error);
1709 1823 return;
1710 1824 }
|
↓ open down ↓ |
41 lines elided |
↑ open up ↑ |
1711 1825
1712 1826 /*
1713 1827 * Must specify the mode.
1714 1828 */
1715 1829 if (!(va.va_mask & AT_MODE)) {
1716 1830 VN_RELE(dvp);
1717 1831 dr->dr_status = NFSERR_INVAL;
1718 1832 return;
1719 1833 }
1720 1834
1835 + if (protect_zfs_mntpt(dvp) != 0) {
1836 + VN_RELE(dvp);
1837 + dr->dr_status = NFSERR_ACCES;
1838 + return;
1839 + }
1840 +
1721 1841 /*
1722 1842 * This is a completely gross hack to make mknod
1723 1843 * work over the wire until we can wack the protocol
1724 1844 */
1725 1845 if ((va.va_mode & IFMT) == IFCHR) {
1726 1846 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1727 1847 va.va_type = VFIFO; /* xtra kludge for named pipe */
1728 1848 else {
1729 1849 va.va_type = VCHR;
1730 1850 /*
1731 1851 * uncompress the received dev_t
1732 1852 * if the top half is zero indicating a request
1733 1853 * from an `older style' OS.
1734 1854 */
1735 1855 if ((va.va_size & 0xffff0000) == 0)
1736 1856 va.va_rdev = nfsv2_expdev(va.va_size);
1737 1857 else
1738 1858 va.va_rdev = (dev_t)va.va_size;
1739 1859 }
1740 1860 va.va_mask &= ~AT_SIZE;
1741 1861 } else if ((va.va_mode & IFMT) == IFBLK) {
1742 1862 va.va_type = VBLK;
1743 1863 /*
1744 1864 * uncompress the received dev_t
1745 1865 * if the top half is zero indicating a request
1746 1866 * from an `older style' OS.
1747 1867 */
1748 1868 if ((va.va_size & 0xffff0000) == 0)
1749 1869 va.va_rdev = nfsv2_expdev(va.va_size);
1750 1870 else
1751 1871 va.va_rdev = (dev_t)va.va_size;
1752 1872 va.va_mask &= ~AT_SIZE;
1753 1873 } else if ((va.va_mode & IFMT) == IFSOCK) {
1754 1874 va.va_type = VSOCK;
1755 1875 } else {
1756 1876 va.va_type = VREG;
1757 1877 }
1758 1878 va.va_mode &= ~IFMT;
1759 1879 va.va_mask |= AT_TYPE;
1760 1880
1761 1881 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1762 1882 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1763 1883 MAXPATHLEN);
1764 1884 if (name == NULL) {
1765 1885 dr->dr_status = puterrno(EINVAL);
1766 1886 return;
1767 1887 }
1768 1888
1769 1889 /*
1770 1890 * Why was the choice made to use VWRITE as the mode to the
1771 1891 * call to VOP_CREATE ? This results in a bug. When a client
1772 1892 * opens a file that already exists and is RDONLY, the second
1773 1893 * open fails with an EACESS because of the mode.
1774 1894 * bug ID 1054648.
1775 1895 */
1776 1896 lookup_ok = 0;
1777 1897 mode = VWRITE;
1778 1898 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1779 1899 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1780 1900 NULL, NULL, NULL);
1781 1901 if (!error) {
1782 1902 struct vattr at;
1783 1903
1784 1904 lookup_ok = 1;
1785 1905 at.va_mask = AT_MODE;
1786 1906 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1787 1907 if (!error)
1788 1908 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1789 1909 VN_RELE(tvp);
1790 1910 tvp = NULL;
1791 1911 }
1792 1912 }
1793 1913
1794 1914 if (!lookup_ok) {
1795 1915 if (rdonly(ro, dvp)) {
1796 1916 error = EROFS;
1797 1917 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1798 1918 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1799 1919 error = EPERM;
1800 1920 } else {
1801 1921 error = 0;
1802 1922 }
1803 1923 }
1804 1924
1805 1925 /*
1806 1926 * If file size is being modified on an already existing file
1807 1927 * make sure that there are no conflicting non-blocking mandatory
1808 1928 * locks in the region being manipulated. Return EACCES if there
1809 1929 * are conflicting locks.
1810 1930 */
1811 1931 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1812 1932 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1813 1933 NULL, NULL, NULL);
1814 1934
1815 1935 if (!lookuperr &&
1816 1936 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1817 1937 VN_RELE(tvp);
1818 1938 curthread->t_flag |= T_WOULDBLOCK;
1819 1939 goto out;
1820 1940 }
1821 1941
1822 1942 if (!lookuperr && nbl_need_check(tvp)) {
1823 1943 /*
1824 1944 * The file exists. Now check if it has any
1825 1945 * conflicting non-blocking mandatory locks
1826 1946 * in the region being changed.
1827 1947 */
1828 1948 struct vattr bva;
1829 1949 u_offset_t offset;
1830 1950 ssize_t length;
1831 1951
1832 1952 nbl_start_crit(tvp, RW_READER);
1833 1953 in_crit = 1;
1834 1954
1835 1955 bva.va_mask = AT_SIZE;
1836 1956 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1837 1957 if (!error) {
1838 1958 if (va.va_size < bva.va_size) {
1839 1959 offset = va.va_size;
1840 1960 length = bva.va_size - va.va_size;
1841 1961 } else {
1842 1962 offset = bva.va_size;
1843 1963 length = va.va_size - bva.va_size;
1844 1964 }
1845 1965 if (length) {
1846 1966 if (nbl_conflict(tvp, NBL_WRITE,
1847 1967 offset, length, 0, NULL)) {
1848 1968 error = EACCES;
1849 1969 }
1850 1970 }
1851 1971 }
1852 1972 if (error) {
1853 1973 nbl_end_crit(tvp);
1854 1974 VN_RELE(tvp);
1855 1975 in_crit = 0;
1856 1976 }
1857 1977 } else if (tvp != NULL) {
1858 1978 VN_RELE(tvp);
1859 1979 }
1860 1980 }
1861 1981
1862 1982 if (!error) {
1863 1983 /*
1864 1984 * If filesystem is shared with nosuid the remove any
1865 1985 * setuid/setgid bits on create.
1866 1986 */
1867 1987 if (va.va_type == VREG &&
1868 1988 exi->exi_export.ex_flags & EX_NOSUID)
1869 1989 va.va_mode &= ~(VSUID | VSGID);
1870 1990
1871 1991 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1872 1992 NULL, NULL);
1873 1993
1874 1994 if (!error) {
1875 1995
1876 1996 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1877 1997 trunc = TRUE;
1878 1998 else
1879 1999 trunc = FALSE;
1880 2000
1881 2001 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1882 2002 VN_RELE(vp);
1883 2003 curthread->t_flag |= T_WOULDBLOCK;
1884 2004 goto out;
1885 2005 }
1886 2006 va.va_mask = AT_ALL;
1887 2007
1888 2008 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1889 2009
1890 2010 /* check for overflows */
1891 2011 if (!error) {
1892 2012 acl_perm(vp, exi, &va, cr);
1893 2013 error = vattr_to_nattr(&va, &dr->dr_attr);
1894 2014 if (!error) {
1895 2015 error = makefh(&dr->dr_fhandle, vp,
1896 2016 exi);
1897 2017 }
1898 2018 }
1899 2019 /*
1900 2020 * Force modified metadata out to stable storage.
1901 2021 *
1902 2022 * if a underlying vp exists, pass it to VOP_FSYNC
1903 2023 */
1904 2024 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1905 2025 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1906 2026 else
1907 2027 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1908 2028 VN_RELE(vp);
1909 2029 }
1910 2030
1911 2031 if (in_crit) {
1912 2032 nbl_end_crit(tvp);
1913 2033 VN_RELE(tvp);
1914 2034 }
1915 2035 }
1916 2036
1917 2037 /*
1918 2038 * Force modified data and metadata out to stable storage.
1919 2039 */
1920 2040 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1921 2041
1922 2042 out:
1923 2043
1924 2044 VN_RELE(dvp);
1925 2045
1926 2046 dr->dr_status = puterrno(error);
1927 2047
1928 2048 if (name != args->ca_da.da_name)
1929 2049 kmem_free(name, MAXPATHLEN);
1930 2050 }
1931 2051 void *
1932 2052 rfs_create_getfh(struct nfscreatargs *args)
1933 2053 {
1934 2054 return (args->ca_da.da_fhandle);
1935 2055 }
1936 2056
1937 2057 /*
1938 2058 * Remove a file.
1939 2059 * Remove named file from parent directory.
1940 2060 */
1941 2061 /* ARGSUSED */
1942 2062 void
1943 2063 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
1944 2064 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1945 2065 {
1946 2066 int error = 0;
1947 2067 vnode_t *vp;
1948 2068 vnode_t *targvp;
1949 2069 int in_crit = 0;
1950 2070
1951 2071 /*
1952 2072 * Disallow NULL paths
1953 2073 */
1954 2074 if (da->da_name == NULL || *da->da_name == '\0') {
1955 2075 *status = NFSERR_ACCES;
1956 2076 return;
1957 2077 }
1958 2078
1959 2079 vp = nfs_fhtovp(da->da_fhandle, exi);
1960 2080 if (vp == NULL) {
1961 2081 *status = NFSERR_STALE;
1962 2082 return;
1963 2083 }
1964 2084
1965 2085 if (rdonly(ro, vp)) {
1966 2086 VN_RELE(vp);
1967 2087 *status = NFSERR_ROFS;
1968 2088 return;
1969 2089 }
1970 2090
1971 2091 /*
1972 2092 * Check for a conflict with a non-blocking mandatory share reservation.
1973 2093 */
1974 2094 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
1975 2095 NULL, cr, NULL, NULL, NULL);
1976 2096 if (error != 0) {
1977 2097 VN_RELE(vp);
1978 2098 *status = puterrno(error);
1979 2099 return;
1980 2100 }
1981 2101
1982 2102 /*
1983 2103 * If the file is delegated to an v4 client, then initiate
1984 2104 * recall and drop this request (by setting T_WOULDBLOCK).
1985 2105 * The client will eventually re-transmit the request and
1986 2106 * (hopefully), by then, the v4 client will have returned
1987 2107 * the delegation.
1988 2108 */
1989 2109
1990 2110 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
1991 2111 VN_RELE(vp);
1992 2112 VN_RELE(targvp);
1993 2113 curthread->t_flag |= T_WOULDBLOCK;
1994 2114 return;
1995 2115 }
1996 2116
1997 2117 if (nbl_need_check(targvp)) {
1998 2118 nbl_start_crit(targvp, RW_READER);
1999 2119 in_crit = 1;
2000 2120 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2001 2121 error = EACCES;
2002 2122 goto out;
2003 2123 }
2004 2124 }
2005 2125
2006 2126 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2007 2127
2008 2128 /*
2009 2129 * Force modified data and metadata out to stable storage.
2010 2130 */
2011 2131 (void) VOP_FSYNC(vp, 0, cr, NULL);
2012 2132
2013 2133 out:
2014 2134 if (in_crit)
2015 2135 nbl_end_crit(targvp);
2016 2136 VN_RELE(targvp);
2017 2137 VN_RELE(vp);
2018 2138
2019 2139 *status = puterrno(error);
2020 2140
2021 2141 }
2022 2142
2023 2143 void *
2024 2144 rfs_remove_getfh(struct nfsdiropargs *da)
2025 2145 {
2026 2146 return (da->da_fhandle);
2027 2147 }
2028 2148
2029 2149 /*
2030 2150 * rename a file
2031 2151 * Give a file (from) a new name (to).
2032 2152 */
2033 2153 /* ARGSUSED */
2034 2154 void
2035 2155 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2036 2156 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2037 2157 {
2038 2158 int error = 0;
2039 2159 vnode_t *fromvp;
2040 2160 vnode_t *tovp;
2041 2161 struct exportinfo *to_exi;
2042 2162 fhandle_t *fh;
2043 2163 vnode_t *srcvp;
2044 2164 vnode_t *targvp;
2045 2165 int in_crit = 0;
2046 2166
2047 2167 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2048 2168 if (fromvp == NULL) {
2049 2169 *status = NFSERR_STALE;
|
↓ open down ↓ |
319 lines elided |
↑ open up ↑ |
2050 2170 return;
2051 2171 }
2052 2172
2053 2173 fh = args->rna_to.da_fhandle;
2054 2174 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2055 2175 if (to_exi == NULL) {
2056 2176 VN_RELE(fromvp);
2057 2177 *status = NFSERR_ACCES;
2058 2178 return;
2059 2179 }
2060 - exi_rele(to_exi);
2180 + exi_rele(&to_exi);
2061 2181
2062 2182 if (to_exi != exi) {
2063 2183 VN_RELE(fromvp);
2064 2184 *status = NFSERR_XDEV;
2065 2185 return;
2066 2186 }
2067 2187
2068 2188 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2069 2189 if (tovp == NULL) {
2070 2190 VN_RELE(fromvp);
2071 2191 *status = NFSERR_STALE;
2072 2192 return;
2073 2193 }
2074 2194
2075 2195 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2076 2196 VN_RELE(tovp);
2077 2197 VN_RELE(fromvp);
2078 2198 *status = NFSERR_NOTDIR;
2079 2199 return;
2080 2200 }
2081 2201
2082 2202 /*
2083 2203 * Disallow NULL paths
2084 2204 */
2085 2205 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2086 2206 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2087 2207 VN_RELE(tovp);
2088 2208 VN_RELE(fromvp);
2089 2209 *status = NFSERR_ACCES;
|
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
2090 2210 return;
2091 2211 }
2092 2212
2093 2213 if (rdonly(ro, tovp)) {
2094 2214 VN_RELE(tovp);
2095 2215 VN_RELE(fromvp);
2096 2216 *status = NFSERR_ROFS;
2097 2217 return;
2098 2218 }
2099 2219
2220 + if (protect_zfs_mntpt(tovp) != 0) {
2221 + VN_RELE(tovp);
2222 + VN_RELE(fromvp);
2223 + *status = NFSERR_ACCES;
2224 + return;
2225 + }
2226 +
2100 2227 /*
2101 2228 * Check for a conflict with a non-blocking mandatory share reservation.
2102 2229 */
2103 2230 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2104 2231 NULL, cr, NULL, NULL, NULL);
2105 2232 if (error != 0) {
2106 2233 VN_RELE(tovp);
2107 2234 VN_RELE(fromvp);
2108 2235 *status = puterrno(error);
2109 2236 return;
2110 2237 }
2111 2238
2112 2239 /* Check for delegations on the source file */
2113 2240
|
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
2114 2241 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2115 2242 VN_RELE(tovp);
2116 2243 VN_RELE(fromvp);
2117 2244 VN_RELE(srcvp);
2118 2245 curthread->t_flag |= T_WOULDBLOCK;
2119 2246 return;
2120 2247 }
2121 2248
2122 2249 /* Check for delegation on the file being renamed over, if it exists */
2123 2250
2124 - if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2251 + if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2125 2252 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2126 2253 NULL, NULL, NULL) == 0) {
2127 2254
2128 2255 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2129 2256 VN_RELE(tovp);
2130 2257 VN_RELE(fromvp);
2131 2258 VN_RELE(srcvp);
2132 2259 VN_RELE(targvp);
2133 2260 curthread->t_flag |= T_WOULDBLOCK;
2134 2261 return;
2135 2262 }
2136 2263 VN_RELE(targvp);
2137 2264 }
2138 2265
2139 2266
2140 2267 if (nbl_need_check(srcvp)) {
2141 2268 nbl_start_crit(srcvp, RW_READER);
2142 2269 in_crit = 1;
2143 2270 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2144 2271 error = EACCES;
2145 2272 goto out;
2146 2273 }
2147 2274 }
2148 2275
2149 2276 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2150 2277 tovp, args->rna_to.da_name, cr, NULL, 0);
2151 2278
2152 2279 if (error == 0)
2153 2280 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2154 2281 strlen(args->rna_to.da_name));
2155 2282
2156 2283 /*
2157 2284 * Force modified data and metadata out to stable storage.
2158 2285 */
2159 2286 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2160 2287 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2161 2288
2162 2289 out:
2163 2290 if (in_crit)
2164 2291 nbl_end_crit(srcvp);
2165 2292 VN_RELE(srcvp);
2166 2293 VN_RELE(tovp);
2167 2294 VN_RELE(fromvp);
2168 2295
2169 2296 *status = puterrno(error);
2170 2297
2171 2298 }
2172 2299 void *
2173 2300 rfs_rename_getfh(struct nfsrnmargs *args)
2174 2301 {
2175 2302 return (args->rna_from.da_fhandle);
2176 2303 }
2177 2304
2178 2305 /*
2179 2306 * Link to a file.
2180 2307 * Create a file (to) which is a hard link to the given file (from).
2181 2308 */
2182 2309 /* ARGSUSED */
2183 2310 void
2184 2311 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2185 2312 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2186 2313 {
2187 2314 int error;
2188 2315 vnode_t *fromvp;
2189 2316 vnode_t *tovp;
2190 2317 struct exportinfo *to_exi;
2191 2318 fhandle_t *fh;
2192 2319
2193 2320 fromvp = nfs_fhtovp(args->la_from, exi);
2194 2321 if (fromvp == NULL) {
2195 2322 *status = NFSERR_STALE;
|
↓ open down ↓ |
61 lines elided |
↑ open up ↑ |
2196 2323 return;
2197 2324 }
2198 2325
2199 2326 fh = args->la_to.da_fhandle;
2200 2327 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2201 2328 if (to_exi == NULL) {
2202 2329 VN_RELE(fromvp);
2203 2330 *status = NFSERR_ACCES;
2204 2331 return;
2205 2332 }
2206 - exi_rele(to_exi);
2333 + exi_rele(&to_exi);
2207 2334
2208 2335 if (to_exi != exi) {
2209 2336 VN_RELE(fromvp);
2210 2337 *status = NFSERR_XDEV;
2211 2338 return;
2212 2339 }
2213 2340
2214 2341 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2215 2342 if (tovp == NULL) {
2216 2343 VN_RELE(fromvp);
2217 2344 *status = NFSERR_STALE;
2218 2345 return;
2219 2346 }
2220 2347
2221 2348 if (tovp->v_type != VDIR) {
2222 2349 VN_RELE(tovp);
2223 2350 VN_RELE(fromvp);
2224 2351 *status = NFSERR_NOTDIR;
2225 2352 return;
2226 2353 }
2227 2354 /*
2228 2355 * Disallow NULL paths
2229 2356 */
2230 2357 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2231 2358 VN_RELE(tovp);
2232 2359 VN_RELE(fromvp);
2233 2360 *status = NFSERR_ACCES;
|
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
2234 2361 return;
2235 2362 }
2236 2363
2237 2364 if (rdonly(ro, tovp)) {
2238 2365 VN_RELE(tovp);
2239 2366 VN_RELE(fromvp);
2240 2367 *status = NFSERR_ROFS;
2241 2368 return;
2242 2369 }
2243 2370
2371 + if (protect_zfs_mntpt(tovp) != 0) {
2372 + VN_RELE(tovp);
2373 + VN_RELE(fromvp);
2374 + *status = NFSERR_ACCES;
2375 + return;
2376 + }
2377 +
2244 2378 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2245 2379
2246 2380 /*
2247 2381 * Force modified data and metadata out to stable storage.
2248 2382 */
2249 2383 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2250 2384 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2251 2385
2252 2386 VN_RELE(tovp);
2253 2387 VN_RELE(fromvp);
2254 2388
2255 2389 *status = puterrno(error);
|
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
2256 2390
2257 2391 }
2258 2392 void *
2259 2393 rfs_link_getfh(struct nfslinkargs *args)
2260 2394 {
2261 2395 return (args->la_from);
2262 2396 }
2263 2397
2264 2398 /*
2265 2399 * Symbolicly link to a file.
2266 - * Create a file (to) with the given attributes which is a symbolic link
2400 + * Create a file (from) with the given attributes which is a symbolic link
2267 2401 * to the given path name (to).
2268 2402 */
2269 2403 void
2270 2404 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2271 2405 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2272 2406 {
2273 2407 int error;
2274 2408 struct vattr va;
2275 2409 vnode_t *vp;
2276 2410 vnode_t *svp;
2277 2411 int lerror;
2278 2412 struct sockaddr *ca;
2279 2413 char *name = NULL;
2280 2414
2281 2415 /*
2282 2416 * Disallow NULL paths
2283 2417 */
2284 2418 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2285 2419 *status = NFSERR_ACCES;
2286 2420 return;
2287 2421 }
2288 2422
2289 2423 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2290 2424 if (vp == NULL) {
2291 2425 *status = NFSERR_STALE;
2292 2426 return;
2293 2427 }
2294 2428
2295 2429 if (rdonly(ro, vp)) {
2296 2430 VN_RELE(vp);
2297 2431 *status = NFSERR_ROFS;
2298 2432 return;
2299 2433 }
2300 2434
2301 2435 error = sattr_to_vattr(args->sla_sa, &va);
2302 2436 if (error) {
2303 2437 VN_RELE(vp);
|
↓ open down ↓ |
27 lines elided |
↑ open up ↑ |
2304 2438 *status = puterrno(error);
2305 2439 return;
2306 2440 }
2307 2441
2308 2442 if (!(va.va_mask & AT_MODE)) {
2309 2443 VN_RELE(vp);
2310 2444 *status = NFSERR_INVAL;
2311 2445 return;
2312 2446 }
2313 2447
2448 + if (protect_zfs_mntpt(vp) != 0) {
2449 + VN_RELE(vp);
2450 + *status = NFSERR_ACCES;
2451 + return;
2452 + }
2453 +
2314 2454 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2315 2455 name = nfscmd_convname(ca, exi, args->sla_tnm,
2316 2456 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2317 2457
2318 2458 if (name == NULL) {
2319 2459 *status = NFSERR_ACCES;
2320 2460 return;
2321 2461 }
2322 2462
2323 2463 va.va_type = VLNK;
2324 2464 va.va_mask |= AT_TYPE;
2325 2465
2326 2466 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2327 2467
2328 2468 /*
2329 2469 * Force new data and metadata out to stable storage.
2330 2470 */
2331 2471 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2332 2472 NULL, cr, NULL, NULL, NULL);
2333 2473
2334 2474 if (!lerror) {
2335 2475 (void) VOP_FSYNC(svp, 0, cr, NULL);
2336 2476 VN_RELE(svp);
2337 2477 }
2338 2478
2339 2479 /*
2340 2480 * Force modified data and metadata out to stable storage.
2341 2481 */
2342 2482 (void) VOP_FSYNC(vp, 0, cr, NULL);
2343 2483
2344 2484 VN_RELE(vp);
2345 2485
2346 2486 *status = puterrno(error);
2347 2487 if (name != args->sla_tnm)
2348 2488 kmem_free(name, MAXPATHLEN);
2349 2489
2350 2490 }
2351 2491 void *
2352 2492 rfs_symlink_getfh(struct nfsslargs *args)
2353 2493 {
2354 2494 return (args->sla_from.da_fhandle);
2355 2495 }
2356 2496
2357 2497 /*
2358 2498 * Make a directory.
2359 2499 * Create a directory with the given name, parent directory, and attributes.
2360 2500 * Returns a file handle and attributes for the new directory.
2361 2501 */
2362 2502 /* ARGSUSED */
2363 2503 void
2364 2504 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2365 2505 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2366 2506 {
2367 2507 int error;
2368 2508 struct vattr va;
2369 2509 vnode_t *dvp = NULL;
2370 2510 vnode_t *vp;
2371 2511 char *name = args->ca_da.da_name;
2372 2512
2373 2513 /*
2374 2514 * Disallow NULL paths
2375 2515 */
2376 2516 if (name == NULL || *name == '\0') {
2377 2517 dr->dr_status = NFSERR_ACCES;
2378 2518 return;
2379 2519 }
2380 2520
2381 2521 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2382 2522 if (vp == NULL) {
2383 2523 dr->dr_status = NFSERR_STALE;
2384 2524 return;
2385 2525 }
2386 2526
2387 2527 if (rdonly(ro, vp)) {
2388 2528 VN_RELE(vp);
2389 2529 dr->dr_status = NFSERR_ROFS;
2390 2530 return;
2391 2531 }
2392 2532
2393 2533 error = sattr_to_vattr(args->ca_sa, &va);
2394 2534 if (error) {
2395 2535 VN_RELE(vp);
|
↓ open down ↓ |
72 lines elided |
↑ open up ↑ |
2396 2536 dr->dr_status = puterrno(error);
2397 2537 return;
2398 2538 }
2399 2539
2400 2540 if (!(va.va_mask & AT_MODE)) {
2401 2541 VN_RELE(vp);
2402 2542 dr->dr_status = NFSERR_INVAL;
2403 2543 return;
2404 2544 }
2405 2545
2546 + if (protect_zfs_mntpt(vp) != 0) {
2547 + VN_RELE(vp);
2548 + dr->dr_status = NFSERR_ACCES;
2549 + return;
2550 + }
2551 +
2406 2552 va.va_type = VDIR;
2407 2553 va.va_mask |= AT_TYPE;
2408 2554
2409 2555 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2410 2556
2411 2557 if (!error) {
2412 2558 /*
2413 2559 * Attribtutes of the newly created directory should
2414 2560 * be returned to the client.
2415 2561 */
2416 2562 va.va_mask = AT_ALL; /* We want everything */
2417 2563 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2418 2564
2419 2565 /* check for overflows */
2420 2566 if (!error) {
2421 2567 acl_perm(vp, exi, &va, cr);
2422 2568 error = vattr_to_nattr(&va, &dr->dr_attr);
2423 2569 if (!error) {
2424 2570 error = makefh(&dr->dr_fhandle, dvp, exi);
2425 2571 }
2426 2572 }
2427 2573 /*
2428 2574 * Force new data and metadata out to stable storage.
2429 2575 */
2430 2576 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2431 2577 VN_RELE(dvp);
2432 2578 }
2433 2579
2434 2580 /*
2435 2581 * Force modified data and metadata out to stable storage.
2436 2582 */
2437 2583 (void) VOP_FSYNC(vp, 0, cr, NULL);
2438 2584
2439 2585 VN_RELE(vp);
2440 2586
2441 2587 dr->dr_status = puterrno(error);
2442 2588
2443 2589 }
2444 2590 void *
2445 2591 rfs_mkdir_getfh(struct nfscreatargs *args)
2446 2592 {
2447 2593 return (args->ca_da.da_fhandle);
2448 2594 }
2449 2595
2450 2596 /*
2451 2597 * Remove a directory.
2452 2598 * Remove the given directory name from the given parent directory.
2453 2599 */
2454 2600 /* ARGSUSED */
2455 2601 void
2456 2602 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2457 2603 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2458 2604 {
2459 2605 int error;
2460 2606 vnode_t *vp;
2461 2607
2462 2608 /*
2463 2609 * Disallow NULL paths
2464 2610 */
2465 2611 if (da->da_name == NULL || *da->da_name == '\0') {
2466 2612 *status = NFSERR_ACCES;
2467 2613 return;
2468 2614 }
2469 2615
2470 2616 vp = nfs_fhtovp(da->da_fhandle, exi);
2471 2617 if (vp == NULL) {
2472 2618 *status = NFSERR_STALE;
2473 2619 return;
2474 2620 }
2475 2621
2476 2622 if (rdonly(ro, vp)) {
2477 2623 VN_RELE(vp);
2478 2624 *status = NFSERR_ROFS;
2479 2625 return;
2480 2626 }
|
↓ open down ↓ |
65 lines elided |
↑ open up ↑ |
2481 2627
2482 2628 /*
2483 2629 * VOP_RMDIR takes a third argument (the current
2484 2630 * directory of the process). That's because someone
2485 2631 * wants to return EINVAL if one tries to remove ".".
2486 2632 * Of course, NFS servers have no idea what their
2487 2633 * clients' current directories are. We fake it by
2488 2634 * supplying a vnode known to exist and illegal to
2489 2635 * remove.
2490 2636 */
2491 - error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2637 + error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2492 2638
2493 2639 /*
2494 2640 * Force modified data and metadata out to stable storage.
2495 2641 */
2496 2642 (void) VOP_FSYNC(vp, 0, cr, NULL);
2497 2643
2498 2644 VN_RELE(vp);
2499 2645
2500 2646 /*
2501 2647 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2502 2648 * if the directory is not empty. A System V NFS server
2503 2649 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2504 2650 * over the wire.
2505 2651 */
2506 2652 if (error == EEXIST)
2507 2653 *status = NFSERR_NOTEMPTY;
|
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
2508 2654 else
2509 2655 *status = puterrno(error);
2510 2656
2511 2657 }
2512 2658 void *
2513 2659 rfs_rmdir_getfh(struct nfsdiropargs *da)
2514 2660 {
2515 2661 return (da->da_fhandle);
2516 2662 }
2517 2663
2664 +#ifdef nextdp
2665 +#undef nextdp
2666 +#endif
2667 +#define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
2668 +
2518 2669 /* ARGSUSED */
2519 2670 void
2520 2671 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2521 2672 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2522 2673 {
2523 2674 int error;
2524 - int iseof;
2675 + vnode_t *vp;
2525 2676 struct iovec iov;
2526 2677 struct uio uio;
2527 - vnode_t *vp;
2528 - char *ndata = NULL;
2678 + int iseof;
2679 +
2680 + uint32_t count = rda->rda_count;
2681 + uint32_t size; /* size of the readdirres structure */
2682 + int overflow = 0;
2683 +
2684 + size_t datasz;
2685 + char *data = NULL;
2686 + dirent64_t *dp;
2687 +
2529 2688 struct sockaddr *ca;
2530 - size_t nents;
2531 - int ret;
2689 + struct nfsentry **eptr;
2690 + struct nfsentry *entry;
2532 2691
2533 2692 vp = nfs_fhtovp(&rda->rda_fh, exi);
2534 2693 if (vp == NULL) {
2535 - rd->rd_entries = NULL;
2536 2694 rd->rd_status = NFSERR_STALE;
2537 2695 return;
2538 2696 }
2539 2697
2540 2698 if (vp->v_type != VDIR) {
2541 2699 VN_RELE(vp);
2542 - rd->rd_entries = NULL;
2543 2700 rd->rd_status = NFSERR_NOTDIR;
2544 2701 return;
2545 2702 }
2546 2703
2547 2704 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2548 2705
2549 2706 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2550 -
2551 - if (error) {
2552 - rd->rd_entries = NULL;
2707 + if (error)
2553 2708 goto bad;
2554 - }
2555 2709
2556 - if (rda->rda_count == 0) {
2557 - rd->rd_entries = NULL;
2558 - rd->rd_size = 0;
2559 - rd->rd_eof = FALSE;
2560 - goto bad;
2561 - }
2710 + /*
2711 + * Don't allow arbitrary counts for allocation
2712 + */
2713 + if (count > NFS_MAXDATA)
2714 + count = NFS_MAXDATA;
2562 2715
2563 - rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2716 + /*
2717 + * struct readdirres:
2718 + * status: 1
2719 + * entries (bool): 1
2720 + * eof: 1
2721 + */
2722 + size = (1 + 1 + 1) * BYTES_PER_XDR_UNIT;
2564 2723
2724 + if (size > count) {
2725 + eptr = &rd->rd_entries;
2726 + iseof = 0;
2727 + size = 0;
2728 +
2729 + goto done;
2730 + }
2731 +
2565 2732 /*
2566 - * Allocate data for entries. This will be freed by rfs_rddirfree.
2733 + * This is simplification. The dirent64_t size is not the same as the
2734 + * size of XDR representation of entry, but the sizes are similar so
2735 + * we'll assume they are same. This assumption should not cause any
2736 + * harm. In worst case we will need to issue VOP_READDIR() once more.
2567 2737 */
2568 - rd->rd_bufsize = (uint_t)rda->rda_count;
2569 - rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2738 + datasz = count;
2570 2739
2571 2740 /*
2572 - * Set up io vector to read directory data
2741 + * Make sure that there is room to read at least one entry
2742 + * if any are available.
2573 2743 */
2574 - iov.iov_base = (caddr_t)rd->rd_entries;
2575 - iov.iov_len = rda->rda_count;
2744 + if (datasz < DIRENT64_RECLEN(MAXNAMELEN))
2745 + datasz = DIRENT64_RECLEN(MAXNAMELEN);
2746 +
2747 + data = kmem_alloc(datasz, KM_NOSLEEP);
2748 + if (data == NULL) {
2749 + /* The allocation failed; downsize and wait for it this time */
2750 + if (datasz > MAXBSIZE)
2751 + datasz = MAXBSIZE;
2752 + data = kmem_alloc(datasz, KM_SLEEP);
2753 + }
2754 +
2576 2755 uio.uio_iov = &iov;
2577 2756 uio.uio_iovcnt = 1;
2578 2757 uio.uio_segflg = UIO_SYSSPACE;
2579 2758 uio.uio_extflg = UIO_COPY_CACHED;
2580 2759 uio.uio_loffset = (offset_t)rda->rda_offset;
2581 - uio.uio_resid = rda->rda_count;
2760 + uio.uio_resid = datasz;
2582 2761
2583 - /*
2584 - * read directory
2585 - */
2762 + ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2763 + eptr = &rd->rd_entries;
2764 + entry = NULL;
2765 +
2766 +getmoredents:
2767 + iov.iov_base = data;
2768 + iov.iov_len = datasz;
2769 +
2586 2770 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2771 + if (error) {
2772 + iseof = 0;
2773 + goto done;
2774 + }
2587 2775
2588 - /*
2589 - * Clean up
2590 - */
2591 - if (!error) {
2592 - /*
2593 - * set size and eof
2594 - */
2595 - if (uio.uio_resid == rda->rda_count) {
2596 - rd->rd_size = 0;
2597 - rd->rd_eof = TRUE;
2598 - } else {
2599 - rd->rd_size = (uint32_t)(rda->rda_count -
2600 - uio.uio_resid);
2601 - rd->rd_eof = iseof ? TRUE : FALSE;
2776 + if (iov.iov_len == datasz)
2777 + goto done;
2778 +
2779 + for (dp = (dirent64_t *)data;
2780 + (char *)dp - data < datasz - iov.iov_len && !overflow;
2781 + dp = nextdp(dp)) {
2782 + char *name;
2783 + uint32_t esize;
2784 + uint32_t cookie;
2785 +
2786 + overflow = (uint64_t)dp->d_off > UINT32_MAX;
2787 + if (overflow) {
2788 + cookie = 0;
2789 + iseof = 1;
2790 + } else
2791 + cookie = (uint32_t)dp->d_off;
2792 +
2793 + if (dp->d_ino == 0 || (uint64_t)dp->d_ino > UINT32_MAX) {
2794 + if (entry != NULL)
2795 + entry->cookie = cookie;
2796 + continue;
2602 2797 }
2603 - }
2604 2798
2605 - ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2606 - nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2607 - ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2608 - rda->rda_count, &ndata);
2799 + name = nfscmd_convname(ca, exi, dp->d_name,
2800 + NFSCMD_CONV_OUTBOUND, NFS_MAXPATHLEN + 1);
2801 + if (name == NULL) {
2802 + if (entry != NULL)
2803 + entry->cookie = cookie;
2804 + continue;
2805 + }
2609 2806
2610 - if (ret != 0) {
2611 - size_t dropbytes;
2612 2807 /*
2613 - * We had to drop one or more entries in order to fit
2614 - * during the character conversion. We need to patch
2615 - * up the size and eof info.
2808 + * struct entry:
2809 + * fileid: 1
2810 + * name (length): 1
2811 + * name (data): length (rounded up)
2812 + * cookie: 1
2813 + * nextentry (bool): 1
2616 2814 */
2617 - if (rd->rd_eof)
2618 - rd->rd_eof = FALSE;
2619 - dropbytes = nfscmd_dropped_entrysize(
2620 - (struct dirent64 *)rd->rd_entries, nents, ret);
2621 - rd->rd_size -= dropbytes;
2815 + esize = (1 + 1 + 1 + 1) * BYTES_PER_XDR_UNIT +
2816 + RNDUP(strlen(name));
2817 +
2818 + /* If the new entry does not fit, discard it */
2819 + if (esize > count - size) {
2820 + if (name != dp->d_name)
2821 + kmem_free(name, NFS_MAXPATHLEN + 1);
2822 + iseof = 0;
2823 + goto done;
2824 + }
2825 +
2826 + entry = kmem_alloc(sizeof (struct nfsentry), KM_SLEEP);
2827 +
2828 + entry->fileid = (uint32_t)dp->d_ino;
2829 + entry->name = strdup(name);
2830 + if (name != dp->d_name)
2831 + kmem_free(name, NFS_MAXPATHLEN + 1);
2832 + entry->cookie = cookie;
2833 +
2834 + size += esize;
2835 +
2836 + /* Add the entry to the linked list */
2837 + *eptr = entry;
2838 + eptr = &entry->nextentry;
2622 2839 }
2623 - if (ndata == NULL) {
2624 - ndata = (char *)rd->rd_entries;
2625 - } else if (ndata != (char *)rd->rd_entries) {
2626 - kmem_free(rd->rd_entries, rd->rd_bufsize);
2627 - rd->rd_entries = (void *)ndata;
2628 - rd->rd_bufsize = rda->rda_count;
2840 +
2841 + if (!iseof && size < count) {
2842 + uio.uio_resid = MIN(datasz, MAXBSIZE);
2843 + goto getmoredents;
2629 2844 }
2630 2845
2846 +done:
2847 + *eptr = NULL;
2848 +
2849 + if (iseof || rd->rd_entries != NULL || !error) {
2850 + error = 0;
2851 + rd->rd_eof = iseof ? TRUE : FALSE;
2852 +
2853 + /* This is for nfslog only */
2854 + rd->rd_offset = rda->rda_offset;
2855 + rd->rd_size = size;
2856 + }
2857 +
2631 2858 bad:
2632 2859 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2633 2860
2634 2861 #if 0 /* notyet */
2635 2862 /*
2636 2863 * Don't do this. It causes local disk writes when just
2637 2864 * reading the file and the overhead is deemed larger
2638 2865 * than the benefit.
2639 2866 */
2640 2867 /*
2641 2868 * Force modified metadata out to stable storage.
2642 2869 */
2643 2870 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2644 2871 #endif
2645 2872
2646 2873 VN_RELE(vp);
2647 2874
2648 2875 rd->rd_status = puterrno(error);
2649 2876
2877 + if (data != NULL)
2878 + kmem_free(data, datasz);
2650 2879 }
2651 2880 void *
2652 2881 rfs_readdir_getfh(struct nfsrddirargs *rda)
2653 2882 {
2654 2883 return (&rda->rda_fh);
2655 2884 }
2656 2885 void
2657 2886 rfs_rddirfree(struct nfsrddirres *rd)
2658 2887 {
2659 - if (rd->rd_entries != NULL)
2660 - kmem_free(rd->rd_entries, rd->rd_bufsize);
2888 + if (rd->rd_status == NFS_OK) {
2889 + struct nfsentry *entry, *nentry;
2890 +
2891 + for (entry = rd->rd_entries; entry != NULL; entry = nentry) {
2892 + nentry = entry->nextentry;
2893 + strfree(entry->name);
2894 + kmem_free(entry, sizeof (struct nfsentry));
2895 + }
2896 + }
2661 2897 }
2662 2898
2663 2899 /* ARGSUSED */
2664 2900 void
2665 2901 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2666 2902 struct svc_req *req, cred_t *cr, bool_t ro)
2667 2903 {
2668 2904 int error;
2669 2905 struct statvfs64 sb;
2670 2906 vnode_t *vp;
2671 2907
2672 2908 vp = nfs_fhtovp(fh, exi);
2673 2909 if (vp == NULL) {
2674 2910 fs->fs_status = NFSERR_STALE;
2675 2911 return;
2676 2912 }
2677 2913
2678 2914 error = VFS_STATVFS(vp->v_vfsp, &sb);
2679 2915
2680 2916 if (!error) {
2681 2917 fs->fs_tsize = nfstsize();
2682 2918 fs->fs_bsize = sb.f_frsize;
2683 2919 fs->fs_blocks = sb.f_blocks;
2684 2920 fs->fs_bfree = sb.f_bfree;
2685 2921 fs->fs_bavail = sb.f_bavail;
2686 2922 }
2687 2923
2688 2924 VN_RELE(vp);
2689 2925
2690 2926 fs->fs_status = puterrno(error);
2691 2927
2692 2928 }
2693 2929 void *
2694 2930 rfs_statfs_getfh(fhandle_t *fh)
2695 2931 {
2696 2932 return (fh);
2697 2933 }
2698 2934
2699 2935 static int
2700 2936 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2701 2937 {
2702 2938 vap->va_mask = 0;
2703 2939
2704 2940 /*
2705 2941 * There was a sign extension bug in some VFS based systems
2706 2942 * which stored the mode as a short. When it would get
2707 2943 * assigned to a u_long, no sign extension would occur.
2708 2944 * It needed to, but this wasn't noticed because sa_mode
2709 2945 * would then get assigned back to the short, thus ignoring
2710 2946 * the upper 16 bits of sa_mode.
2711 2947 *
2712 2948 * To make this implementation work for both broken
2713 2949 * clients and good clients, we check for both versions
2714 2950 * of the mode.
2715 2951 */
2716 2952 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2717 2953 sa->sa_mode != (uint32_t)-1) {
2718 2954 vap->va_mask |= AT_MODE;
2719 2955 vap->va_mode = sa->sa_mode;
2720 2956 }
2721 2957 if (sa->sa_uid != (uint32_t)-1) {
2722 2958 vap->va_mask |= AT_UID;
2723 2959 vap->va_uid = sa->sa_uid;
2724 2960 }
2725 2961 if (sa->sa_gid != (uint32_t)-1) {
2726 2962 vap->va_mask |= AT_GID;
2727 2963 vap->va_gid = sa->sa_gid;
2728 2964 }
2729 2965 if (sa->sa_size != (uint32_t)-1) {
2730 2966 vap->va_mask |= AT_SIZE;
2731 2967 vap->va_size = sa->sa_size;
2732 2968 }
2733 2969 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2734 2970 sa->sa_atime.tv_usec != (int32_t)-1) {
2735 2971 #ifndef _LP64
2736 2972 /* return error if time overflow */
2737 2973 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2738 2974 return (EOVERFLOW);
2739 2975 #endif
2740 2976 vap->va_mask |= AT_ATIME;
2741 2977 /*
2742 2978 * nfs protocol defines times as unsigned so don't extend sign,
2743 2979 * unless sysadmin set nfs_allow_preepoch_time.
2744 2980 */
2745 2981 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2746 2982 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2747 2983 }
2748 2984 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2749 2985 sa->sa_mtime.tv_usec != (int32_t)-1) {
2750 2986 #ifndef _LP64
2751 2987 /* return error if time overflow */
2752 2988 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2753 2989 return (EOVERFLOW);
2754 2990 #endif
2755 2991 vap->va_mask |= AT_MTIME;
|
↓ open down ↓ |
85 lines elided |
↑ open up ↑ |
2756 2992 /*
2757 2993 * nfs protocol defines times as unsigned so don't extend sign,
2758 2994 * unless sysadmin set nfs_allow_preepoch_time.
2759 2995 */
2760 2996 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2761 2997 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2762 2998 }
2763 2999 return (0);
2764 3000 }
2765 3001
2766 -static enum nfsftype vt_to_nf[] = {
3002 +static const enum nfsftype vt_to_nf[] = {
2767 3003 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2768 3004 };
2769 3005
2770 3006 /*
2771 3007 * check the following fields for overflow: nodeid, size, and time.
2772 3008 * There could be a problem when converting 64-bit LP64 fields
2773 3009 * into 32-bit ones. Return an error if there is an overflow.
2774 3010 */
2775 3011 int
2776 3012 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2777 3013 {
2778 3014 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2779 3015 na->na_type = vt_to_nf[vap->va_type];
2780 3016
2781 3017 if (vap->va_mode == (unsigned short) -1)
2782 3018 na->na_mode = (uint32_t)-1;
2783 3019 else
2784 3020 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2785 3021
2786 3022 if (vap->va_uid == (unsigned short)(-1))
2787 3023 na->na_uid = (uint32_t)(-1);
2788 3024 else if (vap->va_uid == UID_NOBODY)
2789 3025 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2790 3026 else
2791 3027 na->na_uid = vap->va_uid;
2792 3028
2793 3029 if (vap->va_gid == (unsigned short)(-1))
2794 3030 na->na_gid = (uint32_t)-1;
2795 3031 else if (vap->va_gid == GID_NOBODY)
2796 3032 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2797 3033 else
2798 3034 na->na_gid = vap->va_gid;
2799 3035
2800 3036 /*
2801 3037 * Do we need to check fsid for overflow? It is 64-bit in the
2802 3038 * vattr, but are bigger than 32 bit values supported?
2803 3039 */
2804 3040 na->na_fsid = vap->va_fsid;
2805 3041
2806 3042 na->na_nodeid = vap->va_nodeid;
2807 3043
2808 3044 /*
2809 3045 * Check to make sure that the nodeid is representable over the
2810 3046 * wire without losing bits.
2811 3047 */
2812 3048 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2813 3049 return (EFBIG);
2814 3050 na->na_nlink = vap->va_nlink;
2815 3051
2816 3052 /*
2817 3053 * Check for big files here, instead of at the caller. See
2818 3054 * comments in cstat for large special file explanation.
2819 3055 */
2820 3056 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2821 3057 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2822 3058 return (EFBIG);
2823 3059 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2824 3060 /* UNKNOWN_SIZE | OVERFLOW */
2825 3061 na->na_size = MAXOFF32_T;
2826 3062 } else
2827 3063 na->na_size = vap->va_size;
2828 3064 } else
2829 3065 na->na_size = vap->va_size;
2830 3066
2831 3067 /*
2832 3068 * If the vnode times overflow the 32-bit times that NFS2
2833 3069 * uses on the wire then return an error.
2834 3070 */
2835 3071 if (!NFS_VAP_TIME_OK(vap)) {
2836 3072 return (EOVERFLOW);
2837 3073 }
2838 3074 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2839 3075 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2840 3076
2841 3077 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2842 3078 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2843 3079
2844 3080 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2845 3081 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2846 3082
2847 3083 /*
2848 3084 * If the dev_t will fit into 16 bits then compress
2849 3085 * it, otherwise leave it alone. See comments in
2850 3086 * nfs_client.c.
2851 3087 */
2852 3088 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2853 3089 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2854 3090 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2855 3091 else
2856 3092 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2857 3093
2858 3094 na->na_blocks = vap->va_nblocks;
2859 3095 na->na_blocksize = vap->va_blksize;
2860 3096
2861 3097 /*
2862 3098 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2863 3099 * over-the-wire protocols for named-pipe vnodes. It remaps the
2864 3100 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2865 3101 *
2866 3102 * BUYER BEWARE:
2867 3103 * If you are porting the NFS to a non-Sun server, you probably
2868 3104 * don't want to include the following block of code. The
2869 3105 * over-the-wire special file types will be changing with the
2870 3106 * NFS Protocol Revision.
2871 3107 */
2872 3108 if (vap->va_type == VFIFO)
2873 3109 NA_SETFIFO(na);
2874 3110 return (0);
2875 3111 }
2876 3112
2877 3113 /*
2878 3114 * acl v2 support: returns approximate permission.
2879 3115 * default: returns minimal permission (more restrictive)
2880 3116 * aclok: returns maximal permission (less restrictive)
2881 3117 * This routine changes the permissions that are alaredy in *va.
2882 3118 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2883 3119 * CLASS_OBJ is always the same as GROUP_OBJ entry.
2884 3120 */
2885 3121 static void
2886 3122 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2887 3123 {
2888 3124 vsecattr_t vsa;
2889 3125 int aclcnt;
2890 3126 aclent_t *aclentp;
2891 3127 mode_t mask_perm;
2892 3128 mode_t grp_perm;
2893 3129 mode_t other_perm;
2894 3130 mode_t other_orig;
2895 3131 int error;
2896 3132
2897 3133 /* dont care default acl */
2898 3134 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2899 3135 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2900 3136
2901 3137 if (!error) {
2902 3138 aclcnt = vsa.vsa_aclcnt;
2903 3139 if (aclcnt > MIN_ACL_ENTRIES) {
2904 3140 /* non-trivial ACL */
2905 3141 aclentp = vsa.vsa_aclentp;
2906 3142 if (exi->exi_export.ex_flags & EX_ACLOK) {
2907 3143 /* maximal permissions */
2908 3144 grp_perm = 0;
2909 3145 other_perm = 0;
2910 3146 for (; aclcnt > 0; aclcnt--, aclentp++) {
2911 3147 switch (aclentp->a_type) {
2912 3148 case USER_OBJ:
2913 3149 break;
2914 3150 case USER:
2915 3151 grp_perm |=
2916 3152 aclentp->a_perm << 3;
2917 3153 other_perm |= aclentp->a_perm;
2918 3154 break;
2919 3155 case GROUP_OBJ:
2920 3156 grp_perm |=
2921 3157 aclentp->a_perm << 3;
2922 3158 break;
2923 3159 case GROUP:
2924 3160 other_perm |= aclentp->a_perm;
2925 3161 break;
2926 3162 case OTHER_OBJ:
2927 3163 other_orig = aclentp->a_perm;
2928 3164 break;
2929 3165 case CLASS_OBJ:
2930 3166 mask_perm = aclentp->a_perm;
2931 3167 break;
2932 3168 default:
2933 3169 break;
2934 3170 }
2935 3171 }
2936 3172 grp_perm &= mask_perm << 3;
2937 3173 other_perm &= mask_perm;
2938 3174 other_perm |= other_orig;
2939 3175
2940 3176 } else {
2941 3177 /* minimal permissions */
2942 3178 grp_perm = 070;
2943 3179 other_perm = 07;
2944 3180 for (; aclcnt > 0; aclcnt--, aclentp++) {
2945 3181 switch (aclentp->a_type) {
2946 3182 case USER_OBJ:
2947 3183 break;
2948 3184 case USER:
2949 3185 case CLASS_OBJ:
2950 3186 grp_perm &=
2951 3187 aclentp->a_perm << 3;
2952 3188 other_perm &=
2953 3189 aclentp->a_perm;
2954 3190 break;
2955 3191 case GROUP_OBJ:
2956 3192 grp_perm &=
2957 3193 aclentp->a_perm << 3;
2958 3194 break;
2959 3195 case GROUP:
2960 3196 other_perm &=
2961 3197 aclentp->a_perm;
2962 3198 break;
2963 3199 case OTHER_OBJ:
2964 3200 other_perm &=
2965 3201 aclentp->a_perm;
2966 3202 break;
2967 3203 default:
2968 3204 break;
2969 3205 }
2970 3206 }
2971 3207 }
2972 3208 /* copy to va */
2973 3209 va->va_mode &= ~077;
2974 3210 va->va_mode |= grp_perm | other_perm;
|
↓ open down ↓ |
198 lines elided |
↑ open up ↑ |
2975 3211 }
2976 3212 if (vsa.vsa_aclcnt)
2977 3213 kmem_free(vsa.vsa_aclentp,
2978 3214 vsa.vsa_aclcnt * sizeof (aclent_t));
2979 3215 }
2980 3216 }
2981 3217
2982 3218 void
2983 3219 rfs_srvrinit(void)
2984 3220 {
2985 - mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
2986 3221 nfs2_srv_caller_id = fs_new_caller_id();
3222 + zone_key_create(&rfs_zone_key, rfs_zone_init, NULL, rfs_zone_fini);
2987 3223 }
2988 3224
2989 3225 void
2990 3226 rfs_srvrfini(void)
2991 3227 {
2992 - mutex_destroy(&rfs_async_write_lock);
2993 3228 }
2994 3229
3230 +/* ARGSUSED */
3231 +static void *
3232 +rfs_zone_init(zoneid_t zoneid)
3233 +{
3234 + nfs_srv_t *ns;
3235 +
3236 + ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3237 +
3238 + mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3239 + ns->write_async = 1;
3240 +
3241 + return (ns);
3242 +}
3243 +
3244 +/* ARGSUSED */
3245 +static void
3246 +rfs_zone_fini(zoneid_t zoneid, void *data)
3247 +{
3248 + nfs_srv_t *ns;
3249 +
3250 + ns = (nfs_srv_t *)data;
3251 + mutex_destroy(&ns->async_write_lock);
3252 + kmem_free(ns, sizeof (*ns));
3253 +}
3254 +
2995 3255 static int
2996 3256 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
2997 3257 {
2998 3258 struct clist *wcl;
2999 3259 int wlist_len;
3000 3260 uint32_t count = rr->rr_count;
3001 3261
3002 3262 wcl = ra->ra_wlist;
3003 3263
3004 3264 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3005 3265 return (FALSE);
3006 3266 }
3007 3267
3008 3268 wcl = ra->ra_wlist;
3009 3269 rr->rr_ok.rrok_wlist_len = wlist_len;
3010 3270 rr->rr_ok.rrok_wlist = wcl;
3011 3271
3012 3272 return (TRUE);
3013 3273 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX