Print this page
2988 nfssrv: need ability to go to submounts for v3 and v2 protocols
Portions contributed by: Marcel Telka <marcel.telka@nexenta.com>
Portions contributed by: Jean McCormack <jean.mccormack@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Dan Fields <dan.fields@nexenta.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Change-Id: I6fdf110cc17e789353c4442b83a46cb80643456e
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
|
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 +
21 22 /*
22 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23 - * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
24 + * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 25 * Copyright (c) 2016 by Delphix. All rights reserved.
25 26 */
26 27
27 28 /*
28 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
29 30 * All rights reserved.
30 31 */
31 32
32 33 #include <sys/param.h>
33 34 #include <sys/types.h>
34 35 #include <sys/systm.h>
35 36 #include <sys/cred.h>
36 37 #include <sys/buf.h>
37 38 #include <sys/vfs.h>
38 39 #include <sys/vnode.h>
39 40 #include <sys/uio.h>
40 41 #include <sys/stat.h>
41 42 #include <sys/errno.h>
42 43 #include <sys/sysmacros.h>
43 44 #include <sys/statvfs.h>
44 45 #include <sys/kmem.h>
45 46 #include <sys/kstat.h>
46 47 #include <sys/dirent.h>
47 48 #include <sys/cmn_err.h>
48 49 #include <sys/debug.h>
49 50 #include <sys/vtrace.h>
50 51 #include <sys/mode.h>
51 52 #include <sys/acl.h>
52 53 #include <sys/nbmlock.h>
53 54 #include <sys/policy.h>
54 55 #include <sys/sdt.h>
55 56
56 57 #include <rpc/types.h>
57 58 #include <rpc/auth.h>
58 59 #include <rpc/svc.h>
59 60
60 61 #include <nfs/nfs.h>
61 62 #include <nfs/export.h>
62 63 #include <nfs/nfs_cmd.h>
63 64
64 65 #include <vm/hat.h>
65 66 #include <vm/as.h>
66 67 #include <vm/seg.h>
67 68 #include <vm/seg_map.h>
68 69 #include <vm/seg_kmem.h>
69 70
70 71 #include <sys/strsubr.h>
71 72
72 73 /*
73 74 * These are the interface routines for the server side of the
74 75 * Network File System. See the NFS version 2 protocol specification
75 76 * for a description of this interface.
76 77 */
77 78
78 79 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
79 80 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
80 81 cred_t *);
81 82
82 83 /*
83 84 * Some "over the wire" UNIX file types. These are encoded
84 85 * into the mode. This needs to be fixed in the next rev.
85 86 */
86 87 #define IFMT 0170000 /* type of file */
87 88 #define IFCHR 0020000 /* character special */
88 89 #define IFBLK 0060000 /* block special */
89 90 #define IFSOCK 0140000 /* socket */
90 91
91 92 u_longlong_t nfs2_srv_caller_id;
92 93
93 94 /*
94 95 * Get file attributes.
95 96 * Returns the current attributes of the file with the given fhandle.
96 97 */
97 98 /* ARGSUSED */
98 99 void
99 100 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
100 101 struct svc_req *req, cred_t *cr, bool_t ro)
101 102 {
102 103 int error;
103 104 vnode_t *vp;
104 105 struct vattr va;
105 106
106 107 vp = nfs_fhtovp(fhp, exi);
107 108 if (vp == NULL) {
108 109 ns->ns_status = NFSERR_STALE;
109 110 return;
110 111 }
111 112
112 113 /*
113 114 * Do the getattr.
114 115 */
115 116 va.va_mask = AT_ALL; /* we want all the attributes */
116 117
117 118 error = rfs4_delegated_getattr(vp, &va, 0, cr);
118 119
119 120 /* check for overflows */
120 121 if (!error) {
121 122 /* Lie about the object type for a referral */
122 123 if (vn_is_nfs_reparse(vp, cr))
123 124 va.va_type = VLNK;
124 125
125 126 acl_perm(vp, exi, &va, cr);
126 127 error = vattr_to_nattr(&va, &ns->ns_attr);
127 128 }
128 129
129 130 VN_RELE(vp);
130 131
131 132 ns->ns_status = puterrno(error);
132 133 }
133 134 void *
134 135 rfs_getattr_getfh(fhandle_t *fhp)
135 136 {
136 137 return (fhp);
137 138 }
138 139
139 140 /*
140 141 * Set file attributes.
141 142 * Sets the attributes of the file with the given fhandle. Returns
142 143 * the new attributes.
143 144 */
144 145 /* ARGSUSED */
145 146 void
146 147 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
147 148 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
148 149 {
149 150 int error;
150 151 int flag;
151 152 int in_crit = 0;
152 153 vnode_t *vp;
153 154 struct vattr va;
154 155 struct vattr bva;
155 156 struct flock64 bf;
156 157 caller_context_t ct;
157 158
158 159
159 160 vp = nfs_fhtovp(&args->saa_fh, exi);
160 161 if (vp == NULL) {
161 162 ns->ns_status = NFSERR_STALE;
162 163 return;
163 164 }
164 165
165 166 if (rdonly(ro, vp)) {
166 167 VN_RELE(vp);
167 168 ns->ns_status = NFSERR_ROFS;
168 169 return;
169 170 }
170 171
171 172 error = sattr_to_vattr(&args->saa_sa, &va);
172 173 if (error) {
173 174 VN_RELE(vp);
174 175 ns->ns_status = puterrno(error);
175 176 return;
176 177 }
177 178
178 179 /*
179 180 * If the client is requesting a change to the mtime,
180 181 * but the nanosecond field is set to 1 billion, then
181 182 * this is a flag to the server that it should set the
182 183 * atime and mtime fields to the server's current time.
183 184 * The 1 billion number actually came from the client
184 185 * as 1 million, but the units in the over the wire
185 186 * request are microseconds instead of nanoseconds.
186 187 *
187 188 * This is an overload of the protocol and should be
188 189 * documented in the NFS Version 2 protocol specification.
189 190 */
190 191 if (va.va_mask & AT_MTIME) {
191 192 if (va.va_mtime.tv_nsec == 1000000000) {
192 193 gethrestime(&va.va_mtime);
193 194 va.va_atime = va.va_mtime;
194 195 va.va_mask |= AT_ATIME;
195 196 flag = 0;
196 197 } else
197 198 flag = ATTR_UTIME;
198 199 } else
199 200 flag = 0;
200 201
201 202 /*
202 203 * If the filesystem is exported with nosuid, then mask off
203 204 * the setuid and setgid bits.
204 205 */
205 206 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
206 207 (exi->exi_export.ex_flags & EX_NOSUID))
207 208 va.va_mode &= ~(VSUID | VSGID);
208 209
209 210 ct.cc_sysid = 0;
210 211 ct.cc_pid = 0;
211 212 ct.cc_caller_id = nfs2_srv_caller_id;
212 213 ct.cc_flags = CC_DONTBLOCK;
213 214
214 215 /*
215 216 * We need to specially handle size changes because it is
216 217 * possible for the client to create a file with modes
217 218 * which indicate read-only, but with the file opened for
218 219 * writing. If the client then tries to set the size of
219 220 * the file, then the normal access checking done in
220 221 * VOP_SETATTR would prevent the client from doing so,
221 222 * although it should be legal for it to do so. To get
222 223 * around this, we do the access checking for ourselves
223 224 * and then use VOP_SPACE which doesn't do the access
224 225 * checking which VOP_SETATTR does. VOP_SPACE can only
225 226 * operate on VREG files, let VOP_SETATTR handle the other
226 227 * extremely rare cases.
227 228 * Also the client should not be allowed to change the
228 229 * size of the file if there is a conflicting non-blocking
229 230 * mandatory lock in the region of change.
230 231 */
231 232 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
232 233 if (nbl_need_check(vp)) {
233 234 nbl_start_crit(vp, RW_READER);
234 235 in_crit = 1;
235 236 }
236 237
237 238 bva.va_mask = AT_UID | AT_SIZE;
238 239
239 240 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
240 241
241 242 if (error) {
242 243 if (in_crit)
243 244 nbl_end_crit(vp);
244 245 VN_RELE(vp);
245 246 ns->ns_status = puterrno(error);
246 247 return;
247 248 }
248 249
249 250 if (in_crit) {
250 251 u_offset_t offset;
251 252 ssize_t length;
252 253
253 254 if (va.va_size < bva.va_size) {
254 255 offset = va.va_size;
255 256 length = bva.va_size - va.va_size;
256 257 } else {
257 258 offset = bva.va_size;
258 259 length = va.va_size - bva.va_size;
259 260 }
260 261 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
261 262 NULL)) {
262 263 error = EACCES;
263 264 }
264 265 }
265 266
266 267 if (crgetuid(cr) == bva.va_uid && !error &&
267 268 va.va_size != bva.va_size) {
268 269 va.va_mask &= ~AT_SIZE;
269 270 bf.l_type = F_WRLCK;
270 271 bf.l_whence = 0;
271 272 bf.l_start = (off64_t)va.va_size;
272 273 bf.l_len = 0;
273 274 bf.l_sysid = 0;
274 275 bf.l_pid = 0;
275 276
276 277 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
277 278 (offset_t)va.va_size, cr, &ct);
278 279 }
279 280 if (in_crit)
280 281 nbl_end_crit(vp);
281 282 } else
282 283 error = 0;
283 284
284 285 /*
285 286 * Do the setattr.
286 287 */
287 288 if (!error && va.va_mask) {
288 289 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
289 290 }
290 291
291 292 /*
292 293 * check if the monitor on either vop_space or vop_setattr detected
293 294 * a delegation conflict and if so, mark the thread flag as
294 295 * wouldblock so that the response is dropped and the client will
295 296 * try again.
296 297 */
297 298 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
298 299 VN_RELE(vp);
299 300 curthread->t_flag |= T_WOULDBLOCK;
300 301 return;
301 302 }
302 303
303 304 if (!error) {
304 305 va.va_mask = AT_ALL; /* get everything */
305 306
306 307 error = rfs4_delegated_getattr(vp, &va, 0, cr);
307 308
308 309 /* check for overflows */
309 310 if (!error) {
310 311 acl_perm(vp, exi, &va, cr);
311 312 error = vattr_to_nattr(&va, &ns->ns_attr);
312 313 }
313 314 }
314 315
315 316 ct.cc_flags = 0;
316 317
317 318 /*
318 319 * Force modified metadata out to stable storage.
319 320 */
320 321 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
321 322
|
↓ open down ↓ |
288 lines elided |
↑ open up ↑ |
322 323 VN_RELE(vp);
323 324
324 325 ns->ns_status = puterrno(error);
325 326 }
326 327 void *
327 328 rfs_setattr_getfh(struct nfssaargs *args)
328 329 {
329 330 return (&args->saa_fh);
330 331 }
331 332
333 +/* Change and release @exip and @vpp only in success */
334 +int
335 +rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
336 +{
337 + struct exportinfo *exi;
338 + vnode_t *vp = *vpp;
339 + fid_t fid;
340 + int error;
341 +
342 + VN_HOLD(vp);
343 +
344 + if ((error = traverse(&vp)) != 0) {
345 + VN_RELE(vp);
346 + return (error);
347 + }
348 +
349 + bzero(&fid, sizeof (fid));
350 + fid.fid_len = MAXFIDSZ;
351 + error = VOP_FID(vp, &fid, NULL);
352 + if (error) {
353 + VN_RELE(vp);
354 + return (error);
355 + }
356 +
357 + exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
358 + if (exi == NULL ||
359 + (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
360 + /*
361 + * It is not error, just subdir is not exported
362 + * or "nohide" is not set
363 + */
364 + if (exi != NULL)
365 + exi_rele(exi);
366 + VN_RELE(vp);
367 + } else {
368 + /* go to submount */
369 + exi_rele(*exip);
370 + *exip = exi;
371 +
372 + VN_RELE(*vpp);
373 + *vpp = vp;
374 + }
375 +
376 + return (0);
377 +}
378 +
332 379 /*
380 + * Given mounted "dvp" and "exi", go upper mountpoint
381 + * with dvp/exi correction
382 + * Return 0 in success
383 + */
384 +int
385 +rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
386 +{
387 + struct exportinfo *exi;
388 + vnode_t *dvp = *dvpp;
389 +
390 + ASSERT(dvp->v_flag & VROOT);
391 +
392 + VN_HOLD(dvp);
393 + dvp = untraverse(dvp);
394 + exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
395 + if (exi == NULL) {
396 + VN_RELE(dvp);
397 + return (-1);
398 + }
399 +
400 + exi_rele(*exip);
401 + *exip = exi;
402 + VN_RELE(*dvpp);
403 + *dvpp = dvp;
404 +
405 + return (0);
406 +}
407 +/*
333 408 * Directory lookup.
334 409 * Returns an fhandle and file attributes for file name in a directory.
335 410 */
336 411 /* ARGSUSED */
337 412 void
338 413 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
339 414 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
340 415 {
341 416 int error;
342 417 vnode_t *dvp;
343 418 vnode_t *vp;
344 419 struct vattr va;
345 420 fhandle_t *fhp = da->da_fhandle;
346 421 struct sec_ol sec = {0, 0};
347 422 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
348 423 char *name;
349 424 struct sockaddr *ca;
350 425
351 426 /*
352 427 * Trusted Extension doesn't support NFSv2. MOUNT
353 428 * will reject v2 clients. Need to prevent v2 client
354 429 * access via WebNFS here.
355 430 */
356 431 if (is_system_labeled() && req->rq_vers == 2) {
357 432 dr->dr_status = NFSERR_ACCES;
358 433 return;
359 434 }
360 435
361 436 /*
362 437 * Disallow NULL paths
363 438 */
364 439 if (da->da_name == NULL || *da->da_name == '\0') {
365 440 dr->dr_status = NFSERR_ACCES;
366 441 return;
367 442 }
368 443
369 444 /*
370 445 * Allow lookups from the root - the default
371 446 * location of the public filehandle.
372 447 */
373 448 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
|
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
374 449 dvp = rootdir;
375 450 VN_HOLD(dvp);
376 451 } else {
377 452 dvp = nfs_fhtovp(fhp, exi);
378 453 if (dvp == NULL) {
379 454 dr->dr_status = NFSERR_STALE;
380 455 return;
381 456 }
382 457 }
383 458
459 + exi_hold(exi);
460 +
384 461 /*
385 462 * Not allow lookup beyond root.
386 463 * If the filehandle matches a filehandle of the exi,
387 464 * then the ".." refers beyond the root of an exported filesystem.
388 465 */
389 466 if (strcmp(da->da_name, "..") == 0 &&
390 467 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
391 - VN_RELE(dvp);
392 - dr->dr_status = NFSERR_NOENT;
393 - return;
468 + if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
469 + (dvp->v_flag & VROOT)) {
470 + /*
471 + * special case for ".." and 'nohide'exported root
472 + */
473 + if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
474 + error = NFSERR_ACCES;
475 + goto out;
476 + }
477 + } else {
478 + error = NFSERR_NOENT;
479 + goto out;
480 + }
394 481 }
395 482
396 483 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
397 484 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
398 485 MAXPATHLEN);
399 486
400 487 if (name == NULL) {
401 - dr->dr_status = NFSERR_ACCES;
402 - return;
488 + error = NFSERR_ACCES;
489 + goto out;
403 490 }
404 491
405 492 /*
406 493 * If the public filehandle is used then allow
407 494 * a multi-component lookup, i.e. evaluate
408 495 * a pathname and follow symbolic links if
409 496 * necessary.
410 497 *
411 498 * This may result in a vnode in another filesystem
412 499 * which is OK as long as the filesystem is exported.
413 500 */
414 501 if (PUBLIC_FH2(fhp)) {
415 502 publicfh_flag = TRUE;
503 +
504 + exi_rele(exi);
505 +
416 506 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
417 507 &sec);
418 508 } else {
419 509 /*
420 510 * Do a normal single component lookup.
421 511 */
422 512 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
423 513 NULL, NULL, NULL);
424 514 }
425 515
426 516 if (name != da->da_name)
427 517 kmem_free(name, MAXPATHLEN);
428 518
519 + if (error == 0 && vn_ismntpt(vp)) {
520 + error = rfs_cross_mnt(&vp, &exi);
521 + if (error)
522 + VN_RELE(vp);
523 + }
429 524
430 525 if (!error) {
431 526 va.va_mask = AT_ALL; /* we want everything */
432 527
433 528 error = rfs4_delegated_getattr(vp, &va, 0, cr);
434 529
435 530 /* check for overflows */
436 531 if (!error) {
437 532 acl_perm(vp, exi, &va, cr);
438 533 error = vattr_to_nattr(&va, &dr->dr_attr);
439 534 if (!error) {
440 535 if (sec.sec_flags & SEC_QUERY)
441 536 error = makefh_ol(&dr->dr_fhandle, exi,
442 537 sec.sec_index);
443 538 else {
444 539 error = makefh(&dr->dr_fhandle, vp,
|
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
445 540 exi);
446 541 if (!error && publicfh_flag &&
447 542 !chk_clnt_sec(exi, req))
448 543 auth_weak = TRUE;
449 544 }
450 545 }
451 546 }
452 547 VN_RELE(vp);
453 548 }
454 549
550 +out:
455 551 VN_RELE(dvp);
456 552
457 - /*
458 - * If publicfh_flag is true then we have called rfs_publicfh_mclookup
459 - * and have obtained a new exportinfo in exi which needs to be
460 - * released. Note the the original exportinfo pointed to by exi
461 - * will be released by the caller, comon_dispatch.
462 - */
463 - if (publicfh_flag && exi != NULL)
553 + if (exi != NULL)
464 554 exi_rele(exi);
465 555
466 556 /*
467 557 * If it's public fh, no 0x81, and client's flavor is
468 558 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
469 559 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
470 560 */
471 561 if (auth_weak)
472 562 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
473 563 else
474 564 dr->dr_status = puterrno(error);
475 565 }
476 566 void *
477 567 rfs_lookup_getfh(struct nfsdiropargs *da)
478 568 {
479 569 return (da->da_fhandle);
480 570 }
481 571
482 572 /*
483 573 * Read symbolic link.
484 574 * Returns the string in the symbolic link at the given fhandle.
485 575 */
486 576 /* ARGSUSED */
487 577 void
488 578 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
489 579 struct svc_req *req, cred_t *cr, bool_t ro)
490 580 {
491 581 int error;
492 582 struct iovec iov;
493 583 struct uio uio;
494 584 vnode_t *vp;
495 585 struct vattr va;
496 586 struct sockaddr *ca;
497 587 char *name = NULL;
498 588 int is_referral = 0;
499 589
500 590 vp = nfs_fhtovp(fhp, exi);
501 591 if (vp == NULL) {
502 592 rl->rl_data = NULL;
503 593 rl->rl_status = NFSERR_STALE;
504 594 return;
505 595 }
506 596
507 597 va.va_mask = AT_MODE;
508 598
509 599 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
510 600
511 601 if (error) {
512 602 VN_RELE(vp);
513 603 rl->rl_data = NULL;
514 604 rl->rl_status = puterrno(error);
515 605 return;
516 606 }
517 607
518 608 if (MANDLOCK(vp, va.va_mode)) {
519 609 VN_RELE(vp);
520 610 rl->rl_data = NULL;
521 611 rl->rl_status = NFSERR_ACCES;
522 612 return;
523 613 }
524 614
525 615 /* We lied about the object type for a referral */
526 616 if (vn_is_nfs_reparse(vp, cr))
527 617 is_referral = 1;
528 618
529 619 /*
530 620 * XNFS and RFC1094 require us to return ENXIO if argument
531 621 * is not a link. BUGID 1138002.
532 622 */
533 623 if (vp->v_type != VLNK && !is_referral) {
534 624 VN_RELE(vp);
535 625 rl->rl_data = NULL;
536 626 rl->rl_status = NFSERR_NXIO;
537 627 return;
538 628 }
539 629
540 630 /*
541 631 * Allocate data for pathname. This will be freed by rfs_rlfree.
542 632 */
543 633 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
544 634
545 635 if (is_referral) {
546 636 char *s;
547 637 size_t strsz;
548 638
549 639 /* Get an artificial symlink based on a referral */
550 640 s = build_symlink(vp, cr, &strsz);
551 641 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
552 642 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
553 643 vnode_t *, vp, char *, s);
554 644 if (s == NULL)
555 645 error = EINVAL;
556 646 else {
557 647 error = 0;
558 648 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
559 649 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
560 650 kmem_free(s, strsz);
561 651 }
562 652
563 653 } else {
564 654
565 655 /*
566 656 * Set up io vector to read sym link data
567 657 */
568 658 iov.iov_base = rl->rl_data;
569 659 iov.iov_len = NFS_MAXPATHLEN;
570 660 uio.uio_iov = &iov;
571 661 uio.uio_iovcnt = 1;
572 662 uio.uio_segflg = UIO_SYSSPACE;
573 663 uio.uio_extflg = UIO_COPY_CACHED;
574 664 uio.uio_loffset = (offset_t)0;
575 665 uio.uio_resid = NFS_MAXPATHLEN;
576 666
577 667 /*
578 668 * Do the readlink.
579 669 */
580 670 error = VOP_READLINK(vp, &uio, cr, NULL);
581 671
582 672 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
583 673
584 674 if (!error)
585 675 rl->rl_data[rl->rl_count] = '\0';
586 676
587 677 }
588 678
589 679
590 680 VN_RELE(vp);
591 681
592 682 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
593 683 name = nfscmd_convname(ca, exi, rl->rl_data,
594 684 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
595 685
596 686 if (name != NULL && name != rl->rl_data) {
597 687 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
598 688 rl->rl_data = name;
599 689 }
600 690
601 691 /*
602 692 * XNFS and RFC1094 require us to return ENXIO if argument
603 693 * is not a link. UFS returns EINVAL if this is the case,
604 694 * so we do the mapping here. BUGID 1138002.
605 695 */
606 696 if (error == EINVAL)
607 697 rl->rl_status = NFSERR_NXIO;
608 698 else
609 699 rl->rl_status = puterrno(error);
610 700
611 701 }
612 702 void *
613 703 rfs_readlink_getfh(fhandle_t *fhp)
614 704 {
615 705 return (fhp);
616 706 }
617 707 /*
618 708 * Free data allocated by rfs_readlink
619 709 */
620 710 void
621 711 rfs_rlfree(struct nfsrdlnres *rl)
622 712 {
623 713 if (rl->rl_data != NULL)
624 714 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
625 715 }
626 716
627 717 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
628 718
629 719 /*
630 720 * Read data.
631 721 * Returns some data read from the file at the given fhandle.
632 722 */
633 723 /* ARGSUSED */
634 724 void
635 725 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
636 726 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
637 727 {
638 728 vnode_t *vp;
639 729 int error;
640 730 struct vattr va;
641 731 struct iovec iov;
642 732 struct uio uio;
643 733 mblk_t *mp;
644 734 int alloc_err = 0;
645 735 int in_crit = 0;
646 736 caller_context_t ct;
647 737
648 738 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
649 739 if (vp == NULL) {
650 740 rr->rr_data = NULL;
651 741 rr->rr_status = NFSERR_STALE;
652 742 return;
653 743 }
654 744
655 745 if (vp->v_type != VREG) {
656 746 VN_RELE(vp);
657 747 rr->rr_data = NULL;
658 748 rr->rr_status = NFSERR_ISDIR;
659 749 return;
660 750 }
661 751
662 752 ct.cc_sysid = 0;
663 753 ct.cc_pid = 0;
664 754 ct.cc_caller_id = nfs2_srv_caller_id;
665 755 ct.cc_flags = CC_DONTBLOCK;
666 756
667 757 /*
668 758 * Enter the critical region before calling VOP_RWLOCK
669 759 * to avoid a deadlock with write requests.
670 760 */
671 761 if (nbl_need_check(vp)) {
672 762 nbl_start_crit(vp, RW_READER);
673 763 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
674 764 0, NULL)) {
675 765 nbl_end_crit(vp);
676 766 VN_RELE(vp);
677 767 rr->rr_data = NULL;
678 768 rr->rr_status = NFSERR_ACCES;
679 769 return;
680 770 }
681 771 in_crit = 1;
682 772 }
683 773
684 774 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
685 775
686 776 /* check if a monitor detected a delegation conflict */
687 777 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
688 778 VN_RELE(vp);
689 779 /* mark as wouldblock so response is dropped */
690 780 curthread->t_flag |= T_WOULDBLOCK;
691 781
692 782 rr->rr_data = NULL;
693 783 return;
694 784 }
695 785
696 786 va.va_mask = AT_ALL;
697 787
698 788 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
699 789
700 790 if (error) {
701 791 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
702 792 if (in_crit)
703 793 nbl_end_crit(vp);
704 794
705 795 VN_RELE(vp);
706 796 rr->rr_data = NULL;
707 797 rr->rr_status = puterrno(error);
708 798
709 799 return;
710 800 }
711 801
712 802 /*
713 803 * This is a kludge to allow reading of files created
714 804 * with no read permission. The owner of the file
715 805 * is always allowed to read it.
716 806 */
717 807 if (crgetuid(cr) != va.va_uid) {
718 808 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
719 809
720 810 if (error) {
721 811 /*
722 812 * Exec is the same as read over the net because
723 813 * of demand loading.
724 814 */
725 815 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
726 816 }
727 817 if (error) {
728 818 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
729 819 if (in_crit)
730 820 nbl_end_crit(vp);
731 821 VN_RELE(vp);
732 822 rr->rr_data = NULL;
733 823 rr->rr_status = puterrno(error);
734 824
735 825 return;
736 826 }
737 827 }
738 828
739 829 if (MANDLOCK(vp, va.va_mode)) {
740 830 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
741 831 if (in_crit)
742 832 nbl_end_crit(vp);
743 833
744 834 VN_RELE(vp);
745 835 rr->rr_data = NULL;
746 836 rr->rr_status = NFSERR_ACCES;
747 837
748 838 return;
749 839 }
750 840
751 841 rr->rr_ok.rrok_wlist_len = 0;
752 842 rr->rr_ok.rrok_wlist = NULL;
753 843
754 844 if ((u_offset_t)ra->ra_offset >= va.va_size) {
755 845 rr->rr_count = 0;
756 846 rr->rr_data = NULL;
757 847 /*
758 848 * In this case, status is NFS_OK, but there is no data
759 849 * to encode. So set rr_mp to NULL.
760 850 */
761 851 rr->rr_mp = NULL;
762 852 rr->rr_ok.rrok_wlist = ra->ra_wlist;
763 853 if (rr->rr_ok.rrok_wlist)
764 854 clist_zero_len(rr->rr_ok.rrok_wlist);
765 855 goto done;
766 856 }
767 857
768 858 if (ra->ra_wlist) {
769 859 mp = NULL;
770 860 rr->rr_mp = NULL;
771 861 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
772 862 if (ra->ra_count > iov.iov_len) {
773 863 rr->rr_data = NULL;
774 864 rr->rr_status = NFSERR_INVAL;
775 865 goto done;
776 866 }
777 867 } else {
778 868 /*
779 869 * mp will contain the data to be sent out in the read reply.
780 870 * This will be freed after the reply has been sent out (by the
781 871 * driver).
782 872 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
783 873 * that the call to xdrmblk_putmblk() never fails.
784 874 */
785 875 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
786 876 &alloc_err);
787 877 ASSERT(mp != NULL);
788 878 ASSERT(alloc_err == 0);
789 879
790 880 rr->rr_mp = mp;
791 881
792 882 /*
793 883 * Set up io vector
794 884 */
795 885 iov.iov_base = (caddr_t)mp->b_datap->db_base;
796 886 iov.iov_len = ra->ra_count;
797 887 }
798 888
799 889 uio.uio_iov = &iov;
800 890 uio.uio_iovcnt = 1;
801 891 uio.uio_segflg = UIO_SYSSPACE;
802 892 uio.uio_extflg = UIO_COPY_CACHED;
803 893 uio.uio_loffset = (offset_t)ra->ra_offset;
804 894 uio.uio_resid = ra->ra_count;
805 895
806 896 error = VOP_READ(vp, &uio, 0, cr, &ct);
807 897
808 898 if (error) {
809 899 if (mp)
810 900 freeb(mp);
811 901
812 902 /*
813 903 * check if a monitor detected a delegation conflict and
814 904 * mark as wouldblock so response is dropped
815 905 */
816 906 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
817 907 curthread->t_flag |= T_WOULDBLOCK;
818 908 else
819 909 rr->rr_status = puterrno(error);
820 910
821 911 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
822 912 if (in_crit)
823 913 nbl_end_crit(vp);
824 914
825 915 VN_RELE(vp);
826 916 rr->rr_data = NULL;
827 917
828 918 return;
829 919 }
830 920
831 921 /*
832 922 * Get attributes again so we can send the latest access
833 923 * time to the client side for its cache.
834 924 */
835 925 va.va_mask = AT_ALL;
836 926
837 927 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
838 928
839 929 if (error) {
840 930 if (mp)
841 931 freeb(mp);
842 932
843 933 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
844 934 if (in_crit)
845 935 nbl_end_crit(vp);
846 936
847 937 VN_RELE(vp);
848 938 rr->rr_data = NULL;
849 939 rr->rr_status = puterrno(error);
850 940
851 941 return;
852 942 }
853 943
854 944 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
855 945
856 946 if (mp) {
857 947 rr->rr_data = (char *)mp->b_datap->db_base;
858 948 } else {
859 949 if (ra->ra_wlist) {
860 950 rr->rr_data = (caddr_t)iov.iov_base;
861 951 if (!rdma_setup_read_data2(ra, rr)) {
862 952 rr->rr_data = NULL;
863 953 rr->rr_status = puterrno(NFSERR_INVAL);
864 954 }
865 955 }
866 956 }
867 957 done:
868 958 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
869 959 if (in_crit)
870 960 nbl_end_crit(vp);
871 961
872 962 acl_perm(vp, exi, &va, cr);
873 963
874 964 /* check for overflows */
875 965 error = vattr_to_nattr(&va, &rr->rr_attr);
876 966
877 967 VN_RELE(vp);
878 968
879 969 rr->rr_status = puterrno(error);
880 970 }
881 971
882 972 /*
883 973 * Free data allocated by rfs_read
884 974 */
885 975 void
886 976 rfs_rdfree(struct nfsrdresult *rr)
887 977 {
888 978 mblk_t *mp;
889 979
890 980 if (rr->rr_status == NFS_OK) {
891 981 mp = rr->rr_mp;
892 982 if (mp != NULL)
893 983 freeb(mp);
894 984 }
895 985 }
896 986
897 987 void *
898 988 rfs_read_getfh(struct nfsreadargs *ra)
899 989 {
900 990 return (&ra->ra_fhandle);
901 991 }
902 992
903 993 #define MAX_IOVECS 12
904 994
905 995 #ifdef DEBUG
906 996 static int rfs_write_sync_hits = 0;
907 997 static int rfs_write_sync_misses = 0;
908 998 #endif
909 999
910 1000 /*
911 1001 * Write data to file.
912 1002 * Returns attributes of a file after writing some data to it.
913 1003 *
914 1004 * Any changes made here, especially in error handling might have
915 1005 * to also be done in rfs_write (which clusters write requests).
916 1006 */
917 1007 /* ARGSUSED */
918 1008 void
919 1009 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
920 1010 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
921 1011 {
922 1012 int error;
923 1013 vnode_t *vp;
924 1014 rlim64_t rlimit;
925 1015 struct vattr va;
926 1016 struct uio uio;
927 1017 struct iovec iov[MAX_IOVECS];
928 1018 mblk_t *m;
929 1019 struct iovec *iovp;
930 1020 int iovcnt;
931 1021 cred_t *savecred;
932 1022 int in_crit = 0;
933 1023 caller_context_t ct;
934 1024
935 1025 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
936 1026 if (vp == NULL) {
937 1027 ns->ns_status = NFSERR_STALE;
938 1028 return;
939 1029 }
940 1030
941 1031 if (rdonly(ro, vp)) {
942 1032 VN_RELE(vp);
943 1033 ns->ns_status = NFSERR_ROFS;
944 1034 return;
945 1035 }
946 1036
947 1037 if (vp->v_type != VREG) {
948 1038 VN_RELE(vp);
949 1039 ns->ns_status = NFSERR_ISDIR;
950 1040 return;
951 1041 }
952 1042
953 1043 ct.cc_sysid = 0;
954 1044 ct.cc_pid = 0;
955 1045 ct.cc_caller_id = nfs2_srv_caller_id;
956 1046 ct.cc_flags = CC_DONTBLOCK;
957 1047
958 1048 va.va_mask = AT_UID|AT_MODE;
959 1049
960 1050 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
961 1051
962 1052 if (error) {
963 1053 VN_RELE(vp);
964 1054 ns->ns_status = puterrno(error);
965 1055
966 1056 return;
967 1057 }
968 1058
969 1059 if (crgetuid(cr) != va.va_uid) {
970 1060 /*
971 1061 * This is a kludge to allow writes of files created
972 1062 * with read only permission. The owner of the file
973 1063 * is always allowed to write it.
974 1064 */
975 1065 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
976 1066
977 1067 if (error) {
978 1068 VN_RELE(vp);
979 1069 ns->ns_status = puterrno(error);
980 1070 return;
981 1071 }
982 1072 }
983 1073
984 1074 /*
985 1075 * Can't access a mandatory lock file. This might cause
986 1076 * the NFS service thread to block forever waiting for a
987 1077 * lock to be released that will never be released.
988 1078 */
989 1079 if (MANDLOCK(vp, va.va_mode)) {
990 1080 VN_RELE(vp);
991 1081 ns->ns_status = NFSERR_ACCES;
992 1082 return;
993 1083 }
994 1084
995 1085 /*
996 1086 * We have to enter the critical region before calling VOP_RWLOCK
997 1087 * to avoid a deadlock with ufs.
998 1088 */
999 1089 if (nbl_need_check(vp)) {
1000 1090 nbl_start_crit(vp, RW_READER);
1001 1091 in_crit = 1;
1002 1092 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1003 1093 wa->wa_count, 0, NULL)) {
1004 1094 error = EACCES;
1005 1095 goto out;
1006 1096 }
1007 1097 }
1008 1098
1009 1099 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1010 1100
1011 1101 /* check if a monitor detected a delegation conflict */
1012 1102 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1013 1103 VN_RELE(vp);
1014 1104 /* mark as wouldblock so response is dropped */
1015 1105 curthread->t_flag |= T_WOULDBLOCK;
1016 1106 return;
1017 1107 }
1018 1108
1019 1109 if (wa->wa_data || wa->wa_rlist) {
1020 1110 /* Do the RDMA thing if necessary */
1021 1111 if (wa->wa_rlist) {
1022 1112 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1023 1113 iov[0].iov_len = wa->wa_count;
1024 1114 } else {
1025 1115 iov[0].iov_base = wa->wa_data;
1026 1116 iov[0].iov_len = wa->wa_count;
1027 1117 }
1028 1118 uio.uio_iov = iov;
1029 1119 uio.uio_iovcnt = 1;
1030 1120 uio.uio_segflg = UIO_SYSSPACE;
1031 1121 uio.uio_extflg = UIO_COPY_DEFAULT;
1032 1122 uio.uio_loffset = (offset_t)wa->wa_offset;
1033 1123 uio.uio_resid = wa->wa_count;
1034 1124 /*
1035 1125 * The limit is checked on the client. We
1036 1126 * should allow any size writes here.
1037 1127 */
1038 1128 uio.uio_llimit = curproc->p_fsz_ctl;
1039 1129 rlimit = uio.uio_llimit - wa->wa_offset;
1040 1130 if (rlimit < (rlim64_t)uio.uio_resid)
1041 1131 uio.uio_resid = (uint_t)rlimit;
1042 1132
1043 1133 /*
1044 1134 * for now we assume no append mode
1045 1135 */
1046 1136 /*
1047 1137 * We're changing creds because VM may fault and we need
1048 1138 * the cred of the current thread to be used if quota
1049 1139 * checking is enabled.
1050 1140 */
1051 1141 savecred = curthread->t_cred;
1052 1142 curthread->t_cred = cr;
1053 1143 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1054 1144 curthread->t_cred = savecred;
1055 1145 } else {
1056 1146 iovcnt = 0;
1057 1147 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1058 1148 iovcnt++;
1059 1149 if (iovcnt <= MAX_IOVECS) {
1060 1150 #ifdef DEBUG
1061 1151 rfs_write_sync_hits++;
1062 1152 #endif
1063 1153 iovp = iov;
1064 1154 } else {
1065 1155 #ifdef DEBUG
1066 1156 rfs_write_sync_misses++;
1067 1157 #endif
1068 1158 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1069 1159 }
1070 1160 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1071 1161 uio.uio_iov = iovp;
1072 1162 uio.uio_iovcnt = iovcnt;
1073 1163 uio.uio_segflg = UIO_SYSSPACE;
1074 1164 uio.uio_extflg = UIO_COPY_DEFAULT;
1075 1165 uio.uio_loffset = (offset_t)wa->wa_offset;
1076 1166 uio.uio_resid = wa->wa_count;
1077 1167 /*
1078 1168 * The limit is checked on the client. We
1079 1169 * should allow any size writes here.
1080 1170 */
1081 1171 uio.uio_llimit = curproc->p_fsz_ctl;
1082 1172 rlimit = uio.uio_llimit - wa->wa_offset;
1083 1173 if (rlimit < (rlim64_t)uio.uio_resid)
1084 1174 uio.uio_resid = (uint_t)rlimit;
1085 1175
1086 1176 /*
1087 1177 * For now we assume no append mode.
1088 1178 */
1089 1179 /*
1090 1180 * We're changing creds because VM may fault and we need
1091 1181 * the cred of the current thread to be used if quota
1092 1182 * checking is enabled.
1093 1183 */
1094 1184 savecred = curthread->t_cred;
1095 1185 curthread->t_cred = cr;
1096 1186 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1097 1187 curthread->t_cred = savecred;
1098 1188
1099 1189 if (iovp != iov)
1100 1190 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1101 1191 }
1102 1192
1103 1193 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1104 1194
1105 1195 if (!error) {
1106 1196 /*
1107 1197 * Get attributes again so we send the latest mod
1108 1198 * time to the client side for its cache.
1109 1199 */
1110 1200 va.va_mask = AT_ALL; /* now we want everything */
1111 1201
1112 1202 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1113 1203
1114 1204 /* check for overflows */
1115 1205 if (!error) {
1116 1206 acl_perm(vp, exi, &va, cr);
1117 1207 error = vattr_to_nattr(&va, &ns->ns_attr);
1118 1208 }
1119 1209 }
1120 1210
1121 1211 out:
1122 1212 if (in_crit)
1123 1213 nbl_end_crit(vp);
1124 1214 VN_RELE(vp);
1125 1215
1126 1216 /* check if a monitor detected a delegation conflict */
1127 1217 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1128 1218 /* mark as wouldblock so response is dropped */
1129 1219 curthread->t_flag |= T_WOULDBLOCK;
1130 1220 else
1131 1221 ns->ns_status = puterrno(error);
1132 1222
1133 1223 }
1134 1224
1135 1225 struct rfs_async_write {
1136 1226 struct nfswriteargs *wa;
1137 1227 struct nfsattrstat *ns;
1138 1228 struct svc_req *req;
1139 1229 cred_t *cr;
1140 1230 bool_t ro;
1141 1231 kthread_t *thread;
1142 1232 struct rfs_async_write *list;
1143 1233 };
1144 1234
1145 1235 struct rfs_async_write_list {
1146 1236 fhandle_t *fhp;
1147 1237 kcondvar_t cv;
1148 1238 struct rfs_async_write *list;
1149 1239 struct rfs_async_write_list *next;
1150 1240 };
1151 1241
1152 1242 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1153 1243 static kmutex_t rfs_async_write_lock;
1154 1244 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1155 1245
1156 1246 #define MAXCLIOVECS 42
1157 1247 #define RFSWRITE_INITVAL (enum nfsstat) -1
1158 1248
1159 1249 #ifdef DEBUG
1160 1250 static int rfs_write_hits = 0;
1161 1251 static int rfs_write_misses = 0;
1162 1252 #endif
1163 1253
1164 1254 /*
1165 1255 * Write data to file.
1166 1256 * Returns attributes of a file after writing some data to it.
1167 1257 */
1168 1258 void
1169 1259 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1170 1260 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1171 1261 {
1172 1262 int error;
1173 1263 vnode_t *vp;
1174 1264 rlim64_t rlimit;
1175 1265 struct vattr va;
1176 1266 struct uio uio;
1177 1267 struct rfs_async_write_list *lp;
1178 1268 struct rfs_async_write_list *nlp;
1179 1269 struct rfs_async_write *rp;
1180 1270 struct rfs_async_write *nrp;
1181 1271 struct rfs_async_write *trp;
1182 1272 struct rfs_async_write *lrp;
1183 1273 int data_written;
1184 1274 int iovcnt;
1185 1275 mblk_t *m;
1186 1276 struct iovec *iovp;
1187 1277 struct iovec *niovp;
1188 1278 struct iovec iov[MAXCLIOVECS];
1189 1279 int count;
1190 1280 int rcount;
1191 1281 uint_t off;
1192 1282 uint_t len;
1193 1283 struct rfs_async_write nrpsp;
1194 1284 struct rfs_async_write_list nlpsp;
1195 1285 ushort_t t_flag;
1196 1286 cred_t *savecred;
1197 1287 int in_crit = 0;
1198 1288 caller_context_t ct;
1199 1289
1200 1290 if (!rfs_write_async) {
1201 1291 rfs_write_sync(wa, ns, exi, req, cr, ro);
1202 1292 return;
1203 1293 }
1204 1294
1205 1295 /*
1206 1296 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1207 1297 * is considered an OK.
1208 1298 */
1209 1299 ns->ns_status = RFSWRITE_INITVAL;
1210 1300
1211 1301 nrp = &nrpsp;
1212 1302 nrp->wa = wa;
1213 1303 nrp->ns = ns;
1214 1304 nrp->req = req;
1215 1305 nrp->cr = cr;
1216 1306 nrp->ro = ro;
1217 1307 nrp->thread = curthread;
1218 1308
1219 1309 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1220 1310
1221 1311 /*
1222 1312 * Look to see if there is already a cluster started
1223 1313 * for this file.
1224 1314 */
1225 1315 mutex_enter(&rfs_async_write_lock);
1226 1316 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1227 1317 if (bcmp(&wa->wa_fhandle, lp->fhp,
1228 1318 sizeof (fhandle_t)) == 0)
1229 1319 break;
1230 1320 }
1231 1321
1232 1322 /*
1233 1323 * If lp is non-NULL, then there is already a cluster
1234 1324 * started. We need to place ourselves in the cluster
1235 1325 * list in the right place as determined by starting
1236 1326 * offset. Conflicts with non-blocking mandatory locked
1237 1327 * regions will be checked when the cluster is processed.
1238 1328 */
1239 1329 if (lp != NULL) {
1240 1330 rp = lp->list;
1241 1331 trp = NULL;
1242 1332 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1243 1333 trp = rp;
1244 1334 rp = rp->list;
1245 1335 }
1246 1336 nrp->list = rp;
1247 1337 if (trp == NULL)
1248 1338 lp->list = nrp;
1249 1339 else
1250 1340 trp->list = nrp;
1251 1341 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1252 1342 cv_wait(&lp->cv, &rfs_async_write_lock);
1253 1343 mutex_exit(&rfs_async_write_lock);
1254 1344
1255 1345 return;
1256 1346 }
1257 1347
1258 1348 /*
1259 1349 * No cluster started yet, start one and add ourselves
1260 1350 * to the list of clusters.
1261 1351 */
1262 1352 nrp->list = NULL;
1263 1353
1264 1354 nlp = &nlpsp;
1265 1355 nlp->fhp = &wa->wa_fhandle;
1266 1356 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1267 1357 nlp->list = nrp;
1268 1358 nlp->next = NULL;
1269 1359
1270 1360 if (rfs_async_write_head == NULL) {
1271 1361 rfs_async_write_head = nlp;
1272 1362 } else {
1273 1363 lp = rfs_async_write_head;
1274 1364 while (lp->next != NULL)
1275 1365 lp = lp->next;
1276 1366 lp->next = nlp;
1277 1367 }
1278 1368 mutex_exit(&rfs_async_write_lock);
1279 1369
1280 1370 /*
1281 1371 * Convert the file handle common to all of the requests
1282 1372 * in this cluster to a vnode.
1283 1373 */
1284 1374 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1285 1375 if (vp == NULL) {
1286 1376 mutex_enter(&rfs_async_write_lock);
1287 1377 if (rfs_async_write_head == nlp)
1288 1378 rfs_async_write_head = nlp->next;
1289 1379 else {
1290 1380 lp = rfs_async_write_head;
1291 1381 while (lp->next != nlp)
1292 1382 lp = lp->next;
1293 1383 lp->next = nlp->next;
1294 1384 }
1295 1385 t_flag = curthread->t_flag & T_WOULDBLOCK;
1296 1386 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1297 1387 rp->ns->ns_status = NFSERR_STALE;
1298 1388 rp->thread->t_flag |= t_flag;
1299 1389 }
1300 1390 cv_broadcast(&nlp->cv);
1301 1391 mutex_exit(&rfs_async_write_lock);
1302 1392
1303 1393 return;
1304 1394 }
1305 1395
1306 1396 /*
1307 1397 * Can only write regular files. Attempts to write any
1308 1398 * other file types fail with EISDIR.
1309 1399 */
1310 1400 if (vp->v_type != VREG) {
1311 1401 VN_RELE(vp);
1312 1402 mutex_enter(&rfs_async_write_lock);
1313 1403 if (rfs_async_write_head == nlp)
1314 1404 rfs_async_write_head = nlp->next;
1315 1405 else {
1316 1406 lp = rfs_async_write_head;
1317 1407 while (lp->next != nlp)
1318 1408 lp = lp->next;
1319 1409 lp->next = nlp->next;
1320 1410 }
1321 1411 t_flag = curthread->t_flag & T_WOULDBLOCK;
1322 1412 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1323 1413 rp->ns->ns_status = NFSERR_ISDIR;
1324 1414 rp->thread->t_flag |= t_flag;
1325 1415 }
1326 1416 cv_broadcast(&nlp->cv);
1327 1417 mutex_exit(&rfs_async_write_lock);
1328 1418
1329 1419 return;
1330 1420 }
1331 1421
1332 1422 /*
1333 1423 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1334 1424 * deadlock with ufs.
1335 1425 */
1336 1426 if (nbl_need_check(vp)) {
1337 1427 nbl_start_crit(vp, RW_READER);
1338 1428 in_crit = 1;
1339 1429 }
1340 1430
1341 1431 ct.cc_sysid = 0;
1342 1432 ct.cc_pid = 0;
1343 1433 ct.cc_caller_id = nfs2_srv_caller_id;
1344 1434 ct.cc_flags = CC_DONTBLOCK;
1345 1435
1346 1436 /*
1347 1437 * Lock the file for writing. This operation provides
1348 1438 * the delay which allows clusters to grow.
1349 1439 */
1350 1440 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1351 1441
1352 1442 /* check if a monitor detected a delegation conflict */
1353 1443 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1354 1444 if (in_crit)
1355 1445 nbl_end_crit(vp);
1356 1446 VN_RELE(vp);
1357 1447 /* mark as wouldblock so response is dropped */
1358 1448 curthread->t_flag |= T_WOULDBLOCK;
1359 1449 mutex_enter(&rfs_async_write_lock);
1360 1450 if (rfs_async_write_head == nlp)
1361 1451 rfs_async_write_head = nlp->next;
1362 1452 else {
1363 1453 lp = rfs_async_write_head;
1364 1454 while (lp->next != nlp)
1365 1455 lp = lp->next;
1366 1456 lp->next = nlp->next;
1367 1457 }
1368 1458 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1369 1459 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1370 1460 rp->ns->ns_status = puterrno(error);
1371 1461 rp->thread->t_flag |= T_WOULDBLOCK;
1372 1462 }
1373 1463 }
1374 1464 cv_broadcast(&nlp->cv);
1375 1465 mutex_exit(&rfs_async_write_lock);
1376 1466
1377 1467 return;
1378 1468 }
1379 1469
1380 1470 /*
1381 1471 * Disconnect this cluster from the list of clusters.
1382 1472 * The cluster that is being dealt with must be fixed
1383 1473 * in size after this point, so there is no reason
1384 1474 * to leave it on the list so that new requests can
1385 1475 * find it.
1386 1476 *
1387 1477 * The algorithm is that the first write request will
1388 1478 * create a cluster, convert the file handle to a
1389 1479 * vnode pointer, and then lock the file for writing.
1390 1480 * This request is not likely to be clustered with
1391 1481 * any others. However, the next request will create
1392 1482 * a new cluster and be blocked in VOP_RWLOCK while
1393 1483 * the first request is being processed. This delay
1394 1484 * will allow more requests to be clustered in this
1395 1485 * second cluster.
1396 1486 */
1397 1487 mutex_enter(&rfs_async_write_lock);
1398 1488 if (rfs_async_write_head == nlp)
1399 1489 rfs_async_write_head = nlp->next;
1400 1490 else {
1401 1491 lp = rfs_async_write_head;
1402 1492 while (lp->next != nlp)
1403 1493 lp = lp->next;
1404 1494 lp->next = nlp->next;
1405 1495 }
1406 1496 mutex_exit(&rfs_async_write_lock);
1407 1497
1408 1498 /*
1409 1499 * Step through the list of requests in this cluster.
1410 1500 * We need to check permissions to make sure that all
1411 1501 * of the requests have sufficient permission to write
1412 1502 * the file. A cluster can be composed of requests
1413 1503 * from different clients and different users on each
1414 1504 * client.
1415 1505 *
1416 1506 * As a side effect, we also calculate the size of the
1417 1507 * byte range that this cluster encompasses.
1418 1508 */
1419 1509 rp = nlp->list;
1420 1510 off = rp->wa->wa_offset;
1421 1511 len = (uint_t)0;
1422 1512 do {
1423 1513 if (rdonly(rp->ro, vp)) {
1424 1514 rp->ns->ns_status = NFSERR_ROFS;
1425 1515 t_flag = curthread->t_flag & T_WOULDBLOCK;
1426 1516 rp->thread->t_flag |= t_flag;
1427 1517 continue;
1428 1518 }
1429 1519
1430 1520 va.va_mask = AT_UID|AT_MODE;
1431 1521
1432 1522 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1433 1523
1434 1524 if (!error) {
1435 1525 if (crgetuid(rp->cr) != va.va_uid) {
1436 1526 /*
1437 1527 * This is a kludge to allow writes of files
1438 1528 * created with read only permission. The
1439 1529 * owner of the file is always allowed to
1440 1530 * write it.
1441 1531 */
1442 1532 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1443 1533 }
1444 1534 if (!error && MANDLOCK(vp, va.va_mode))
1445 1535 error = EACCES;
1446 1536 }
1447 1537
1448 1538 /*
1449 1539 * Check for a conflict with a nbmand-locked region.
1450 1540 */
1451 1541 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1452 1542 rp->wa->wa_count, 0, NULL)) {
1453 1543 error = EACCES;
1454 1544 }
1455 1545
1456 1546 if (error) {
1457 1547 rp->ns->ns_status = puterrno(error);
1458 1548 t_flag = curthread->t_flag & T_WOULDBLOCK;
1459 1549 rp->thread->t_flag |= t_flag;
1460 1550 continue;
1461 1551 }
1462 1552 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1463 1553 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1464 1554 } while ((rp = rp->list) != NULL);
1465 1555
1466 1556 /*
1467 1557 * Step through the cluster attempting to gather as many
1468 1558 * requests which are contiguous as possible. These
1469 1559 * contiguous requests are handled via one call to VOP_WRITE
1470 1560 * instead of different calls to VOP_WRITE. We also keep
1471 1561 * track of the fact that any data was written.
1472 1562 */
1473 1563 rp = nlp->list;
1474 1564 data_written = 0;
1475 1565 do {
1476 1566 /*
1477 1567 * Skip any requests which are already marked as having an
1478 1568 * error.
1479 1569 */
1480 1570 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1481 1571 rp = rp->list;
1482 1572 continue;
1483 1573 }
1484 1574
1485 1575 /*
1486 1576 * Count the number of iovec's which are required
1487 1577 * to handle this set of requests. One iovec is
1488 1578 * needed for each data buffer, whether addressed
1489 1579 * by wa_data or by the b_rptr pointers in the
1490 1580 * mblk chains.
1491 1581 */
1492 1582 iovcnt = 0;
1493 1583 lrp = rp;
1494 1584 for (;;) {
1495 1585 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1496 1586 iovcnt++;
1497 1587 else {
1498 1588 m = lrp->wa->wa_mblk;
1499 1589 while (m != NULL) {
1500 1590 iovcnt++;
1501 1591 m = m->b_cont;
1502 1592 }
1503 1593 }
1504 1594 if (lrp->list == NULL ||
1505 1595 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1506 1596 lrp->wa->wa_offset + lrp->wa->wa_count !=
1507 1597 lrp->list->wa->wa_offset) {
1508 1598 lrp = lrp->list;
1509 1599 break;
1510 1600 }
1511 1601 lrp = lrp->list;
1512 1602 }
1513 1603
1514 1604 if (iovcnt <= MAXCLIOVECS) {
1515 1605 #ifdef DEBUG
1516 1606 rfs_write_hits++;
1517 1607 #endif
1518 1608 niovp = iov;
1519 1609 } else {
1520 1610 #ifdef DEBUG
1521 1611 rfs_write_misses++;
1522 1612 #endif
1523 1613 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1524 1614 }
1525 1615 /*
1526 1616 * Put together the scatter/gather iovecs.
1527 1617 */
1528 1618 iovp = niovp;
1529 1619 trp = rp;
1530 1620 count = 0;
1531 1621 do {
1532 1622 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1533 1623 if (trp->wa->wa_rlist) {
1534 1624 iovp->iov_base =
1535 1625 (char *)((trp->wa->wa_rlist)->
1536 1626 u.c_daddr3);
1537 1627 iovp->iov_len = trp->wa->wa_count;
1538 1628 } else {
1539 1629 iovp->iov_base = trp->wa->wa_data;
1540 1630 iovp->iov_len = trp->wa->wa_count;
1541 1631 }
1542 1632 iovp++;
1543 1633 } else {
1544 1634 m = trp->wa->wa_mblk;
1545 1635 rcount = trp->wa->wa_count;
1546 1636 while (m != NULL) {
1547 1637 iovp->iov_base = (caddr_t)m->b_rptr;
1548 1638 iovp->iov_len = (m->b_wptr - m->b_rptr);
1549 1639 rcount -= iovp->iov_len;
1550 1640 if (rcount < 0)
1551 1641 iovp->iov_len += rcount;
1552 1642 iovp++;
1553 1643 if (rcount <= 0)
1554 1644 break;
1555 1645 m = m->b_cont;
1556 1646 }
1557 1647 }
1558 1648 count += trp->wa->wa_count;
1559 1649 trp = trp->list;
1560 1650 } while (trp != lrp);
1561 1651
1562 1652 uio.uio_iov = niovp;
1563 1653 uio.uio_iovcnt = iovcnt;
1564 1654 uio.uio_segflg = UIO_SYSSPACE;
1565 1655 uio.uio_extflg = UIO_COPY_DEFAULT;
1566 1656 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1567 1657 uio.uio_resid = count;
1568 1658 /*
1569 1659 * The limit is checked on the client. We
1570 1660 * should allow any size writes here.
1571 1661 */
1572 1662 uio.uio_llimit = curproc->p_fsz_ctl;
1573 1663 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1574 1664 if (rlimit < (rlim64_t)uio.uio_resid)
1575 1665 uio.uio_resid = (uint_t)rlimit;
1576 1666
1577 1667 /*
1578 1668 * For now we assume no append mode.
1579 1669 */
1580 1670
1581 1671 /*
1582 1672 * We're changing creds because VM may fault
1583 1673 * and we need the cred of the current
1584 1674 * thread to be used if quota * checking is
1585 1675 * enabled.
1586 1676 */
1587 1677 savecred = curthread->t_cred;
1588 1678 curthread->t_cred = cr;
1589 1679 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1590 1680 curthread->t_cred = savecred;
1591 1681
1592 1682 /* check if a monitor detected a delegation conflict */
1593 1683 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1594 1684 /* mark as wouldblock so response is dropped */
1595 1685 curthread->t_flag |= T_WOULDBLOCK;
1596 1686
1597 1687 if (niovp != iov)
1598 1688 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1599 1689
1600 1690 if (!error) {
1601 1691 data_written = 1;
1602 1692 /*
1603 1693 * Get attributes again so we send the latest mod
1604 1694 * time to the client side for its cache.
1605 1695 */
1606 1696 va.va_mask = AT_ALL; /* now we want everything */
1607 1697
1608 1698 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1609 1699
1610 1700 if (!error)
1611 1701 acl_perm(vp, exi, &va, rp->cr);
1612 1702 }
1613 1703
1614 1704 /*
1615 1705 * Fill in the status responses for each request
1616 1706 * which was just handled. Also, copy the latest
1617 1707 * attributes in to the attribute responses if
1618 1708 * appropriate.
1619 1709 */
1620 1710 t_flag = curthread->t_flag & T_WOULDBLOCK;
1621 1711 do {
1622 1712 rp->thread->t_flag |= t_flag;
1623 1713 /* check for overflows */
1624 1714 if (!error) {
1625 1715 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1626 1716 }
1627 1717 rp->ns->ns_status = puterrno(error);
1628 1718 rp = rp->list;
1629 1719 } while (rp != lrp);
1630 1720 } while (rp != NULL);
1631 1721
1632 1722 /*
1633 1723 * If any data was written at all, then we need to flush
1634 1724 * the data and metadata to stable storage.
1635 1725 */
1636 1726 if (data_written) {
1637 1727 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1638 1728
1639 1729 if (!error) {
1640 1730 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1641 1731 }
1642 1732 }
1643 1733
1644 1734 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1645 1735
1646 1736 if (in_crit)
1647 1737 nbl_end_crit(vp);
1648 1738 VN_RELE(vp);
1649 1739
1650 1740 t_flag = curthread->t_flag & T_WOULDBLOCK;
1651 1741 mutex_enter(&rfs_async_write_lock);
1652 1742 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1653 1743 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1654 1744 rp->ns->ns_status = puterrno(error);
1655 1745 rp->thread->t_flag |= t_flag;
1656 1746 }
1657 1747 }
1658 1748 cv_broadcast(&nlp->cv);
1659 1749 mutex_exit(&rfs_async_write_lock);
1660 1750
1661 1751 }
1662 1752
1663 1753 void *
1664 1754 rfs_write_getfh(struct nfswriteargs *wa)
1665 1755 {
1666 1756 return (&wa->wa_fhandle);
1667 1757 }
1668 1758
1669 1759 /*
1670 1760 * Create a file.
1671 1761 * Creates a file with given attributes and returns those attributes
1672 1762 * and an fhandle for the new file.
1673 1763 */
1674 1764 void
1675 1765 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1676 1766 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1677 1767 {
1678 1768 int error;
1679 1769 int lookuperr;
1680 1770 int in_crit = 0;
1681 1771 struct vattr va;
1682 1772 vnode_t *vp;
1683 1773 vnode_t *realvp;
1684 1774 vnode_t *dvp;
1685 1775 char *name = args->ca_da.da_name;
1686 1776 vnode_t *tvp = NULL;
1687 1777 int mode;
1688 1778 int lookup_ok;
1689 1779 bool_t trunc;
1690 1780 struct sockaddr *ca;
1691 1781
1692 1782 /*
1693 1783 * Disallow NULL paths
1694 1784 */
1695 1785 if (name == NULL || *name == '\0') {
1696 1786 dr->dr_status = NFSERR_ACCES;
1697 1787 return;
1698 1788 }
1699 1789
1700 1790 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1701 1791 if (dvp == NULL) {
1702 1792 dr->dr_status = NFSERR_STALE;
1703 1793 return;
1704 1794 }
1705 1795
1706 1796 error = sattr_to_vattr(args->ca_sa, &va);
1707 1797 if (error) {
1708 1798 dr->dr_status = puterrno(error);
1709 1799 return;
1710 1800 }
1711 1801
1712 1802 /*
1713 1803 * Must specify the mode.
1714 1804 */
1715 1805 if (!(va.va_mask & AT_MODE)) {
1716 1806 VN_RELE(dvp);
1717 1807 dr->dr_status = NFSERR_INVAL;
1718 1808 return;
1719 1809 }
1720 1810
1721 1811 /*
1722 1812 * This is a completely gross hack to make mknod
1723 1813 * work over the wire until we can wack the protocol
1724 1814 */
1725 1815 if ((va.va_mode & IFMT) == IFCHR) {
1726 1816 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1727 1817 va.va_type = VFIFO; /* xtra kludge for named pipe */
1728 1818 else {
1729 1819 va.va_type = VCHR;
1730 1820 /*
1731 1821 * uncompress the received dev_t
1732 1822 * if the top half is zero indicating a request
1733 1823 * from an `older style' OS.
1734 1824 */
1735 1825 if ((va.va_size & 0xffff0000) == 0)
1736 1826 va.va_rdev = nfsv2_expdev(va.va_size);
1737 1827 else
1738 1828 va.va_rdev = (dev_t)va.va_size;
1739 1829 }
1740 1830 va.va_mask &= ~AT_SIZE;
1741 1831 } else if ((va.va_mode & IFMT) == IFBLK) {
1742 1832 va.va_type = VBLK;
1743 1833 /*
1744 1834 * uncompress the received dev_t
1745 1835 * if the top half is zero indicating a request
1746 1836 * from an `older style' OS.
1747 1837 */
1748 1838 if ((va.va_size & 0xffff0000) == 0)
1749 1839 va.va_rdev = nfsv2_expdev(va.va_size);
1750 1840 else
1751 1841 va.va_rdev = (dev_t)va.va_size;
1752 1842 va.va_mask &= ~AT_SIZE;
1753 1843 } else if ((va.va_mode & IFMT) == IFSOCK) {
1754 1844 va.va_type = VSOCK;
1755 1845 } else {
1756 1846 va.va_type = VREG;
1757 1847 }
1758 1848 va.va_mode &= ~IFMT;
1759 1849 va.va_mask |= AT_TYPE;
1760 1850
1761 1851 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1762 1852 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1763 1853 MAXPATHLEN);
1764 1854 if (name == NULL) {
1765 1855 dr->dr_status = puterrno(EINVAL);
1766 1856 return;
1767 1857 }
1768 1858
1769 1859 /*
1770 1860 * Why was the choice made to use VWRITE as the mode to the
1771 1861 * call to VOP_CREATE ? This results in a bug. When a client
1772 1862 * opens a file that already exists and is RDONLY, the second
1773 1863 * open fails with an EACESS because of the mode.
1774 1864 * bug ID 1054648.
1775 1865 */
1776 1866 lookup_ok = 0;
1777 1867 mode = VWRITE;
1778 1868 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1779 1869 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1780 1870 NULL, NULL, NULL);
1781 1871 if (!error) {
1782 1872 struct vattr at;
1783 1873
1784 1874 lookup_ok = 1;
1785 1875 at.va_mask = AT_MODE;
1786 1876 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1787 1877 if (!error)
1788 1878 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1789 1879 VN_RELE(tvp);
1790 1880 tvp = NULL;
1791 1881 }
1792 1882 }
1793 1883
1794 1884 if (!lookup_ok) {
1795 1885 if (rdonly(ro, dvp)) {
1796 1886 error = EROFS;
1797 1887 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1798 1888 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1799 1889 error = EPERM;
1800 1890 } else {
1801 1891 error = 0;
1802 1892 }
1803 1893 }
1804 1894
1805 1895 /*
1806 1896 * If file size is being modified on an already existing file
1807 1897 * make sure that there are no conflicting non-blocking mandatory
1808 1898 * locks in the region being manipulated. Return EACCES if there
1809 1899 * are conflicting locks.
1810 1900 */
1811 1901 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1812 1902 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1813 1903 NULL, NULL, NULL);
1814 1904
1815 1905 if (!lookuperr &&
1816 1906 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1817 1907 VN_RELE(tvp);
1818 1908 curthread->t_flag |= T_WOULDBLOCK;
1819 1909 goto out;
1820 1910 }
1821 1911
1822 1912 if (!lookuperr && nbl_need_check(tvp)) {
1823 1913 /*
1824 1914 * The file exists. Now check if it has any
1825 1915 * conflicting non-blocking mandatory locks
1826 1916 * in the region being changed.
1827 1917 */
1828 1918 struct vattr bva;
1829 1919 u_offset_t offset;
1830 1920 ssize_t length;
1831 1921
1832 1922 nbl_start_crit(tvp, RW_READER);
1833 1923 in_crit = 1;
1834 1924
1835 1925 bva.va_mask = AT_SIZE;
1836 1926 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1837 1927 if (!error) {
1838 1928 if (va.va_size < bva.va_size) {
1839 1929 offset = va.va_size;
1840 1930 length = bva.va_size - va.va_size;
1841 1931 } else {
1842 1932 offset = bva.va_size;
1843 1933 length = va.va_size - bva.va_size;
1844 1934 }
1845 1935 if (length) {
1846 1936 if (nbl_conflict(tvp, NBL_WRITE,
1847 1937 offset, length, 0, NULL)) {
1848 1938 error = EACCES;
1849 1939 }
1850 1940 }
1851 1941 }
1852 1942 if (error) {
1853 1943 nbl_end_crit(tvp);
1854 1944 VN_RELE(tvp);
1855 1945 in_crit = 0;
1856 1946 }
1857 1947 } else if (tvp != NULL) {
1858 1948 VN_RELE(tvp);
1859 1949 }
1860 1950 }
1861 1951
1862 1952 if (!error) {
1863 1953 /*
1864 1954 * If filesystem is shared with nosuid the remove any
1865 1955 * setuid/setgid bits on create.
1866 1956 */
1867 1957 if (va.va_type == VREG &&
1868 1958 exi->exi_export.ex_flags & EX_NOSUID)
1869 1959 va.va_mode &= ~(VSUID | VSGID);
1870 1960
1871 1961 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1872 1962 NULL, NULL);
1873 1963
1874 1964 if (!error) {
1875 1965
1876 1966 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1877 1967 trunc = TRUE;
1878 1968 else
1879 1969 trunc = FALSE;
1880 1970
1881 1971 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1882 1972 VN_RELE(vp);
1883 1973 curthread->t_flag |= T_WOULDBLOCK;
1884 1974 goto out;
1885 1975 }
1886 1976 va.va_mask = AT_ALL;
1887 1977
1888 1978 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1889 1979
1890 1980 /* check for overflows */
1891 1981 if (!error) {
1892 1982 acl_perm(vp, exi, &va, cr);
1893 1983 error = vattr_to_nattr(&va, &dr->dr_attr);
1894 1984 if (!error) {
1895 1985 error = makefh(&dr->dr_fhandle, vp,
1896 1986 exi);
1897 1987 }
1898 1988 }
1899 1989 /*
1900 1990 * Force modified metadata out to stable storage.
1901 1991 *
1902 1992 * if a underlying vp exists, pass it to VOP_FSYNC
1903 1993 */
1904 1994 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1905 1995 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1906 1996 else
1907 1997 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1908 1998 VN_RELE(vp);
1909 1999 }
1910 2000
1911 2001 if (in_crit) {
1912 2002 nbl_end_crit(tvp);
1913 2003 VN_RELE(tvp);
1914 2004 }
1915 2005 }
1916 2006
1917 2007 /*
1918 2008 * Force modified data and metadata out to stable storage.
1919 2009 */
1920 2010 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1921 2011
1922 2012 out:
1923 2013
1924 2014 VN_RELE(dvp);
1925 2015
1926 2016 dr->dr_status = puterrno(error);
1927 2017
1928 2018 if (name != args->ca_da.da_name)
1929 2019 kmem_free(name, MAXPATHLEN);
1930 2020 }
1931 2021 void *
1932 2022 rfs_create_getfh(struct nfscreatargs *args)
1933 2023 {
1934 2024 return (args->ca_da.da_fhandle);
1935 2025 }
1936 2026
1937 2027 /*
1938 2028 * Remove a file.
1939 2029 * Remove named file from parent directory.
1940 2030 */
1941 2031 /* ARGSUSED */
1942 2032 void
1943 2033 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
1944 2034 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1945 2035 {
1946 2036 int error = 0;
1947 2037 vnode_t *vp;
1948 2038 vnode_t *targvp;
1949 2039 int in_crit = 0;
1950 2040
1951 2041 /*
1952 2042 * Disallow NULL paths
1953 2043 */
1954 2044 if (da->da_name == NULL || *da->da_name == '\0') {
1955 2045 *status = NFSERR_ACCES;
1956 2046 return;
1957 2047 }
1958 2048
1959 2049 vp = nfs_fhtovp(da->da_fhandle, exi);
1960 2050 if (vp == NULL) {
1961 2051 *status = NFSERR_STALE;
1962 2052 return;
1963 2053 }
1964 2054
1965 2055 if (rdonly(ro, vp)) {
1966 2056 VN_RELE(vp);
1967 2057 *status = NFSERR_ROFS;
1968 2058 return;
1969 2059 }
1970 2060
1971 2061 /*
1972 2062 * Check for a conflict with a non-blocking mandatory share reservation.
1973 2063 */
1974 2064 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
1975 2065 NULL, cr, NULL, NULL, NULL);
1976 2066 if (error != 0) {
1977 2067 VN_RELE(vp);
1978 2068 *status = puterrno(error);
1979 2069 return;
1980 2070 }
1981 2071
1982 2072 /*
1983 2073 * If the file is delegated to an v4 client, then initiate
1984 2074 * recall and drop this request (by setting T_WOULDBLOCK).
1985 2075 * The client will eventually re-transmit the request and
1986 2076 * (hopefully), by then, the v4 client will have returned
1987 2077 * the delegation.
1988 2078 */
1989 2079
1990 2080 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
1991 2081 VN_RELE(vp);
1992 2082 VN_RELE(targvp);
1993 2083 curthread->t_flag |= T_WOULDBLOCK;
1994 2084 return;
1995 2085 }
1996 2086
1997 2087 if (nbl_need_check(targvp)) {
1998 2088 nbl_start_crit(targvp, RW_READER);
1999 2089 in_crit = 1;
2000 2090 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2001 2091 error = EACCES;
2002 2092 goto out;
2003 2093 }
2004 2094 }
2005 2095
2006 2096 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2007 2097
2008 2098 /*
2009 2099 * Force modified data and metadata out to stable storage.
2010 2100 */
2011 2101 (void) VOP_FSYNC(vp, 0, cr, NULL);
2012 2102
2013 2103 out:
2014 2104 if (in_crit)
2015 2105 nbl_end_crit(targvp);
2016 2106 VN_RELE(targvp);
2017 2107 VN_RELE(vp);
2018 2108
2019 2109 *status = puterrno(error);
2020 2110
2021 2111 }
2022 2112
2023 2113 void *
2024 2114 rfs_remove_getfh(struct nfsdiropargs *da)
2025 2115 {
2026 2116 return (da->da_fhandle);
2027 2117 }
2028 2118
2029 2119 /*
2030 2120 * rename a file
2031 2121 * Give a file (from) a new name (to).
2032 2122 */
2033 2123 /* ARGSUSED */
2034 2124 void
2035 2125 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2036 2126 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2037 2127 {
2038 2128 int error = 0;
2039 2129 vnode_t *fromvp;
2040 2130 vnode_t *tovp;
2041 2131 struct exportinfo *to_exi;
2042 2132 fhandle_t *fh;
2043 2133 vnode_t *srcvp;
2044 2134 vnode_t *targvp;
2045 2135 int in_crit = 0;
2046 2136
2047 2137 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2048 2138 if (fromvp == NULL) {
2049 2139 *status = NFSERR_STALE;
2050 2140 return;
2051 2141 }
2052 2142
2053 2143 fh = args->rna_to.da_fhandle;
2054 2144 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2055 2145 if (to_exi == NULL) {
2056 2146 VN_RELE(fromvp);
2057 2147 *status = NFSERR_ACCES;
2058 2148 return;
2059 2149 }
2060 2150 exi_rele(to_exi);
2061 2151
2062 2152 if (to_exi != exi) {
2063 2153 VN_RELE(fromvp);
2064 2154 *status = NFSERR_XDEV;
2065 2155 return;
2066 2156 }
2067 2157
2068 2158 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2069 2159 if (tovp == NULL) {
2070 2160 VN_RELE(fromvp);
2071 2161 *status = NFSERR_STALE;
2072 2162 return;
2073 2163 }
2074 2164
2075 2165 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2076 2166 VN_RELE(tovp);
2077 2167 VN_RELE(fromvp);
2078 2168 *status = NFSERR_NOTDIR;
2079 2169 return;
2080 2170 }
2081 2171
2082 2172 /*
2083 2173 * Disallow NULL paths
2084 2174 */
2085 2175 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2086 2176 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2087 2177 VN_RELE(tovp);
2088 2178 VN_RELE(fromvp);
2089 2179 *status = NFSERR_ACCES;
2090 2180 return;
2091 2181 }
2092 2182
2093 2183 if (rdonly(ro, tovp)) {
2094 2184 VN_RELE(tovp);
2095 2185 VN_RELE(fromvp);
2096 2186 *status = NFSERR_ROFS;
2097 2187 return;
2098 2188 }
2099 2189
2100 2190 /*
2101 2191 * Check for a conflict with a non-blocking mandatory share reservation.
2102 2192 */
2103 2193 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2104 2194 NULL, cr, NULL, NULL, NULL);
2105 2195 if (error != 0) {
2106 2196 VN_RELE(tovp);
2107 2197 VN_RELE(fromvp);
2108 2198 *status = puterrno(error);
2109 2199 return;
2110 2200 }
2111 2201
2112 2202 /* Check for delegations on the source file */
2113 2203
2114 2204 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2115 2205 VN_RELE(tovp);
2116 2206 VN_RELE(fromvp);
2117 2207 VN_RELE(srcvp);
2118 2208 curthread->t_flag |= T_WOULDBLOCK;
2119 2209 return;
2120 2210 }
2121 2211
2122 2212 /* Check for delegation on the file being renamed over, if it exists */
2123 2213
2124 2214 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2125 2215 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2126 2216 NULL, NULL, NULL) == 0) {
2127 2217
2128 2218 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2129 2219 VN_RELE(tovp);
2130 2220 VN_RELE(fromvp);
2131 2221 VN_RELE(srcvp);
2132 2222 VN_RELE(targvp);
2133 2223 curthread->t_flag |= T_WOULDBLOCK;
2134 2224 return;
2135 2225 }
2136 2226 VN_RELE(targvp);
2137 2227 }
2138 2228
2139 2229
2140 2230 if (nbl_need_check(srcvp)) {
2141 2231 nbl_start_crit(srcvp, RW_READER);
2142 2232 in_crit = 1;
2143 2233 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2144 2234 error = EACCES;
2145 2235 goto out;
2146 2236 }
2147 2237 }
2148 2238
2149 2239 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2150 2240 tovp, args->rna_to.da_name, cr, NULL, 0);
2151 2241
2152 2242 if (error == 0)
2153 2243 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2154 2244 strlen(args->rna_to.da_name));
2155 2245
2156 2246 /*
2157 2247 * Force modified data and metadata out to stable storage.
2158 2248 */
2159 2249 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2160 2250 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2161 2251
2162 2252 out:
2163 2253 if (in_crit)
2164 2254 nbl_end_crit(srcvp);
2165 2255 VN_RELE(srcvp);
2166 2256 VN_RELE(tovp);
2167 2257 VN_RELE(fromvp);
2168 2258
2169 2259 *status = puterrno(error);
2170 2260
2171 2261 }
2172 2262 void *
2173 2263 rfs_rename_getfh(struct nfsrnmargs *args)
2174 2264 {
2175 2265 return (args->rna_from.da_fhandle);
2176 2266 }
2177 2267
2178 2268 /*
2179 2269 * Link to a file.
2180 2270 * Create a file (to) which is a hard link to the given file (from).
2181 2271 */
2182 2272 /* ARGSUSED */
2183 2273 void
2184 2274 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2185 2275 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2186 2276 {
2187 2277 int error;
2188 2278 vnode_t *fromvp;
2189 2279 vnode_t *tovp;
2190 2280 struct exportinfo *to_exi;
2191 2281 fhandle_t *fh;
2192 2282
2193 2283 fromvp = nfs_fhtovp(args->la_from, exi);
2194 2284 if (fromvp == NULL) {
2195 2285 *status = NFSERR_STALE;
2196 2286 return;
2197 2287 }
2198 2288
2199 2289 fh = args->la_to.da_fhandle;
2200 2290 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2201 2291 if (to_exi == NULL) {
2202 2292 VN_RELE(fromvp);
2203 2293 *status = NFSERR_ACCES;
2204 2294 return;
2205 2295 }
2206 2296 exi_rele(to_exi);
2207 2297
2208 2298 if (to_exi != exi) {
2209 2299 VN_RELE(fromvp);
2210 2300 *status = NFSERR_XDEV;
2211 2301 return;
2212 2302 }
2213 2303
2214 2304 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2215 2305 if (tovp == NULL) {
2216 2306 VN_RELE(fromvp);
2217 2307 *status = NFSERR_STALE;
2218 2308 return;
2219 2309 }
2220 2310
2221 2311 if (tovp->v_type != VDIR) {
2222 2312 VN_RELE(tovp);
2223 2313 VN_RELE(fromvp);
2224 2314 *status = NFSERR_NOTDIR;
2225 2315 return;
2226 2316 }
2227 2317 /*
2228 2318 * Disallow NULL paths
2229 2319 */
2230 2320 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2231 2321 VN_RELE(tovp);
2232 2322 VN_RELE(fromvp);
2233 2323 *status = NFSERR_ACCES;
2234 2324 return;
2235 2325 }
2236 2326
2237 2327 if (rdonly(ro, tovp)) {
2238 2328 VN_RELE(tovp);
2239 2329 VN_RELE(fromvp);
2240 2330 *status = NFSERR_ROFS;
2241 2331 return;
2242 2332 }
2243 2333
2244 2334 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2245 2335
2246 2336 /*
2247 2337 * Force modified data and metadata out to stable storage.
2248 2338 */
2249 2339 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2250 2340 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2251 2341
2252 2342 VN_RELE(tovp);
2253 2343 VN_RELE(fromvp);
2254 2344
2255 2345 *status = puterrno(error);
2256 2346
2257 2347 }
2258 2348 void *
2259 2349 rfs_link_getfh(struct nfslinkargs *args)
2260 2350 {
2261 2351 return (args->la_from);
2262 2352 }
2263 2353
2264 2354 /*
2265 2355 * Symbolicly link to a file.
2266 2356 * Create a file (to) with the given attributes which is a symbolic link
2267 2357 * to the given path name (to).
2268 2358 */
2269 2359 void
2270 2360 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2271 2361 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2272 2362 {
2273 2363 int error;
2274 2364 struct vattr va;
2275 2365 vnode_t *vp;
2276 2366 vnode_t *svp;
2277 2367 int lerror;
2278 2368 struct sockaddr *ca;
2279 2369 char *name = NULL;
2280 2370
2281 2371 /*
2282 2372 * Disallow NULL paths
2283 2373 */
2284 2374 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2285 2375 *status = NFSERR_ACCES;
2286 2376 return;
2287 2377 }
2288 2378
2289 2379 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2290 2380 if (vp == NULL) {
2291 2381 *status = NFSERR_STALE;
2292 2382 return;
2293 2383 }
2294 2384
2295 2385 if (rdonly(ro, vp)) {
2296 2386 VN_RELE(vp);
2297 2387 *status = NFSERR_ROFS;
2298 2388 return;
2299 2389 }
2300 2390
2301 2391 error = sattr_to_vattr(args->sla_sa, &va);
2302 2392 if (error) {
2303 2393 VN_RELE(vp);
2304 2394 *status = puterrno(error);
2305 2395 return;
2306 2396 }
2307 2397
2308 2398 if (!(va.va_mask & AT_MODE)) {
2309 2399 VN_RELE(vp);
2310 2400 *status = NFSERR_INVAL;
2311 2401 return;
2312 2402 }
2313 2403
2314 2404 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2315 2405 name = nfscmd_convname(ca, exi, args->sla_tnm,
2316 2406 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2317 2407
2318 2408 if (name == NULL) {
2319 2409 *status = NFSERR_ACCES;
2320 2410 return;
2321 2411 }
2322 2412
2323 2413 va.va_type = VLNK;
2324 2414 va.va_mask |= AT_TYPE;
2325 2415
2326 2416 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2327 2417
2328 2418 /*
2329 2419 * Force new data and metadata out to stable storage.
2330 2420 */
2331 2421 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2332 2422 NULL, cr, NULL, NULL, NULL);
2333 2423
2334 2424 if (!lerror) {
2335 2425 (void) VOP_FSYNC(svp, 0, cr, NULL);
2336 2426 VN_RELE(svp);
2337 2427 }
2338 2428
2339 2429 /*
2340 2430 * Force modified data and metadata out to stable storage.
2341 2431 */
2342 2432 (void) VOP_FSYNC(vp, 0, cr, NULL);
2343 2433
2344 2434 VN_RELE(vp);
2345 2435
2346 2436 *status = puterrno(error);
2347 2437 if (name != args->sla_tnm)
2348 2438 kmem_free(name, MAXPATHLEN);
2349 2439
2350 2440 }
2351 2441 void *
2352 2442 rfs_symlink_getfh(struct nfsslargs *args)
2353 2443 {
2354 2444 return (args->sla_from.da_fhandle);
2355 2445 }
2356 2446
2357 2447 /*
2358 2448 * Make a directory.
2359 2449 * Create a directory with the given name, parent directory, and attributes.
2360 2450 * Returns a file handle and attributes for the new directory.
2361 2451 */
2362 2452 /* ARGSUSED */
2363 2453 void
2364 2454 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2365 2455 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2366 2456 {
2367 2457 int error;
2368 2458 struct vattr va;
2369 2459 vnode_t *dvp = NULL;
2370 2460 vnode_t *vp;
2371 2461 char *name = args->ca_da.da_name;
2372 2462
2373 2463 /*
2374 2464 * Disallow NULL paths
2375 2465 */
2376 2466 if (name == NULL || *name == '\0') {
2377 2467 dr->dr_status = NFSERR_ACCES;
2378 2468 return;
2379 2469 }
2380 2470
2381 2471 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2382 2472 if (vp == NULL) {
2383 2473 dr->dr_status = NFSERR_STALE;
2384 2474 return;
2385 2475 }
2386 2476
2387 2477 if (rdonly(ro, vp)) {
2388 2478 VN_RELE(vp);
2389 2479 dr->dr_status = NFSERR_ROFS;
2390 2480 return;
2391 2481 }
2392 2482
2393 2483 error = sattr_to_vattr(args->ca_sa, &va);
2394 2484 if (error) {
2395 2485 VN_RELE(vp);
2396 2486 dr->dr_status = puterrno(error);
2397 2487 return;
2398 2488 }
2399 2489
2400 2490 if (!(va.va_mask & AT_MODE)) {
2401 2491 VN_RELE(vp);
2402 2492 dr->dr_status = NFSERR_INVAL;
2403 2493 return;
2404 2494 }
2405 2495
2406 2496 va.va_type = VDIR;
2407 2497 va.va_mask |= AT_TYPE;
2408 2498
2409 2499 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2410 2500
2411 2501 if (!error) {
2412 2502 /*
2413 2503 * Attribtutes of the newly created directory should
2414 2504 * be returned to the client.
2415 2505 */
2416 2506 va.va_mask = AT_ALL; /* We want everything */
2417 2507 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2418 2508
2419 2509 /* check for overflows */
2420 2510 if (!error) {
2421 2511 acl_perm(vp, exi, &va, cr);
2422 2512 error = vattr_to_nattr(&va, &dr->dr_attr);
2423 2513 if (!error) {
2424 2514 error = makefh(&dr->dr_fhandle, dvp, exi);
2425 2515 }
2426 2516 }
2427 2517 /*
2428 2518 * Force new data and metadata out to stable storage.
2429 2519 */
2430 2520 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2431 2521 VN_RELE(dvp);
2432 2522 }
2433 2523
2434 2524 /*
2435 2525 * Force modified data and metadata out to stable storage.
2436 2526 */
2437 2527 (void) VOP_FSYNC(vp, 0, cr, NULL);
2438 2528
2439 2529 VN_RELE(vp);
2440 2530
2441 2531 dr->dr_status = puterrno(error);
2442 2532
2443 2533 }
2444 2534 void *
2445 2535 rfs_mkdir_getfh(struct nfscreatargs *args)
2446 2536 {
2447 2537 return (args->ca_da.da_fhandle);
2448 2538 }
2449 2539
2450 2540 /*
2451 2541 * Remove a directory.
2452 2542 * Remove the given directory name from the given parent directory.
2453 2543 */
2454 2544 /* ARGSUSED */
2455 2545 void
2456 2546 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2457 2547 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2458 2548 {
2459 2549 int error;
2460 2550 vnode_t *vp;
2461 2551
2462 2552 /*
2463 2553 * Disallow NULL paths
2464 2554 */
2465 2555 if (da->da_name == NULL || *da->da_name == '\0') {
2466 2556 *status = NFSERR_ACCES;
2467 2557 return;
2468 2558 }
2469 2559
2470 2560 vp = nfs_fhtovp(da->da_fhandle, exi);
2471 2561 if (vp == NULL) {
2472 2562 *status = NFSERR_STALE;
2473 2563 return;
2474 2564 }
2475 2565
2476 2566 if (rdonly(ro, vp)) {
2477 2567 VN_RELE(vp);
2478 2568 *status = NFSERR_ROFS;
2479 2569 return;
2480 2570 }
2481 2571
2482 2572 /*
2483 2573 * VOP_RMDIR takes a third argument (the current
2484 2574 * directory of the process). That's because someone
2485 2575 * wants to return EINVAL if one tries to remove ".".
2486 2576 * Of course, NFS servers have no idea what their
2487 2577 * clients' current directories are. We fake it by
2488 2578 * supplying a vnode known to exist and illegal to
2489 2579 * remove.
2490 2580 */
2491 2581 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2492 2582
2493 2583 /*
2494 2584 * Force modified data and metadata out to stable storage.
2495 2585 */
2496 2586 (void) VOP_FSYNC(vp, 0, cr, NULL);
2497 2587
2498 2588 VN_RELE(vp);
2499 2589
2500 2590 /*
2501 2591 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2502 2592 * if the directory is not empty. A System V NFS server
2503 2593 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2504 2594 * over the wire.
2505 2595 */
2506 2596 if (error == EEXIST)
2507 2597 *status = NFSERR_NOTEMPTY;
2508 2598 else
2509 2599 *status = puterrno(error);
2510 2600
2511 2601 }
2512 2602 void *
2513 2603 rfs_rmdir_getfh(struct nfsdiropargs *da)
2514 2604 {
2515 2605 return (da->da_fhandle);
2516 2606 }
2517 2607
2518 2608 /* ARGSUSED */
2519 2609 void
2520 2610 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2521 2611 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2522 2612 {
2523 2613 int error;
2524 2614 int iseof;
2525 2615 struct iovec iov;
2526 2616 struct uio uio;
2527 2617 vnode_t *vp;
2528 2618 char *ndata = NULL;
2529 2619 struct sockaddr *ca;
2530 2620 size_t nents;
2531 2621 int ret;
2532 2622
2533 2623 vp = nfs_fhtovp(&rda->rda_fh, exi);
2534 2624 if (vp == NULL) {
2535 2625 rd->rd_entries = NULL;
2536 2626 rd->rd_status = NFSERR_STALE;
2537 2627 return;
2538 2628 }
2539 2629
2540 2630 if (vp->v_type != VDIR) {
2541 2631 VN_RELE(vp);
2542 2632 rd->rd_entries = NULL;
2543 2633 rd->rd_status = NFSERR_NOTDIR;
2544 2634 return;
2545 2635 }
2546 2636
2547 2637 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2548 2638
2549 2639 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2550 2640
2551 2641 if (error) {
2552 2642 rd->rd_entries = NULL;
2553 2643 goto bad;
2554 2644 }
2555 2645
2556 2646 if (rda->rda_count == 0) {
2557 2647 rd->rd_entries = NULL;
2558 2648 rd->rd_size = 0;
2559 2649 rd->rd_eof = FALSE;
2560 2650 goto bad;
2561 2651 }
2562 2652
2563 2653 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2564 2654
2565 2655 /*
2566 2656 * Allocate data for entries. This will be freed by rfs_rddirfree.
2567 2657 */
2568 2658 rd->rd_bufsize = (uint_t)rda->rda_count;
2569 2659 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2570 2660
2571 2661 /*
2572 2662 * Set up io vector to read directory data
2573 2663 */
2574 2664 iov.iov_base = (caddr_t)rd->rd_entries;
2575 2665 iov.iov_len = rda->rda_count;
2576 2666 uio.uio_iov = &iov;
2577 2667 uio.uio_iovcnt = 1;
2578 2668 uio.uio_segflg = UIO_SYSSPACE;
2579 2669 uio.uio_extflg = UIO_COPY_CACHED;
2580 2670 uio.uio_loffset = (offset_t)rda->rda_offset;
2581 2671 uio.uio_resid = rda->rda_count;
2582 2672
2583 2673 /*
2584 2674 * read directory
2585 2675 */
2586 2676 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2587 2677
2588 2678 /*
2589 2679 * Clean up
2590 2680 */
2591 2681 if (!error) {
2592 2682 /*
2593 2683 * set size and eof
2594 2684 */
2595 2685 if (uio.uio_resid == rda->rda_count) {
2596 2686 rd->rd_size = 0;
2597 2687 rd->rd_eof = TRUE;
2598 2688 } else {
2599 2689 rd->rd_size = (uint32_t)(rda->rda_count -
2600 2690 uio.uio_resid);
2601 2691 rd->rd_eof = iseof ? TRUE : FALSE;
2602 2692 }
2603 2693 }
2604 2694
2605 2695 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2606 2696 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2607 2697 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2608 2698 rda->rda_count, &ndata);
2609 2699
2610 2700 if (ret != 0) {
2611 2701 size_t dropbytes;
2612 2702 /*
2613 2703 * We had to drop one or more entries in order to fit
2614 2704 * during the character conversion. We need to patch
2615 2705 * up the size and eof info.
2616 2706 */
2617 2707 if (rd->rd_eof)
2618 2708 rd->rd_eof = FALSE;
2619 2709 dropbytes = nfscmd_dropped_entrysize(
2620 2710 (struct dirent64 *)rd->rd_entries, nents, ret);
2621 2711 rd->rd_size -= dropbytes;
2622 2712 }
2623 2713 if (ndata == NULL) {
2624 2714 ndata = (char *)rd->rd_entries;
2625 2715 } else if (ndata != (char *)rd->rd_entries) {
2626 2716 kmem_free(rd->rd_entries, rd->rd_bufsize);
2627 2717 rd->rd_entries = (void *)ndata;
2628 2718 rd->rd_bufsize = rda->rda_count;
2629 2719 }
2630 2720
2631 2721 bad:
2632 2722 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2633 2723
2634 2724 #if 0 /* notyet */
2635 2725 /*
2636 2726 * Don't do this. It causes local disk writes when just
2637 2727 * reading the file and the overhead is deemed larger
2638 2728 * than the benefit.
2639 2729 */
2640 2730 /*
2641 2731 * Force modified metadata out to stable storage.
2642 2732 */
2643 2733 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2644 2734 #endif
2645 2735
2646 2736 VN_RELE(vp);
2647 2737
2648 2738 rd->rd_status = puterrno(error);
2649 2739
2650 2740 }
2651 2741 void *
2652 2742 rfs_readdir_getfh(struct nfsrddirargs *rda)
2653 2743 {
2654 2744 return (&rda->rda_fh);
2655 2745 }
2656 2746 void
2657 2747 rfs_rddirfree(struct nfsrddirres *rd)
2658 2748 {
2659 2749 if (rd->rd_entries != NULL)
2660 2750 kmem_free(rd->rd_entries, rd->rd_bufsize);
2661 2751 }
2662 2752
2663 2753 /* ARGSUSED */
2664 2754 void
2665 2755 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2666 2756 struct svc_req *req, cred_t *cr, bool_t ro)
2667 2757 {
2668 2758 int error;
2669 2759 struct statvfs64 sb;
2670 2760 vnode_t *vp;
2671 2761
2672 2762 vp = nfs_fhtovp(fh, exi);
2673 2763 if (vp == NULL) {
2674 2764 fs->fs_status = NFSERR_STALE;
2675 2765 return;
2676 2766 }
2677 2767
2678 2768 error = VFS_STATVFS(vp->v_vfsp, &sb);
2679 2769
2680 2770 if (!error) {
2681 2771 fs->fs_tsize = nfstsize();
2682 2772 fs->fs_bsize = sb.f_frsize;
2683 2773 fs->fs_blocks = sb.f_blocks;
2684 2774 fs->fs_bfree = sb.f_bfree;
2685 2775 fs->fs_bavail = sb.f_bavail;
2686 2776 }
2687 2777
2688 2778 VN_RELE(vp);
2689 2779
2690 2780 fs->fs_status = puterrno(error);
2691 2781
2692 2782 }
2693 2783 void *
2694 2784 rfs_statfs_getfh(fhandle_t *fh)
2695 2785 {
2696 2786 return (fh);
2697 2787 }
2698 2788
2699 2789 static int
2700 2790 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2701 2791 {
2702 2792 vap->va_mask = 0;
2703 2793
2704 2794 /*
2705 2795 * There was a sign extension bug in some VFS based systems
2706 2796 * which stored the mode as a short. When it would get
2707 2797 * assigned to a u_long, no sign extension would occur.
2708 2798 * It needed to, but this wasn't noticed because sa_mode
2709 2799 * would then get assigned back to the short, thus ignoring
2710 2800 * the upper 16 bits of sa_mode.
2711 2801 *
2712 2802 * To make this implementation work for both broken
2713 2803 * clients and good clients, we check for both versions
2714 2804 * of the mode.
2715 2805 */
2716 2806 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2717 2807 sa->sa_mode != (uint32_t)-1) {
2718 2808 vap->va_mask |= AT_MODE;
2719 2809 vap->va_mode = sa->sa_mode;
2720 2810 }
2721 2811 if (sa->sa_uid != (uint32_t)-1) {
2722 2812 vap->va_mask |= AT_UID;
2723 2813 vap->va_uid = sa->sa_uid;
2724 2814 }
2725 2815 if (sa->sa_gid != (uint32_t)-1) {
2726 2816 vap->va_mask |= AT_GID;
2727 2817 vap->va_gid = sa->sa_gid;
2728 2818 }
2729 2819 if (sa->sa_size != (uint32_t)-1) {
2730 2820 vap->va_mask |= AT_SIZE;
2731 2821 vap->va_size = sa->sa_size;
2732 2822 }
2733 2823 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2734 2824 sa->sa_atime.tv_usec != (int32_t)-1) {
2735 2825 #ifndef _LP64
2736 2826 /* return error if time overflow */
2737 2827 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2738 2828 return (EOVERFLOW);
2739 2829 #endif
2740 2830 vap->va_mask |= AT_ATIME;
2741 2831 /*
2742 2832 * nfs protocol defines times as unsigned so don't extend sign,
2743 2833 * unless sysadmin set nfs_allow_preepoch_time.
2744 2834 */
2745 2835 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2746 2836 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2747 2837 }
2748 2838 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2749 2839 sa->sa_mtime.tv_usec != (int32_t)-1) {
2750 2840 #ifndef _LP64
2751 2841 /* return error if time overflow */
2752 2842 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2753 2843 return (EOVERFLOW);
2754 2844 #endif
2755 2845 vap->va_mask |= AT_MTIME;
2756 2846 /*
2757 2847 * nfs protocol defines times as unsigned so don't extend sign,
2758 2848 * unless sysadmin set nfs_allow_preepoch_time.
2759 2849 */
2760 2850 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2761 2851 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2762 2852 }
2763 2853 return (0);
2764 2854 }
2765 2855
2766 2856 static enum nfsftype vt_to_nf[] = {
2767 2857 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2768 2858 };
2769 2859
2770 2860 /*
2771 2861 * check the following fields for overflow: nodeid, size, and time.
2772 2862 * There could be a problem when converting 64-bit LP64 fields
2773 2863 * into 32-bit ones. Return an error if there is an overflow.
2774 2864 */
2775 2865 int
2776 2866 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2777 2867 {
2778 2868 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2779 2869 na->na_type = vt_to_nf[vap->va_type];
2780 2870
2781 2871 if (vap->va_mode == (unsigned short) -1)
2782 2872 na->na_mode = (uint32_t)-1;
2783 2873 else
2784 2874 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2785 2875
2786 2876 if (vap->va_uid == (unsigned short)(-1))
2787 2877 na->na_uid = (uint32_t)(-1);
2788 2878 else if (vap->va_uid == UID_NOBODY)
2789 2879 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2790 2880 else
2791 2881 na->na_uid = vap->va_uid;
2792 2882
2793 2883 if (vap->va_gid == (unsigned short)(-1))
2794 2884 na->na_gid = (uint32_t)-1;
2795 2885 else if (vap->va_gid == GID_NOBODY)
2796 2886 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2797 2887 else
2798 2888 na->na_gid = vap->va_gid;
2799 2889
2800 2890 /*
2801 2891 * Do we need to check fsid for overflow? It is 64-bit in the
2802 2892 * vattr, but are bigger than 32 bit values supported?
2803 2893 */
2804 2894 na->na_fsid = vap->va_fsid;
2805 2895
2806 2896 na->na_nodeid = vap->va_nodeid;
2807 2897
2808 2898 /*
2809 2899 * Check to make sure that the nodeid is representable over the
2810 2900 * wire without losing bits.
2811 2901 */
2812 2902 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2813 2903 return (EFBIG);
2814 2904 na->na_nlink = vap->va_nlink;
2815 2905
2816 2906 /*
2817 2907 * Check for big files here, instead of at the caller. See
2818 2908 * comments in cstat for large special file explanation.
2819 2909 */
2820 2910 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2821 2911 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2822 2912 return (EFBIG);
2823 2913 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2824 2914 /* UNKNOWN_SIZE | OVERFLOW */
2825 2915 na->na_size = MAXOFF32_T;
2826 2916 } else
2827 2917 na->na_size = vap->va_size;
2828 2918 } else
2829 2919 na->na_size = vap->va_size;
2830 2920
2831 2921 /*
2832 2922 * If the vnode times overflow the 32-bit times that NFS2
2833 2923 * uses on the wire then return an error.
2834 2924 */
2835 2925 if (!NFS_VAP_TIME_OK(vap)) {
2836 2926 return (EOVERFLOW);
2837 2927 }
2838 2928 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2839 2929 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2840 2930
2841 2931 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2842 2932 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2843 2933
2844 2934 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2845 2935 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2846 2936
2847 2937 /*
2848 2938 * If the dev_t will fit into 16 bits then compress
2849 2939 * it, otherwise leave it alone. See comments in
2850 2940 * nfs_client.c.
2851 2941 */
2852 2942 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2853 2943 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2854 2944 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2855 2945 else
2856 2946 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2857 2947
2858 2948 na->na_blocks = vap->va_nblocks;
2859 2949 na->na_blocksize = vap->va_blksize;
2860 2950
2861 2951 /*
2862 2952 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2863 2953 * over-the-wire protocols for named-pipe vnodes. It remaps the
2864 2954 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2865 2955 *
2866 2956 * BUYER BEWARE:
2867 2957 * If you are porting the NFS to a non-Sun server, you probably
2868 2958 * don't want to include the following block of code. The
2869 2959 * over-the-wire special file types will be changing with the
2870 2960 * NFS Protocol Revision.
2871 2961 */
2872 2962 if (vap->va_type == VFIFO)
2873 2963 NA_SETFIFO(na);
2874 2964 return (0);
2875 2965 }
2876 2966
2877 2967 /*
2878 2968 * acl v2 support: returns approximate permission.
2879 2969 * default: returns minimal permission (more restrictive)
2880 2970 * aclok: returns maximal permission (less restrictive)
2881 2971 * This routine changes the permissions that are alaredy in *va.
2882 2972 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2883 2973 * CLASS_OBJ is always the same as GROUP_OBJ entry.
2884 2974 */
2885 2975 static void
2886 2976 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2887 2977 {
2888 2978 vsecattr_t vsa;
2889 2979 int aclcnt;
2890 2980 aclent_t *aclentp;
2891 2981 mode_t mask_perm;
2892 2982 mode_t grp_perm;
2893 2983 mode_t other_perm;
2894 2984 mode_t other_orig;
2895 2985 int error;
2896 2986
2897 2987 /* dont care default acl */
2898 2988 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2899 2989 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2900 2990
2901 2991 if (!error) {
2902 2992 aclcnt = vsa.vsa_aclcnt;
2903 2993 if (aclcnt > MIN_ACL_ENTRIES) {
2904 2994 /* non-trivial ACL */
2905 2995 aclentp = vsa.vsa_aclentp;
2906 2996 if (exi->exi_export.ex_flags & EX_ACLOK) {
2907 2997 /* maximal permissions */
2908 2998 grp_perm = 0;
2909 2999 other_perm = 0;
2910 3000 for (; aclcnt > 0; aclcnt--, aclentp++) {
2911 3001 switch (aclentp->a_type) {
2912 3002 case USER_OBJ:
2913 3003 break;
2914 3004 case USER:
2915 3005 grp_perm |=
2916 3006 aclentp->a_perm << 3;
2917 3007 other_perm |= aclentp->a_perm;
2918 3008 break;
2919 3009 case GROUP_OBJ:
2920 3010 grp_perm |=
2921 3011 aclentp->a_perm << 3;
2922 3012 break;
2923 3013 case GROUP:
2924 3014 other_perm |= aclentp->a_perm;
2925 3015 break;
2926 3016 case OTHER_OBJ:
2927 3017 other_orig = aclentp->a_perm;
2928 3018 break;
2929 3019 case CLASS_OBJ:
2930 3020 mask_perm = aclentp->a_perm;
2931 3021 break;
2932 3022 default:
2933 3023 break;
2934 3024 }
2935 3025 }
2936 3026 grp_perm &= mask_perm << 3;
2937 3027 other_perm &= mask_perm;
2938 3028 other_perm |= other_orig;
2939 3029
2940 3030 } else {
2941 3031 /* minimal permissions */
2942 3032 grp_perm = 070;
2943 3033 other_perm = 07;
2944 3034 for (; aclcnt > 0; aclcnt--, aclentp++) {
2945 3035 switch (aclentp->a_type) {
2946 3036 case USER_OBJ:
2947 3037 break;
2948 3038 case USER:
2949 3039 case CLASS_OBJ:
2950 3040 grp_perm &=
2951 3041 aclentp->a_perm << 3;
2952 3042 other_perm &=
2953 3043 aclentp->a_perm;
2954 3044 break;
2955 3045 case GROUP_OBJ:
2956 3046 grp_perm &=
2957 3047 aclentp->a_perm << 3;
2958 3048 break;
2959 3049 case GROUP:
2960 3050 other_perm &=
2961 3051 aclentp->a_perm;
2962 3052 break;
2963 3053 case OTHER_OBJ:
2964 3054 other_perm &=
2965 3055 aclentp->a_perm;
2966 3056 break;
2967 3057 default:
2968 3058 break;
2969 3059 }
2970 3060 }
2971 3061 }
2972 3062 /* copy to va */
2973 3063 va->va_mode &= ~077;
2974 3064 va->va_mode |= grp_perm | other_perm;
2975 3065 }
2976 3066 if (vsa.vsa_aclcnt)
2977 3067 kmem_free(vsa.vsa_aclentp,
2978 3068 vsa.vsa_aclcnt * sizeof (aclent_t));
2979 3069 }
2980 3070 }
2981 3071
2982 3072 void
2983 3073 rfs_srvrinit(void)
2984 3074 {
2985 3075 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
2986 3076 nfs2_srv_caller_id = fs_new_caller_id();
2987 3077 }
2988 3078
2989 3079 void
2990 3080 rfs_srvrfini(void)
2991 3081 {
2992 3082 mutex_destroy(&rfs_async_write_lock);
2993 3083 }
2994 3084
2995 3085 static int
2996 3086 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
2997 3087 {
2998 3088 struct clist *wcl;
2999 3089 int wlist_len;
3000 3090 uint32_t count = rr->rr_count;
3001 3091
3002 3092 wcl = ra->ra_wlist;
3003 3093
3004 3094 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3005 3095 return (FALSE);
3006 3096 }
3007 3097
3008 3098 wcl = ra->ra_wlist;
3009 3099 rr->rr_ok.rrok_wlist_len = wlist_len;
3010 3100 rr->rr_ok.rrok_wlist = wcl;
3011 3101
3012 3102 return (TRUE);
3013 3103 }
|
↓ open down ↓ |
2540 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX