Print this page
Try to remove assumption that zone's root vnode is marked VROOT
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 30 * All rights reserved.
31 31 */
32 32
33 33 /*
34 34 * Copyright 2018 Nexenta Systems, Inc.
35 35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 36 */
37 37
38 38 #include <sys/param.h>
39 39 #include <sys/types.h>
40 40 #include <sys/systm.h>
41 41 #include <sys/cred.h>
42 42 #include <sys/buf.h>
43 43 #include <sys/vfs.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/stat.h>
47 47 #include <sys/errno.h>
48 48 #include <sys/sysmacros.h>
49 49 #include <sys/statvfs.h>
50 50 #include <sys/kmem.h>
51 51 #include <sys/kstat.h>
52 52 #include <sys/dirent.h>
53 53 #include <sys/cmn_err.h>
54 54 #include <sys/debug.h>
55 55 #include <sys/vtrace.h>
56 56 #include <sys/mode.h>
57 57 #include <sys/acl.h>
58 58 #include <sys/nbmlock.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/sdt.h>
61 61
62 62 #include <rpc/types.h>
63 63 #include <rpc/auth.h>
64 64 #include <rpc/svc.h>
65 65
66 66 #include <nfs/nfs.h>
67 67 #include <nfs/export.h>
68 68 #include <nfs/nfs_cmd.h>
69 69
70 70 #include <vm/hat.h>
71 71 #include <vm/as.h>
72 72 #include <vm/seg.h>
73 73 #include <vm/seg_map.h>
74 74 #include <vm/seg_kmem.h>
75 75
76 76 #include <sys/strsubr.h>
77 77
78 78 struct rfs_async_write_list;
79 79
80 80 /*
81 81 * Zone globals of NFSv2 server
82 82 */
83 83 typedef struct nfs_srv {
84 84 kmutex_t async_write_lock;
85 85 struct rfs_async_write_list *async_write_head;
86 86
87 87 /*
88 88 * enables write clustering if == 1
89 89 */
90 90 int write_async;
91 91 } nfs_srv_t;
92 92
93 93 /*
94 94 * These are the interface routines for the server side of the
95 95 * Network File System. See the NFS version 2 protocol specification
96 96 * for a description of this interface.
97 97 */
98 98
99 99 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
100 100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
101 101 cred_t *);
102 102 static void *rfs_zone_init(zoneid_t zoneid);
103 103 static void rfs_zone_fini(zoneid_t zoneid, void *data);
104 104
105 105
106 106 /*
107 107 * Some "over the wire" UNIX file types. These are encoded
108 108 * into the mode. This needs to be fixed in the next rev.
109 109 */
110 110 #define IFMT 0170000 /* type of file */
111 111 #define IFCHR 0020000 /* character special */
112 112 #define IFBLK 0060000 /* block special */
113 113 #define IFSOCK 0140000 /* socket */
114 114
115 115 u_longlong_t nfs2_srv_caller_id;
116 116 static zone_key_t rfs_zone_key;
117 117
118 118 /*
119 119 * Get file attributes.
120 120 * Returns the current attributes of the file with the given fhandle.
121 121 */
122 122 /* ARGSUSED */
123 123 void
124 124 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
125 125 struct svc_req *req, cred_t *cr, bool_t ro)
126 126 {
127 127 int error;
128 128 vnode_t *vp;
129 129 struct vattr va;
130 130
131 131 vp = nfs_fhtovp(fhp, exi);
132 132 if (vp == NULL) {
133 133 ns->ns_status = NFSERR_STALE;
134 134 return;
135 135 }
136 136
137 137 /*
138 138 * Do the getattr.
139 139 */
140 140 va.va_mask = AT_ALL; /* we want all the attributes */
141 141
142 142 error = rfs4_delegated_getattr(vp, &va, 0, cr);
143 143
144 144 /* check for overflows */
145 145 if (!error) {
146 146 /* Lie about the object type for a referral */
147 147 if (vn_is_nfs_reparse(vp, cr))
148 148 va.va_type = VLNK;
149 149
150 150 acl_perm(vp, exi, &va, cr);
151 151 error = vattr_to_nattr(&va, &ns->ns_attr);
152 152 }
153 153
154 154 VN_RELE(vp);
155 155
156 156 ns->ns_status = puterrno(error);
157 157 }
158 158 void *
159 159 rfs_getattr_getfh(fhandle_t *fhp)
160 160 {
161 161 return (fhp);
162 162 }
163 163
164 164 /*
165 165 * Set file attributes.
166 166 * Sets the attributes of the file with the given fhandle. Returns
167 167 * the new attributes.
168 168 */
169 169 /* ARGSUSED */
170 170 void
171 171 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
172 172 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
173 173 {
174 174 int error;
175 175 int flag;
176 176 int in_crit = 0;
177 177 vnode_t *vp;
178 178 struct vattr va;
179 179 struct vattr bva;
180 180 struct flock64 bf;
181 181 caller_context_t ct;
182 182
183 183
184 184 vp = nfs_fhtovp(&args->saa_fh, exi);
185 185 if (vp == NULL) {
186 186 ns->ns_status = NFSERR_STALE;
187 187 return;
188 188 }
189 189
190 190 if (rdonly(ro, vp)) {
191 191 VN_RELE(vp);
192 192 ns->ns_status = NFSERR_ROFS;
193 193 return;
194 194 }
195 195
196 196 error = sattr_to_vattr(&args->saa_sa, &va);
197 197 if (error) {
198 198 VN_RELE(vp);
199 199 ns->ns_status = puterrno(error);
200 200 return;
201 201 }
202 202
203 203 /*
204 204 * If the client is requesting a change to the mtime,
205 205 * but the nanosecond field is set to 1 billion, then
206 206 * this is a flag to the server that it should set the
207 207 * atime and mtime fields to the server's current time.
208 208 * The 1 billion number actually came from the client
209 209 * as 1 million, but the units in the over the wire
210 210 * request are microseconds instead of nanoseconds.
211 211 *
212 212 * This is an overload of the protocol and should be
213 213 * documented in the NFS Version 2 protocol specification.
214 214 */
215 215 if (va.va_mask & AT_MTIME) {
216 216 if (va.va_mtime.tv_nsec == 1000000000) {
217 217 gethrestime(&va.va_mtime);
218 218 va.va_atime = va.va_mtime;
219 219 va.va_mask |= AT_ATIME;
220 220 flag = 0;
221 221 } else
222 222 flag = ATTR_UTIME;
223 223 } else
224 224 flag = 0;
225 225
226 226 /*
227 227 * If the filesystem is exported with nosuid, then mask off
228 228 * the setuid and setgid bits.
229 229 */
230 230 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
231 231 (exi->exi_export.ex_flags & EX_NOSUID))
232 232 va.va_mode &= ~(VSUID | VSGID);
233 233
234 234 ct.cc_sysid = 0;
235 235 ct.cc_pid = 0;
236 236 ct.cc_caller_id = nfs2_srv_caller_id;
237 237 ct.cc_flags = CC_DONTBLOCK;
238 238
239 239 /*
240 240 * We need to specially handle size changes because it is
241 241 * possible for the client to create a file with modes
242 242 * which indicate read-only, but with the file opened for
243 243 * writing. If the client then tries to set the size of
244 244 * the file, then the normal access checking done in
245 245 * VOP_SETATTR would prevent the client from doing so,
246 246 * although it should be legal for it to do so. To get
247 247 * around this, we do the access checking for ourselves
248 248 * and then use VOP_SPACE which doesn't do the access
249 249 * checking which VOP_SETATTR does. VOP_SPACE can only
250 250 * operate on VREG files, let VOP_SETATTR handle the other
251 251 * extremely rare cases.
252 252 * Also the client should not be allowed to change the
253 253 * size of the file if there is a conflicting non-blocking
254 254 * mandatory lock in the region of change.
255 255 */
256 256 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
257 257 if (nbl_need_check(vp)) {
258 258 nbl_start_crit(vp, RW_READER);
259 259 in_crit = 1;
260 260 }
261 261
262 262 bva.va_mask = AT_UID | AT_SIZE;
263 263
264 264 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
265 265
266 266 if (error) {
267 267 if (in_crit)
268 268 nbl_end_crit(vp);
269 269 VN_RELE(vp);
270 270 ns->ns_status = puterrno(error);
271 271 return;
272 272 }
273 273
274 274 if (in_crit) {
275 275 u_offset_t offset;
276 276 ssize_t length;
277 277
278 278 if (va.va_size < bva.va_size) {
279 279 offset = va.va_size;
280 280 length = bva.va_size - va.va_size;
281 281 } else {
282 282 offset = bva.va_size;
283 283 length = va.va_size - bva.va_size;
284 284 }
285 285 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
286 286 NULL)) {
287 287 error = EACCES;
288 288 }
289 289 }
290 290
291 291 if (crgetuid(cr) == bva.va_uid && !error &&
292 292 va.va_size != bva.va_size) {
293 293 va.va_mask &= ~AT_SIZE;
294 294 bf.l_type = F_WRLCK;
295 295 bf.l_whence = 0;
296 296 bf.l_start = (off64_t)va.va_size;
297 297 bf.l_len = 0;
298 298 bf.l_sysid = 0;
299 299 bf.l_pid = 0;
300 300
301 301 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
302 302 (offset_t)va.va_size, cr, &ct);
303 303 }
304 304 if (in_crit)
305 305 nbl_end_crit(vp);
306 306 } else
307 307 error = 0;
308 308
309 309 /*
310 310 * Do the setattr.
311 311 */
312 312 if (!error && va.va_mask) {
313 313 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
314 314 }
315 315
316 316 /*
317 317 * check if the monitor on either vop_space or vop_setattr detected
318 318 * a delegation conflict and if so, mark the thread flag as
319 319 * wouldblock so that the response is dropped and the client will
320 320 * try again.
321 321 */
322 322 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
323 323 VN_RELE(vp);
324 324 curthread->t_flag |= T_WOULDBLOCK;
325 325 return;
326 326 }
327 327
328 328 if (!error) {
329 329 va.va_mask = AT_ALL; /* get everything */
330 330
331 331 error = rfs4_delegated_getattr(vp, &va, 0, cr);
332 332
333 333 /* check for overflows */
334 334 if (!error) {
335 335 acl_perm(vp, exi, &va, cr);
336 336 error = vattr_to_nattr(&va, &ns->ns_attr);
337 337 }
338 338 }
339 339
340 340 ct.cc_flags = 0;
341 341
342 342 /*
343 343 * Force modified metadata out to stable storage.
344 344 */
345 345 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
346 346
347 347 VN_RELE(vp);
348 348
349 349 ns->ns_status = puterrno(error);
350 350 }
351 351 void *
352 352 rfs_setattr_getfh(struct nfssaargs *args)
353 353 {
354 354 return (&args->saa_fh);
355 355 }
356 356
357 357 /* Change and release @exip and @vpp only in success */
358 358 int
359 359 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
360 360 {
361 361 struct exportinfo *exi;
362 362 vnode_t *vp = *vpp;
363 363 fid_t fid;
364 364 int error;
365 365
366 366 VN_HOLD(vp);
367 367
368 368 if ((error = traverse(&vp)) != 0) {
369 369 VN_RELE(vp);
370 370 return (error);
371 371 }
372 372
373 373 bzero(&fid, sizeof (fid));
374 374 fid.fid_len = MAXFIDSZ;
375 375 error = VOP_FID(vp, &fid, NULL);
376 376 if (error) {
377 377 VN_RELE(vp);
378 378 return (error);
379 379 }
380 380
381 381 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
382 382 if (exi == NULL ||
383 383 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
384 384 /*
385 385 * It is not error, just subdir is not exported
386 386 * or "nohide" is not set
387 387 */
388 388 if (exi != NULL)
389 389 exi_rele(exi);
390 390 VN_RELE(vp);
391 391 } else {
392 392 /* go to submount */
393 393 exi_rele(*exip);
394 394 *exip = exi;
395 395
396 396 VN_RELE(*vpp);
397 397 *vpp = vp;
398 398 }
399 399
400 400 return (0);
401 401 }
402 402
403 403 /*
|
↓ open down ↓ |
403 lines elided |
↑ open up ↑ |
404 404 * Given mounted "dvp" and "exi", go upper mountpoint
405 405 * with dvp/exi correction
406 406 * Return 0 in success
407 407 */
408 408 int
409 409 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
410 410 {
411 411 struct exportinfo *exi;
412 412 vnode_t *dvp = *dvpp;
413 413
414 - ASSERT(dvp->v_flag & VROOT);
414 + ASSERT((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp));
415 415
416 416 VN_HOLD(dvp);
417 417 dvp = untraverse(dvp);
418 418 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
419 419 if (exi == NULL) {
420 420 VN_RELE(dvp);
421 421 return (-1);
422 422 }
423 423
424 424 exi_rele(*exip);
425 425 *exip = exi;
426 426 VN_RELE(*dvpp);
427 427 *dvpp = dvp;
428 428
429 429 return (0);
430 430 }
431 431 /*
432 432 * Directory lookup.
433 433 * Returns an fhandle and file attributes for file name in a directory.
434 434 */
435 435 /* ARGSUSED */
436 436 void
437 437 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
438 438 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
439 439 {
440 440 int error;
441 441 vnode_t *dvp;
442 442 vnode_t *vp;
443 443 struct vattr va;
444 444 fhandle_t *fhp = da->da_fhandle;
445 445 struct sec_ol sec = {0, 0};
446 446 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
447 447 char *name;
448 448 struct sockaddr *ca;
449 449
450 450 /*
451 451 * Trusted Extension doesn't support NFSv2. MOUNT
452 452 * will reject v2 clients. Need to prevent v2 client
453 453 * access via WebNFS here.
454 454 */
455 455 if (is_system_labeled() && req->rq_vers == 2) {
456 456 dr->dr_status = NFSERR_ACCES;
457 457 return;
458 458 }
459 459
460 460 /*
461 461 * Disallow NULL paths
462 462 */
463 463 if (da->da_name == NULL || *da->da_name == '\0') {
464 464 dr->dr_status = NFSERR_ACCES;
465 465 return;
466 466 }
467 467
468 468 /*
469 469 * Allow lookups from the root - the default
470 470 * location of the public filehandle.
471 471 */
472 472 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
473 473 dvp = ZONE_ROOTVP();
474 474 VN_HOLD(dvp);
475 475 } else {
476 476 dvp = nfs_fhtovp(fhp, exi);
477 477 if (dvp == NULL) {
478 478 dr->dr_status = NFSERR_STALE;
479 479 return;
480 480 }
481 481 }
482 482
|
↓ open down ↓ |
58 lines elided |
↑ open up ↑ |
483 483 exi_hold(exi);
484 484
485 485 /*
486 486 * Not allow lookup beyond root.
487 487 * If the filehandle matches a filehandle of the exi,
488 488 * then the ".." refers beyond the root of an exported filesystem.
489 489 */
490 490 if (strcmp(da->da_name, "..") == 0 &&
491 491 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
492 492 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
493 - (dvp->v_flag & VROOT)) {
493 + ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
494 494 /*
495 495 * special case for ".." and 'nohide'exported root
496 496 */
497 497 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
498 498 error = NFSERR_ACCES;
499 499 goto out;
500 500 }
501 501 } else {
502 502 error = NFSERR_NOENT;
503 503 goto out;
504 504 }
505 505 }
506 506
507 507 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
508 508 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
509 509 MAXPATHLEN);
510 510
511 511 if (name == NULL) {
512 512 error = NFSERR_ACCES;
513 513 goto out;
514 514 }
515 515
516 516 /*
517 517 * If the public filehandle is used then allow
518 518 * a multi-component lookup, i.e. evaluate
519 519 * a pathname and follow symbolic links if
520 520 * necessary.
521 521 *
522 522 * This may result in a vnode in another filesystem
523 523 * which is OK as long as the filesystem is exported.
524 524 */
525 525 if (PUBLIC_FH2(fhp)) {
526 526 publicfh_flag = TRUE;
527 527
528 528 exi_rele(exi);
529 529
530 530 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
531 531 &sec);
532 532 } else {
533 533 /*
534 534 * Do a normal single component lookup.
535 535 */
536 536 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
537 537 NULL, NULL, NULL);
538 538 }
539 539
540 540 if (name != da->da_name)
541 541 kmem_free(name, MAXPATHLEN);
542 542
543 543 if (error == 0 && vn_ismntpt(vp)) {
544 544 error = rfs_cross_mnt(&vp, &exi);
545 545 if (error)
546 546 VN_RELE(vp);
547 547 }
548 548
549 549 if (!error) {
550 550 va.va_mask = AT_ALL; /* we want everything */
551 551
552 552 error = rfs4_delegated_getattr(vp, &va, 0, cr);
553 553
554 554 /* check for overflows */
555 555 if (!error) {
556 556 acl_perm(vp, exi, &va, cr);
557 557 error = vattr_to_nattr(&va, &dr->dr_attr);
558 558 if (!error) {
559 559 if (sec.sec_flags & SEC_QUERY)
560 560 error = makefh_ol(&dr->dr_fhandle, exi,
561 561 sec.sec_index);
562 562 else {
563 563 error = makefh(&dr->dr_fhandle, vp,
564 564 exi);
565 565 if (!error && publicfh_flag &&
566 566 !chk_clnt_sec(exi, req))
567 567 auth_weak = TRUE;
568 568 }
569 569 }
570 570 }
571 571 VN_RELE(vp);
572 572 }
573 573
574 574 out:
575 575 VN_RELE(dvp);
576 576
577 577 if (exi != NULL)
578 578 exi_rele(exi);
579 579
580 580 /*
581 581 * If it's public fh, no 0x81, and client's flavor is
582 582 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
583 583 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
584 584 */
585 585 if (auth_weak)
586 586 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
587 587 else
588 588 dr->dr_status = puterrno(error);
589 589 }
590 590 void *
591 591 rfs_lookup_getfh(struct nfsdiropargs *da)
592 592 {
593 593 return (da->da_fhandle);
594 594 }
595 595
596 596 /*
597 597 * Read symbolic link.
598 598 * Returns the string in the symbolic link at the given fhandle.
599 599 */
600 600 /* ARGSUSED */
601 601 void
602 602 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
603 603 struct svc_req *req, cred_t *cr, bool_t ro)
604 604 {
605 605 int error;
606 606 struct iovec iov;
607 607 struct uio uio;
608 608 vnode_t *vp;
609 609 struct vattr va;
610 610 struct sockaddr *ca;
611 611 char *name = NULL;
612 612 int is_referral = 0;
613 613
614 614 vp = nfs_fhtovp(fhp, exi);
615 615 if (vp == NULL) {
616 616 rl->rl_data = NULL;
617 617 rl->rl_status = NFSERR_STALE;
618 618 return;
619 619 }
620 620
621 621 va.va_mask = AT_MODE;
622 622
623 623 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
624 624
625 625 if (error) {
626 626 VN_RELE(vp);
627 627 rl->rl_data = NULL;
628 628 rl->rl_status = puterrno(error);
629 629 return;
630 630 }
631 631
632 632 if (MANDLOCK(vp, va.va_mode)) {
633 633 VN_RELE(vp);
634 634 rl->rl_data = NULL;
635 635 rl->rl_status = NFSERR_ACCES;
636 636 return;
637 637 }
638 638
639 639 /* We lied about the object type for a referral */
640 640 if (vn_is_nfs_reparse(vp, cr))
641 641 is_referral = 1;
642 642
643 643 /*
644 644 * XNFS and RFC1094 require us to return ENXIO if argument
645 645 * is not a link. BUGID 1138002.
646 646 */
647 647 if (vp->v_type != VLNK && !is_referral) {
648 648 VN_RELE(vp);
649 649 rl->rl_data = NULL;
650 650 rl->rl_status = NFSERR_NXIO;
651 651 return;
652 652 }
653 653
654 654 /*
655 655 * Allocate data for pathname. This will be freed by rfs_rlfree.
656 656 */
657 657 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
658 658
659 659 if (is_referral) {
660 660 char *s;
661 661 size_t strsz;
662 662
663 663 /* Get an artificial symlink based on a referral */
664 664 s = build_symlink(vp, cr, &strsz);
665 665 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
666 666 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
667 667 vnode_t *, vp, char *, s);
668 668 if (s == NULL)
669 669 error = EINVAL;
670 670 else {
671 671 error = 0;
672 672 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
673 673 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
674 674 kmem_free(s, strsz);
675 675 }
676 676
677 677 } else {
678 678
679 679 /*
680 680 * Set up io vector to read sym link data
681 681 */
682 682 iov.iov_base = rl->rl_data;
683 683 iov.iov_len = NFS_MAXPATHLEN;
684 684 uio.uio_iov = &iov;
685 685 uio.uio_iovcnt = 1;
686 686 uio.uio_segflg = UIO_SYSSPACE;
687 687 uio.uio_extflg = UIO_COPY_CACHED;
688 688 uio.uio_loffset = (offset_t)0;
689 689 uio.uio_resid = NFS_MAXPATHLEN;
690 690
691 691 /*
692 692 * Do the readlink.
693 693 */
694 694 error = VOP_READLINK(vp, &uio, cr, NULL);
695 695
696 696 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
697 697
698 698 if (!error)
699 699 rl->rl_data[rl->rl_count] = '\0';
700 700
701 701 }
702 702
703 703
704 704 VN_RELE(vp);
705 705
706 706 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
707 707 name = nfscmd_convname(ca, exi, rl->rl_data,
708 708 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
709 709
710 710 if (name != NULL && name != rl->rl_data) {
711 711 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
712 712 rl->rl_data = name;
713 713 }
714 714
715 715 /*
716 716 * XNFS and RFC1094 require us to return ENXIO if argument
717 717 * is not a link. UFS returns EINVAL if this is the case,
718 718 * so we do the mapping here. BUGID 1138002.
719 719 */
720 720 if (error == EINVAL)
721 721 rl->rl_status = NFSERR_NXIO;
722 722 else
723 723 rl->rl_status = puterrno(error);
724 724
725 725 }
726 726 void *
727 727 rfs_readlink_getfh(fhandle_t *fhp)
728 728 {
729 729 return (fhp);
730 730 }
731 731 /*
732 732 * Free data allocated by rfs_readlink
733 733 */
734 734 void
735 735 rfs_rlfree(struct nfsrdlnres *rl)
736 736 {
737 737 if (rl->rl_data != NULL)
738 738 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
739 739 }
740 740
741 741 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
742 742
743 743 /*
744 744 * Read data.
745 745 * Returns some data read from the file at the given fhandle.
746 746 */
747 747 /* ARGSUSED */
748 748 void
749 749 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
750 750 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
751 751 {
752 752 vnode_t *vp;
753 753 int error;
754 754 struct vattr va;
755 755 struct iovec iov;
756 756 struct uio uio;
757 757 mblk_t *mp;
758 758 int alloc_err = 0;
759 759 int in_crit = 0;
760 760 caller_context_t ct;
761 761
762 762 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
763 763 if (vp == NULL) {
764 764 rr->rr_data = NULL;
765 765 rr->rr_status = NFSERR_STALE;
766 766 return;
767 767 }
768 768
769 769 if (vp->v_type != VREG) {
770 770 VN_RELE(vp);
771 771 rr->rr_data = NULL;
772 772 rr->rr_status = NFSERR_ISDIR;
773 773 return;
774 774 }
775 775
776 776 ct.cc_sysid = 0;
777 777 ct.cc_pid = 0;
778 778 ct.cc_caller_id = nfs2_srv_caller_id;
779 779 ct.cc_flags = CC_DONTBLOCK;
780 780
781 781 /*
782 782 * Enter the critical region before calling VOP_RWLOCK
783 783 * to avoid a deadlock with write requests.
784 784 */
785 785 if (nbl_need_check(vp)) {
786 786 nbl_start_crit(vp, RW_READER);
787 787 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
788 788 0, NULL)) {
789 789 nbl_end_crit(vp);
790 790 VN_RELE(vp);
791 791 rr->rr_data = NULL;
792 792 rr->rr_status = NFSERR_ACCES;
793 793 return;
794 794 }
795 795 in_crit = 1;
796 796 }
797 797
798 798 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
799 799
800 800 /* check if a monitor detected a delegation conflict */
801 801 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
802 802 if (in_crit)
803 803 nbl_end_crit(vp);
804 804 VN_RELE(vp);
805 805 /* mark as wouldblock so response is dropped */
806 806 curthread->t_flag |= T_WOULDBLOCK;
807 807
808 808 rr->rr_data = NULL;
809 809 return;
810 810 }
811 811
812 812 va.va_mask = AT_ALL;
813 813
814 814 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
815 815
816 816 if (error) {
817 817 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
818 818 if (in_crit)
819 819 nbl_end_crit(vp);
820 820
821 821 VN_RELE(vp);
822 822 rr->rr_data = NULL;
823 823 rr->rr_status = puterrno(error);
824 824
825 825 return;
826 826 }
827 827
828 828 /*
829 829 * This is a kludge to allow reading of files created
830 830 * with no read permission. The owner of the file
831 831 * is always allowed to read it.
832 832 */
833 833 if (crgetuid(cr) != va.va_uid) {
834 834 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
835 835
836 836 if (error) {
837 837 /*
838 838 * Exec is the same as read over the net because
839 839 * of demand loading.
840 840 */
841 841 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
842 842 }
843 843 if (error) {
844 844 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
845 845 if (in_crit)
846 846 nbl_end_crit(vp);
847 847 VN_RELE(vp);
848 848 rr->rr_data = NULL;
849 849 rr->rr_status = puterrno(error);
850 850
851 851 return;
852 852 }
853 853 }
854 854
855 855 if (MANDLOCK(vp, va.va_mode)) {
856 856 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
857 857 if (in_crit)
858 858 nbl_end_crit(vp);
859 859
860 860 VN_RELE(vp);
861 861 rr->rr_data = NULL;
862 862 rr->rr_status = NFSERR_ACCES;
863 863
864 864 return;
865 865 }
866 866
867 867 rr->rr_ok.rrok_wlist_len = 0;
868 868 rr->rr_ok.rrok_wlist = NULL;
869 869
870 870 if ((u_offset_t)ra->ra_offset >= va.va_size) {
871 871 rr->rr_count = 0;
872 872 rr->rr_data = NULL;
873 873 /*
874 874 * In this case, status is NFS_OK, but there is no data
875 875 * to encode. So set rr_mp to NULL.
876 876 */
877 877 rr->rr_mp = NULL;
878 878 rr->rr_ok.rrok_wlist = ra->ra_wlist;
879 879 if (rr->rr_ok.rrok_wlist)
880 880 clist_zero_len(rr->rr_ok.rrok_wlist);
881 881 goto done;
882 882 }
883 883
884 884 if (ra->ra_wlist) {
885 885 mp = NULL;
886 886 rr->rr_mp = NULL;
887 887 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
888 888 if (ra->ra_count > iov.iov_len) {
889 889 rr->rr_data = NULL;
890 890 rr->rr_status = NFSERR_INVAL;
891 891 goto done;
892 892 }
893 893 } else {
894 894 /*
895 895 * mp will contain the data to be sent out in the read reply.
896 896 * This will be freed after the reply has been sent out (by the
897 897 * driver).
898 898 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
899 899 * that the call to xdrmblk_putmblk() never fails.
900 900 */
901 901 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
902 902 &alloc_err);
903 903 ASSERT(mp != NULL);
904 904 ASSERT(alloc_err == 0);
905 905
906 906 rr->rr_mp = mp;
907 907
908 908 /*
909 909 * Set up io vector
910 910 */
911 911 iov.iov_base = (caddr_t)mp->b_datap->db_base;
912 912 iov.iov_len = ra->ra_count;
913 913 }
914 914
915 915 uio.uio_iov = &iov;
916 916 uio.uio_iovcnt = 1;
917 917 uio.uio_segflg = UIO_SYSSPACE;
918 918 uio.uio_extflg = UIO_COPY_CACHED;
919 919 uio.uio_loffset = (offset_t)ra->ra_offset;
920 920 uio.uio_resid = ra->ra_count;
921 921
922 922 error = VOP_READ(vp, &uio, 0, cr, &ct);
923 923
924 924 if (error) {
925 925 if (mp)
926 926 freeb(mp);
927 927
928 928 /*
929 929 * check if a monitor detected a delegation conflict and
930 930 * mark as wouldblock so response is dropped
931 931 */
932 932 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
933 933 curthread->t_flag |= T_WOULDBLOCK;
934 934 else
935 935 rr->rr_status = puterrno(error);
936 936
937 937 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
938 938 if (in_crit)
939 939 nbl_end_crit(vp);
940 940
941 941 VN_RELE(vp);
942 942 rr->rr_data = NULL;
943 943
944 944 return;
945 945 }
946 946
947 947 /*
948 948 * Get attributes again so we can send the latest access
949 949 * time to the client side for its cache.
950 950 */
951 951 va.va_mask = AT_ALL;
952 952
953 953 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
954 954
955 955 if (error) {
956 956 if (mp)
957 957 freeb(mp);
958 958
959 959 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
960 960 if (in_crit)
961 961 nbl_end_crit(vp);
962 962
963 963 VN_RELE(vp);
964 964 rr->rr_data = NULL;
965 965 rr->rr_status = puterrno(error);
966 966
967 967 return;
968 968 }
969 969
970 970 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
971 971
972 972 if (mp) {
973 973 rr->rr_data = (char *)mp->b_datap->db_base;
974 974 } else {
975 975 if (ra->ra_wlist) {
976 976 rr->rr_data = (caddr_t)iov.iov_base;
977 977 if (!rdma_setup_read_data2(ra, rr)) {
978 978 rr->rr_data = NULL;
979 979 rr->rr_status = puterrno(NFSERR_INVAL);
980 980 }
981 981 }
982 982 }
983 983 done:
984 984 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
985 985 if (in_crit)
986 986 nbl_end_crit(vp);
987 987
988 988 acl_perm(vp, exi, &va, cr);
989 989
990 990 /* check for overflows */
991 991 error = vattr_to_nattr(&va, &rr->rr_attr);
992 992
993 993 VN_RELE(vp);
994 994
995 995 rr->rr_status = puterrno(error);
996 996 }
997 997
998 998 /*
999 999 * Free data allocated by rfs_read
1000 1000 */
1001 1001 void
1002 1002 rfs_rdfree(struct nfsrdresult *rr)
1003 1003 {
1004 1004 mblk_t *mp;
1005 1005
1006 1006 if (rr->rr_status == NFS_OK) {
1007 1007 mp = rr->rr_mp;
1008 1008 if (mp != NULL)
1009 1009 freeb(mp);
1010 1010 }
1011 1011 }
1012 1012
1013 1013 void *
1014 1014 rfs_read_getfh(struct nfsreadargs *ra)
1015 1015 {
1016 1016 return (&ra->ra_fhandle);
1017 1017 }
1018 1018
1019 1019 #define MAX_IOVECS 12
1020 1020
1021 1021 #ifdef DEBUG
1022 1022 static int rfs_write_sync_hits = 0;
1023 1023 static int rfs_write_sync_misses = 0;
1024 1024 #endif
1025 1025
1026 1026 /*
1027 1027 * Write data to file.
1028 1028 * Returns attributes of a file after writing some data to it.
1029 1029 *
1030 1030 * Any changes made here, especially in error handling might have
1031 1031 * to also be done in rfs_write (which clusters write requests).
1032 1032 */
1033 1033 /* ARGSUSED */
1034 1034 void
1035 1035 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1036 1036 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1037 1037 {
1038 1038 int error;
1039 1039 vnode_t *vp;
1040 1040 rlim64_t rlimit;
1041 1041 struct vattr va;
1042 1042 struct uio uio;
1043 1043 struct iovec iov[MAX_IOVECS];
1044 1044 mblk_t *m;
1045 1045 struct iovec *iovp;
1046 1046 int iovcnt;
1047 1047 cred_t *savecred;
1048 1048 int in_crit = 0;
1049 1049 caller_context_t ct;
1050 1050
1051 1051 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1052 1052 if (vp == NULL) {
1053 1053 ns->ns_status = NFSERR_STALE;
1054 1054 return;
1055 1055 }
1056 1056
1057 1057 if (rdonly(ro, vp)) {
1058 1058 VN_RELE(vp);
1059 1059 ns->ns_status = NFSERR_ROFS;
1060 1060 return;
1061 1061 }
1062 1062
1063 1063 if (vp->v_type != VREG) {
1064 1064 VN_RELE(vp);
1065 1065 ns->ns_status = NFSERR_ISDIR;
1066 1066 return;
1067 1067 }
1068 1068
1069 1069 ct.cc_sysid = 0;
1070 1070 ct.cc_pid = 0;
1071 1071 ct.cc_caller_id = nfs2_srv_caller_id;
1072 1072 ct.cc_flags = CC_DONTBLOCK;
1073 1073
1074 1074 va.va_mask = AT_UID|AT_MODE;
1075 1075
1076 1076 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1077 1077
1078 1078 if (error) {
1079 1079 VN_RELE(vp);
1080 1080 ns->ns_status = puterrno(error);
1081 1081
1082 1082 return;
1083 1083 }
1084 1084
1085 1085 if (crgetuid(cr) != va.va_uid) {
1086 1086 /*
1087 1087 * This is a kludge to allow writes of files created
1088 1088 * with read only permission. The owner of the file
1089 1089 * is always allowed to write it.
1090 1090 */
1091 1091 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1092 1092
1093 1093 if (error) {
1094 1094 VN_RELE(vp);
1095 1095 ns->ns_status = puterrno(error);
1096 1096 return;
1097 1097 }
1098 1098 }
1099 1099
1100 1100 /*
1101 1101 * Can't access a mandatory lock file. This might cause
1102 1102 * the NFS service thread to block forever waiting for a
1103 1103 * lock to be released that will never be released.
1104 1104 */
1105 1105 if (MANDLOCK(vp, va.va_mode)) {
1106 1106 VN_RELE(vp);
1107 1107 ns->ns_status = NFSERR_ACCES;
1108 1108 return;
1109 1109 }
1110 1110
1111 1111 /*
1112 1112 * We have to enter the critical region before calling VOP_RWLOCK
1113 1113 * to avoid a deadlock with ufs.
1114 1114 */
1115 1115 if (nbl_need_check(vp)) {
1116 1116 nbl_start_crit(vp, RW_READER);
1117 1117 in_crit = 1;
1118 1118 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1119 1119 wa->wa_count, 0, NULL)) {
1120 1120 error = EACCES;
1121 1121 goto out;
1122 1122 }
1123 1123 }
1124 1124
1125 1125 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1126 1126
1127 1127 /* check if a monitor detected a delegation conflict */
1128 1128 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1129 1129 goto out;
1130 1130 }
1131 1131
1132 1132 if (wa->wa_data || wa->wa_rlist) {
1133 1133 /* Do the RDMA thing if necessary */
1134 1134 if (wa->wa_rlist) {
1135 1135 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1136 1136 iov[0].iov_len = wa->wa_count;
1137 1137 } else {
1138 1138 iov[0].iov_base = wa->wa_data;
1139 1139 iov[0].iov_len = wa->wa_count;
1140 1140 }
1141 1141 uio.uio_iov = iov;
1142 1142 uio.uio_iovcnt = 1;
1143 1143 uio.uio_segflg = UIO_SYSSPACE;
1144 1144 uio.uio_extflg = UIO_COPY_DEFAULT;
1145 1145 uio.uio_loffset = (offset_t)wa->wa_offset;
1146 1146 uio.uio_resid = wa->wa_count;
1147 1147 /*
1148 1148 * The limit is checked on the client. We
1149 1149 * should allow any size writes here.
1150 1150 */
1151 1151 uio.uio_llimit = curproc->p_fsz_ctl;
1152 1152 rlimit = uio.uio_llimit - wa->wa_offset;
1153 1153 if (rlimit < (rlim64_t)uio.uio_resid)
1154 1154 uio.uio_resid = (uint_t)rlimit;
1155 1155
1156 1156 /*
1157 1157 * for now we assume no append mode
1158 1158 */
1159 1159 /*
1160 1160 * We're changing creds because VM may fault and we need
1161 1161 * the cred of the current thread to be used if quota
1162 1162 * checking is enabled.
1163 1163 */
1164 1164 savecred = curthread->t_cred;
1165 1165 curthread->t_cred = cr;
1166 1166 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1167 1167 curthread->t_cred = savecred;
1168 1168 } else {
1169 1169
1170 1170 iovcnt = 0;
1171 1171 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1172 1172 iovcnt++;
1173 1173 if (iovcnt <= MAX_IOVECS) {
1174 1174 #ifdef DEBUG
1175 1175 rfs_write_sync_hits++;
1176 1176 #endif
1177 1177 iovp = iov;
1178 1178 } else {
1179 1179 #ifdef DEBUG
1180 1180 rfs_write_sync_misses++;
1181 1181 #endif
1182 1182 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1183 1183 }
1184 1184 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1185 1185 uio.uio_iov = iovp;
1186 1186 uio.uio_iovcnt = iovcnt;
1187 1187 uio.uio_segflg = UIO_SYSSPACE;
1188 1188 uio.uio_extflg = UIO_COPY_DEFAULT;
1189 1189 uio.uio_loffset = (offset_t)wa->wa_offset;
1190 1190 uio.uio_resid = wa->wa_count;
1191 1191 /*
1192 1192 * The limit is checked on the client. We
1193 1193 * should allow any size writes here.
1194 1194 */
1195 1195 uio.uio_llimit = curproc->p_fsz_ctl;
1196 1196 rlimit = uio.uio_llimit - wa->wa_offset;
1197 1197 if (rlimit < (rlim64_t)uio.uio_resid)
1198 1198 uio.uio_resid = (uint_t)rlimit;
1199 1199
1200 1200 /*
1201 1201 * For now we assume no append mode.
1202 1202 */
1203 1203 /*
1204 1204 * We're changing creds because VM may fault and we need
1205 1205 * the cred of the current thread to be used if quota
1206 1206 * checking is enabled.
1207 1207 */
1208 1208 savecred = curthread->t_cred;
1209 1209 curthread->t_cred = cr;
1210 1210 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1211 1211 curthread->t_cred = savecred;
1212 1212
1213 1213 if (iovp != iov)
1214 1214 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1215 1215 }
1216 1216
1217 1217 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1218 1218
1219 1219 if (!error) {
1220 1220 /*
1221 1221 * Get attributes again so we send the latest mod
1222 1222 * time to the client side for its cache.
1223 1223 */
1224 1224 va.va_mask = AT_ALL; /* now we want everything */
1225 1225
1226 1226 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1227 1227
1228 1228 /* check for overflows */
1229 1229 if (!error) {
1230 1230 acl_perm(vp, exi, &va, cr);
1231 1231 error = vattr_to_nattr(&va, &ns->ns_attr);
1232 1232 }
1233 1233 }
1234 1234
1235 1235 out:
1236 1236 if (in_crit)
1237 1237 nbl_end_crit(vp);
1238 1238 VN_RELE(vp);
1239 1239
1240 1240 /* check if a monitor detected a delegation conflict */
1241 1241 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1242 1242 /* mark as wouldblock so response is dropped */
1243 1243 curthread->t_flag |= T_WOULDBLOCK;
1244 1244 else
1245 1245 ns->ns_status = puterrno(error);
1246 1246
1247 1247 }
1248 1248
1249 1249 struct rfs_async_write {
1250 1250 struct nfswriteargs *wa;
1251 1251 struct nfsattrstat *ns;
1252 1252 struct svc_req *req;
1253 1253 cred_t *cr;
1254 1254 bool_t ro;
1255 1255 kthread_t *thread;
1256 1256 struct rfs_async_write *list;
1257 1257 };
1258 1258
1259 1259 struct rfs_async_write_list {
1260 1260 fhandle_t *fhp;
1261 1261 kcondvar_t cv;
1262 1262 struct rfs_async_write *list;
1263 1263 struct rfs_async_write_list *next;
1264 1264 };
1265 1265
1266 1266 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1267 1267 static kmutex_t rfs_async_write_lock;
1268 1268 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1269 1269
1270 1270 #define MAXCLIOVECS 42
1271 1271 #define RFSWRITE_INITVAL (enum nfsstat) -1
1272 1272
1273 1273 #ifdef DEBUG
1274 1274 static int rfs_write_hits = 0;
1275 1275 static int rfs_write_misses = 0;
1276 1276 #endif
1277 1277
1278 1278 /*
1279 1279 * Write data to file.
1280 1280 * Returns attributes of a file after writing some data to it.
1281 1281 */
1282 1282 void
1283 1283 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1284 1284 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1285 1285 {
1286 1286 int error;
1287 1287 vnode_t *vp;
1288 1288 rlim64_t rlimit;
1289 1289 struct vattr va;
1290 1290 struct uio uio;
1291 1291 struct rfs_async_write_list *lp;
1292 1292 struct rfs_async_write_list *nlp;
1293 1293 struct rfs_async_write *rp;
1294 1294 struct rfs_async_write *nrp;
1295 1295 struct rfs_async_write *trp;
1296 1296 struct rfs_async_write *lrp;
1297 1297 int data_written;
1298 1298 int iovcnt;
1299 1299 mblk_t *m;
1300 1300 struct iovec *iovp;
1301 1301 struct iovec *niovp;
1302 1302 struct iovec iov[MAXCLIOVECS];
1303 1303 int count;
1304 1304 int rcount;
1305 1305 uint_t off;
1306 1306 uint_t len;
1307 1307 struct rfs_async_write nrpsp;
1308 1308 struct rfs_async_write_list nlpsp;
1309 1309 ushort_t t_flag;
1310 1310 cred_t *savecred;
1311 1311 int in_crit = 0;
1312 1312 caller_context_t ct;
1313 1313 nfs_srv_t *nsrv;
1314 1314
1315 1315 nsrv = zone_getspecific(rfs_zone_key, curzone);
1316 1316 if (!nsrv->write_async) {
1317 1317 rfs_write_sync(wa, ns, exi, req, cr, ro);
1318 1318 return;
1319 1319 }
1320 1320
1321 1321 /*
1322 1322 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1323 1323 * is considered an OK.
1324 1324 */
1325 1325 ns->ns_status = RFSWRITE_INITVAL;
1326 1326
1327 1327 nrp = &nrpsp;
1328 1328 nrp->wa = wa;
1329 1329 nrp->ns = ns;
1330 1330 nrp->req = req;
1331 1331 nrp->cr = cr;
1332 1332 nrp->ro = ro;
1333 1333 nrp->thread = curthread;
1334 1334
1335 1335 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1336 1336
1337 1337 /*
1338 1338 * Look to see if there is already a cluster started
1339 1339 * for this file.
1340 1340 */
1341 1341 mutex_enter(&nsrv->async_write_lock);
1342 1342 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1343 1343 if (bcmp(&wa->wa_fhandle, lp->fhp,
1344 1344 sizeof (fhandle_t)) == 0)
1345 1345 break;
1346 1346 }
1347 1347
1348 1348 /*
1349 1349 * If lp is non-NULL, then there is already a cluster
1350 1350 * started. We need to place ourselves in the cluster
1351 1351 * list in the right place as determined by starting
1352 1352 * offset. Conflicts with non-blocking mandatory locked
1353 1353 * regions will be checked when the cluster is processed.
1354 1354 */
1355 1355 if (lp != NULL) {
1356 1356 rp = lp->list;
1357 1357 trp = NULL;
1358 1358 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1359 1359 trp = rp;
1360 1360 rp = rp->list;
1361 1361 }
1362 1362 nrp->list = rp;
1363 1363 if (trp == NULL)
1364 1364 lp->list = nrp;
1365 1365 else
1366 1366 trp->list = nrp;
1367 1367 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1368 1368 cv_wait(&lp->cv, &nsrv->async_write_lock);
1369 1369 mutex_exit(&nsrv->async_write_lock);
1370 1370
1371 1371 return;
1372 1372 }
1373 1373
1374 1374 /*
1375 1375 * No cluster started yet, start one and add ourselves
1376 1376 * to the list of clusters.
1377 1377 */
1378 1378 nrp->list = NULL;
1379 1379
1380 1380 nlp = &nlpsp;
1381 1381 nlp->fhp = &wa->wa_fhandle;
1382 1382 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1383 1383 nlp->list = nrp;
1384 1384 nlp->next = NULL;
1385 1385
1386 1386 if (nsrv->async_write_head == NULL) {
1387 1387 nsrv->async_write_head = nlp;
1388 1388 } else {
1389 1389 lp = nsrv->async_write_head;
1390 1390 while (lp->next != NULL)
1391 1391 lp = lp->next;
1392 1392 lp->next = nlp;
1393 1393 }
1394 1394 mutex_exit(&nsrv->async_write_lock);
1395 1395
1396 1396 /*
1397 1397 * Convert the file handle common to all of the requests
1398 1398 * in this cluster to a vnode.
1399 1399 */
1400 1400 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1401 1401 if (vp == NULL) {
1402 1402 mutex_enter(&nsrv->async_write_lock);
1403 1403 if (nsrv->async_write_head == nlp)
1404 1404 nsrv->async_write_head = nlp->next;
1405 1405 else {
1406 1406 lp = nsrv->async_write_head;
1407 1407 while (lp->next != nlp)
1408 1408 lp = lp->next;
1409 1409 lp->next = nlp->next;
1410 1410 }
1411 1411 t_flag = curthread->t_flag & T_WOULDBLOCK;
1412 1412 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1413 1413 rp->ns->ns_status = NFSERR_STALE;
1414 1414 rp->thread->t_flag |= t_flag;
1415 1415 }
1416 1416 cv_broadcast(&nlp->cv);
1417 1417 mutex_exit(&nsrv->async_write_lock);
1418 1418
1419 1419 return;
1420 1420 }
1421 1421
1422 1422 /*
1423 1423 * Can only write regular files. Attempts to write any
1424 1424 * other file types fail with EISDIR.
1425 1425 */
1426 1426 if (vp->v_type != VREG) {
1427 1427 VN_RELE(vp);
1428 1428 mutex_enter(&nsrv->async_write_lock);
1429 1429 if (nsrv->async_write_head == nlp)
1430 1430 nsrv->async_write_head = nlp->next;
1431 1431 else {
1432 1432 lp = nsrv->async_write_head;
1433 1433 while (lp->next != nlp)
1434 1434 lp = lp->next;
1435 1435 lp->next = nlp->next;
1436 1436 }
1437 1437 t_flag = curthread->t_flag & T_WOULDBLOCK;
1438 1438 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1439 1439 rp->ns->ns_status = NFSERR_ISDIR;
1440 1440 rp->thread->t_flag |= t_flag;
1441 1441 }
1442 1442 cv_broadcast(&nlp->cv);
1443 1443 mutex_exit(&nsrv->async_write_lock);
1444 1444
1445 1445 return;
1446 1446 }
1447 1447
1448 1448 /*
1449 1449 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1450 1450 * deadlock with ufs.
1451 1451 */
1452 1452 if (nbl_need_check(vp)) {
1453 1453 nbl_start_crit(vp, RW_READER);
1454 1454 in_crit = 1;
1455 1455 }
1456 1456
1457 1457 ct.cc_sysid = 0;
1458 1458 ct.cc_pid = 0;
1459 1459 ct.cc_caller_id = nfs2_srv_caller_id;
1460 1460 ct.cc_flags = CC_DONTBLOCK;
1461 1461
1462 1462 /*
1463 1463 * Lock the file for writing. This operation provides
1464 1464 * the delay which allows clusters to grow.
1465 1465 */
1466 1466 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1467 1467
1468 1468 /* check if a monitor detected a delegation conflict */
1469 1469 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1470 1470 if (in_crit)
1471 1471 nbl_end_crit(vp);
1472 1472 VN_RELE(vp);
1473 1473 /* mark as wouldblock so response is dropped */
1474 1474 curthread->t_flag |= T_WOULDBLOCK;
1475 1475 mutex_enter(&nsrv->async_write_lock);
1476 1476 if (nsrv->async_write_head == nlp)
1477 1477 nsrv->async_write_head = nlp->next;
1478 1478 else {
1479 1479 lp = nsrv->async_write_head;
1480 1480 while (lp->next != nlp)
1481 1481 lp = lp->next;
1482 1482 lp->next = nlp->next;
1483 1483 }
1484 1484 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1485 1485 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1486 1486 rp->ns->ns_status = puterrno(error);
1487 1487 rp->thread->t_flag |= T_WOULDBLOCK;
1488 1488 }
1489 1489 }
1490 1490 cv_broadcast(&nlp->cv);
1491 1491 mutex_exit(&nsrv->async_write_lock);
1492 1492
1493 1493 return;
1494 1494 }
1495 1495
1496 1496 /*
1497 1497 * Disconnect this cluster from the list of clusters.
1498 1498 * The cluster that is being dealt with must be fixed
1499 1499 * in size after this point, so there is no reason
1500 1500 * to leave it on the list so that new requests can
1501 1501 * find it.
1502 1502 *
1503 1503 * The algorithm is that the first write request will
1504 1504 * create a cluster, convert the file handle to a
1505 1505 * vnode pointer, and then lock the file for writing.
1506 1506 * This request is not likely to be clustered with
1507 1507 * any others. However, the next request will create
1508 1508 * a new cluster and be blocked in VOP_RWLOCK while
1509 1509 * the first request is being processed. This delay
1510 1510 * will allow more requests to be clustered in this
1511 1511 * second cluster.
1512 1512 */
1513 1513 mutex_enter(&nsrv->async_write_lock);
1514 1514 if (nsrv->async_write_head == nlp)
1515 1515 nsrv->async_write_head = nlp->next;
1516 1516 else {
1517 1517 lp = nsrv->async_write_head;
1518 1518 while (lp->next != nlp)
1519 1519 lp = lp->next;
1520 1520 lp->next = nlp->next;
1521 1521 }
1522 1522 mutex_exit(&nsrv->async_write_lock);
1523 1523
1524 1524 /*
1525 1525 * Step through the list of requests in this cluster.
1526 1526 * We need to check permissions to make sure that all
1527 1527 * of the requests have sufficient permission to write
1528 1528 * the file. A cluster can be composed of requests
1529 1529 * from different clients and different users on each
1530 1530 * client.
1531 1531 *
1532 1532 * As a side effect, we also calculate the size of the
1533 1533 * byte range that this cluster encompasses.
1534 1534 */
1535 1535 rp = nlp->list;
1536 1536 off = rp->wa->wa_offset;
1537 1537 len = (uint_t)0;
1538 1538 do {
1539 1539 if (rdonly(rp->ro, vp)) {
1540 1540 rp->ns->ns_status = NFSERR_ROFS;
1541 1541 t_flag = curthread->t_flag & T_WOULDBLOCK;
1542 1542 rp->thread->t_flag |= t_flag;
1543 1543 continue;
1544 1544 }
1545 1545
1546 1546 va.va_mask = AT_UID|AT_MODE;
1547 1547
1548 1548 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1549 1549
1550 1550 if (!error) {
1551 1551 if (crgetuid(rp->cr) != va.va_uid) {
1552 1552 /*
1553 1553 * This is a kludge to allow writes of files
1554 1554 * created with read only permission. The
1555 1555 * owner of the file is always allowed to
1556 1556 * write it.
1557 1557 */
1558 1558 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1559 1559 }
1560 1560 if (!error && MANDLOCK(vp, va.va_mode))
1561 1561 error = EACCES;
1562 1562 }
1563 1563
1564 1564 /*
1565 1565 * Check for a conflict with a nbmand-locked region.
1566 1566 */
1567 1567 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1568 1568 rp->wa->wa_count, 0, NULL)) {
1569 1569 error = EACCES;
1570 1570 }
1571 1571
1572 1572 if (error) {
1573 1573 rp->ns->ns_status = puterrno(error);
1574 1574 t_flag = curthread->t_flag & T_WOULDBLOCK;
1575 1575 rp->thread->t_flag |= t_flag;
1576 1576 continue;
1577 1577 }
1578 1578 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1579 1579 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1580 1580 } while ((rp = rp->list) != NULL);
1581 1581
1582 1582 /*
1583 1583 * Step through the cluster attempting to gather as many
1584 1584 * requests which are contiguous as possible. These
1585 1585 * contiguous requests are handled via one call to VOP_WRITE
1586 1586 * instead of different calls to VOP_WRITE. We also keep
1587 1587 * track of the fact that any data was written.
1588 1588 */
1589 1589 rp = nlp->list;
1590 1590 data_written = 0;
1591 1591 do {
1592 1592 /*
1593 1593 * Skip any requests which are already marked as having an
1594 1594 * error.
1595 1595 */
1596 1596 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1597 1597 rp = rp->list;
1598 1598 continue;
1599 1599 }
1600 1600
1601 1601 /*
1602 1602 * Count the number of iovec's which are required
1603 1603 * to handle this set of requests. One iovec is
1604 1604 * needed for each data buffer, whether addressed
1605 1605 * by wa_data or by the b_rptr pointers in the
1606 1606 * mblk chains.
1607 1607 */
1608 1608 iovcnt = 0;
1609 1609 lrp = rp;
1610 1610 for (;;) {
1611 1611 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1612 1612 iovcnt++;
1613 1613 else {
1614 1614 m = lrp->wa->wa_mblk;
1615 1615 while (m != NULL) {
1616 1616 iovcnt++;
1617 1617 m = m->b_cont;
1618 1618 }
1619 1619 }
1620 1620 if (lrp->list == NULL ||
1621 1621 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1622 1622 lrp->wa->wa_offset + lrp->wa->wa_count !=
1623 1623 lrp->list->wa->wa_offset) {
1624 1624 lrp = lrp->list;
1625 1625 break;
1626 1626 }
1627 1627 lrp = lrp->list;
1628 1628 }
1629 1629
1630 1630 if (iovcnt <= MAXCLIOVECS) {
1631 1631 #ifdef DEBUG
1632 1632 rfs_write_hits++;
1633 1633 #endif
1634 1634 niovp = iov;
1635 1635 } else {
1636 1636 #ifdef DEBUG
1637 1637 rfs_write_misses++;
1638 1638 #endif
1639 1639 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1640 1640 }
1641 1641 /*
1642 1642 * Put together the scatter/gather iovecs.
1643 1643 */
1644 1644 iovp = niovp;
1645 1645 trp = rp;
1646 1646 count = 0;
1647 1647 do {
1648 1648 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1649 1649 if (trp->wa->wa_rlist) {
1650 1650 iovp->iov_base =
1651 1651 (char *)((trp->wa->wa_rlist)->
1652 1652 u.c_daddr3);
1653 1653 iovp->iov_len = trp->wa->wa_count;
1654 1654 } else {
1655 1655 iovp->iov_base = trp->wa->wa_data;
1656 1656 iovp->iov_len = trp->wa->wa_count;
1657 1657 }
1658 1658 iovp++;
1659 1659 } else {
1660 1660 m = trp->wa->wa_mblk;
1661 1661 rcount = trp->wa->wa_count;
1662 1662 while (m != NULL) {
1663 1663 iovp->iov_base = (caddr_t)m->b_rptr;
1664 1664 iovp->iov_len = (m->b_wptr - m->b_rptr);
1665 1665 rcount -= iovp->iov_len;
1666 1666 if (rcount < 0)
1667 1667 iovp->iov_len += rcount;
1668 1668 iovp++;
1669 1669 if (rcount <= 0)
1670 1670 break;
1671 1671 m = m->b_cont;
1672 1672 }
1673 1673 }
1674 1674 count += trp->wa->wa_count;
1675 1675 trp = trp->list;
1676 1676 } while (trp != lrp);
1677 1677
1678 1678 uio.uio_iov = niovp;
1679 1679 uio.uio_iovcnt = iovcnt;
1680 1680 uio.uio_segflg = UIO_SYSSPACE;
1681 1681 uio.uio_extflg = UIO_COPY_DEFAULT;
1682 1682 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1683 1683 uio.uio_resid = count;
1684 1684 /*
1685 1685 * The limit is checked on the client. We
1686 1686 * should allow any size writes here.
1687 1687 */
1688 1688 uio.uio_llimit = curproc->p_fsz_ctl;
1689 1689 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1690 1690 if (rlimit < (rlim64_t)uio.uio_resid)
1691 1691 uio.uio_resid = (uint_t)rlimit;
1692 1692
1693 1693 /*
1694 1694 * For now we assume no append mode.
1695 1695 */
1696 1696
1697 1697 /*
1698 1698 * We're changing creds because VM may fault
1699 1699 * and we need the cred of the current
1700 1700 * thread to be used if quota * checking is
1701 1701 * enabled.
1702 1702 */
1703 1703 savecred = curthread->t_cred;
1704 1704 curthread->t_cred = cr;
1705 1705 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1706 1706 curthread->t_cred = savecred;
1707 1707
1708 1708 /* check if a monitor detected a delegation conflict */
1709 1709 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1710 1710 /* mark as wouldblock so response is dropped */
1711 1711 curthread->t_flag |= T_WOULDBLOCK;
1712 1712
1713 1713 if (niovp != iov)
1714 1714 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1715 1715
1716 1716 if (!error) {
1717 1717 data_written = 1;
1718 1718 /*
1719 1719 * Get attributes again so we send the latest mod
1720 1720 * time to the client side for its cache.
1721 1721 */
1722 1722 va.va_mask = AT_ALL; /* now we want everything */
1723 1723
1724 1724 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1725 1725
1726 1726 if (!error)
1727 1727 acl_perm(vp, exi, &va, rp->cr);
1728 1728 }
1729 1729
1730 1730 /*
1731 1731 * Fill in the status responses for each request
1732 1732 * which was just handled. Also, copy the latest
1733 1733 * attributes in to the attribute responses if
1734 1734 * appropriate.
1735 1735 */
1736 1736 t_flag = curthread->t_flag & T_WOULDBLOCK;
1737 1737 do {
1738 1738 rp->thread->t_flag |= t_flag;
1739 1739 /* check for overflows */
1740 1740 if (!error) {
1741 1741 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1742 1742 }
1743 1743 rp->ns->ns_status = puterrno(error);
1744 1744 rp = rp->list;
1745 1745 } while (rp != lrp);
1746 1746 } while (rp != NULL);
1747 1747
1748 1748 /*
1749 1749 * If any data was written at all, then we need to flush
1750 1750 * the data and metadata to stable storage.
1751 1751 */
1752 1752 if (data_written) {
1753 1753 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1754 1754
1755 1755 if (!error) {
1756 1756 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1757 1757 }
1758 1758 }
1759 1759
1760 1760 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1761 1761
1762 1762 if (in_crit)
1763 1763 nbl_end_crit(vp);
1764 1764 VN_RELE(vp);
1765 1765
1766 1766 t_flag = curthread->t_flag & T_WOULDBLOCK;
1767 1767 mutex_enter(&nsrv->async_write_lock);
1768 1768 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1769 1769 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1770 1770 rp->ns->ns_status = puterrno(error);
1771 1771 rp->thread->t_flag |= t_flag;
1772 1772 }
1773 1773 }
1774 1774 cv_broadcast(&nlp->cv);
1775 1775 mutex_exit(&nsrv->async_write_lock);
1776 1776
1777 1777 }
1778 1778
1779 1779 void *
1780 1780 rfs_write_getfh(struct nfswriteargs *wa)
1781 1781 {
1782 1782 return (&wa->wa_fhandle);
1783 1783 }
1784 1784
1785 1785 /*
1786 1786 * Create a file.
1787 1787 * Creates a file with given attributes and returns those attributes
1788 1788 * and an fhandle for the new file.
1789 1789 */
1790 1790 void
1791 1791 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1792 1792 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1793 1793 {
1794 1794 int error;
1795 1795 int lookuperr;
1796 1796 int in_crit = 0;
1797 1797 struct vattr va;
1798 1798 vnode_t *vp;
1799 1799 vnode_t *realvp;
1800 1800 vnode_t *dvp;
1801 1801 char *name = args->ca_da.da_name;
1802 1802 vnode_t *tvp = NULL;
1803 1803 int mode;
1804 1804 int lookup_ok;
1805 1805 bool_t trunc;
1806 1806 struct sockaddr *ca;
1807 1807
1808 1808 /*
1809 1809 * Disallow NULL paths
1810 1810 */
1811 1811 if (name == NULL || *name == '\0') {
1812 1812 dr->dr_status = NFSERR_ACCES;
1813 1813 return;
1814 1814 }
1815 1815
1816 1816 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1817 1817 if (dvp == NULL) {
1818 1818 dr->dr_status = NFSERR_STALE;
1819 1819 return;
1820 1820 }
1821 1821
1822 1822 error = sattr_to_vattr(args->ca_sa, &va);
1823 1823 if (error) {
1824 1824 dr->dr_status = puterrno(error);
1825 1825 return;
1826 1826 }
1827 1827
1828 1828 /*
1829 1829 * Must specify the mode.
1830 1830 */
1831 1831 if (!(va.va_mask & AT_MODE)) {
1832 1832 VN_RELE(dvp);
1833 1833 dr->dr_status = NFSERR_INVAL;
1834 1834 return;
1835 1835 }
1836 1836
1837 1837 /*
1838 1838 * This is a completely gross hack to make mknod
1839 1839 * work over the wire until we can wack the protocol
1840 1840 */
1841 1841 if ((va.va_mode & IFMT) == IFCHR) {
1842 1842 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1843 1843 va.va_type = VFIFO; /* xtra kludge for named pipe */
1844 1844 else {
1845 1845 va.va_type = VCHR;
1846 1846 /*
1847 1847 * uncompress the received dev_t
1848 1848 * if the top half is zero indicating a request
1849 1849 * from an `older style' OS.
1850 1850 */
1851 1851 if ((va.va_size & 0xffff0000) == 0)
1852 1852 va.va_rdev = nfsv2_expdev(va.va_size);
1853 1853 else
1854 1854 va.va_rdev = (dev_t)va.va_size;
1855 1855 }
1856 1856 va.va_mask &= ~AT_SIZE;
1857 1857 } else if ((va.va_mode & IFMT) == IFBLK) {
1858 1858 va.va_type = VBLK;
1859 1859 /*
1860 1860 * uncompress the received dev_t
1861 1861 * if the top half is zero indicating a request
1862 1862 * from an `older style' OS.
1863 1863 */
1864 1864 if ((va.va_size & 0xffff0000) == 0)
1865 1865 va.va_rdev = nfsv2_expdev(va.va_size);
1866 1866 else
1867 1867 va.va_rdev = (dev_t)va.va_size;
1868 1868 va.va_mask &= ~AT_SIZE;
1869 1869 } else if ((va.va_mode & IFMT) == IFSOCK) {
1870 1870 va.va_type = VSOCK;
1871 1871 } else {
1872 1872 va.va_type = VREG;
1873 1873 }
1874 1874 va.va_mode &= ~IFMT;
1875 1875 va.va_mask |= AT_TYPE;
1876 1876
1877 1877 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1878 1878 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1879 1879 MAXPATHLEN);
1880 1880 if (name == NULL) {
1881 1881 dr->dr_status = puterrno(EINVAL);
1882 1882 return;
1883 1883 }
1884 1884
1885 1885 /*
1886 1886 * Why was the choice made to use VWRITE as the mode to the
1887 1887 * call to VOP_CREATE ? This results in a bug. When a client
1888 1888 * opens a file that already exists and is RDONLY, the second
1889 1889 * open fails with an EACESS because of the mode.
1890 1890 * bug ID 1054648.
1891 1891 */
1892 1892 lookup_ok = 0;
1893 1893 mode = VWRITE;
1894 1894 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1895 1895 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1896 1896 NULL, NULL, NULL);
1897 1897 if (!error) {
1898 1898 struct vattr at;
1899 1899
1900 1900 lookup_ok = 1;
1901 1901 at.va_mask = AT_MODE;
1902 1902 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1903 1903 if (!error)
1904 1904 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1905 1905 VN_RELE(tvp);
1906 1906 tvp = NULL;
1907 1907 }
1908 1908 }
1909 1909
1910 1910 if (!lookup_ok) {
1911 1911 if (rdonly(ro, dvp)) {
1912 1912 error = EROFS;
1913 1913 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1914 1914 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1915 1915 error = EPERM;
1916 1916 } else {
1917 1917 error = 0;
1918 1918 }
1919 1919 }
1920 1920
1921 1921 /*
1922 1922 * If file size is being modified on an already existing file
1923 1923 * make sure that there are no conflicting non-blocking mandatory
1924 1924 * locks in the region being manipulated. Return EACCES if there
1925 1925 * are conflicting locks.
1926 1926 */
1927 1927 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1928 1928 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1929 1929 NULL, NULL, NULL);
1930 1930
1931 1931 if (!lookuperr &&
1932 1932 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1933 1933 VN_RELE(tvp);
1934 1934 curthread->t_flag |= T_WOULDBLOCK;
1935 1935 goto out;
1936 1936 }
1937 1937
1938 1938 if (!lookuperr && nbl_need_check(tvp)) {
1939 1939 /*
1940 1940 * The file exists. Now check if it has any
1941 1941 * conflicting non-blocking mandatory locks
1942 1942 * in the region being changed.
1943 1943 */
1944 1944 struct vattr bva;
1945 1945 u_offset_t offset;
1946 1946 ssize_t length;
1947 1947
1948 1948 nbl_start_crit(tvp, RW_READER);
1949 1949 in_crit = 1;
1950 1950
1951 1951 bva.va_mask = AT_SIZE;
1952 1952 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1953 1953 if (!error) {
1954 1954 if (va.va_size < bva.va_size) {
1955 1955 offset = va.va_size;
1956 1956 length = bva.va_size - va.va_size;
1957 1957 } else {
1958 1958 offset = bva.va_size;
1959 1959 length = va.va_size - bva.va_size;
1960 1960 }
1961 1961 if (length) {
1962 1962 if (nbl_conflict(tvp, NBL_WRITE,
1963 1963 offset, length, 0, NULL)) {
1964 1964 error = EACCES;
1965 1965 }
1966 1966 }
1967 1967 }
1968 1968 if (error) {
1969 1969 nbl_end_crit(tvp);
1970 1970 VN_RELE(tvp);
1971 1971 in_crit = 0;
1972 1972 }
1973 1973 } else if (tvp != NULL) {
1974 1974 VN_RELE(tvp);
1975 1975 }
1976 1976 }
1977 1977
1978 1978 if (!error) {
1979 1979 /*
1980 1980 * If filesystem is shared with nosuid the remove any
1981 1981 * setuid/setgid bits on create.
1982 1982 */
1983 1983 if (va.va_type == VREG &&
1984 1984 exi->exi_export.ex_flags & EX_NOSUID)
1985 1985 va.va_mode &= ~(VSUID | VSGID);
1986 1986
1987 1987 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1988 1988 NULL, NULL);
1989 1989
1990 1990 if (!error) {
1991 1991
1992 1992 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1993 1993 trunc = TRUE;
1994 1994 else
1995 1995 trunc = FALSE;
1996 1996
1997 1997 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1998 1998 VN_RELE(vp);
1999 1999 curthread->t_flag |= T_WOULDBLOCK;
2000 2000 goto out;
2001 2001 }
2002 2002 va.va_mask = AT_ALL;
2003 2003
2004 2004 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
2005 2005
2006 2006 /* check for overflows */
2007 2007 if (!error) {
2008 2008 acl_perm(vp, exi, &va, cr);
2009 2009 error = vattr_to_nattr(&va, &dr->dr_attr);
2010 2010 if (!error) {
2011 2011 error = makefh(&dr->dr_fhandle, vp,
2012 2012 exi);
2013 2013 }
2014 2014 }
2015 2015 /*
2016 2016 * Force modified metadata out to stable storage.
2017 2017 *
2018 2018 * if a underlying vp exists, pass it to VOP_FSYNC
2019 2019 */
2020 2020 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2021 2021 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2022 2022 else
2023 2023 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2024 2024 VN_RELE(vp);
2025 2025 }
2026 2026
2027 2027 if (in_crit) {
2028 2028 nbl_end_crit(tvp);
2029 2029 VN_RELE(tvp);
2030 2030 }
2031 2031 }
2032 2032
2033 2033 /*
2034 2034 * Force modified data and metadata out to stable storage.
2035 2035 */
2036 2036 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2037 2037
2038 2038 out:
2039 2039
2040 2040 VN_RELE(dvp);
2041 2041
2042 2042 dr->dr_status = puterrno(error);
2043 2043
2044 2044 if (name != args->ca_da.da_name)
2045 2045 kmem_free(name, MAXPATHLEN);
2046 2046 }
2047 2047 void *
2048 2048 rfs_create_getfh(struct nfscreatargs *args)
2049 2049 {
2050 2050 return (args->ca_da.da_fhandle);
2051 2051 }
2052 2052
2053 2053 /*
2054 2054 * Remove a file.
2055 2055 * Remove named file from parent directory.
2056 2056 */
2057 2057 /* ARGSUSED */
2058 2058 void
2059 2059 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2060 2060 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2061 2061 {
2062 2062 int error = 0;
2063 2063 vnode_t *vp;
2064 2064 vnode_t *targvp;
2065 2065 int in_crit = 0;
2066 2066
2067 2067 /*
2068 2068 * Disallow NULL paths
2069 2069 */
2070 2070 if (da->da_name == NULL || *da->da_name == '\0') {
2071 2071 *status = NFSERR_ACCES;
2072 2072 return;
2073 2073 }
2074 2074
2075 2075 vp = nfs_fhtovp(da->da_fhandle, exi);
2076 2076 if (vp == NULL) {
2077 2077 *status = NFSERR_STALE;
2078 2078 return;
2079 2079 }
2080 2080
2081 2081 if (rdonly(ro, vp)) {
2082 2082 VN_RELE(vp);
2083 2083 *status = NFSERR_ROFS;
2084 2084 return;
2085 2085 }
2086 2086
2087 2087 /*
2088 2088 * Check for a conflict with a non-blocking mandatory share reservation.
2089 2089 */
2090 2090 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2091 2091 NULL, cr, NULL, NULL, NULL);
2092 2092 if (error != 0) {
2093 2093 VN_RELE(vp);
2094 2094 *status = puterrno(error);
2095 2095 return;
2096 2096 }
2097 2097
2098 2098 /*
2099 2099 * If the file is delegated to an v4 client, then initiate
2100 2100 * recall and drop this request (by setting T_WOULDBLOCK).
2101 2101 * The client will eventually re-transmit the request and
2102 2102 * (hopefully), by then, the v4 client will have returned
2103 2103 * the delegation.
2104 2104 */
2105 2105
2106 2106 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2107 2107 VN_RELE(vp);
2108 2108 VN_RELE(targvp);
2109 2109 curthread->t_flag |= T_WOULDBLOCK;
2110 2110 return;
2111 2111 }
2112 2112
2113 2113 if (nbl_need_check(targvp)) {
2114 2114 nbl_start_crit(targvp, RW_READER);
2115 2115 in_crit = 1;
2116 2116 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2117 2117 error = EACCES;
2118 2118 goto out;
2119 2119 }
2120 2120 }
2121 2121
2122 2122 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2123 2123
2124 2124 /*
2125 2125 * Force modified data and metadata out to stable storage.
2126 2126 */
2127 2127 (void) VOP_FSYNC(vp, 0, cr, NULL);
2128 2128
2129 2129 out:
2130 2130 if (in_crit)
2131 2131 nbl_end_crit(targvp);
2132 2132 VN_RELE(targvp);
2133 2133 VN_RELE(vp);
2134 2134
2135 2135 *status = puterrno(error);
2136 2136
2137 2137 }
2138 2138
2139 2139 void *
2140 2140 rfs_remove_getfh(struct nfsdiropargs *da)
2141 2141 {
2142 2142 return (da->da_fhandle);
2143 2143 }
2144 2144
2145 2145 /*
2146 2146 * rename a file
2147 2147 * Give a file (from) a new name (to).
2148 2148 */
2149 2149 /* ARGSUSED */
2150 2150 void
2151 2151 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2152 2152 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2153 2153 {
2154 2154 int error = 0;
2155 2155 vnode_t *fromvp;
2156 2156 vnode_t *tovp;
2157 2157 struct exportinfo *to_exi;
2158 2158 fhandle_t *fh;
2159 2159 vnode_t *srcvp;
2160 2160 vnode_t *targvp;
2161 2161 int in_crit = 0;
2162 2162
2163 2163 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2164 2164 if (fromvp == NULL) {
2165 2165 *status = NFSERR_STALE;
2166 2166 return;
2167 2167 }
2168 2168
2169 2169 fh = args->rna_to.da_fhandle;
2170 2170 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2171 2171 if (to_exi == NULL) {
2172 2172 VN_RELE(fromvp);
2173 2173 *status = NFSERR_ACCES;
2174 2174 return;
2175 2175 }
2176 2176 exi_rele(to_exi);
2177 2177
2178 2178 if (to_exi != exi) {
2179 2179 VN_RELE(fromvp);
2180 2180 *status = NFSERR_XDEV;
2181 2181 return;
2182 2182 }
2183 2183
2184 2184 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2185 2185 if (tovp == NULL) {
2186 2186 VN_RELE(fromvp);
2187 2187 *status = NFSERR_STALE;
2188 2188 return;
2189 2189 }
2190 2190
2191 2191 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2192 2192 VN_RELE(tovp);
2193 2193 VN_RELE(fromvp);
2194 2194 *status = NFSERR_NOTDIR;
2195 2195 return;
2196 2196 }
2197 2197
2198 2198 /*
2199 2199 * Disallow NULL paths
2200 2200 */
2201 2201 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2202 2202 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2203 2203 VN_RELE(tovp);
2204 2204 VN_RELE(fromvp);
2205 2205 *status = NFSERR_ACCES;
2206 2206 return;
2207 2207 }
2208 2208
2209 2209 if (rdonly(ro, tovp)) {
2210 2210 VN_RELE(tovp);
2211 2211 VN_RELE(fromvp);
2212 2212 *status = NFSERR_ROFS;
2213 2213 return;
2214 2214 }
2215 2215
2216 2216 /*
2217 2217 * Check for a conflict with a non-blocking mandatory share reservation.
2218 2218 */
2219 2219 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2220 2220 NULL, cr, NULL, NULL, NULL);
2221 2221 if (error != 0) {
2222 2222 VN_RELE(tovp);
2223 2223 VN_RELE(fromvp);
2224 2224 *status = puterrno(error);
2225 2225 return;
2226 2226 }
2227 2227
2228 2228 /* Check for delegations on the source file */
2229 2229
2230 2230 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2231 2231 VN_RELE(tovp);
2232 2232 VN_RELE(fromvp);
2233 2233 VN_RELE(srcvp);
2234 2234 curthread->t_flag |= T_WOULDBLOCK;
2235 2235 return;
2236 2236 }
2237 2237
2238 2238 /* Check for delegation on the file being renamed over, if it exists */
2239 2239
2240 2240 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2241 2241 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2242 2242 NULL, NULL, NULL) == 0) {
2243 2243
2244 2244 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2245 2245 VN_RELE(tovp);
2246 2246 VN_RELE(fromvp);
2247 2247 VN_RELE(srcvp);
2248 2248 VN_RELE(targvp);
2249 2249 curthread->t_flag |= T_WOULDBLOCK;
2250 2250 return;
2251 2251 }
2252 2252 VN_RELE(targvp);
2253 2253 }
2254 2254
2255 2255
2256 2256 if (nbl_need_check(srcvp)) {
2257 2257 nbl_start_crit(srcvp, RW_READER);
2258 2258 in_crit = 1;
2259 2259 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2260 2260 error = EACCES;
2261 2261 goto out;
2262 2262 }
2263 2263 }
2264 2264
2265 2265 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2266 2266 tovp, args->rna_to.da_name, cr, NULL, 0);
2267 2267
2268 2268 if (error == 0)
2269 2269 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2270 2270 strlen(args->rna_to.da_name));
2271 2271
2272 2272 /*
2273 2273 * Force modified data and metadata out to stable storage.
2274 2274 */
2275 2275 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2276 2276 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2277 2277
2278 2278 out:
2279 2279 if (in_crit)
2280 2280 nbl_end_crit(srcvp);
2281 2281 VN_RELE(srcvp);
2282 2282 VN_RELE(tovp);
2283 2283 VN_RELE(fromvp);
2284 2284
2285 2285 *status = puterrno(error);
2286 2286
2287 2287 }
2288 2288 void *
2289 2289 rfs_rename_getfh(struct nfsrnmargs *args)
2290 2290 {
2291 2291 return (args->rna_from.da_fhandle);
2292 2292 }
2293 2293
2294 2294 /*
2295 2295 * Link to a file.
2296 2296 * Create a file (to) which is a hard link to the given file (from).
2297 2297 */
2298 2298 /* ARGSUSED */
2299 2299 void
2300 2300 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2301 2301 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2302 2302 {
2303 2303 int error;
2304 2304 vnode_t *fromvp;
2305 2305 vnode_t *tovp;
2306 2306 struct exportinfo *to_exi;
2307 2307 fhandle_t *fh;
2308 2308
2309 2309 fromvp = nfs_fhtovp(args->la_from, exi);
2310 2310 if (fromvp == NULL) {
2311 2311 *status = NFSERR_STALE;
2312 2312 return;
2313 2313 }
2314 2314
2315 2315 fh = args->la_to.da_fhandle;
2316 2316 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2317 2317 if (to_exi == NULL) {
2318 2318 VN_RELE(fromvp);
2319 2319 *status = NFSERR_ACCES;
2320 2320 return;
2321 2321 }
2322 2322 exi_rele(to_exi);
2323 2323
2324 2324 if (to_exi != exi) {
2325 2325 VN_RELE(fromvp);
2326 2326 *status = NFSERR_XDEV;
2327 2327 return;
2328 2328 }
2329 2329
2330 2330 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2331 2331 if (tovp == NULL) {
2332 2332 VN_RELE(fromvp);
2333 2333 *status = NFSERR_STALE;
2334 2334 return;
2335 2335 }
2336 2336
2337 2337 if (tovp->v_type != VDIR) {
2338 2338 VN_RELE(tovp);
2339 2339 VN_RELE(fromvp);
2340 2340 *status = NFSERR_NOTDIR;
2341 2341 return;
2342 2342 }
2343 2343 /*
2344 2344 * Disallow NULL paths
2345 2345 */
2346 2346 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2347 2347 VN_RELE(tovp);
2348 2348 VN_RELE(fromvp);
2349 2349 *status = NFSERR_ACCES;
2350 2350 return;
2351 2351 }
2352 2352
2353 2353 if (rdonly(ro, tovp)) {
2354 2354 VN_RELE(tovp);
2355 2355 VN_RELE(fromvp);
2356 2356 *status = NFSERR_ROFS;
2357 2357 return;
2358 2358 }
2359 2359
2360 2360 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2361 2361
2362 2362 /*
2363 2363 * Force modified data and metadata out to stable storage.
2364 2364 */
2365 2365 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2366 2366 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2367 2367
2368 2368 VN_RELE(tovp);
2369 2369 VN_RELE(fromvp);
2370 2370
2371 2371 *status = puterrno(error);
2372 2372
2373 2373 }
2374 2374 void *
2375 2375 rfs_link_getfh(struct nfslinkargs *args)
2376 2376 {
2377 2377 return (args->la_from);
2378 2378 }
2379 2379
2380 2380 /*
2381 2381 * Symbolicly link to a file.
2382 2382 * Create a file (to) with the given attributes which is a symbolic link
2383 2383 * to the given path name (to).
2384 2384 */
2385 2385 void
2386 2386 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2387 2387 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2388 2388 {
2389 2389 int error;
2390 2390 struct vattr va;
2391 2391 vnode_t *vp;
2392 2392 vnode_t *svp;
2393 2393 int lerror;
2394 2394 struct sockaddr *ca;
2395 2395 char *name = NULL;
2396 2396
2397 2397 /*
2398 2398 * Disallow NULL paths
2399 2399 */
2400 2400 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2401 2401 *status = NFSERR_ACCES;
2402 2402 return;
2403 2403 }
2404 2404
2405 2405 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2406 2406 if (vp == NULL) {
2407 2407 *status = NFSERR_STALE;
2408 2408 return;
2409 2409 }
2410 2410
2411 2411 if (rdonly(ro, vp)) {
2412 2412 VN_RELE(vp);
2413 2413 *status = NFSERR_ROFS;
2414 2414 return;
2415 2415 }
2416 2416
2417 2417 error = sattr_to_vattr(args->sla_sa, &va);
2418 2418 if (error) {
2419 2419 VN_RELE(vp);
2420 2420 *status = puterrno(error);
2421 2421 return;
2422 2422 }
2423 2423
2424 2424 if (!(va.va_mask & AT_MODE)) {
2425 2425 VN_RELE(vp);
2426 2426 *status = NFSERR_INVAL;
2427 2427 return;
2428 2428 }
2429 2429
2430 2430 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2431 2431 name = nfscmd_convname(ca, exi, args->sla_tnm,
2432 2432 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2433 2433
2434 2434 if (name == NULL) {
2435 2435 *status = NFSERR_ACCES;
2436 2436 return;
2437 2437 }
2438 2438
2439 2439 va.va_type = VLNK;
2440 2440 va.va_mask |= AT_TYPE;
2441 2441
2442 2442 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2443 2443
2444 2444 /*
2445 2445 * Force new data and metadata out to stable storage.
2446 2446 */
2447 2447 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2448 2448 NULL, cr, NULL, NULL, NULL);
2449 2449
2450 2450 if (!lerror) {
2451 2451 (void) VOP_FSYNC(svp, 0, cr, NULL);
2452 2452 VN_RELE(svp);
2453 2453 }
2454 2454
2455 2455 /*
2456 2456 * Force modified data and metadata out to stable storage.
2457 2457 */
2458 2458 (void) VOP_FSYNC(vp, 0, cr, NULL);
2459 2459
2460 2460 VN_RELE(vp);
2461 2461
2462 2462 *status = puterrno(error);
2463 2463 if (name != args->sla_tnm)
2464 2464 kmem_free(name, MAXPATHLEN);
2465 2465
2466 2466 }
2467 2467 void *
2468 2468 rfs_symlink_getfh(struct nfsslargs *args)
2469 2469 {
2470 2470 return (args->sla_from.da_fhandle);
2471 2471 }
2472 2472
2473 2473 /*
2474 2474 * Make a directory.
2475 2475 * Create a directory with the given name, parent directory, and attributes.
2476 2476 * Returns a file handle and attributes for the new directory.
2477 2477 */
2478 2478 /* ARGSUSED */
2479 2479 void
2480 2480 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2481 2481 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2482 2482 {
2483 2483 int error;
2484 2484 struct vattr va;
2485 2485 vnode_t *dvp = NULL;
2486 2486 vnode_t *vp;
2487 2487 char *name = args->ca_da.da_name;
2488 2488
2489 2489 /*
2490 2490 * Disallow NULL paths
2491 2491 */
2492 2492 if (name == NULL || *name == '\0') {
2493 2493 dr->dr_status = NFSERR_ACCES;
2494 2494 return;
2495 2495 }
2496 2496
2497 2497 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2498 2498 if (vp == NULL) {
2499 2499 dr->dr_status = NFSERR_STALE;
2500 2500 return;
2501 2501 }
2502 2502
2503 2503 if (rdonly(ro, vp)) {
2504 2504 VN_RELE(vp);
2505 2505 dr->dr_status = NFSERR_ROFS;
2506 2506 return;
2507 2507 }
2508 2508
2509 2509 error = sattr_to_vattr(args->ca_sa, &va);
2510 2510 if (error) {
2511 2511 VN_RELE(vp);
2512 2512 dr->dr_status = puterrno(error);
2513 2513 return;
2514 2514 }
2515 2515
2516 2516 if (!(va.va_mask & AT_MODE)) {
2517 2517 VN_RELE(vp);
2518 2518 dr->dr_status = NFSERR_INVAL;
2519 2519 return;
2520 2520 }
2521 2521
2522 2522 va.va_type = VDIR;
2523 2523 va.va_mask |= AT_TYPE;
2524 2524
2525 2525 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2526 2526
2527 2527 if (!error) {
2528 2528 /*
2529 2529 * Attribtutes of the newly created directory should
2530 2530 * be returned to the client.
2531 2531 */
2532 2532 va.va_mask = AT_ALL; /* We want everything */
2533 2533 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2534 2534
2535 2535 /* check for overflows */
2536 2536 if (!error) {
2537 2537 acl_perm(vp, exi, &va, cr);
2538 2538 error = vattr_to_nattr(&va, &dr->dr_attr);
2539 2539 if (!error) {
2540 2540 error = makefh(&dr->dr_fhandle, dvp, exi);
2541 2541 }
2542 2542 }
2543 2543 /*
2544 2544 * Force new data and metadata out to stable storage.
2545 2545 */
2546 2546 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2547 2547 VN_RELE(dvp);
2548 2548 }
2549 2549
2550 2550 /*
2551 2551 * Force modified data and metadata out to stable storage.
2552 2552 */
2553 2553 (void) VOP_FSYNC(vp, 0, cr, NULL);
2554 2554
2555 2555 VN_RELE(vp);
2556 2556
2557 2557 dr->dr_status = puterrno(error);
2558 2558
2559 2559 }
2560 2560 void *
2561 2561 rfs_mkdir_getfh(struct nfscreatargs *args)
2562 2562 {
2563 2563 return (args->ca_da.da_fhandle);
2564 2564 }
2565 2565
2566 2566 /*
2567 2567 * Remove a directory.
2568 2568 * Remove the given directory name from the given parent directory.
2569 2569 */
2570 2570 /* ARGSUSED */
2571 2571 void
2572 2572 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2573 2573 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2574 2574 {
2575 2575 int error;
2576 2576 vnode_t *vp;
2577 2577
2578 2578 /*
2579 2579 * Disallow NULL paths
2580 2580 */
2581 2581 if (da->da_name == NULL || *da->da_name == '\0') {
2582 2582 *status = NFSERR_ACCES;
2583 2583 return;
2584 2584 }
2585 2585
2586 2586 vp = nfs_fhtovp(da->da_fhandle, exi);
2587 2587 if (vp == NULL) {
2588 2588 *status = NFSERR_STALE;
2589 2589 return;
2590 2590 }
2591 2591
2592 2592 if (rdonly(ro, vp)) {
2593 2593 VN_RELE(vp);
2594 2594 *status = NFSERR_ROFS;
2595 2595 return;
2596 2596 }
2597 2597
2598 2598 /*
2599 2599 * VOP_RMDIR takes a third argument (the current
2600 2600 * directory of the process). That's because someone
2601 2601 * wants to return EINVAL if one tries to remove ".".
2602 2602 * Of course, NFS servers have no idea what their
2603 2603 * clients' current directories are. We fake it by
2604 2604 * supplying a vnode known to exist and illegal to
2605 2605 * remove.
2606 2606 */
2607 2607 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2608 2608
2609 2609 /*
2610 2610 * Force modified data and metadata out to stable storage.
2611 2611 */
2612 2612 (void) VOP_FSYNC(vp, 0, cr, NULL);
2613 2613
2614 2614 VN_RELE(vp);
2615 2615
2616 2616 /*
2617 2617 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2618 2618 * if the directory is not empty. A System V NFS server
2619 2619 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2620 2620 * over the wire.
2621 2621 */
2622 2622 if (error == EEXIST)
2623 2623 *status = NFSERR_NOTEMPTY;
2624 2624 else
2625 2625 *status = puterrno(error);
2626 2626
2627 2627 }
2628 2628 void *
2629 2629 rfs_rmdir_getfh(struct nfsdiropargs *da)
2630 2630 {
2631 2631 return (da->da_fhandle);
2632 2632 }
2633 2633
2634 2634 /* ARGSUSED */
2635 2635 void
2636 2636 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2637 2637 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2638 2638 {
2639 2639 int error;
2640 2640 int iseof;
2641 2641 struct iovec iov;
2642 2642 struct uio uio;
2643 2643 vnode_t *vp;
2644 2644 char *ndata = NULL;
2645 2645 struct sockaddr *ca;
2646 2646 size_t nents;
2647 2647 int ret;
2648 2648
2649 2649 vp = nfs_fhtovp(&rda->rda_fh, exi);
2650 2650 if (vp == NULL) {
2651 2651 rd->rd_entries = NULL;
2652 2652 rd->rd_status = NFSERR_STALE;
2653 2653 return;
2654 2654 }
2655 2655
2656 2656 if (vp->v_type != VDIR) {
2657 2657 VN_RELE(vp);
2658 2658 rd->rd_entries = NULL;
2659 2659 rd->rd_status = NFSERR_NOTDIR;
2660 2660 return;
2661 2661 }
2662 2662
2663 2663 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2664 2664
2665 2665 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2666 2666
2667 2667 if (error) {
2668 2668 rd->rd_entries = NULL;
2669 2669 goto bad;
2670 2670 }
2671 2671
2672 2672 if (rda->rda_count == 0) {
2673 2673 rd->rd_entries = NULL;
2674 2674 rd->rd_size = 0;
2675 2675 rd->rd_eof = FALSE;
2676 2676 goto bad;
2677 2677 }
2678 2678
2679 2679 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2680 2680
2681 2681 /*
2682 2682 * Allocate data for entries. This will be freed by rfs_rddirfree.
2683 2683 */
2684 2684 rd->rd_bufsize = (uint_t)rda->rda_count;
2685 2685 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2686 2686
2687 2687 /*
2688 2688 * Set up io vector to read directory data
2689 2689 */
2690 2690 iov.iov_base = (caddr_t)rd->rd_entries;
2691 2691 iov.iov_len = rda->rda_count;
2692 2692 uio.uio_iov = &iov;
2693 2693 uio.uio_iovcnt = 1;
2694 2694 uio.uio_segflg = UIO_SYSSPACE;
2695 2695 uio.uio_extflg = UIO_COPY_CACHED;
2696 2696 uio.uio_loffset = (offset_t)rda->rda_offset;
2697 2697 uio.uio_resid = rda->rda_count;
2698 2698
2699 2699 /*
2700 2700 * read directory
2701 2701 */
2702 2702 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2703 2703
2704 2704 /*
2705 2705 * Clean up
2706 2706 */
2707 2707 if (!error) {
2708 2708 /*
2709 2709 * set size and eof
2710 2710 */
2711 2711 if (uio.uio_resid == rda->rda_count) {
2712 2712 rd->rd_size = 0;
2713 2713 rd->rd_eof = TRUE;
2714 2714 } else {
2715 2715 rd->rd_size = (uint32_t)(rda->rda_count -
2716 2716 uio.uio_resid);
2717 2717 rd->rd_eof = iseof ? TRUE : FALSE;
2718 2718 }
2719 2719 }
2720 2720
2721 2721 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2722 2722 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2723 2723 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2724 2724 rda->rda_count, &ndata);
2725 2725
2726 2726 if (ret != 0) {
2727 2727 size_t dropbytes;
2728 2728 /*
2729 2729 * We had to drop one or more entries in order to fit
2730 2730 * during the character conversion. We need to patch
2731 2731 * up the size and eof info.
2732 2732 */
2733 2733 if (rd->rd_eof)
2734 2734 rd->rd_eof = FALSE;
2735 2735 dropbytes = nfscmd_dropped_entrysize(
2736 2736 (struct dirent64 *)rd->rd_entries, nents, ret);
2737 2737 rd->rd_size -= dropbytes;
2738 2738 }
2739 2739 if (ndata == NULL) {
2740 2740 ndata = (char *)rd->rd_entries;
2741 2741 } else if (ndata != (char *)rd->rd_entries) {
2742 2742 kmem_free(rd->rd_entries, rd->rd_bufsize);
2743 2743 rd->rd_entries = (void *)ndata;
2744 2744 rd->rd_bufsize = rda->rda_count;
2745 2745 }
2746 2746
2747 2747 bad:
2748 2748 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2749 2749
2750 2750 #if 0 /* notyet */
2751 2751 /*
2752 2752 * Don't do this. It causes local disk writes when just
2753 2753 * reading the file and the overhead is deemed larger
2754 2754 * than the benefit.
2755 2755 */
2756 2756 /*
2757 2757 * Force modified metadata out to stable storage.
2758 2758 */
2759 2759 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2760 2760 #endif
2761 2761
2762 2762 VN_RELE(vp);
2763 2763
2764 2764 rd->rd_status = puterrno(error);
2765 2765
2766 2766 }
2767 2767 void *
2768 2768 rfs_readdir_getfh(struct nfsrddirargs *rda)
2769 2769 {
2770 2770 return (&rda->rda_fh);
2771 2771 }
2772 2772 void
2773 2773 rfs_rddirfree(struct nfsrddirres *rd)
2774 2774 {
2775 2775 if (rd->rd_entries != NULL)
2776 2776 kmem_free(rd->rd_entries, rd->rd_bufsize);
2777 2777 }
2778 2778
2779 2779 /* ARGSUSED */
2780 2780 void
2781 2781 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2782 2782 struct svc_req *req, cred_t *cr, bool_t ro)
2783 2783 {
2784 2784 int error;
2785 2785 struct statvfs64 sb;
2786 2786 vnode_t *vp;
2787 2787
2788 2788 vp = nfs_fhtovp(fh, exi);
2789 2789 if (vp == NULL) {
2790 2790 fs->fs_status = NFSERR_STALE;
2791 2791 return;
2792 2792 }
2793 2793
2794 2794 error = VFS_STATVFS(vp->v_vfsp, &sb);
2795 2795
2796 2796 if (!error) {
2797 2797 fs->fs_tsize = nfstsize();
2798 2798 fs->fs_bsize = sb.f_frsize;
2799 2799 fs->fs_blocks = sb.f_blocks;
2800 2800 fs->fs_bfree = sb.f_bfree;
2801 2801 fs->fs_bavail = sb.f_bavail;
2802 2802 }
2803 2803
2804 2804 VN_RELE(vp);
2805 2805
2806 2806 fs->fs_status = puterrno(error);
2807 2807
2808 2808 }
2809 2809 void *
2810 2810 rfs_statfs_getfh(fhandle_t *fh)
2811 2811 {
2812 2812 return (fh);
2813 2813 }
2814 2814
2815 2815 static int
2816 2816 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2817 2817 {
2818 2818 vap->va_mask = 0;
2819 2819
2820 2820 /*
2821 2821 * There was a sign extension bug in some VFS based systems
2822 2822 * which stored the mode as a short. When it would get
2823 2823 * assigned to a u_long, no sign extension would occur.
2824 2824 * It needed to, but this wasn't noticed because sa_mode
2825 2825 * would then get assigned back to the short, thus ignoring
2826 2826 * the upper 16 bits of sa_mode.
2827 2827 *
2828 2828 * To make this implementation work for both broken
2829 2829 * clients and good clients, we check for both versions
2830 2830 * of the mode.
2831 2831 */
2832 2832 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2833 2833 sa->sa_mode != (uint32_t)-1) {
2834 2834 vap->va_mask |= AT_MODE;
2835 2835 vap->va_mode = sa->sa_mode;
2836 2836 }
2837 2837 if (sa->sa_uid != (uint32_t)-1) {
2838 2838 vap->va_mask |= AT_UID;
2839 2839 vap->va_uid = sa->sa_uid;
2840 2840 }
2841 2841 if (sa->sa_gid != (uint32_t)-1) {
2842 2842 vap->va_mask |= AT_GID;
2843 2843 vap->va_gid = sa->sa_gid;
2844 2844 }
2845 2845 if (sa->sa_size != (uint32_t)-1) {
2846 2846 vap->va_mask |= AT_SIZE;
2847 2847 vap->va_size = sa->sa_size;
2848 2848 }
2849 2849 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2850 2850 sa->sa_atime.tv_usec != (int32_t)-1) {
2851 2851 #ifndef _LP64
2852 2852 /* return error if time overflow */
2853 2853 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2854 2854 return (EOVERFLOW);
2855 2855 #endif
2856 2856 vap->va_mask |= AT_ATIME;
2857 2857 /*
2858 2858 * nfs protocol defines times as unsigned so don't extend sign,
2859 2859 * unless sysadmin set nfs_allow_preepoch_time.
2860 2860 */
2861 2861 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2862 2862 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2863 2863 }
2864 2864 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2865 2865 sa->sa_mtime.tv_usec != (int32_t)-1) {
2866 2866 #ifndef _LP64
2867 2867 /* return error if time overflow */
2868 2868 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2869 2869 return (EOVERFLOW);
2870 2870 #endif
2871 2871 vap->va_mask |= AT_MTIME;
2872 2872 /*
2873 2873 * nfs protocol defines times as unsigned so don't extend sign,
2874 2874 * unless sysadmin set nfs_allow_preepoch_time.
2875 2875 */
2876 2876 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2877 2877 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2878 2878 }
2879 2879 return (0);
2880 2880 }
2881 2881
2882 2882 static const enum nfsftype vt_to_nf[] = {
2883 2883 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2884 2884 };
2885 2885
2886 2886 /*
2887 2887 * check the following fields for overflow: nodeid, size, and time.
2888 2888 * There could be a problem when converting 64-bit LP64 fields
2889 2889 * into 32-bit ones. Return an error if there is an overflow.
2890 2890 */
2891 2891 int
2892 2892 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2893 2893 {
2894 2894 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2895 2895 na->na_type = vt_to_nf[vap->va_type];
2896 2896
2897 2897 if (vap->va_mode == (unsigned short) -1)
2898 2898 na->na_mode = (uint32_t)-1;
2899 2899 else
2900 2900 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2901 2901
2902 2902 if (vap->va_uid == (unsigned short)(-1))
2903 2903 na->na_uid = (uint32_t)(-1);
2904 2904 else if (vap->va_uid == UID_NOBODY)
2905 2905 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2906 2906 else
2907 2907 na->na_uid = vap->va_uid;
2908 2908
2909 2909 if (vap->va_gid == (unsigned short)(-1))
2910 2910 na->na_gid = (uint32_t)-1;
2911 2911 else if (vap->va_gid == GID_NOBODY)
2912 2912 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2913 2913 else
2914 2914 na->na_gid = vap->va_gid;
2915 2915
2916 2916 /*
2917 2917 * Do we need to check fsid for overflow? It is 64-bit in the
2918 2918 * vattr, but are bigger than 32 bit values supported?
2919 2919 */
2920 2920 na->na_fsid = vap->va_fsid;
2921 2921
2922 2922 na->na_nodeid = vap->va_nodeid;
2923 2923
2924 2924 /*
2925 2925 * Check to make sure that the nodeid is representable over the
2926 2926 * wire without losing bits.
2927 2927 */
2928 2928 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2929 2929 return (EFBIG);
2930 2930 na->na_nlink = vap->va_nlink;
2931 2931
2932 2932 /*
2933 2933 * Check for big files here, instead of at the caller. See
2934 2934 * comments in cstat for large special file explanation.
2935 2935 */
2936 2936 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2937 2937 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2938 2938 return (EFBIG);
2939 2939 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2940 2940 /* UNKNOWN_SIZE | OVERFLOW */
2941 2941 na->na_size = MAXOFF32_T;
2942 2942 } else
2943 2943 na->na_size = vap->va_size;
2944 2944 } else
2945 2945 na->na_size = vap->va_size;
2946 2946
2947 2947 /*
2948 2948 * If the vnode times overflow the 32-bit times that NFS2
2949 2949 * uses on the wire then return an error.
2950 2950 */
2951 2951 if (!NFS_VAP_TIME_OK(vap)) {
2952 2952 return (EOVERFLOW);
2953 2953 }
2954 2954 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2955 2955 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2956 2956
2957 2957 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2958 2958 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2959 2959
2960 2960 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2961 2961 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2962 2962
2963 2963 /*
2964 2964 * If the dev_t will fit into 16 bits then compress
2965 2965 * it, otherwise leave it alone. See comments in
2966 2966 * nfs_client.c.
2967 2967 */
2968 2968 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2969 2969 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2970 2970 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2971 2971 else
2972 2972 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2973 2973
2974 2974 na->na_blocks = vap->va_nblocks;
2975 2975 na->na_blocksize = vap->va_blksize;
2976 2976
2977 2977 /*
2978 2978 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2979 2979 * over-the-wire protocols for named-pipe vnodes. It remaps the
2980 2980 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2981 2981 *
2982 2982 * BUYER BEWARE:
2983 2983 * If you are porting the NFS to a non-Sun server, you probably
2984 2984 * don't want to include the following block of code. The
2985 2985 * over-the-wire special file types will be changing with the
2986 2986 * NFS Protocol Revision.
2987 2987 */
2988 2988 if (vap->va_type == VFIFO)
2989 2989 NA_SETFIFO(na);
2990 2990 return (0);
2991 2991 }
2992 2992
2993 2993 /*
2994 2994 * acl v2 support: returns approximate permission.
2995 2995 * default: returns minimal permission (more restrictive)
2996 2996 * aclok: returns maximal permission (less restrictive)
2997 2997 * This routine changes the permissions that are alaredy in *va.
2998 2998 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2999 2999 * CLASS_OBJ is always the same as GROUP_OBJ entry.
3000 3000 */
3001 3001 static void
3002 3002 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
3003 3003 {
3004 3004 vsecattr_t vsa;
3005 3005 int aclcnt;
3006 3006 aclent_t *aclentp;
3007 3007 mode_t mask_perm;
3008 3008 mode_t grp_perm;
3009 3009 mode_t other_perm;
3010 3010 mode_t other_orig;
3011 3011 int error;
3012 3012
3013 3013 /* dont care default acl */
3014 3014 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
3015 3015 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
3016 3016
3017 3017 if (!error) {
3018 3018 aclcnt = vsa.vsa_aclcnt;
3019 3019 if (aclcnt > MIN_ACL_ENTRIES) {
3020 3020 /* non-trivial ACL */
3021 3021 aclentp = vsa.vsa_aclentp;
3022 3022 if (exi->exi_export.ex_flags & EX_ACLOK) {
3023 3023 /* maximal permissions */
3024 3024 grp_perm = 0;
3025 3025 other_perm = 0;
3026 3026 for (; aclcnt > 0; aclcnt--, aclentp++) {
3027 3027 switch (aclentp->a_type) {
3028 3028 case USER_OBJ:
3029 3029 break;
3030 3030 case USER:
3031 3031 grp_perm |=
3032 3032 aclentp->a_perm << 3;
3033 3033 other_perm |= aclentp->a_perm;
3034 3034 break;
3035 3035 case GROUP_OBJ:
3036 3036 grp_perm |=
3037 3037 aclentp->a_perm << 3;
3038 3038 break;
3039 3039 case GROUP:
3040 3040 other_perm |= aclentp->a_perm;
3041 3041 break;
3042 3042 case OTHER_OBJ:
3043 3043 other_orig = aclentp->a_perm;
3044 3044 break;
3045 3045 case CLASS_OBJ:
3046 3046 mask_perm = aclentp->a_perm;
3047 3047 break;
3048 3048 default:
3049 3049 break;
3050 3050 }
3051 3051 }
3052 3052 grp_perm &= mask_perm << 3;
3053 3053 other_perm &= mask_perm;
3054 3054 other_perm |= other_orig;
3055 3055
3056 3056 } else {
3057 3057 /* minimal permissions */
3058 3058 grp_perm = 070;
3059 3059 other_perm = 07;
3060 3060 for (; aclcnt > 0; aclcnt--, aclentp++) {
3061 3061 switch (aclentp->a_type) {
3062 3062 case USER_OBJ:
3063 3063 break;
3064 3064 case USER:
3065 3065 case CLASS_OBJ:
3066 3066 grp_perm &=
3067 3067 aclentp->a_perm << 3;
3068 3068 other_perm &=
3069 3069 aclentp->a_perm;
3070 3070 break;
3071 3071 case GROUP_OBJ:
3072 3072 grp_perm &=
3073 3073 aclentp->a_perm << 3;
3074 3074 break;
3075 3075 case GROUP:
3076 3076 other_perm &=
3077 3077 aclentp->a_perm;
3078 3078 break;
3079 3079 case OTHER_OBJ:
3080 3080 other_perm &=
3081 3081 aclentp->a_perm;
3082 3082 break;
3083 3083 default:
3084 3084 break;
3085 3085 }
3086 3086 }
3087 3087 }
3088 3088 /* copy to va */
3089 3089 va->va_mode &= ~077;
3090 3090 va->va_mode |= grp_perm | other_perm;
3091 3091 }
3092 3092 if (vsa.vsa_aclcnt)
3093 3093 kmem_free(vsa.vsa_aclentp,
3094 3094 vsa.vsa_aclcnt * sizeof (aclent_t));
3095 3095 }
3096 3096 }
3097 3097
3098 3098 void
3099 3099 rfs_srvrinit(void)
3100 3100 {
3101 3101 nfs2_srv_caller_id = fs_new_caller_id();
3102 3102 zone_key_create(&rfs_zone_key, rfs_zone_init, NULL, rfs_zone_fini);
3103 3103 }
3104 3104
3105 3105 void
3106 3106 rfs_srvrfini(void)
3107 3107 {
3108 3108 }
3109 3109
3110 3110 /* ARGSUSED */
3111 3111 static void *
3112 3112 rfs_zone_init(zoneid_t zoneid)
3113 3113 {
3114 3114 nfs_srv_t *ns;
3115 3115
3116 3116 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3117 3117
3118 3118 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3119 3119 ns->write_async = 1;
3120 3120
3121 3121 return (ns);
3122 3122 }
3123 3123
3124 3124 /* ARGSUSED */
3125 3125 static void
3126 3126 rfs_zone_fini(zoneid_t zoneid, void *data)
3127 3127 {
3128 3128 nfs_srv_t *ns;
3129 3129
3130 3130 ns = (nfs_srv_t *)data;
3131 3131 mutex_destroy(&ns->async_write_lock);
3132 3132 kmem_free(ns, sizeof (*ns));
3133 3133 }
3134 3134
3135 3135 static int
3136 3136 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3137 3137 {
3138 3138 struct clist *wcl;
3139 3139 int wlist_len;
3140 3140 uint32_t count = rr->rr_count;
3141 3141
3142 3142 wcl = ra->ra_wlist;
3143 3143
3144 3144 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3145 3145 return (FALSE);
3146 3146 }
3147 3147
3148 3148 wcl = ra->ra_wlist;
3149 3149 rr->rr_ok.rrok_wlist_len = wlist_len;
3150 3150 rr->rr_ok.rrok_wlist = wcl;
3151 3151
3152 3152 return (TRUE);
3153 3153 }
|
↓ open down ↓ |
2650 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX