Print this page
nfssrv: nfsstat reports zeroed data in zone
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 30 * All rights reserved.
31 31 */
32 32
33 33 /*
34 34 * Copyright 2018 Nexenta Systems, Inc.
35 35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 36 */
37 37
38 38 #include <sys/param.h>
39 39 #include <sys/types.h>
40 40 #include <sys/systm.h>
41 41 #include <sys/cred.h>
42 42 #include <sys/buf.h>
43 43 #include <sys/vfs.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/stat.h>
47 47 #include <sys/errno.h>
48 48 #include <sys/sysmacros.h>
49 49 #include <sys/statvfs.h>
50 50 #include <sys/kmem.h>
51 51 #include <sys/kstat.h>
52 52 #include <sys/dirent.h>
53 53 #include <sys/cmn_err.h>
54 54 #include <sys/debug.h>
55 55 #include <sys/vtrace.h>
56 56 #include <sys/mode.h>
57 57 #include <sys/acl.h>
58 58 #include <sys/nbmlock.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/sdt.h>
61 61
62 62 #include <rpc/types.h>
63 63 #include <rpc/auth.h>
64 64 #include <rpc/svc.h>
65 65
66 66 #include <nfs/nfs.h>
67 67 #include <nfs/export.h>
68 68 #include <nfs/nfs_cmd.h>
69 69
70 70 #include <vm/hat.h>
71 71 #include <vm/as.h>
72 72 #include <vm/seg.h>
73 73 #include <vm/seg_map.h>
74 74 #include <vm/seg_kmem.h>
75 75
76 76 #include <sys/strsubr.h>
77 77
78 78 struct rfs_async_write_list;
79 79
80 80 /*
81 81 * Zone globals of NFSv2 server
82 82 */
83 83 typedef struct nfs_srv {
84 84 kmutex_t async_write_lock;
85 85 struct rfs_async_write_list *async_write_head;
86 86
87 87 /*
88 88 * enables write clustering if == 1
89 89 */
90 90 int write_async;
91 91 } nfs_srv_t;
92 92
93 93 /*
94 94 * These are the interface routines for the server side of the
95 95 * Network File System. See the NFS version 2 protocol specification
96 96 * for a description of this interface.
97 97 */
98 98
99 99 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
100 100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
101 101 cred_t *);
102 102
103 103
104 104 /*
105 105 * Some "over the wire" UNIX file types. These are encoded
106 106 * into the mode. This needs to be fixed in the next rev.
107 107 */
108 108 #define IFMT 0170000 /* type of file */
109 109 #define IFCHR 0020000 /* character special */
110 110 #define IFBLK 0060000 /* block special */
111 111 #define IFSOCK 0140000 /* socket */
112 112
113 113 u_longlong_t nfs2_srv_caller_id;
114 114
115 115 static nfs_srv_t *
116 116 nfs_get_srv(void)
117 117 {
118 118 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
119 119 nfs_srv_t *srv = ng->nfs_srv;
120 120 ASSERT(srv != NULL);
121 121 return (srv);
122 122 }
123 123
124 124 /*
125 125 * Get file attributes.
126 126 * Returns the current attributes of the file with the given fhandle.
127 127 */
128 128 /* ARGSUSED */
129 129 void
130 130 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
131 131 struct svc_req *req, cred_t *cr, bool_t ro)
132 132 {
133 133 int error;
134 134 vnode_t *vp;
135 135 struct vattr va;
136 136
137 137 vp = nfs_fhtovp(fhp, exi);
138 138 if (vp == NULL) {
139 139 ns->ns_status = NFSERR_STALE;
140 140 return;
141 141 }
142 142
143 143 /*
144 144 * Do the getattr.
145 145 */
146 146 va.va_mask = AT_ALL; /* we want all the attributes */
147 147
148 148 error = rfs4_delegated_getattr(vp, &va, 0, cr);
149 149
150 150 /* check for overflows */
151 151 if (!error) {
152 152 /* Lie about the object type for a referral */
153 153 if (vn_is_nfs_reparse(vp, cr))
154 154 va.va_type = VLNK;
155 155
156 156 acl_perm(vp, exi, &va, cr);
157 157 error = vattr_to_nattr(&va, &ns->ns_attr);
158 158 }
159 159
160 160 VN_RELE(vp);
161 161
162 162 ns->ns_status = puterrno(error);
163 163 }
164 164 void *
165 165 rfs_getattr_getfh(fhandle_t *fhp)
166 166 {
167 167 return (fhp);
168 168 }
169 169
170 170 /*
171 171 * Set file attributes.
172 172 * Sets the attributes of the file with the given fhandle. Returns
173 173 * the new attributes.
174 174 */
175 175 /* ARGSUSED */
176 176 void
177 177 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
178 178 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
179 179 {
180 180 int error;
181 181 int flag;
182 182 int in_crit = 0;
183 183 vnode_t *vp;
184 184 struct vattr va;
185 185 struct vattr bva;
186 186 struct flock64 bf;
187 187 caller_context_t ct;
188 188
189 189
190 190 vp = nfs_fhtovp(&args->saa_fh, exi);
191 191 if (vp == NULL) {
192 192 ns->ns_status = NFSERR_STALE;
193 193 return;
194 194 }
195 195
196 196 if (rdonly(ro, vp)) {
197 197 VN_RELE(vp);
198 198 ns->ns_status = NFSERR_ROFS;
199 199 return;
200 200 }
201 201
202 202 error = sattr_to_vattr(&args->saa_sa, &va);
203 203 if (error) {
204 204 VN_RELE(vp);
205 205 ns->ns_status = puterrno(error);
206 206 return;
207 207 }
208 208
209 209 /*
210 210 * If the client is requesting a change to the mtime,
211 211 * but the nanosecond field is set to 1 billion, then
212 212 * this is a flag to the server that it should set the
213 213 * atime and mtime fields to the server's current time.
214 214 * The 1 billion number actually came from the client
215 215 * as 1 million, but the units in the over the wire
216 216 * request are microseconds instead of nanoseconds.
217 217 *
218 218 * This is an overload of the protocol and should be
219 219 * documented in the NFS Version 2 protocol specification.
220 220 */
221 221 if (va.va_mask & AT_MTIME) {
222 222 if (va.va_mtime.tv_nsec == 1000000000) {
223 223 gethrestime(&va.va_mtime);
224 224 va.va_atime = va.va_mtime;
225 225 va.va_mask |= AT_ATIME;
226 226 flag = 0;
227 227 } else
228 228 flag = ATTR_UTIME;
229 229 } else
230 230 flag = 0;
231 231
232 232 /*
233 233 * If the filesystem is exported with nosuid, then mask off
234 234 * the setuid and setgid bits.
235 235 */
236 236 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
237 237 (exi->exi_export.ex_flags & EX_NOSUID))
238 238 va.va_mode &= ~(VSUID | VSGID);
239 239
240 240 ct.cc_sysid = 0;
241 241 ct.cc_pid = 0;
242 242 ct.cc_caller_id = nfs2_srv_caller_id;
243 243 ct.cc_flags = CC_DONTBLOCK;
244 244
245 245 /*
246 246 * We need to specially handle size changes because it is
247 247 * possible for the client to create a file with modes
248 248 * which indicate read-only, but with the file opened for
249 249 * writing. If the client then tries to set the size of
250 250 * the file, then the normal access checking done in
251 251 * VOP_SETATTR would prevent the client from doing so,
252 252 * although it should be legal for it to do so. To get
253 253 * around this, we do the access checking for ourselves
254 254 * and then use VOP_SPACE which doesn't do the access
255 255 * checking which VOP_SETATTR does. VOP_SPACE can only
256 256 * operate on VREG files, let VOP_SETATTR handle the other
257 257 * extremely rare cases.
258 258 * Also the client should not be allowed to change the
259 259 * size of the file if there is a conflicting non-blocking
260 260 * mandatory lock in the region of change.
261 261 */
262 262 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
263 263 if (nbl_need_check(vp)) {
264 264 nbl_start_crit(vp, RW_READER);
265 265 in_crit = 1;
266 266 }
267 267
268 268 bva.va_mask = AT_UID | AT_SIZE;
269 269
270 270 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
271 271
272 272 if (error) {
273 273 if (in_crit)
274 274 nbl_end_crit(vp);
275 275 VN_RELE(vp);
276 276 ns->ns_status = puterrno(error);
277 277 return;
278 278 }
279 279
280 280 if (in_crit) {
281 281 u_offset_t offset;
282 282 ssize_t length;
283 283
284 284 if (va.va_size < bva.va_size) {
285 285 offset = va.va_size;
286 286 length = bva.va_size - va.va_size;
287 287 } else {
288 288 offset = bva.va_size;
289 289 length = va.va_size - bva.va_size;
290 290 }
291 291 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
292 292 NULL)) {
293 293 error = EACCES;
294 294 }
295 295 }
296 296
297 297 if (crgetuid(cr) == bva.va_uid && !error &&
298 298 va.va_size != bva.va_size) {
299 299 va.va_mask &= ~AT_SIZE;
300 300 bf.l_type = F_WRLCK;
301 301 bf.l_whence = 0;
302 302 bf.l_start = (off64_t)va.va_size;
303 303 bf.l_len = 0;
304 304 bf.l_sysid = 0;
305 305 bf.l_pid = 0;
306 306
307 307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
308 308 (offset_t)va.va_size, cr, &ct);
309 309 }
310 310 if (in_crit)
311 311 nbl_end_crit(vp);
312 312 } else
313 313 error = 0;
314 314
315 315 /*
316 316 * Do the setattr.
317 317 */
318 318 if (!error && va.va_mask) {
319 319 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
320 320 }
321 321
322 322 /*
323 323 * check if the monitor on either vop_space or vop_setattr detected
324 324 * a delegation conflict and if so, mark the thread flag as
325 325 * wouldblock so that the response is dropped and the client will
326 326 * try again.
327 327 */
328 328 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
329 329 VN_RELE(vp);
330 330 curthread->t_flag |= T_WOULDBLOCK;
331 331 return;
332 332 }
333 333
334 334 if (!error) {
335 335 va.va_mask = AT_ALL; /* get everything */
336 336
337 337 error = rfs4_delegated_getattr(vp, &va, 0, cr);
338 338
339 339 /* check for overflows */
340 340 if (!error) {
341 341 acl_perm(vp, exi, &va, cr);
342 342 error = vattr_to_nattr(&va, &ns->ns_attr);
343 343 }
344 344 }
345 345
346 346 ct.cc_flags = 0;
347 347
348 348 /*
349 349 * Force modified metadata out to stable storage.
350 350 */
351 351 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
352 352
353 353 VN_RELE(vp);
354 354
355 355 ns->ns_status = puterrno(error);
356 356 }
357 357 void *
358 358 rfs_setattr_getfh(struct nfssaargs *args)
359 359 {
360 360 return (&args->saa_fh);
361 361 }
362 362
363 363 /* Change and release @exip and @vpp only in success */
364 364 int
365 365 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
366 366 {
367 367 struct exportinfo *exi;
368 368 vnode_t *vp = *vpp;
369 369 fid_t fid;
370 370 int error;
371 371
372 372 VN_HOLD(vp);
373 373
374 374 if ((error = traverse(&vp)) != 0) {
375 375 VN_RELE(vp);
376 376 return (error);
377 377 }
378 378
379 379 bzero(&fid, sizeof (fid));
380 380 fid.fid_len = MAXFIDSZ;
381 381 error = VOP_FID(vp, &fid, NULL);
382 382 if (error) {
383 383 VN_RELE(vp);
384 384 return (error);
385 385 }
386 386
387 387 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
388 388 if (exi == NULL ||
389 389 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
390 390 /*
391 391 * It is not error, just subdir is not exported
392 392 * or "nohide" is not set
393 393 */
394 394 if (exi != NULL)
395 395 exi_rele(exi);
396 396 VN_RELE(vp);
397 397 } else {
398 398 /* go to submount */
399 399 exi_rele(*exip);
400 400 *exip = exi;
401 401
402 402 VN_RELE(*vpp);
403 403 *vpp = vp;
404 404 }
405 405
406 406 return (0);
407 407 }
408 408
409 409 /*
410 410 * Given mounted "dvp" and "exi", go upper mountpoint
411 411 * with dvp/exi correction
412 412 * Return 0 in success
413 413 */
414 414 int
415 415 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
416 416 {
417 417 struct exportinfo *exi;
418 418 vnode_t *dvp = *dvpp;
419 419 vnode_t *zone_rootvp;
420 420
421 421 zone_rootvp = (*exip)->exi_ne->exi_root->exi_vp;
422 422 ASSERT((dvp->v_flag & VROOT) || VN_CMP(zone_rootvp, dvp));
423 423
424 424 VN_HOLD(dvp);
425 425 dvp = untraverse(dvp, zone_rootvp);
426 426 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
427 427 if (exi == NULL) {
428 428 VN_RELE(dvp);
429 429 return (-1);
430 430 }
431 431
432 432 ASSERT3U(exi->exi_zoneid, ==, (*exip)->exi_zoneid);
433 433 exi_rele(*exip);
434 434 *exip = exi;
435 435 VN_RELE(*dvpp);
436 436 *dvpp = dvp;
437 437
438 438 return (0);
439 439 }
440 440 /*
441 441 * Directory lookup.
442 442 * Returns an fhandle and file attributes for file name in a directory.
443 443 */
444 444 /* ARGSUSED */
445 445 void
446 446 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
447 447 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
448 448 {
449 449 int error;
450 450 vnode_t *dvp;
451 451 vnode_t *vp;
452 452 struct vattr va;
453 453 fhandle_t *fhp = da->da_fhandle;
454 454 struct sec_ol sec = {0, 0};
455 455 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
456 456 char *name;
457 457 struct sockaddr *ca;
458 458
459 459 /*
460 460 * Trusted Extension doesn't support NFSv2. MOUNT
461 461 * will reject v2 clients. Need to prevent v2 client
462 462 * access via WebNFS here.
463 463 */
464 464 if (is_system_labeled() && req->rq_vers == 2) {
465 465 dr->dr_status = NFSERR_ACCES;
466 466 return;
467 467 }
468 468
469 469 /*
470 470 * Disallow NULL paths
471 471 */
472 472 if (da->da_name == NULL || *da->da_name == '\0') {
473 473 dr->dr_status = NFSERR_ACCES;
474 474 return;
475 475 }
476 476
477 477 /*
478 478 * Allow lookups from the root - the default
479 479 * location of the public filehandle.
480 480 */
481 481 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
482 482 dvp = ZONE_ROOTVP();
483 483 VN_HOLD(dvp);
484 484 } else {
485 485 dvp = nfs_fhtovp(fhp, exi);
486 486 if (dvp == NULL) {
487 487 dr->dr_status = NFSERR_STALE;
488 488 return;
489 489 }
490 490 }
491 491
492 492 exi_hold(exi);
493 493 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
494 494
495 495 /*
496 496 * Not allow lookup beyond root.
497 497 * If the filehandle matches a filehandle of the exi,
498 498 * then the ".." refers beyond the root of an exported filesystem.
499 499 */
500 500 if (strcmp(da->da_name, "..") == 0 &&
501 501 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
502 502 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
503 503 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
504 504 /*
505 505 * special case for ".." and 'nohide'exported root
506 506 */
507 507 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
508 508 error = NFSERR_ACCES;
509 509 goto out;
510 510 }
511 511 } else {
512 512 error = NFSERR_NOENT;
513 513 goto out;
514 514 }
515 515 }
516 516
517 517 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
518 518 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
519 519 MAXPATHLEN);
520 520
521 521 if (name == NULL) {
522 522 error = NFSERR_ACCES;
523 523 goto out;
524 524 }
525 525
526 526 /*
527 527 * If the public filehandle is used then allow
528 528 * a multi-component lookup, i.e. evaluate
529 529 * a pathname and follow symbolic links if
530 530 * necessary.
531 531 *
532 532 * This may result in a vnode in another filesystem
533 533 * which is OK as long as the filesystem is exported.
534 534 */
535 535 if (PUBLIC_FH2(fhp)) {
536 536 publicfh_flag = TRUE;
537 537
538 538 exi_rele(exi);
539 539 exi = NULL;
540 540
541 541 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
542 542 &sec);
543 543 } else {
544 544 /*
545 545 * Do a normal single component lookup.
546 546 */
547 547 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
548 548 NULL, NULL, NULL);
549 549 }
550 550
551 551 if (name != da->da_name)
552 552 kmem_free(name, MAXPATHLEN);
553 553
554 554 if (error == 0 && vn_ismntpt(vp)) {
555 555 error = rfs_cross_mnt(&vp, &exi);
556 556 if (error)
557 557 VN_RELE(vp);
558 558 }
559 559
560 560 if (!error) {
561 561 va.va_mask = AT_ALL; /* we want everything */
562 562
563 563 error = rfs4_delegated_getattr(vp, &va, 0, cr);
564 564
565 565 /* check for overflows */
566 566 if (!error) {
567 567 acl_perm(vp, exi, &va, cr);
568 568 error = vattr_to_nattr(&va, &dr->dr_attr);
569 569 if (!error) {
570 570 if (sec.sec_flags & SEC_QUERY)
571 571 error = makefh_ol(&dr->dr_fhandle, exi,
572 572 sec.sec_index);
573 573 else {
574 574 error = makefh(&dr->dr_fhandle, vp,
575 575 exi);
576 576 if (!error && publicfh_flag &&
577 577 !chk_clnt_sec(exi, req))
578 578 auth_weak = TRUE;
579 579 }
580 580 }
581 581 }
582 582 VN_RELE(vp);
583 583 }
584 584
585 585 out:
586 586 VN_RELE(dvp);
587 587
588 588 if (exi != NULL)
589 589 exi_rele(exi);
590 590
591 591 /*
592 592 * If it's public fh, no 0x81, and client's flavor is
593 593 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
594 594 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
595 595 */
596 596 if (auth_weak)
597 597 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
598 598 else
599 599 dr->dr_status = puterrno(error);
600 600 }
601 601 void *
602 602 rfs_lookup_getfh(struct nfsdiropargs *da)
603 603 {
604 604 return (da->da_fhandle);
605 605 }
606 606
607 607 /*
608 608 * Read symbolic link.
609 609 * Returns the string in the symbolic link at the given fhandle.
610 610 */
611 611 /* ARGSUSED */
612 612 void
613 613 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
614 614 struct svc_req *req, cred_t *cr, bool_t ro)
615 615 {
616 616 int error;
617 617 struct iovec iov;
618 618 struct uio uio;
619 619 vnode_t *vp;
620 620 struct vattr va;
621 621 struct sockaddr *ca;
622 622 char *name = NULL;
623 623 int is_referral = 0;
624 624
625 625 vp = nfs_fhtovp(fhp, exi);
626 626 if (vp == NULL) {
627 627 rl->rl_data = NULL;
628 628 rl->rl_status = NFSERR_STALE;
629 629 return;
630 630 }
631 631
632 632 va.va_mask = AT_MODE;
633 633
634 634 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
635 635
636 636 if (error) {
637 637 VN_RELE(vp);
638 638 rl->rl_data = NULL;
639 639 rl->rl_status = puterrno(error);
640 640 return;
641 641 }
642 642
643 643 if (MANDLOCK(vp, va.va_mode)) {
644 644 VN_RELE(vp);
645 645 rl->rl_data = NULL;
646 646 rl->rl_status = NFSERR_ACCES;
647 647 return;
648 648 }
649 649
650 650 /* We lied about the object type for a referral */
651 651 if (vn_is_nfs_reparse(vp, cr))
652 652 is_referral = 1;
653 653
654 654 /*
655 655 * XNFS and RFC1094 require us to return ENXIO if argument
656 656 * is not a link. BUGID 1138002.
657 657 */
658 658 if (vp->v_type != VLNK && !is_referral) {
659 659 VN_RELE(vp);
660 660 rl->rl_data = NULL;
661 661 rl->rl_status = NFSERR_NXIO;
662 662 return;
|
↓ open down ↓ |
662 lines elided |
↑ open up ↑ |
663 663 }
664 664
665 665 /*
666 666 * Allocate data for pathname. This will be freed by rfs_rlfree.
667 667 */
668 668 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
669 669
670 670 if (is_referral) {
671 671 char *s;
672 672 size_t strsz;
673 + kstat_named_t *stat =
674 + exi->exi_ne->ne_globals->svstat[NFS_VERSION];
673 675
674 676 /* Get an artificial symlink based on a referral */
675 677 s = build_symlink(vp, cr, &strsz);
676 - global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
678 + stat[NFS_REFERLINKS].value.ui64++;
677 679 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
678 680 vnode_t *, vp, char *, s);
679 681 if (s == NULL)
680 682 error = EINVAL;
681 683 else {
682 684 error = 0;
683 685 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
684 686 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
685 687 kmem_free(s, strsz);
686 688 }
687 689
688 690 } else {
689 691
690 692 /*
691 693 * Set up io vector to read sym link data
692 694 */
693 695 iov.iov_base = rl->rl_data;
694 696 iov.iov_len = NFS_MAXPATHLEN;
695 697 uio.uio_iov = &iov;
696 698 uio.uio_iovcnt = 1;
697 699 uio.uio_segflg = UIO_SYSSPACE;
698 700 uio.uio_extflg = UIO_COPY_CACHED;
699 701 uio.uio_loffset = (offset_t)0;
700 702 uio.uio_resid = NFS_MAXPATHLEN;
701 703
702 704 /*
703 705 * Do the readlink.
704 706 */
705 707 error = VOP_READLINK(vp, &uio, cr, NULL);
706 708
707 709 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
708 710
709 711 if (!error)
710 712 rl->rl_data[rl->rl_count] = '\0';
711 713
712 714 }
713 715
714 716
715 717 VN_RELE(vp);
716 718
717 719 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
718 720 name = nfscmd_convname(ca, exi, rl->rl_data,
719 721 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
720 722
721 723 if (name != NULL && name != rl->rl_data) {
722 724 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
723 725 rl->rl_data = name;
724 726 }
725 727
726 728 /*
727 729 * XNFS and RFC1094 require us to return ENXIO if argument
728 730 * is not a link. UFS returns EINVAL if this is the case,
729 731 * so we do the mapping here. BUGID 1138002.
730 732 */
731 733 if (error == EINVAL)
732 734 rl->rl_status = NFSERR_NXIO;
733 735 else
734 736 rl->rl_status = puterrno(error);
735 737
736 738 }
737 739 void *
738 740 rfs_readlink_getfh(fhandle_t *fhp)
739 741 {
740 742 return (fhp);
741 743 }
742 744 /*
743 745 * Free data allocated by rfs_readlink
744 746 */
745 747 void
746 748 rfs_rlfree(struct nfsrdlnres *rl)
747 749 {
748 750 if (rl->rl_data != NULL)
749 751 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
750 752 }
751 753
752 754 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
753 755
754 756 /*
755 757 * Read data.
756 758 * Returns some data read from the file at the given fhandle.
757 759 */
758 760 /* ARGSUSED */
759 761 void
760 762 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
761 763 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
762 764 {
763 765 vnode_t *vp;
764 766 int error;
765 767 struct vattr va;
766 768 struct iovec iov;
767 769 struct uio uio;
768 770 mblk_t *mp;
769 771 int alloc_err = 0;
770 772 int in_crit = 0;
771 773 caller_context_t ct;
772 774
773 775 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
774 776 if (vp == NULL) {
775 777 rr->rr_data = NULL;
776 778 rr->rr_status = NFSERR_STALE;
777 779 return;
778 780 }
779 781
780 782 if (vp->v_type != VREG) {
781 783 VN_RELE(vp);
782 784 rr->rr_data = NULL;
783 785 rr->rr_status = NFSERR_ISDIR;
784 786 return;
785 787 }
786 788
787 789 ct.cc_sysid = 0;
788 790 ct.cc_pid = 0;
789 791 ct.cc_caller_id = nfs2_srv_caller_id;
790 792 ct.cc_flags = CC_DONTBLOCK;
791 793
792 794 /*
793 795 * Enter the critical region before calling VOP_RWLOCK
794 796 * to avoid a deadlock with write requests.
795 797 */
796 798 if (nbl_need_check(vp)) {
797 799 nbl_start_crit(vp, RW_READER);
798 800 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
799 801 0, NULL)) {
800 802 nbl_end_crit(vp);
801 803 VN_RELE(vp);
802 804 rr->rr_data = NULL;
803 805 rr->rr_status = NFSERR_ACCES;
804 806 return;
805 807 }
806 808 in_crit = 1;
807 809 }
808 810
809 811 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
810 812
811 813 /* check if a monitor detected a delegation conflict */
812 814 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
813 815 if (in_crit)
814 816 nbl_end_crit(vp);
815 817 VN_RELE(vp);
816 818 /* mark as wouldblock so response is dropped */
817 819 curthread->t_flag |= T_WOULDBLOCK;
818 820
819 821 rr->rr_data = NULL;
820 822 return;
821 823 }
822 824
823 825 va.va_mask = AT_ALL;
824 826
825 827 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
826 828
827 829 if (error) {
828 830 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
829 831 if (in_crit)
830 832 nbl_end_crit(vp);
831 833
832 834 VN_RELE(vp);
833 835 rr->rr_data = NULL;
834 836 rr->rr_status = puterrno(error);
835 837
836 838 return;
837 839 }
838 840
839 841 /*
840 842 * This is a kludge to allow reading of files created
841 843 * with no read permission. The owner of the file
842 844 * is always allowed to read it.
843 845 */
844 846 if (crgetuid(cr) != va.va_uid) {
845 847 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
846 848
847 849 if (error) {
848 850 /*
849 851 * Exec is the same as read over the net because
850 852 * of demand loading.
851 853 */
852 854 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
853 855 }
854 856 if (error) {
855 857 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
856 858 if (in_crit)
857 859 nbl_end_crit(vp);
858 860 VN_RELE(vp);
859 861 rr->rr_data = NULL;
860 862 rr->rr_status = puterrno(error);
861 863
862 864 return;
863 865 }
864 866 }
865 867
866 868 if (MANDLOCK(vp, va.va_mode)) {
867 869 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
868 870 if (in_crit)
869 871 nbl_end_crit(vp);
870 872
871 873 VN_RELE(vp);
872 874 rr->rr_data = NULL;
873 875 rr->rr_status = NFSERR_ACCES;
874 876
875 877 return;
876 878 }
877 879
878 880 rr->rr_ok.rrok_wlist_len = 0;
879 881 rr->rr_ok.rrok_wlist = NULL;
880 882
881 883 if ((u_offset_t)ra->ra_offset >= va.va_size) {
882 884 rr->rr_count = 0;
883 885 rr->rr_data = NULL;
884 886 /*
885 887 * In this case, status is NFS_OK, but there is no data
886 888 * to encode. So set rr_mp to NULL.
887 889 */
888 890 rr->rr_mp = NULL;
889 891 rr->rr_ok.rrok_wlist = ra->ra_wlist;
890 892 if (rr->rr_ok.rrok_wlist)
891 893 clist_zero_len(rr->rr_ok.rrok_wlist);
892 894 goto done;
893 895 }
894 896
895 897 if (ra->ra_wlist) {
896 898 mp = NULL;
897 899 rr->rr_mp = NULL;
898 900 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
899 901 if (ra->ra_count > iov.iov_len) {
900 902 rr->rr_data = NULL;
901 903 rr->rr_status = NFSERR_INVAL;
902 904 goto done;
903 905 }
904 906 } else {
905 907 /*
906 908 * mp will contain the data to be sent out in the read reply.
907 909 * This will be freed after the reply has been sent out (by the
908 910 * driver).
909 911 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
910 912 * that the call to xdrmblk_putmblk() never fails.
911 913 */
912 914 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
913 915 &alloc_err);
914 916 ASSERT(mp != NULL);
915 917 ASSERT(alloc_err == 0);
916 918
917 919 rr->rr_mp = mp;
918 920
919 921 /*
920 922 * Set up io vector
921 923 */
922 924 iov.iov_base = (caddr_t)mp->b_datap->db_base;
923 925 iov.iov_len = ra->ra_count;
924 926 }
925 927
926 928 uio.uio_iov = &iov;
927 929 uio.uio_iovcnt = 1;
928 930 uio.uio_segflg = UIO_SYSSPACE;
929 931 uio.uio_extflg = UIO_COPY_CACHED;
930 932 uio.uio_loffset = (offset_t)ra->ra_offset;
931 933 uio.uio_resid = ra->ra_count;
932 934
933 935 error = VOP_READ(vp, &uio, 0, cr, &ct);
934 936
935 937 if (error) {
936 938 if (mp)
937 939 freeb(mp);
938 940
939 941 /*
940 942 * check if a monitor detected a delegation conflict and
941 943 * mark as wouldblock so response is dropped
942 944 */
943 945 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
944 946 curthread->t_flag |= T_WOULDBLOCK;
945 947 else
946 948 rr->rr_status = puterrno(error);
947 949
948 950 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
949 951 if (in_crit)
950 952 nbl_end_crit(vp);
951 953
952 954 VN_RELE(vp);
953 955 rr->rr_data = NULL;
954 956
955 957 return;
956 958 }
957 959
958 960 /*
959 961 * Get attributes again so we can send the latest access
960 962 * time to the client side for its cache.
961 963 */
962 964 va.va_mask = AT_ALL;
963 965
964 966 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
965 967
966 968 if (error) {
967 969 if (mp)
968 970 freeb(mp);
969 971
970 972 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
971 973 if (in_crit)
972 974 nbl_end_crit(vp);
973 975
974 976 VN_RELE(vp);
975 977 rr->rr_data = NULL;
976 978 rr->rr_status = puterrno(error);
977 979
978 980 return;
979 981 }
980 982
981 983 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
982 984
983 985 if (mp) {
984 986 rr->rr_data = (char *)mp->b_datap->db_base;
985 987 } else {
986 988 if (ra->ra_wlist) {
987 989 rr->rr_data = (caddr_t)iov.iov_base;
988 990 if (!rdma_setup_read_data2(ra, rr)) {
989 991 rr->rr_data = NULL;
990 992 rr->rr_status = puterrno(NFSERR_INVAL);
991 993 }
992 994 }
993 995 }
994 996 done:
995 997 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
996 998 if (in_crit)
997 999 nbl_end_crit(vp);
998 1000
999 1001 acl_perm(vp, exi, &va, cr);
1000 1002
1001 1003 /* check for overflows */
1002 1004 error = vattr_to_nattr(&va, &rr->rr_attr);
1003 1005
1004 1006 VN_RELE(vp);
1005 1007
1006 1008 rr->rr_status = puterrno(error);
1007 1009 }
1008 1010
1009 1011 /*
1010 1012 * Free data allocated by rfs_read
1011 1013 */
1012 1014 void
1013 1015 rfs_rdfree(struct nfsrdresult *rr)
1014 1016 {
1015 1017 mblk_t *mp;
1016 1018
1017 1019 if (rr->rr_status == NFS_OK) {
1018 1020 mp = rr->rr_mp;
1019 1021 if (mp != NULL)
1020 1022 freeb(mp);
1021 1023 }
1022 1024 }
1023 1025
1024 1026 void *
1025 1027 rfs_read_getfh(struct nfsreadargs *ra)
1026 1028 {
1027 1029 return (&ra->ra_fhandle);
1028 1030 }
1029 1031
1030 1032 #define MAX_IOVECS 12
1031 1033
1032 1034 #ifdef DEBUG
1033 1035 static int rfs_write_sync_hits = 0;
1034 1036 static int rfs_write_sync_misses = 0;
1035 1037 #endif
1036 1038
1037 1039 /*
1038 1040 * Write data to file.
1039 1041 * Returns attributes of a file after writing some data to it.
1040 1042 *
1041 1043 * Any changes made here, especially in error handling might have
1042 1044 * to also be done in rfs_write (which clusters write requests).
1043 1045 */
1044 1046 /* ARGSUSED */
1045 1047 void
1046 1048 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1047 1049 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1048 1050 {
1049 1051 int error;
1050 1052 vnode_t *vp;
1051 1053 rlim64_t rlimit;
1052 1054 struct vattr va;
1053 1055 struct uio uio;
1054 1056 struct iovec iov[MAX_IOVECS];
1055 1057 mblk_t *m;
1056 1058 struct iovec *iovp;
1057 1059 int iovcnt;
1058 1060 cred_t *savecred;
1059 1061 int in_crit = 0;
1060 1062 caller_context_t ct;
1061 1063
1062 1064 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1063 1065 if (vp == NULL) {
1064 1066 ns->ns_status = NFSERR_STALE;
1065 1067 return;
1066 1068 }
1067 1069
1068 1070 if (rdonly(ro, vp)) {
1069 1071 VN_RELE(vp);
1070 1072 ns->ns_status = NFSERR_ROFS;
1071 1073 return;
1072 1074 }
1073 1075
1074 1076 if (vp->v_type != VREG) {
1075 1077 VN_RELE(vp);
1076 1078 ns->ns_status = NFSERR_ISDIR;
1077 1079 return;
1078 1080 }
1079 1081
1080 1082 ct.cc_sysid = 0;
1081 1083 ct.cc_pid = 0;
1082 1084 ct.cc_caller_id = nfs2_srv_caller_id;
1083 1085 ct.cc_flags = CC_DONTBLOCK;
1084 1086
1085 1087 va.va_mask = AT_UID|AT_MODE;
1086 1088
1087 1089 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1088 1090
1089 1091 if (error) {
1090 1092 VN_RELE(vp);
1091 1093 ns->ns_status = puterrno(error);
1092 1094
1093 1095 return;
1094 1096 }
1095 1097
1096 1098 if (crgetuid(cr) != va.va_uid) {
1097 1099 /*
1098 1100 * This is a kludge to allow writes of files created
1099 1101 * with read only permission. The owner of the file
1100 1102 * is always allowed to write it.
1101 1103 */
1102 1104 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1103 1105
1104 1106 if (error) {
1105 1107 VN_RELE(vp);
1106 1108 ns->ns_status = puterrno(error);
1107 1109 return;
1108 1110 }
1109 1111 }
1110 1112
1111 1113 /*
1112 1114 * Can't access a mandatory lock file. This might cause
1113 1115 * the NFS service thread to block forever waiting for a
1114 1116 * lock to be released that will never be released.
1115 1117 */
1116 1118 if (MANDLOCK(vp, va.va_mode)) {
1117 1119 VN_RELE(vp);
1118 1120 ns->ns_status = NFSERR_ACCES;
1119 1121 return;
1120 1122 }
1121 1123
1122 1124 /*
1123 1125 * We have to enter the critical region before calling VOP_RWLOCK
1124 1126 * to avoid a deadlock with ufs.
1125 1127 */
1126 1128 if (nbl_need_check(vp)) {
1127 1129 nbl_start_crit(vp, RW_READER);
1128 1130 in_crit = 1;
1129 1131 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1130 1132 wa->wa_count, 0, NULL)) {
1131 1133 error = EACCES;
1132 1134 goto out;
1133 1135 }
1134 1136 }
1135 1137
1136 1138 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1137 1139
1138 1140 /* check if a monitor detected a delegation conflict */
1139 1141 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1140 1142 goto out;
1141 1143 }
1142 1144
1143 1145 if (wa->wa_data || wa->wa_rlist) {
1144 1146 /* Do the RDMA thing if necessary */
1145 1147 if (wa->wa_rlist) {
1146 1148 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1147 1149 iov[0].iov_len = wa->wa_count;
1148 1150 } else {
1149 1151 iov[0].iov_base = wa->wa_data;
1150 1152 iov[0].iov_len = wa->wa_count;
1151 1153 }
1152 1154 uio.uio_iov = iov;
1153 1155 uio.uio_iovcnt = 1;
1154 1156 uio.uio_segflg = UIO_SYSSPACE;
1155 1157 uio.uio_extflg = UIO_COPY_DEFAULT;
1156 1158 uio.uio_loffset = (offset_t)wa->wa_offset;
1157 1159 uio.uio_resid = wa->wa_count;
1158 1160 /*
1159 1161 * The limit is checked on the client. We
1160 1162 * should allow any size writes here.
1161 1163 */
1162 1164 uio.uio_llimit = curproc->p_fsz_ctl;
1163 1165 rlimit = uio.uio_llimit - wa->wa_offset;
1164 1166 if (rlimit < (rlim64_t)uio.uio_resid)
1165 1167 uio.uio_resid = (uint_t)rlimit;
1166 1168
1167 1169 /*
1168 1170 * for now we assume no append mode
1169 1171 */
1170 1172 /*
1171 1173 * We're changing creds because VM may fault and we need
1172 1174 * the cred of the current thread to be used if quota
1173 1175 * checking is enabled.
1174 1176 */
1175 1177 savecred = curthread->t_cred;
1176 1178 curthread->t_cred = cr;
1177 1179 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1178 1180 curthread->t_cred = savecred;
1179 1181 } else {
1180 1182
1181 1183 iovcnt = 0;
1182 1184 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1183 1185 iovcnt++;
1184 1186 if (iovcnt <= MAX_IOVECS) {
1185 1187 #ifdef DEBUG
1186 1188 rfs_write_sync_hits++;
1187 1189 #endif
1188 1190 iovp = iov;
1189 1191 } else {
1190 1192 #ifdef DEBUG
1191 1193 rfs_write_sync_misses++;
1192 1194 #endif
1193 1195 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1194 1196 }
1195 1197 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1196 1198 uio.uio_iov = iovp;
1197 1199 uio.uio_iovcnt = iovcnt;
1198 1200 uio.uio_segflg = UIO_SYSSPACE;
1199 1201 uio.uio_extflg = UIO_COPY_DEFAULT;
1200 1202 uio.uio_loffset = (offset_t)wa->wa_offset;
1201 1203 uio.uio_resid = wa->wa_count;
1202 1204 /*
1203 1205 * The limit is checked on the client. We
1204 1206 * should allow any size writes here.
1205 1207 */
1206 1208 uio.uio_llimit = curproc->p_fsz_ctl;
1207 1209 rlimit = uio.uio_llimit - wa->wa_offset;
1208 1210 if (rlimit < (rlim64_t)uio.uio_resid)
1209 1211 uio.uio_resid = (uint_t)rlimit;
1210 1212
1211 1213 /*
1212 1214 * For now we assume no append mode.
1213 1215 */
1214 1216 /*
1215 1217 * We're changing creds because VM may fault and we need
1216 1218 * the cred of the current thread to be used if quota
1217 1219 * checking is enabled.
1218 1220 */
1219 1221 savecred = curthread->t_cred;
1220 1222 curthread->t_cred = cr;
1221 1223 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1222 1224 curthread->t_cred = savecred;
1223 1225
1224 1226 if (iovp != iov)
1225 1227 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1226 1228 }
1227 1229
1228 1230 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1229 1231
1230 1232 if (!error) {
1231 1233 /*
1232 1234 * Get attributes again so we send the latest mod
1233 1235 * time to the client side for its cache.
1234 1236 */
1235 1237 va.va_mask = AT_ALL; /* now we want everything */
1236 1238
1237 1239 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1238 1240
1239 1241 /* check for overflows */
1240 1242 if (!error) {
1241 1243 acl_perm(vp, exi, &va, cr);
1242 1244 error = vattr_to_nattr(&va, &ns->ns_attr);
1243 1245 }
1244 1246 }
1245 1247
1246 1248 out:
1247 1249 if (in_crit)
1248 1250 nbl_end_crit(vp);
1249 1251 VN_RELE(vp);
1250 1252
1251 1253 /* check if a monitor detected a delegation conflict */
1252 1254 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1253 1255 /* mark as wouldblock so response is dropped */
1254 1256 curthread->t_flag |= T_WOULDBLOCK;
1255 1257 else
1256 1258 ns->ns_status = puterrno(error);
1257 1259
1258 1260 }
1259 1261
1260 1262 struct rfs_async_write {
1261 1263 struct nfswriteargs *wa;
1262 1264 struct nfsattrstat *ns;
1263 1265 struct svc_req *req;
1264 1266 cred_t *cr;
1265 1267 bool_t ro;
1266 1268 kthread_t *thread;
1267 1269 struct rfs_async_write *list;
1268 1270 };
1269 1271
1270 1272 struct rfs_async_write_list {
1271 1273 fhandle_t *fhp;
1272 1274 kcondvar_t cv;
1273 1275 struct rfs_async_write *list;
1274 1276 struct rfs_async_write_list *next;
1275 1277 };
1276 1278
1277 1279 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1278 1280 static kmutex_t rfs_async_write_lock;
1279 1281 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1280 1282
1281 1283 #define MAXCLIOVECS 42
1282 1284 #define RFSWRITE_INITVAL (enum nfsstat) -1
1283 1285
1284 1286 #ifdef DEBUG
1285 1287 static int rfs_write_hits = 0;
1286 1288 static int rfs_write_misses = 0;
1287 1289 #endif
1288 1290
1289 1291 /*
1290 1292 * Write data to file.
1291 1293 * Returns attributes of a file after writing some data to it.
1292 1294 */
1293 1295 void
1294 1296 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1295 1297 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1296 1298 {
1297 1299 int error;
1298 1300 vnode_t *vp;
1299 1301 rlim64_t rlimit;
1300 1302 struct vattr va;
1301 1303 struct uio uio;
1302 1304 struct rfs_async_write_list *lp;
1303 1305 struct rfs_async_write_list *nlp;
1304 1306 struct rfs_async_write *rp;
1305 1307 struct rfs_async_write *nrp;
1306 1308 struct rfs_async_write *trp;
1307 1309 struct rfs_async_write *lrp;
1308 1310 int data_written;
1309 1311 int iovcnt;
1310 1312 mblk_t *m;
1311 1313 struct iovec *iovp;
1312 1314 struct iovec *niovp;
1313 1315 struct iovec iov[MAXCLIOVECS];
1314 1316 int count;
1315 1317 int rcount;
1316 1318 uint_t off;
1317 1319 uint_t len;
1318 1320 struct rfs_async_write nrpsp;
1319 1321 struct rfs_async_write_list nlpsp;
1320 1322 ushort_t t_flag;
1321 1323 cred_t *savecred;
1322 1324 int in_crit = 0;
1323 1325 caller_context_t ct;
1324 1326 nfs_srv_t *nsrv;
1325 1327
1326 1328 ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
1327 1329 nsrv = nfs_get_srv();
1328 1330 if (!nsrv->write_async) {
1329 1331 rfs_write_sync(wa, ns, exi, req, cr, ro);
1330 1332 return;
1331 1333 }
1332 1334
1333 1335 /*
1334 1336 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1335 1337 * is considered an OK.
1336 1338 */
1337 1339 ns->ns_status = RFSWRITE_INITVAL;
1338 1340
1339 1341 nrp = &nrpsp;
1340 1342 nrp->wa = wa;
1341 1343 nrp->ns = ns;
1342 1344 nrp->req = req;
1343 1345 nrp->cr = cr;
1344 1346 nrp->ro = ro;
1345 1347 nrp->thread = curthread;
1346 1348
1347 1349 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1348 1350
1349 1351 /*
1350 1352 * Look to see if there is already a cluster started
1351 1353 * for this file.
1352 1354 */
1353 1355 mutex_enter(&nsrv->async_write_lock);
1354 1356 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1355 1357 if (bcmp(&wa->wa_fhandle, lp->fhp,
1356 1358 sizeof (fhandle_t)) == 0)
1357 1359 break;
1358 1360 }
1359 1361
1360 1362 /*
1361 1363 * If lp is non-NULL, then there is already a cluster
1362 1364 * started. We need to place ourselves in the cluster
1363 1365 * list in the right place as determined by starting
1364 1366 * offset. Conflicts with non-blocking mandatory locked
1365 1367 * regions will be checked when the cluster is processed.
1366 1368 */
1367 1369 if (lp != NULL) {
1368 1370 rp = lp->list;
1369 1371 trp = NULL;
1370 1372 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1371 1373 trp = rp;
1372 1374 rp = rp->list;
1373 1375 }
1374 1376 nrp->list = rp;
1375 1377 if (trp == NULL)
1376 1378 lp->list = nrp;
1377 1379 else
1378 1380 trp->list = nrp;
1379 1381 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1380 1382 cv_wait(&lp->cv, &nsrv->async_write_lock);
1381 1383 mutex_exit(&nsrv->async_write_lock);
1382 1384
1383 1385 return;
1384 1386 }
1385 1387
1386 1388 /*
1387 1389 * No cluster started yet, start one and add ourselves
1388 1390 * to the list of clusters.
1389 1391 */
1390 1392 nrp->list = NULL;
1391 1393
1392 1394 nlp = &nlpsp;
1393 1395 nlp->fhp = &wa->wa_fhandle;
1394 1396 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1395 1397 nlp->list = nrp;
1396 1398 nlp->next = NULL;
1397 1399
1398 1400 if (nsrv->async_write_head == NULL) {
1399 1401 nsrv->async_write_head = nlp;
1400 1402 } else {
1401 1403 lp = nsrv->async_write_head;
1402 1404 while (lp->next != NULL)
1403 1405 lp = lp->next;
1404 1406 lp->next = nlp;
1405 1407 }
1406 1408 mutex_exit(&nsrv->async_write_lock);
1407 1409
1408 1410 /*
1409 1411 * Convert the file handle common to all of the requests
1410 1412 * in this cluster to a vnode.
1411 1413 */
1412 1414 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1413 1415 if (vp == NULL) {
1414 1416 mutex_enter(&nsrv->async_write_lock);
1415 1417 if (nsrv->async_write_head == nlp)
1416 1418 nsrv->async_write_head = nlp->next;
1417 1419 else {
1418 1420 lp = nsrv->async_write_head;
1419 1421 while (lp->next != nlp)
1420 1422 lp = lp->next;
1421 1423 lp->next = nlp->next;
1422 1424 }
1423 1425 t_flag = curthread->t_flag & T_WOULDBLOCK;
1424 1426 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1425 1427 rp->ns->ns_status = NFSERR_STALE;
1426 1428 rp->thread->t_flag |= t_flag;
1427 1429 }
1428 1430 cv_broadcast(&nlp->cv);
1429 1431 mutex_exit(&nsrv->async_write_lock);
1430 1432
1431 1433 return;
1432 1434 }
1433 1435
1434 1436 /*
1435 1437 * Can only write regular files. Attempts to write any
1436 1438 * other file types fail with EISDIR.
1437 1439 */
1438 1440 if (vp->v_type != VREG) {
1439 1441 VN_RELE(vp);
1440 1442 mutex_enter(&nsrv->async_write_lock);
1441 1443 if (nsrv->async_write_head == nlp)
1442 1444 nsrv->async_write_head = nlp->next;
1443 1445 else {
1444 1446 lp = nsrv->async_write_head;
1445 1447 while (lp->next != nlp)
1446 1448 lp = lp->next;
1447 1449 lp->next = nlp->next;
1448 1450 }
1449 1451 t_flag = curthread->t_flag & T_WOULDBLOCK;
1450 1452 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1451 1453 rp->ns->ns_status = NFSERR_ISDIR;
1452 1454 rp->thread->t_flag |= t_flag;
1453 1455 }
1454 1456 cv_broadcast(&nlp->cv);
1455 1457 mutex_exit(&nsrv->async_write_lock);
1456 1458
1457 1459 return;
1458 1460 }
1459 1461
1460 1462 /*
1461 1463 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1462 1464 * deadlock with ufs.
1463 1465 */
1464 1466 if (nbl_need_check(vp)) {
1465 1467 nbl_start_crit(vp, RW_READER);
1466 1468 in_crit = 1;
1467 1469 }
1468 1470
1469 1471 ct.cc_sysid = 0;
1470 1472 ct.cc_pid = 0;
1471 1473 ct.cc_caller_id = nfs2_srv_caller_id;
1472 1474 ct.cc_flags = CC_DONTBLOCK;
1473 1475
1474 1476 /*
1475 1477 * Lock the file for writing. This operation provides
1476 1478 * the delay which allows clusters to grow.
1477 1479 */
1478 1480 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1479 1481
1480 1482 /* check if a monitor detected a delegation conflict */
1481 1483 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1482 1484 if (in_crit)
1483 1485 nbl_end_crit(vp);
1484 1486 VN_RELE(vp);
1485 1487 /* mark as wouldblock so response is dropped */
1486 1488 curthread->t_flag |= T_WOULDBLOCK;
1487 1489 mutex_enter(&nsrv->async_write_lock);
1488 1490 if (nsrv->async_write_head == nlp)
1489 1491 nsrv->async_write_head = nlp->next;
1490 1492 else {
1491 1493 lp = nsrv->async_write_head;
1492 1494 while (lp->next != nlp)
1493 1495 lp = lp->next;
1494 1496 lp->next = nlp->next;
1495 1497 }
1496 1498 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1497 1499 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1498 1500 rp->ns->ns_status = puterrno(error);
1499 1501 rp->thread->t_flag |= T_WOULDBLOCK;
1500 1502 }
1501 1503 }
1502 1504 cv_broadcast(&nlp->cv);
1503 1505 mutex_exit(&nsrv->async_write_lock);
1504 1506
1505 1507 return;
1506 1508 }
1507 1509
1508 1510 /*
1509 1511 * Disconnect this cluster from the list of clusters.
1510 1512 * The cluster that is being dealt with must be fixed
1511 1513 * in size after this point, so there is no reason
1512 1514 * to leave it on the list so that new requests can
1513 1515 * find it.
1514 1516 *
1515 1517 * The algorithm is that the first write request will
1516 1518 * create a cluster, convert the file handle to a
1517 1519 * vnode pointer, and then lock the file for writing.
1518 1520 * This request is not likely to be clustered with
1519 1521 * any others. However, the next request will create
1520 1522 * a new cluster and be blocked in VOP_RWLOCK while
1521 1523 * the first request is being processed. This delay
1522 1524 * will allow more requests to be clustered in this
1523 1525 * second cluster.
1524 1526 */
1525 1527 mutex_enter(&nsrv->async_write_lock);
1526 1528 if (nsrv->async_write_head == nlp)
1527 1529 nsrv->async_write_head = nlp->next;
1528 1530 else {
1529 1531 lp = nsrv->async_write_head;
1530 1532 while (lp->next != nlp)
1531 1533 lp = lp->next;
1532 1534 lp->next = nlp->next;
1533 1535 }
1534 1536 mutex_exit(&nsrv->async_write_lock);
1535 1537
1536 1538 /*
1537 1539 * Step through the list of requests in this cluster.
1538 1540 * We need to check permissions to make sure that all
1539 1541 * of the requests have sufficient permission to write
1540 1542 * the file. A cluster can be composed of requests
1541 1543 * from different clients and different users on each
1542 1544 * client.
1543 1545 *
1544 1546 * As a side effect, we also calculate the size of the
1545 1547 * byte range that this cluster encompasses.
1546 1548 */
1547 1549 rp = nlp->list;
1548 1550 off = rp->wa->wa_offset;
1549 1551 len = (uint_t)0;
1550 1552 do {
1551 1553 if (rdonly(rp->ro, vp)) {
1552 1554 rp->ns->ns_status = NFSERR_ROFS;
1553 1555 t_flag = curthread->t_flag & T_WOULDBLOCK;
1554 1556 rp->thread->t_flag |= t_flag;
1555 1557 continue;
1556 1558 }
1557 1559
1558 1560 va.va_mask = AT_UID|AT_MODE;
1559 1561
1560 1562 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1561 1563
1562 1564 if (!error) {
1563 1565 if (crgetuid(rp->cr) != va.va_uid) {
1564 1566 /*
1565 1567 * This is a kludge to allow writes of files
1566 1568 * created with read only permission. The
1567 1569 * owner of the file is always allowed to
1568 1570 * write it.
1569 1571 */
1570 1572 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1571 1573 }
1572 1574 if (!error && MANDLOCK(vp, va.va_mode))
1573 1575 error = EACCES;
1574 1576 }
1575 1577
1576 1578 /*
1577 1579 * Check for a conflict with a nbmand-locked region.
1578 1580 */
1579 1581 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1580 1582 rp->wa->wa_count, 0, NULL)) {
1581 1583 error = EACCES;
1582 1584 }
1583 1585
1584 1586 if (error) {
1585 1587 rp->ns->ns_status = puterrno(error);
1586 1588 t_flag = curthread->t_flag & T_WOULDBLOCK;
1587 1589 rp->thread->t_flag |= t_flag;
1588 1590 continue;
1589 1591 }
1590 1592 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1591 1593 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1592 1594 } while ((rp = rp->list) != NULL);
1593 1595
1594 1596 /*
1595 1597 * Step through the cluster attempting to gather as many
1596 1598 * requests which are contiguous as possible. These
1597 1599 * contiguous requests are handled via one call to VOP_WRITE
1598 1600 * instead of different calls to VOP_WRITE. We also keep
1599 1601 * track of the fact that any data was written.
1600 1602 */
1601 1603 rp = nlp->list;
1602 1604 data_written = 0;
1603 1605 do {
1604 1606 /*
1605 1607 * Skip any requests which are already marked as having an
1606 1608 * error.
1607 1609 */
1608 1610 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1609 1611 rp = rp->list;
1610 1612 continue;
1611 1613 }
1612 1614
1613 1615 /*
1614 1616 * Count the number of iovec's which are required
1615 1617 * to handle this set of requests. One iovec is
1616 1618 * needed for each data buffer, whether addressed
1617 1619 * by wa_data or by the b_rptr pointers in the
1618 1620 * mblk chains.
1619 1621 */
1620 1622 iovcnt = 0;
1621 1623 lrp = rp;
1622 1624 for (;;) {
1623 1625 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1624 1626 iovcnt++;
1625 1627 else {
1626 1628 m = lrp->wa->wa_mblk;
1627 1629 while (m != NULL) {
1628 1630 iovcnt++;
1629 1631 m = m->b_cont;
1630 1632 }
1631 1633 }
1632 1634 if (lrp->list == NULL ||
1633 1635 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1634 1636 lrp->wa->wa_offset + lrp->wa->wa_count !=
1635 1637 lrp->list->wa->wa_offset) {
1636 1638 lrp = lrp->list;
1637 1639 break;
1638 1640 }
1639 1641 lrp = lrp->list;
1640 1642 }
1641 1643
1642 1644 if (iovcnt <= MAXCLIOVECS) {
1643 1645 #ifdef DEBUG
1644 1646 rfs_write_hits++;
1645 1647 #endif
1646 1648 niovp = iov;
1647 1649 } else {
1648 1650 #ifdef DEBUG
1649 1651 rfs_write_misses++;
1650 1652 #endif
1651 1653 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1652 1654 }
1653 1655 /*
1654 1656 * Put together the scatter/gather iovecs.
1655 1657 */
1656 1658 iovp = niovp;
1657 1659 trp = rp;
1658 1660 count = 0;
1659 1661 do {
1660 1662 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1661 1663 if (trp->wa->wa_rlist) {
1662 1664 iovp->iov_base =
1663 1665 (char *)((trp->wa->wa_rlist)->
1664 1666 u.c_daddr3);
1665 1667 iovp->iov_len = trp->wa->wa_count;
1666 1668 } else {
1667 1669 iovp->iov_base = trp->wa->wa_data;
1668 1670 iovp->iov_len = trp->wa->wa_count;
1669 1671 }
1670 1672 iovp++;
1671 1673 } else {
1672 1674 m = trp->wa->wa_mblk;
1673 1675 rcount = trp->wa->wa_count;
1674 1676 while (m != NULL) {
1675 1677 iovp->iov_base = (caddr_t)m->b_rptr;
1676 1678 iovp->iov_len = (m->b_wptr - m->b_rptr);
1677 1679 rcount -= iovp->iov_len;
1678 1680 if (rcount < 0)
1679 1681 iovp->iov_len += rcount;
1680 1682 iovp++;
1681 1683 if (rcount <= 0)
1682 1684 break;
1683 1685 m = m->b_cont;
1684 1686 }
1685 1687 }
1686 1688 count += trp->wa->wa_count;
1687 1689 trp = trp->list;
1688 1690 } while (trp != lrp);
1689 1691
1690 1692 uio.uio_iov = niovp;
1691 1693 uio.uio_iovcnt = iovcnt;
1692 1694 uio.uio_segflg = UIO_SYSSPACE;
1693 1695 uio.uio_extflg = UIO_COPY_DEFAULT;
1694 1696 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1695 1697 uio.uio_resid = count;
1696 1698 /*
1697 1699 * The limit is checked on the client. We
1698 1700 * should allow any size writes here.
1699 1701 */
1700 1702 uio.uio_llimit = curproc->p_fsz_ctl;
1701 1703 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1702 1704 if (rlimit < (rlim64_t)uio.uio_resid)
1703 1705 uio.uio_resid = (uint_t)rlimit;
1704 1706
1705 1707 /*
1706 1708 * For now we assume no append mode.
1707 1709 */
1708 1710
1709 1711 /*
1710 1712 * We're changing creds because VM may fault
1711 1713 * and we need the cred of the current
1712 1714 * thread to be used if quota * checking is
1713 1715 * enabled.
1714 1716 */
1715 1717 savecred = curthread->t_cred;
1716 1718 curthread->t_cred = cr;
1717 1719 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1718 1720 curthread->t_cred = savecred;
1719 1721
1720 1722 /* check if a monitor detected a delegation conflict */
1721 1723 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1722 1724 /* mark as wouldblock so response is dropped */
1723 1725 curthread->t_flag |= T_WOULDBLOCK;
1724 1726
1725 1727 if (niovp != iov)
1726 1728 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1727 1729
1728 1730 if (!error) {
1729 1731 data_written = 1;
1730 1732 /*
1731 1733 * Get attributes again so we send the latest mod
1732 1734 * time to the client side for its cache.
1733 1735 */
1734 1736 va.va_mask = AT_ALL; /* now we want everything */
1735 1737
1736 1738 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1737 1739
1738 1740 if (!error)
1739 1741 acl_perm(vp, exi, &va, rp->cr);
1740 1742 }
1741 1743
1742 1744 /*
1743 1745 * Fill in the status responses for each request
1744 1746 * which was just handled. Also, copy the latest
1745 1747 * attributes in to the attribute responses if
1746 1748 * appropriate.
1747 1749 */
1748 1750 t_flag = curthread->t_flag & T_WOULDBLOCK;
1749 1751 do {
1750 1752 rp->thread->t_flag |= t_flag;
1751 1753 /* check for overflows */
1752 1754 if (!error) {
1753 1755 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1754 1756 }
1755 1757 rp->ns->ns_status = puterrno(error);
1756 1758 rp = rp->list;
1757 1759 } while (rp != lrp);
1758 1760 } while (rp != NULL);
1759 1761
1760 1762 /*
1761 1763 * If any data was written at all, then we need to flush
1762 1764 * the data and metadata to stable storage.
1763 1765 */
1764 1766 if (data_written) {
1765 1767 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1766 1768
1767 1769 if (!error) {
1768 1770 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1769 1771 }
1770 1772 }
1771 1773
1772 1774 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1773 1775
1774 1776 if (in_crit)
1775 1777 nbl_end_crit(vp);
1776 1778 VN_RELE(vp);
1777 1779
1778 1780 t_flag = curthread->t_flag & T_WOULDBLOCK;
1779 1781 mutex_enter(&nsrv->async_write_lock);
1780 1782 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1781 1783 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1782 1784 rp->ns->ns_status = puterrno(error);
1783 1785 rp->thread->t_flag |= t_flag;
1784 1786 }
1785 1787 }
1786 1788 cv_broadcast(&nlp->cv);
1787 1789 mutex_exit(&nsrv->async_write_lock);
1788 1790
1789 1791 }
1790 1792
1791 1793 void *
1792 1794 rfs_write_getfh(struct nfswriteargs *wa)
1793 1795 {
1794 1796 return (&wa->wa_fhandle);
1795 1797 }
1796 1798
1797 1799 /*
1798 1800 * Create a file.
1799 1801 * Creates a file with given attributes and returns those attributes
1800 1802 * and an fhandle for the new file.
1801 1803 */
1802 1804 void
1803 1805 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1804 1806 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1805 1807 {
1806 1808 int error;
1807 1809 int lookuperr;
1808 1810 int in_crit = 0;
1809 1811 struct vattr va;
1810 1812 vnode_t *vp;
1811 1813 vnode_t *realvp;
1812 1814 vnode_t *dvp;
1813 1815 char *name = args->ca_da.da_name;
1814 1816 vnode_t *tvp = NULL;
1815 1817 int mode;
1816 1818 int lookup_ok;
1817 1819 bool_t trunc;
1818 1820 struct sockaddr *ca;
1819 1821
1820 1822 /*
1821 1823 * Disallow NULL paths
1822 1824 */
1823 1825 if (name == NULL || *name == '\0') {
1824 1826 dr->dr_status = NFSERR_ACCES;
1825 1827 return;
1826 1828 }
1827 1829
1828 1830 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1829 1831 if (dvp == NULL) {
1830 1832 dr->dr_status = NFSERR_STALE;
1831 1833 return;
1832 1834 }
1833 1835
1834 1836 error = sattr_to_vattr(args->ca_sa, &va);
1835 1837 if (error) {
1836 1838 dr->dr_status = puterrno(error);
1837 1839 return;
1838 1840 }
1839 1841
1840 1842 /*
1841 1843 * Must specify the mode.
1842 1844 */
1843 1845 if (!(va.va_mask & AT_MODE)) {
1844 1846 VN_RELE(dvp);
1845 1847 dr->dr_status = NFSERR_INVAL;
1846 1848 return;
1847 1849 }
1848 1850
1849 1851 /*
1850 1852 * This is a completely gross hack to make mknod
1851 1853 * work over the wire until we can wack the protocol
1852 1854 */
1853 1855 if ((va.va_mode & IFMT) == IFCHR) {
1854 1856 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1855 1857 va.va_type = VFIFO; /* xtra kludge for named pipe */
1856 1858 else {
1857 1859 va.va_type = VCHR;
1858 1860 /*
1859 1861 * uncompress the received dev_t
1860 1862 * if the top half is zero indicating a request
1861 1863 * from an `older style' OS.
1862 1864 */
1863 1865 if ((va.va_size & 0xffff0000) == 0)
1864 1866 va.va_rdev = nfsv2_expdev(va.va_size);
1865 1867 else
1866 1868 va.va_rdev = (dev_t)va.va_size;
1867 1869 }
1868 1870 va.va_mask &= ~AT_SIZE;
1869 1871 } else if ((va.va_mode & IFMT) == IFBLK) {
1870 1872 va.va_type = VBLK;
1871 1873 /*
1872 1874 * uncompress the received dev_t
1873 1875 * if the top half is zero indicating a request
1874 1876 * from an `older style' OS.
1875 1877 */
1876 1878 if ((va.va_size & 0xffff0000) == 0)
1877 1879 va.va_rdev = nfsv2_expdev(va.va_size);
1878 1880 else
1879 1881 va.va_rdev = (dev_t)va.va_size;
1880 1882 va.va_mask &= ~AT_SIZE;
1881 1883 } else if ((va.va_mode & IFMT) == IFSOCK) {
1882 1884 va.va_type = VSOCK;
1883 1885 } else {
1884 1886 va.va_type = VREG;
1885 1887 }
1886 1888 va.va_mode &= ~IFMT;
1887 1889 va.va_mask |= AT_TYPE;
1888 1890
1889 1891 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1890 1892 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1891 1893 MAXPATHLEN);
1892 1894 if (name == NULL) {
1893 1895 dr->dr_status = puterrno(EINVAL);
1894 1896 return;
1895 1897 }
1896 1898
1897 1899 /*
1898 1900 * Why was the choice made to use VWRITE as the mode to the
1899 1901 * call to VOP_CREATE ? This results in a bug. When a client
1900 1902 * opens a file that already exists and is RDONLY, the second
1901 1903 * open fails with an EACESS because of the mode.
1902 1904 * bug ID 1054648.
1903 1905 */
1904 1906 lookup_ok = 0;
1905 1907 mode = VWRITE;
1906 1908 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1907 1909 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1908 1910 NULL, NULL, NULL);
1909 1911 if (!error) {
1910 1912 struct vattr at;
1911 1913
1912 1914 lookup_ok = 1;
1913 1915 at.va_mask = AT_MODE;
1914 1916 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1915 1917 if (!error)
1916 1918 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1917 1919 VN_RELE(tvp);
1918 1920 tvp = NULL;
1919 1921 }
1920 1922 }
1921 1923
1922 1924 if (!lookup_ok) {
1923 1925 if (rdonly(ro, dvp)) {
1924 1926 error = EROFS;
1925 1927 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1926 1928 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1927 1929 error = EPERM;
1928 1930 } else {
1929 1931 error = 0;
1930 1932 }
1931 1933 }
1932 1934
1933 1935 /*
1934 1936 * If file size is being modified on an already existing file
1935 1937 * make sure that there are no conflicting non-blocking mandatory
1936 1938 * locks in the region being manipulated. Return EACCES if there
1937 1939 * are conflicting locks.
1938 1940 */
1939 1941 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1940 1942 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1941 1943 NULL, NULL, NULL);
1942 1944
1943 1945 if (!lookuperr &&
1944 1946 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1945 1947 VN_RELE(tvp);
1946 1948 curthread->t_flag |= T_WOULDBLOCK;
1947 1949 goto out;
1948 1950 }
1949 1951
1950 1952 if (!lookuperr && nbl_need_check(tvp)) {
1951 1953 /*
1952 1954 * The file exists. Now check if it has any
1953 1955 * conflicting non-blocking mandatory locks
1954 1956 * in the region being changed.
1955 1957 */
1956 1958 struct vattr bva;
1957 1959 u_offset_t offset;
1958 1960 ssize_t length;
1959 1961
1960 1962 nbl_start_crit(tvp, RW_READER);
1961 1963 in_crit = 1;
1962 1964
1963 1965 bva.va_mask = AT_SIZE;
1964 1966 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1965 1967 if (!error) {
1966 1968 if (va.va_size < bva.va_size) {
1967 1969 offset = va.va_size;
1968 1970 length = bva.va_size - va.va_size;
1969 1971 } else {
1970 1972 offset = bva.va_size;
1971 1973 length = va.va_size - bva.va_size;
1972 1974 }
1973 1975 if (length) {
1974 1976 if (nbl_conflict(tvp, NBL_WRITE,
1975 1977 offset, length, 0, NULL)) {
1976 1978 error = EACCES;
1977 1979 }
1978 1980 }
1979 1981 }
1980 1982 if (error) {
1981 1983 nbl_end_crit(tvp);
1982 1984 VN_RELE(tvp);
1983 1985 in_crit = 0;
1984 1986 }
1985 1987 } else if (tvp != NULL) {
1986 1988 VN_RELE(tvp);
1987 1989 }
1988 1990 }
1989 1991
1990 1992 if (!error) {
1991 1993 /*
1992 1994 * If filesystem is shared with nosuid the remove any
1993 1995 * setuid/setgid bits on create.
1994 1996 */
1995 1997 if (va.va_type == VREG &&
1996 1998 exi->exi_export.ex_flags & EX_NOSUID)
1997 1999 va.va_mode &= ~(VSUID | VSGID);
1998 2000
1999 2001 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
2000 2002 NULL, NULL);
2001 2003
2002 2004 if (!error) {
2003 2005
2004 2006 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
2005 2007 trunc = TRUE;
2006 2008 else
2007 2009 trunc = FALSE;
2008 2010
2009 2011 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
2010 2012 VN_RELE(vp);
2011 2013 curthread->t_flag |= T_WOULDBLOCK;
2012 2014 goto out;
2013 2015 }
2014 2016 va.va_mask = AT_ALL;
2015 2017
2016 2018 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
2017 2019
2018 2020 /* check for overflows */
2019 2021 if (!error) {
2020 2022 acl_perm(vp, exi, &va, cr);
2021 2023 error = vattr_to_nattr(&va, &dr->dr_attr);
2022 2024 if (!error) {
2023 2025 error = makefh(&dr->dr_fhandle, vp,
2024 2026 exi);
2025 2027 }
2026 2028 }
2027 2029 /*
2028 2030 * Force modified metadata out to stable storage.
2029 2031 *
2030 2032 * if a underlying vp exists, pass it to VOP_FSYNC
2031 2033 */
2032 2034 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2033 2035 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2034 2036 else
2035 2037 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2036 2038 VN_RELE(vp);
2037 2039 }
2038 2040
2039 2041 if (in_crit) {
2040 2042 nbl_end_crit(tvp);
2041 2043 VN_RELE(tvp);
2042 2044 }
2043 2045 }
2044 2046
2045 2047 /*
2046 2048 * Force modified data and metadata out to stable storage.
2047 2049 */
2048 2050 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2049 2051
2050 2052 out:
2051 2053
2052 2054 VN_RELE(dvp);
2053 2055
2054 2056 dr->dr_status = puterrno(error);
2055 2057
2056 2058 if (name != args->ca_da.da_name)
2057 2059 kmem_free(name, MAXPATHLEN);
2058 2060 }
2059 2061 void *
2060 2062 rfs_create_getfh(struct nfscreatargs *args)
2061 2063 {
2062 2064 return (args->ca_da.da_fhandle);
2063 2065 }
2064 2066
2065 2067 /*
2066 2068 * Remove a file.
2067 2069 * Remove named file from parent directory.
2068 2070 */
2069 2071 /* ARGSUSED */
2070 2072 void
2071 2073 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2072 2074 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2073 2075 {
2074 2076 int error = 0;
2075 2077 vnode_t *vp;
2076 2078 vnode_t *targvp;
2077 2079 int in_crit = 0;
2078 2080
2079 2081 /*
2080 2082 * Disallow NULL paths
2081 2083 */
2082 2084 if (da->da_name == NULL || *da->da_name == '\0') {
2083 2085 *status = NFSERR_ACCES;
2084 2086 return;
2085 2087 }
2086 2088
2087 2089 vp = nfs_fhtovp(da->da_fhandle, exi);
2088 2090 if (vp == NULL) {
2089 2091 *status = NFSERR_STALE;
2090 2092 return;
2091 2093 }
2092 2094
2093 2095 if (rdonly(ro, vp)) {
2094 2096 VN_RELE(vp);
2095 2097 *status = NFSERR_ROFS;
2096 2098 return;
2097 2099 }
2098 2100
2099 2101 /*
2100 2102 * Check for a conflict with a non-blocking mandatory share reservation.
2101 2103 */
2102 2104 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2103 2105 NULL, cr, NULL, NULL, NULL);
2104 2106 if (error != 0) {
2105 2107 VN_RELE(vp);
2106 2108 *status = puterrno(error);
2107 2109 return;
2108 2110 }
2109 2111
2110 2112 /*
2111 2113 * If the file is delegated to an v4 client, then initiate
2112 2114 * recall and drop this request (by setting T_WOULDBLOCK).
2113 2115 * The client will eventually re-transmit the request and
2114 2116 * (hopefully), by then, the v4 client will have returned
2115 2117 * the delegation.
2116 2118 */
2117 2119
2118 2120 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2119 2121 VN_RELE(vp);
2120 2122 VN_RELE(targvp);
2121 2123 curthread->t_flag |= T_WOULDBLOCK;
2122 2124 return;
2123 2125 }
2124 2126
2125 2127 if (nbl_need_check(targvp)) {
2126 2128 nbl_start_crit(targvp, RW_READER);
2127 2129 in_crit = 1;
2128 2130 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2129 2131 error = EACCES;
2130 2132 goto out;
2131 2133 }
2132 2134 }
2133 2135
2134 2136 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2135 2137
2136 2138 /*
2137 2139 * Force modified data and metadata out to stable storage.
2138 2140 */
2139 2141 (void) VOP_FSYNC(vp, 0, cr, NULL);
2140 2142
2141 2143 out:
2142 2144 if (in_crit)
2143 2145 nbl_end_crit(targvp);
2144 2146 VN_RELE(targvp);
2145 2147 VN_RELE(vp);
2146 2148
2147 2149 *status = puterrno(error);
2148 2150
2149 2151 }
2150 2152
2151 2153 void *
2152 2154 rfs_remove_getfh(struct nfsdiropargs *da)
2153 2155 {
2154 2156 return (da->da_fhandle);
2155 2157 }
2156 2158
2157 2159 /*
2158 2160 * rename a file
2159 2161 * Give a file (from) a new name (to).
2160 2162 */
2161 2163 /* ARGSUSED */
2162 2164 void
2163 2165 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2164 2166 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2165 2167 {
2166 2168 int error = 0;
2167 2169 vnode_t *fromvp;
2168 2170 vnode_t *tovp;
2169 2171 struct exportinfo *to_exi;
2170 2172 fhandle_t *fh;
2171 2173 vnode_t *srcvp;
2172 2174 vnode_t *targvp;
2173 2175 int in_crit = 0;
2174 2176
2175 2177 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2176 2178 if (fromvp == NULL) {
2177 2179 *status = NFSERR_STALE;
2178 2180 return;
2179 2181 }
2180 2182
2181 2183 fh = args->rna_to.da_fhandle;
2182 2184 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2183 2185 if (to_exi == NULL) {
2184 2186 VN_RELE(fromvp);
2185 2187 *status = NFSERR_ACCES;
2186 2188 return;
2187 2189 }
2188 2190 exi_rele(to_exi);
2189 2191
2190 2192 if (to_exi != exi) {
2191 2193 VN_RELE(fromvp);
2192 2194 *status = NFSERR_XDEV;
2193 2195 return;
2194 2196 }
2195 2197
2196 2198 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2197 2199 if (tovp == NULL) {
2198 2200 VN_RELE(fromvp);
2199 2201 *status = NFSERR_STALE;
2200 2202 return;
2201 2203 }
2202 2204
2203 2205 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2204 2206 VN_RELE(tovp);
2205 2207 VN_RELE(fromvp);
2206 2208 *status = NFSERR_NOTDIR;
2207 2209 return;
2208 2210 }
2209 2211
2210 2212 /*
2211 2213 * Disallow NULL paths
2212 2214 */
2213 2215 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2214 2216 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2215 2217 VN_RELE(tovp);
2216 2218 VN_RELE(fromvp);
2217 2219 *status = NFSERR_ACCES;
2218 2220 return;
2219 2221 }
2220 2222
2221 2223 if (rdonly(ro, tovp)) {
2222 2224 VN_RELE(tovp);
2223 2225 VN_RELE(fromvp);
2224 2226 *status = NFSERR_ROFS;
2225 2227 return;
2226 2228 }
2227 2229
2228 2230 /*
2229 2231 * Check for a conflict with a non-blocking mandatory share reservation.
2230 2232 */
2231 2233 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2232 2234 NULL, cr, NULL, NULL, NULL);
2233 2235 if (error != 0) {
2234 2236 VN_RELE(tovp);
2235 2237 VN_RELE(fromvp);
2236 2238 *status = puterrno(error);
2237 2239 return;
2238 2240 }
2239 2241
2240 2242 /* Check for delegations on the source file */
2241 2243
2242 2244 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2243 2245 VN_RELE(tovp);
2244 2246 VN_RELE(fromvp);
2245 2247 VN_RELE(srcvp);
2246 2248 curthread->t_flag |= T_WOULDBLOCK;
2247 2249 return;
2248 2250 }
2249 2251
2250 2252 /* Check for delegation on the file being renamed over, if it exists */
2251 2253
2252 2254 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2253 2255 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2254 2256 NULL, NULL, NULL) == 0) {
2255 2257
2256 2258 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2257 2259 VN_RELE(tovp);
2258 2260 VN_RELE(fromvp);
2259 2261 VN_RELE(srcvp);
2260 2262 VN_RELE(targvp);
2261 2263 curthread->t_flag |= T_WOULDBLOCK;
2262 2264 return;
2263 2265 }
2264 2266 VN_RELE(targvp);
2265 2267 }
2266 2268
2267 2269
2268 2270 if (nbl_need_check(srcvp)) {
2269 2271 nbl_start_crit(srcvp, RW_READER);
2270 2272 in_crit = 1;
2271 2273 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2272 2274 error = EACCES;
2273 2275 goto out;
2274 2276 }
2275 2277 }
2276 2278
2277 2279 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2278 2280 tovp, args->rna_to.da_name, cr, NULL, 0);
2279 2281
2280 2282 if (error == 0)
2281 2283 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2282 2284 strlen(args->rna_to.da_name));
2283 2285
2284 2286 /*
2285 2287 * Force modified data and metadata out to stable storage.
2286 2288 */
2287 2289 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2288 2290 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2289 2291
2290 2292 out:
2291 2293 if (in_crit)
2292 2294 nbl_end_crit(srcvp);
2293 2295 VN_RELE(srcvp);
2294 2296 VN_RELE(tovp);
2295 2297 VN_RELE(fromvp);
2296 2298
2297 2299 *status = puterrno(error);
2298 2300
2299 2301 }
2300 2302 void *
2301 2303 rfs_rename_getfh(struct nfsrnmargs *args)
2302 2304 {
2303 2305 return (args->rna_from.da_fhandle);
2304 2306 }
2305 2307
2306 2308 /*
2307 2309 * Link to a file.
2308 2310 * Create a file (to) which is a hard link to the given file (from).
2309 2311 */
2310 2312 /* ARGSUSED */
2311 2313 void
2312 2314 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2313 2315 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2314 2316 {
2315 2317 int error;
2316 2318 vnode_t *fromvp;
2317 2319 vnode_t *tovp;
2318 2320 struct exportinfo *to_exi;
2319 2321 fhandle_t *fh;
2320 2322
2321 2323 fromvp = nfs_fhtovp(args->la_from, exi);
2322 2324 if (fromvp == NULL) {
2323 2325 *status = NFSERR_STALE;
2324 2326 return;
2325 2327 }
2326 2328
2327 2329 fh = args->la_to.da_fhandle;
2328 2330 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2329 2331 if (to_exi == NULL) {
2330 2332 VN_RELE(fromvp);
2331 2333 *status = NFSERR_ACCES;
2332 2334 return;
2333 2335 }
2334 2336 exi_rele(to_exi);
2335 2337
2336 2338 if (to_exi != exi) {
2337 2339 VN_RELE(fromvp);
2338 2340 *status = NFSERR_XDEV;
2339 2341 return;
2340 2342 }
2341 2343
2342 2344 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2343 2345 if (tovp == NULL) {
2344 2346 VN_RELE(fromvp);
2345 2347 *status = NFSERR_STALE;
2346 2348 return;
2347 2349 }
2348 2350
2349 2351 if (tovp->v_type != VDIR) {
2350 2352 VN_RELE(tovp);
2351 2353 VN_RELE(fromvp);
2352 2354 *status = NFSERR_NOTDIR;
2353 2355 return;
2354 2356 }
2355 2357 /*
2356 2358 * Disallow NULL paths
2357 2359 */
2358 2360 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2359 2361 VN_RELE(tovp);
2360 2362 VN_RELE(fromvp);
2361 2363 *status = NFSERR_ACCES;
2362 2364 return;
2363 2365 }
2364 2366
2365 2367 if (rdonly(ro, tovp)) {
2366 2368 VN_RELE(tovp);
2367 2369 VN_RELE(fromvp);
2368 2370 *status = NFSERR_ROFS;
2369 2371 return;
2370 2372 }
2371 2373
2372 2374 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2373 2375
2374 2376 /*
2375 2377 * Force modified data and metadata out to stable storage.
2376 2378 */
2377 2379 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2378 2380 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2379 2381
2380 2382 VN_RELE(tovp);
2381 2383 VN_RELE(fromvp);
2382 2384
2383 2385 *status = puterrno(error);
2384 2386
2385 2387 }
2386 2388 void *
2387 2389 rfs_link_getfh(struct nfslinkargs *args)
2388 2390 {
2389 2391 return (args->la_from);
2390 2392 }
2391 2393
2392 2394 /*
2393 2395 * Symbolicly link to a file.
2394 2396 * Create a file (to) with the given attributes which is a symbolic link
2395 2397 * to the given path name (to).
2396 2398 */
2397 2399 void
2398 2400 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2399 2401 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2400 2402 {
2401 2403 int error;
2402 2404 struct vattr va;
2403 2405 vnode_t *vp;
2404 2406 vnode_t *svp;
2405 2407 int lerror;
2406 2408 struct sockaddr *ca;
2407 2409 char *name = NULL;
2408 2410
2409 2411 /*
2410 2412 * Disallow NULL paths
2411 2413 */
2412 2414 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2413 2415 *status = NFSERR_ACCES;
2414 2416 return;
2415 2417 }
2416 2418
2417 2419 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2418 2420 if (vp == NULL) {
2419 2421 *status = NFSERR_STALE;
2420 2422 return;
2421 2423 }
2422 2424
2423 2425 if (rdonly(ro, vp)) {
2424 2426 VN_RELE(vp);
2425 2427 *status = NFSERR_ROFS;
2426 2428 return;
2427 2429 }
2428 2430
2429 2431 error = sattr_to_vattr(args->sla_sa, &va);
2430 2432 if (error) {
2431 2433 VN_RELE(vp);
2432 2434 *status = puterrno(error);
2433 2435 return;
2434 2436 }
2435 2437
2436 2438 if (!(va.va_mask & AT_MODE)) {
2437 2439 VN_RELE(vp);
2438 2440 *status = NFSERR_INVAL;
2439 2441 return;
2440 2442 }
2441 2443
2442 2444 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2443 2445 name = nfscmd_convname(ca, exi, args->sla_tnm,
2444 2446 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2445 2447
2446 2448 if (name == NULL) {
2447 2449 *status = NFSERR_ACCES;
2448 2450 return;
2449 2451 }
2450 2452
2451 2453 va.va_type = VLNK;
2452 2454 va.va_mask |= AT_TYPE;
2453 2455
2454 2456 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2455 2457
2456 2458 /*
2457 2459 * Force new data and metadata out to stable storage.
2458 2460 */
2459 2461 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2460 2462 NULL, cr, NULL, NULL, NULL);
2461 2463
2462 2464 if (!lerror) {
2463 2465 (void) VOP_FSYNC(svp, 0, cr, NULL);
2464 2466 VN_RELE(svp);
2465 2467 }
2466 2468
2467 2469 /*
2468 2470 * Force modified data and metadata out to stable storage.
2469 2471 */
2470 2472 (void) VOP_FSYNC(vp, 0, cr, NULL);
2471 2473
2472 2474 VN_RELE(vp);
2473 2475
2474 2476 *status = puterrno(error);
2475 2477 if (name != args->sla_tnm)
2476 2478 kmem_free(name, MAXPATHLEN);
2477 2479
2478 2480 }
2479 2481 void *
2480 2482 rfs_symlink_getfh(struct nfsslargs *args)
2481 2483 {
2482 2484 return (args->sla_from.da_fhandle);
2483 2485 }
2484 2486
2485 2487 /*
2486 2488 * Make a directory.
2487 2489 * Create a directory with the given name, parent directory, and attributes.
2488 2490 * Returns a file handle and attributes for the new directory.
2489 2491 */
2490 2492 /* ARGSUSED */
2491 2493 void
2492 2494 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2493 2495 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2494 2496 {
2495 2497 int error;
2496 2498 struct vattr va;
2497 2499 vnode_t *dvp = NULL;
2498 2500 vnode_t *vp;
2499 2501 char *name = args->ca_da.da_name;
2500 2502
2501 2503 /*
2502 2504 * Disallow NULL paths
2503 2505 */
2504 2506 if (name == NULL || *name == '\0') {
2505 2507 dr->dr_status = NFSERR_ACCES;
2506 2508 return;
2507 2509 }
2508 2510
2509 2511 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2510 2512 if (vp == NULL) {
2511 2513 dr->dr_status = NFSERR_STALE;
2512 2514 return;
2513 2515 }
2514 2516
2515 2517 if (rdonly(ro, vp)) {
2516 2518 VN_RELE(vp);
2517 2519 dr->dr_status = NFSERR_ROFS;
2518 2520 return;
2519 2521 }
2520 2522
2521 2523 error = sattr_to_vattr(args->ca_sa, &va);
2522 2524 if (error) {
2523 2525 VN_RELE(vp);
2524 2526 dr->dr_status = puterrno(error);
2525 2527 return;
2526 2528 }
2527 2529
2528 2530 if (!(va.va_mask & AT_MODE)) {
2529 2531 VN_RELE(vp);
2530 2532 dr->dr_status = NFSERR_INVAL;
2531 2533 return;
2532 2534 }
2533 2535
2534 2536 va.va_type = VDIR;
2535 2537 va.va_mask |= AT_TYPE;
2536 2538
2537 2539 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2538 2540
2539 2541 if (!error) {
2540 2542 /*
2541 2543 * Attribtutes of the newly created directory should
2542 2544 * be returned to the client.
2543 2545 */
2544 2546 va.va_mask = AT_ALL; /* We want everything */
2545 2547 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2546 2548
2547 2549 /* check for overflows */
2548 2550 if (!error) {
2549 2551 acl_perm(vp, exi, &va, cr);
2550 2552 error = vattr_to_nattr(&va, &dr->dr_attr);
2551 2553 if (!error) {
2552 2554 error = makefh(&dr->dr_fhandle, dvp, exi);
2553 2555 }
2554 2556 }
2555 2557 /*
2556 2558 * Force new data and metadata out to stable storage.
2557 2559 */
2558 2560 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2559 2561 VN_RELE(dvp);
2560 2562 }
2561 2563
2562 2564 /*
2563 2565 * Force modified data and metadata out to stable storage.
2564 2566 */
2565 2567 (void) VOP_FSYNC(vp, 0, cr, NULL);
2566 2568
2567 2569 VN_RELE(vp);
2568 2570
2569 2571 dr->dr_status = puterrno(error);
2570 2572
2571 2573 }
2572 2574 void *
2573 2575 rfs_mkdir_getfh(struct nfscreatargs *args)
2574 2576 {
2575 2577 return (args->ca_da.da_fhandle);
2576 2578 }
2577 2579
2578 2580 /*
2579 2581 * Remove a directory.
2580 2582 * Remove the given directory name from the given parent directory.
2581 2583 */
2582 2584 /* ARGSUSED */
2583 2585 void
2584 2586 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2585 2587 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2586 2588 {
2587 2589 int error;
2588 2590 vnode_t *vp;
2589 2591
2590 2592 /*
2591 2593 * Disallow NULL paths
2592 2594 */
2593 2595 if (da->da_name == NULL || *da->da_name == '\0') {
2594 2596 *status = NFSERR_ACCES;
2595 2597 return;
2596 2598 }
2597 2599
2598 2600 vp = nfs_fhtovp(da->da_fhandle, exi);
2599 2601 if (vp == NULL) {
2600 2602 *status = NFSERR_STALE;
2601 2603 return;
2602 2604 }
2603 2605
2604 2606 if (rdonly(ro, vp)) {
2605 2607 VN_RELE(vp);
2606 2608 *status = NFSERR_ROFS;
2607 2609 return;
2608 2610 }
2609 2611
2610 2612 /*
2611 2613 * VOP_RMDIR takes a third argument (the current
2612 2614 * directory of the process). That's because someone
2613 2615 * wants to return EINVAL if one tries to remove ".".
2614 2616 * Of course, NFS servers have no idea what their
2615 2617 * clients' current directories are. We fake it by
2616 2618 * supplying a vnode known to exist and illegal to
2617 2619 * remove.
2618 2620 */
2619 2621 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2620 2622
2621 2623 /*
2622 2624 * Force modified data and metadata out to stable storage.
2623 2625 */
2624 2626 (void) VOP_FSYNC(vp, 0, cr, NULL);
2625 2627
2626 2628 VN_RELE(vp);
2627 2629
2628 2630 /*
2629 2631 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2630 2632 * if the directory is not empty. A System V NFS server
2631 2633 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2632 2634 * over the wire.
2633 2635 */
2634 2636 if (error == EEXIST)
2635 2637 *status = NFSERR_NOTEMPTY;
2636 2638 else
2637 2639 *status = puterrno(error);
2638 2640
2639 2641 }
2640 2642 void *
2641 2643 rfs_rmdir_getfh(struct nfsdiropargs *da)
2642 2644 {
2643 2645 return (da->da_fhandle);
2644 2646 }
2645 2647
2646 2648 /* ARGSUSED */
2647 2649 void
2648 2650 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2649 2651 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2650 2652 {
2651 2653 int error;
2652 2654 int iseof;
2653 2655 struct iovec iov;
2654 2656 struct uio uio;
2655 2657 vnode_t *vp;
2656 2658 char *ndata = NULL;
2657 2659 struct sockaddr *ca;
2658 2660 size_t nents;
2659 2661 int ret;
2660 2662
2661 2663 vp = nfs_fhtovp(&rda->rda_fh, exi);
2662 2664 if (vp == NULL) {
2663 2665 rd->rd_entries = NULL;
2664 2666 rd->rd_status = NFSERR_STALE;
2665 2667 return;
2666 2668 }
2667 2669
2668 2670 if (vp->v_type != VDIR) {
2669 2671 VN_RELE(vp);
2670 2672 rd->rd_entries = NULL;
2671 2673 rd->rd_status = NFSERR_NOTDIR;
2672 2674 return;
2673 2675 }
2674 2676
2675 2677 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2676 2678
2677 2679 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2678 2680
2679 2681 if (error) {
2680 2682 rd->rd_entries = NULL;
2681 2683 goto bad;
2682 2684 }
2683 2685
2684 2686 if (rda->rda_count == 0) {
2685 2687 rd->rd_entries = NULL;
2686 2688 rd->rd_size = 0;
2687 2689 rd->rd_eof = FALSE;
2688 2690 goto bad;
2689 2691 }
2690 2692
2691 2693 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2692 2694
2693 2695 /*
2694 2696 * Allocate data for entries. This will be freed by rfs_rddirfree.
2695 2697 */
2696 2698 rd->rd_bufsize = (uint_t)rda->rda_count;
2697 2699 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2698 2700
2699 2701 /*
2700 2702 * Set up io vector to read directory data
2701 2703 */
2702 2704 iov.iov_base = (caddr_t)rd->rd_entries;
2703 2705 iov.iov_len = rda->rda_count;
2704 2706 uio.uio_iov = &iov;
2705 2707 uio.uio_iovcnt = 1;
2706 2708 uio.uio_segflg = UIO_SYSSPACE;
2707 2709 uio.uio_extflg = UIO_COPY_CACHED;
2708 2710 uio.uio_loffset = (offset_t)rda->rda_offset;
2709 2711 uio.uio_resid = rda->rda_count;
2710 2712
2711 2713 /*
2712 2714 * read directory
2713 2715 */
2714 2716 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2715 2717
2716 2718 /*
2717 2719 * Clean up
2718 2720 */
2719 2721 if (!error) {
2720 2722 /*
2721 2723 * set size and eof
2722 2724 */
2723 2725 if (uio.uio_resid == rda->rda_count) {
2724 2726 rd->rd_size = 0;
2725 2727 rd->rd_eof = TRUE;
2726 2728 } else {
2727 2729 rd->rd_size = (uint32_t)(rda->rda_count -
2728 2730 uio.uio_resid);
2729 2731 rd->rd_eof = iseof ? TRUE : FALSE;
2730 2732 }
2731 2733 }
2732 2734
2733 2735 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2734 2736 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2735 2737 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2736 2738 rda->rda_count, &ndata);
2737 2739
2738 2740 if (ret != 0) {
2739 2741 size_t dropbytes;
2740 2742 /*
2741 2743 * We had to drop one or more entries in order to fit
2742 2744 * during the character conversion. We need to patch
2743 2745 * up the size and eof info.
2744 2746 */
2745 2747 if (rd->rd_eof)
2746 2748 rd->rd_eof = FALSE;
2747 2749 dropbytes = nfscmd_dropped_entrysize(
2748 2750 (struct dirent64 *)rd->rd_entries, nents, ret);
2749 2751 rd->rd_size -= dropbytes;
2750 2752 }
2751 2753 if (ndata == NULL) {
2752 2754 ndata = (char *)rd->rd_entries;
2753 2755 } else if (ndata != (char *)rd->rd_entries) {
2754 2756 kmem_free(rd->rd_entries, rd->rd_bufsize);
2755 2757 rd->rd_entries = (void *)ndata;
2756 2758 rd->rd_bufsize = rda->rda_count;
2757 2759 }
2758 2760
2759 2761 bad:
2760 2762 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2761 2763
2762 2764 #if 0 /* notyet */
2763 2765 /*
2764 2766 * Don't do this. It causes local disk writes when just
2765 2767 * reading the file and the overhead is deemed larger
2766 2768 * than the benefit.
2767 2769 */
2768 2770 /*
2769 2771 * Force modified metadata out to stable storage.
2770 2772 */
2771 2773 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2772 2774 #endif
2773 2775
2774 2776 VN_RELE(vp);
2775 2777
2776 2778 rd->rd_status = puterrno(error);
2777 2779
2778 2780 }
2779 2781 void *
2780 2782 rfs_readdir_getfh(struct nfsrddirargs *rda)
2781 2783 {
2782 2784 return (&rda->rda_fh);
2783 2785 }
2784 2786 void
2785 2787 rfs_rddirfree(struct nfsrddirres *rd)
2786 2788 {
2787 2789 if (rd->rd_entries != NULL)
2788 2790 kmem_free(rd->rd_entries, rd->rd_bufsize);
2789 2791 }
2790 2792
2791 2793 /* ARGSUSED */
2792 2794 void
2793 2795 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2794 2796 struct svc_req *req, cred_t *cr, bool_t ro)
2795 2797 {
2796 2798 int error;
2797 2799 struct statvfs64 sb;
2798 2800 vnode_t *vp;
2799 2801
2800 2802 vp = nfs_fhtovp(fh, exi);
2801 2803 if (vp == NULL) {
2802 2804 fs->fs_status = NFSERR_STALE;
2803 2805 return;
2804 2806 }
2805 2807
2806 2808 error = VFS_STATVFS(vp->v_vfsp, &sb);
2807 2809
2808 2810 if (!error) {
2809 2811 fs->fs_tsize = nfstsize();
2810 2812 fs->fs_bsize = sb.f_frsize;
2811 2813 fs->fs_blocks = sb.f_blocks;
2812 2814 fs->fs_bfree = sb.f_bfree;
2813 2815 fs->fs_bavail = sb.f_bavail;
2814 2816 }
2815 2817
2816 2818 VN_RELE(vp);
2817 2819
2818 2820 fs->fs_status = puterrno(error);
2819 2821
2820 2822 }
2821 2823 void *
2822 2824 rfs_statfs_getfh(fhandle_t *fh)
2823 2825 {
2824 2826 return (fh);
2825 2827 }
2826 2828
2827 2829 static int
2828 2830 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2829 2831 {
2830 2832 vap->va_mask = 0;
2831 2833
2832 2834 /*
2833 2835 * There was a sign extension bug in some VFS based systems
2834 2836 * which stored the mode as a short. When it would get
2835 2837 * assigned to a u_long, no sign extension would occur.
2836 2838 * It needed to, but this wasn't noticed because sa_mode
2837 2839 * would then get assigned back to the short, thus ignoring
2838 2840 * the upper 16 bits of sa_mode.
2839 2841 *
2840 2842 * To make this implementation work for both broken
2841 2843 * clients and good clients, we check for both versions
2842 2844 * of the mode.
2843 2845 */
2844 2846 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2845 2847 sa->sa_mode != (uint32_t)-1) {
2846 2848 vap->va_mask |= AT_MODE;
2847 2849 vap->va_mode = sa->sa_mode;
2848 2850 }
2849 2851 if (sa->sa_uid != (uint32_t)-1) {
2850 2852 vap->va_mask |= AT_UID;
2851 2853 vap->va_uid = sa->sa_uid;
2852 2854 }
2853 2855 if (sa->sa_gid != (uint32_t)-1) {
2854 2856 vap->va_mask |= AT_GID;
2855 2857 vap->va_gid = sa->sa_gid;
2856 2858 }
2857 2859 if (sa->sa_size != (uint32_t)-1) {
2858 2860 vap->va_mask |= AT_SIZE;
2859 2861 vap->va_size = sa->sa_size;
2860 2862 }
2861 2863 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2862 2864 sa->sa_atime.tv_usec != (int32_t)-1) {
2863 2865 #ifndef _LP64
2864 2866 /* return error if time overflow */
2865 2867 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2866 2868 return (EOVERFLOW);
2867 2869 #endif
2868 2870 vap->va_mask |= AT_ATIME;
2869 2871 /*
2870 2872 * nfs protocol defines times as unsigned so don't extend sign,
2871 2873 * unless sysadmin set nfs_allow_preepoch_time.
2872 2874 */
2873 2875 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2874 2876 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2875 2877 }
2876 2878 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2877 2879 sa->sa_mtime.tv_usec != (int32_t)-1) {
2878 2880 #ifndef _LP64
2879 2881 /* return error if time overflow */
2880 2882 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2881 2883 return (EOVERFLOW);
2882 2884 #endif
2883 2885 vap->va_mask |= AT_MTIME;
2884 2886 /*
2885 2887 * nfs protocol defines times as unsigned so don't extend sign,
2886 2888 * unless sysadmin set nfs_allow_preepoch_time.
2887 2889 */
2888 2890 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2889 2891 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2890 2892 }
2891 2893 return (0);
2892 2894 }
2893 2895
2894 2896 static const enum nfsftype vt_to_nf[] = {
2895 2897 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2896 2898 };
2897 2899
2898 2900 /*
2899 2901 * check the following fields for overflow: nodeid, size, and time.
2900 2902 * There could be a problem when converting 64-bit LP64 fields
2901 2903 * into 32-bit ones. Return an error if there is an overflow.
2902 2904 */
2903 2905 int
2904 2906 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2905 2907 {
2906 2908 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2907 2909 na->na_type = vt_to_nf[vap->va_type];
2908 2910
2909 2911 if (vap->va_mode == (unsigned short) -1)
2910 2912 na->na_mode = (uint32_t)-1;
2911 2913 else
2912 2914 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2913 2915
2914 2916 if (vap->va_uid == (unsigned short)(-1))
2915 2917 na->na_uid = (uint32_t)(-1);
2916 2918 else if (vap->va_uid == UID_NOBODY)
2917 2919 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2918 2920 else
2919 2921 na->na_uid = vap->va_uid;
2920 2922
2921 2923 if (vap->va_gid == (unsigned short)(-1))
2922 2924 na->na_gid = (uint32_t)-1;
2923 2925 else if (vap->va_gid == GID_NOBODY)
2924 2926 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2925 2927 else
2926 2928 na->na_gid = vap->va_gid;
2927 2929
2928 2930 /*
2929 2931 * Do we need to check fsid for overflow? It is 64-bit in the
2930 2932 * vattr, but are bigger than 32 bit values supported?
2931 2933 */
2932 2934 na->na_fsid = vap->va_fsid;
2933 2935
2934 2936 na->na_nodeid = vap->va_nodeid;
2935 2937
2936 2938 /*
2937 2939 * Check to make sure that the nodeid is representable over the
2938 2940 * wire without losing bits.
2939 2941 */
2940 2942 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2941 2943 return (EFBIG);
2942 2944 na->na_nlink = vap->va_nlink;
2943 2945
2944 2946 /*
2945 2947 * Check for big files here, instead of at the caller. See
2946 2948 * comments in cstat for large special file explanation.
2947 2949 */
2948 2950 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2949 2951 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2950 2952 return (EFBIG);
2951 2953 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2952 2954 /* UNKNOWN_SIZE | OVERFLOW */
2953 2955 na->na_size = MAXOFF32_T;
2954 2956 } else
2955 2957 na->na_size = vap->va_size;
2956 2958 } else
2957 2959 na->na_size = vap->va_size;
2958 2960
2959 2961 /*
2960 2962 * If the vnode times overflow the 32-bit times that NFS2
2961 2963 * uses on the wire then return an error.
2962 2964 */
2963 2965 if (!NFS_VAP_TIME_OK(vap)) {
2964 2966 return (EOVERFLOW);
2965 2967 }
2966 2968 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2967 2969 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2968 2970
2969 2971 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2970 2972 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2971 2973
2972 2974 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2973 2975 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2974 2976
2975 2977 /*
2976 2978 * If the dev_t will fit into 16 bits then compress
2977 2979 * it, otherwise leave it alone. See comments in
2978 2980 * nfs_client.c.
2979 2981 */
2980 2982 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2981 2983 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2982 2984 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2983 2985 else
2984 2986 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2985 2987
2986 2988 na->na_blocks = vap->va_nblocks;
2987 2989 na->na_blocksize = vap->va_blksize;
2988 2990
2989 2991 /*
2990 2992 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2991 2993 * over-the-wire protocols for named-pipe vnodes. It remaps the
2992 2994 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2993 2995 *
2994 2996 * BUYER BEWARE:
2995 2997 * If you are porting the NFS to a non-Sun server, you probably
2996 2998 * don't want to include the following block of code. The
2997 2999 * over-the-wire special file types will be changing with the
2998 3000 * NFS Protocol Revision.
2999 3001 */
3000 3002 if (vap->va_type == VFIFO)
3001 3003 NA_SETFIFO(na);
3002 3004 return (0);
3003 3005 }
3004 3006
3005 3007 /*
3006 3008 * acl v2 support: returns approximate permission.
3007 3009 * default: returns minimal permission (more restrictive)
3008 3010 * aclok: returns maximal permission (less restrictive)
3009 3011 * This routine changes the permissions that are alaredy in *va.
3010 3012 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
3011 3013 * CLASS_OBJ is always the same as GROUP_OBJ entry.
3012 3014 */
3013 3015 static void
3014 3016 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
3015 3017 {
3016 3018 vsecattr_t vsa;
3017 3019 int aclcnt;
3018 3020 aclent_t *aclentp;
3019 3021 mode_t mask_perm;
3020 3022 mode_t grp_perm;
3021 3023 mode_t other_perm;
3022 3024 mode_t other_orig;
3023 3025 int error;
3024 3026
3025 3027 /* dont care default acl */
3026 3028 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
3027 3029 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
3028 3030
3029 3031 if (!error) {
3030 3032 aclcnt = vsa.vsa_aclcnt;
3031 3033 if (aclcnt > MIN_ACL_ENTRIES) {
3032 3034 /* non-trivial ACL */
3033 3035 aclentp = vsa.vsa_aclentp;
3034 3036 if (exi->exi_export.ex_flags & EX_ACLOK) {
3035 3037 /* maximal permissions */
3036 3038 grp_perm = 0;
3037 3039 other_perm = 0;
3038 3040 for (; aclcnt > 0; aclcnt--, aclentp++) {
3039 3041 switch (aclentp->a_type) {
3040 3042 case USER_OBJ:
3041 3043 break;
3042 3044 case USER:
3043 3045 grp_perm |=
3044 3046 aclentp->a_perm << 3;
3045 3047 other_perm |= aclentp->a_perm;
3046 3048 break;
3047 3049 case GROUP_OBJ:
3048 3050 grp_perm |=
3049 3051 aclentp->a_perm << 3;
3050 3052 break;
3051 3053 case GROUP:
3052 3054 other_perm |= aclentp->a_perm;
3053 3055 break;
3054 3056 case OTHER_OBJ:
3055 3057 other_orig = aclentp->a_perm;
3056 3058 break;
3057 3059 case CLASS_OBJ:
3058 3060 mask_perm = aclentp->a_perm;
3059 3061 break;
3060 3062 default:
3061 3063 break;
3062 3064 }
3063 3065 }
3064 3066 grp_perm &= mask_perm << 3;
3065 3067 other_perm &= mask_perm;
3066 3068 other_perm |= other_orig;
3067 3069
3068 3070 } else {
3069 3071 /* minimal permissions */
3070 3072 grp_perm = 070;
3071 3073 other_perm = 07;
3072 3074 for (; aclcnt > 0; aclcnt--, aclentp++) {
3073 3075 switch (aclentp->a_type) {
3074 3076 case USER_OBJ:
3075 3077 break;
3076 3078 case USER:
3077 3079 case CLASS_OBJ:
3078 3080 grp_perm &=
3079 3081 aclentp->a_perm << 3;
3080 3082 other_perm &=
3081 3083 aclentp->a_perm;
3082 3084 break;
3083 3085 case GROUP_OBJ:
3084 3086 grp_perm &=
3085 3087 aclentp->a_perm << 3;
3086 3088 break;
3087 3089 case GROUP:
3088 3090 other_perm &=
3089 3091 aclentp->a_perm;
3090 3092 break;
3091 3093 case OTHER_OBJ:
3092 3094 other_perm &=
3093 3095 aclentp->a_perm;
3094 3096 break;
3095 3097 default:
3096 3098 break;
3097 3099 }
3098 3100 }
3099 3101 }
3100 3102 /* copy to va */
3101 3103 va->va_mode &= ~077;
3102 3104 va->va_mode |= grp_perm | other_perm;
3103 3105 }
3104 3106 if (vsa.vsa_aclcnt)
3105 3107 kmem_free(vsa.vsa_aclentp,
3106 3108 vsa.vsa_aclcnt * sizeof (aclent_t));
3107 3109 }
3108 3110 }
3109 3111
3110 3112 void
3111 3113 rfs_srvrinit(void)
3112 3114 {
3113 3115 nfs2_srv_caller_id = fs_new_caller_id();
3114 3116 }
3115 3117
3116 3118 void
3117 3119 rfs_srvrfini(void)
3118 3120 {
3119 3121 }
3120 3122
3121 3123 /* ARGSUSED */
3122 3124 void
3123 3125 rfs_srv_zone_init(nfs_globals_t *ng)
3124 3126 {
3125 3127 nfs_srv_t *ns;
3126 3128
3127 3129 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3128 3130
3129 3131 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3130 3132 ns->write_async = 1;
3131 3133
3132 3134 ng->nfs_srv = ns;
3133 3135 }
3134 3136
3135 3137 /* ARGSUSED */
3136 3138 void
3137 3139 rfs_srv_zone_fini(nfs_globals_t *ng)
3138 3140 {
3139 3141 nfs_srv_t *ns = ng->nfs_srv;
3140 3142
3141 3143 ng->nfs_srv = NULL;
3142 3144
3143 3145 mutex_destroy(&ns->async_write_lock);
3144 3146 kmem_free(ns, sizeof (*ns));
3145 3147 }
3146 3148
3147 3149 static int
3148 3150 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3149 3151 {
3150 3152 struct clist *wcl;
3151 3153 int wlist_len;
3152 3154 uint32_t count = rr->rr_count;
3153 3155
3154 3156 wcl = ra->ra_wlist;
3155 3157
3156 3158 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3157 3159 return (FALSE);
3158 3160 }
3159 3161
3160 3162 wcl = ra->ra_wlist;
3161 3163 rr->rr_ok.rrok_wlist_len = wlist_len;
3162 3164 rr->rr_ok.rrok_wlist = wcl;
3163 3165
3164 3166 return (TRUE);
3165 3167 }
|
↓ open down ↓ |
2479 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX