Print this page
Caution with use after exi_rele()
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 30 * All rights reserved.
31 31 */
32 32
33 33 /*
34 34 * Copyright 2018 Nexenta Systems, Inc.
35 35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 36 */
37 37
38 38 #include <sys/param.h>
39 39 #include <sys/types.h>
40 40 #include <sys/systm.h>
41 41 #include <sys/cred.h>
42 42 #include <sys/buf.h>
43 43 #include <sys/vfs.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/stat.h>
47 47 #include <sys/errno.h>
48 48 #include <sys/sysmacros.h>
49 49 #include <sys/statvfs.h>
50 50 #include <sys/kmem.h>
51 51 #include <sys/kstat.h>
52 52 #include <sys/dirent.h>
53 53 #include <sys/cmn_err.h>
54 54 #include <sys/debug.h>
55 55 #include <sys/vtrace.h>
56 56 #include <sys/mode.h>
57 57 #include <sys/acl.h>
58 58 #include <sys/nbmlock.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/sdt.h>
61 61
62 62 #include <rpc/types.h>
63 63 #include <rpc/auth.h>
64 64 #include <rpc/svc.h>
65 65
66 66 #include <nfs/nfs.h>
67 67 #include <nfs/export.h>
68 68 #include <nfs/nfs_cmd.h>
69 69
70 70 #include <vm/hat.h>
71 71 #include <vm/as.h>
72 72 #include <vm/seg.h>
73 73 #include <vm/seg_map.h>
74 74 #include <vm/seg_kmem.h>
75 75
76 76 #include <sys/strsubr.h>
77 77
78 78 struct rfs_async_write_list;
79 79
80 80 /*
81 81 * Zone globals of NFSv2 server
82 82 */
83 83 typedef struct nfs_srv {
84 84 kmutex_t async_write_lock;
85 85 struct rfs_async_write_list *async_write_head;
86 86
87 87 /*
88 88 * enables write clustering if == 1
89 89 */
90 90 int write_async;
91 91 } nfs_srv_t;
92 92
93 93 /*
94 94 * These are the interface routines for the server side of the
95 95 * Network File System. See the NFS version 2 protocol specification
96 96 * for a description of this interface.
97 97 */
98 98
99 99 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
100 100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
101 101 cred_t *);
102 102
103 103
104 104 /*
105 105 * Some "over the wire" UNIX file types. These are encoded
106 106 * into the mode. This needs to be fixed in the next rev.
107 107 */
108 108 #define IFMT 0170000 /* type of file */
109 109 #define IFCHR 0020000 /* character special */
110 110 #define IFBLK 0060000 /* block special */
111 111 #define IFSOCK 0140000 /* socket */
112 112
113 113 u_longlong_t nfs2_srv_caller_id;
114 114
115 115 static nfs_srv_t *
116 116 nfs_get_srv(void)
117 117 {
118 118 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
119 119 nfs_srv_t *srv = ng->nfs_srv;
120 120 ASSERT(srv != NULL);
121 121 return (srv);
122 122 }
123 123
124 124 /*
125 125 * Get file attributes.
126 126 * Returns the current attributes of the file with the given fhandle.
127 127 */
128 128 /* ARGSUSED */
129 129 void
130 130 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
131 131 struct svc_req *req, cred_t *cr, bool_t ro)
132 132 {
133 133 int error;
134 134 vnode_t *vp;
135 135 struct vattr va;
136 136
137 137 vp = nfs_fhtovp(fhp, exi);
138 138 if (vp == NULL) {
139 139 ns->ns_status = NFSERR_STALE;
140 140 return;
141 141 }
142 142
143 143 /*
144 144 * Do the getattr.
145 145 */
146 146 va.va_mask = AT_ALL; /* we want all the attributes */
147 147
148 148 error = rfs4_delegated_getattr(vp, &va, 0, cr);
149 149
150 150 /* check for overflows */
151 151 if (!error) {
152 152 /* Lie about the object type for a referral */
153 153 if (vn_is_nfs_reparse(vp, cr))
154 154 va.va_type = VLNK;
155 155
156 156 acl_perm(vp, exi, &va, cr);
157 157 error = vattr_to_nattr(&va, &ns->ns_attr);
158 158 }
159 159
160 160 VN_RELE(vp);
161 161
162 162 ns->ns_status = puterrno(error);
163 163 }
164 164 void *
165 165 rfs_getattr_getfh(fhandle_t *fhp)
166 166 {
167 167 return (fhp);
168 168 }
169 169
170 170 /*
171 171 * Set file attributes.
172 172 * Sets the attributes of the file with the given fhandle. Returns
173 173 * the new attributes.
174 174 */
175 175 /* ARGSUSED */
176 176 void
177 177 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
178 178 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
179 179 {
180 180 int error;
181 181 int flag;
182 182 int in_crit = 0;
183 183 vnode_t *vp;
184 184 struct vattr va;
185 185 struct vattr bva;
186 186 struct flock64 bf;
187 187 caller_context_t ct;
188 188
189 189
190 190 vp = nfs_fhtovp(&args->saa_fh, exi);
191 191 if (vp == NULL) {
192 192 ns->ns_status = NFSERR_STALE;
193 193 return;
194 194 }
195 195
196 196 if (rdonly(ro, vp)) {
197 197 VN_RELE(vp);
198 198 ns->ns_status = NFSERR_ROFS;
199 199 return;
200 200 }
201 201
202 202 error = sattr_to_vattr(&args->saa_sa, &va);
203 203 if (error) {
204 204 VN_RELE(vp);
205 205 ns->ns_status = puterrno(error);
206 206 return;
207 207 }
208 208
209 209 /*
210 210 * If the client is requesting a change to the mtime,
211 211 * but the nanosecond field is set to 1 billion, then
212 212 * this is a flag to the server that it should set the
213 213 * atime and mtime fields to the server's current time.
214 214 * The 1 billion number actually came from the client
215 215 * as 1 million, but the units in the over the wire
216 216 * request are microseconds instead of nanoseconds.
217 217 *
218 218 * This is an overload of the protocol and should be
219 219 * documented in the NFS Version 2 protocol specification.
220 220 */
221 221 if (va.va_mask & AT_MTIME) {
222 222 if (va.va_mtime.tv_nsec == 1000000000) {
223 223 gethrestime(&va.va_mtime);
224 224 va.va_atime = va.va_mtime;
225 225 va.va_mask |= AT_ATIME;
226 226 flag = 0;
227 227 } else
228 228 flag = ATTR_UTIME;
229 229 } else
230 230 flag = 0;
231 231
232 232 /*
233 233 * If the filesystem is exported with nosuid, then mask off
234 234 * the setuid and setgid bits.
235 235 */
236 236 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
237 237 (exi->exi_export.ex_flags & EX_NOSUID))
238 238 va.va_mode &= ~(VSUID | VSGID);
239 239
240 240 ct.cc_sysid = 0;
241 241 ct.cc_pid = 0;
242 242 ct.cc_caller_id = nfs2_srv_caller_id;
243 243 ct.cc_flags = CC_DONTBLOCK;
244 244
245 245 /*
246 246 * We need to specially handle size changes because it is
247 247 * possible for the client to create a file with modes
248 248 * which indicate read-only, but with the file opened for
249 249 * writing. If the client then tries to set the size of
250 250 * the file, then the normal access checking done in
251 251 * VOP_SETATTR would prevent the client from doing so,
252 252 * although it should be legal for it to do so. To get
253 253 * around this, we do the access checking for ourselves
254 254 * and then use VOP_SPACE which doesn't do the access
255 255 * checking which VOP_SETATTR does. VOP_SPACE can only
256 256 * operate on VREG files, let VOP_SETATTR handle the other
257 257 * extremely rare cases.
258 258 * Also the client should not be allowed to change the
259 259 * size of the file if there is a conflicting non-blocking
260 260 * mandatory lock in the region of change.
261 261 */
262 262 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
263 263 if (nbl_need_check(vp)) {
264 264 nbl_start_crit(vp, RW_READER);
265 265 in_crit = 1;
266 266 }
267 267
268 268 bva.va_mask = AT_UID | AT_SIZE;
269 269
270 270 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
271 271
272 272 if (error) {
273 273 if (in_crit)
274 274 nbl_end_crit(vp);
275 275 VN_RELE(vp);
276 276 ns->ns_status = puterrno(error);
277 277 return;
278 278 }
279 279
280 280 if (in_crit) {
281 281 u_offset_t offset;
282 282 ssize_t length;
283 283
284 284 if (va.va_size < bva.va_size) {
285 285 offset = va.va_size;
286 286 length = bva.va_size - va.va_size;
287 287 } else {
288 288 offset = bva.va_size;
289 289 length = va.va_size - bva.va_size;
290 290 }
291 291 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
292 292 NULL)) {
293 293 error = EACCES;
294 294 }
295 295 }
296 296
297 297 if (crgetuid(cr) == bva.va_uid && !error &&
298 298 va.va_size != bva.va_size) {
299 299 va.va_mask &= ~AT_SIZE;
300 300 bf.l_type = F_WRLCK;
301 301 bf.l_whence = 0;
302 302 bf.l_start = (off64_t)va.va_size;
303 303 bf.l_len = 0;
304 304 bf.l_sysid = 0;
305 305 bf.l_pid = 0;
306 306
307 307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
308 308 (offset_t)va.va_size, cr, &ct);
309 309 }
310 310 if (in_crit)
311 311 nbl_end_crit(vp);
312 312 } else
313 313 error = 0;
314 314
315 315 /*
316 316 * Do the setattr.
317 317 */
318 318 if (!error && va.va_mask) {
319 319 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
320 320 }
321 321
322 322 /*
323 323 * check if the monitor on either vop_space or vop_setattr detected
324 324 * a delegation conflict and if so, mark the thread flag as
325 325 * wouldblock so that the response is dropped and the client will
326 326 * try again.
327 327 */
328 328 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
329 329 VN_RELE(vp);
330 330 curthread->t_flag |= T_WOULDBLOCK;
331 331 return;
332 332 }
333 333
334 334 if (!error) {
335 335 va.va_mask = AT_ALL; /* get everything */
336 336
337 337 error = rfs4_delegated_getattr(vp, &va, 0, cr);
338 338
339 339 /* check for overflows */
340 340 if (!error) {
341 341 acl_perm(vp, exi, &va, cr);
342 342 error = vattr_to_nattr(&va, &ns->ns_attr);
343 343 }
344 344 }
345 345
346 346 ct.cc_flags = 0;
347 347
348 348 /*
349 349 * Force modified metadata out to stable storage.
350 350 */
351 351 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
352 352
353 353 VN_RELE(vp);
354 354
355 355 ns->ns_status = puterrno(error);
356 356 }
357 357 void *
358 358 rfs_setattr_getfh(struct nfssaargs *args)
359 359 {
360 360 return (&args->saa_fh);
361 361 }
362 362
363 363 /* Change and release @exip and @vpp only in success */
364 364 int
365 365 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
366 366 {
367 367 struct exportinfo *exi;
368 368 vnode_t *vp = *vpp;
369 369 fid_t fid;
370 370 int error;
371 371
372 372 VN_HOLD(vp);
373 373
374 374 if ((error = traverse(&vp)) != 0) {
375 375 VN_RELE(vp);
376 376 return (error);
377 377 }
378 378
379 379 bzero(&fid, sizeof (fid));
380 380 fid.fid_len = MAXFIDSZ;
381 381 error = VOP_FID(vp, &fid, NULL);
382 382 if (error) {
383 383 VN_RELE(vp);
384 384 return (error);
385 385 }
386 386
387 387 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
388 388 if (exi == NULL ||
389 389 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
390 390 /*
391 391 * It is not error, just subdir is not exported
392 392 * or "nohide" is not set
393 393 */
394 394 if (exi != NULL)
395 395 exi_rele(exi);
396 396 VN_RELE(vp);
397 397 } else {
398 398 /* go to submount */
399 399 exi_rele(*exip);
400 400 *exip = exi;
401 401
402 402 VN_RELE(*vpp);
403 403 *vpp = vp;
404 404 }
405 405
406 406 return (0);
407 407 }
408 408
409 409 /*
410 410 * Given mounted "dvp" and "exi", go upper mountpoint
411 411 * with dvp/exi correction
412 412 * Return 0 in success
413 413 */
414 414 int
415 415 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
416 416 {
417 417 struct exportinfo *exi;
418 418 vnode_t *dvp = *dvpp;
419 419
420 420 ASSERT3P((*exip)->exi_zone, ==, curzone);
421 421 ASSERT((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp));
422 422
423 423 VN_HOLD(dvp);
424 424 dvp = untraverse(dvp);
425 425 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
426 426 if (exi == NULL) {
427 427 VN_RELE(dvp);
428 428 return (-1);
429 429 }
430 430
431 431 ASSERT3P(exi->exi_zone, ==, curzone);
432 432 exi_rele(*exip);
433 433 *exip = exi;
434 434 VN_RELE(*dvpp);
435 435 *dvpp = dvp;
436 436
437 437 return (0);
438 438 }
439 439 /*
440 440 * Directory lookup.
441 441 * Returns an fhandle and file attributes for file name in a directory.
442 442 */
443 443 /* ARGSUSED */
444 444 void
445 445 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
446 446 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
447 447 {
448 448 int error;
449 449 vnode_t *dvp;
450 450 vnode_t *vp;
451 451 struct vattr va;
452 452 fhandle_t *fhp = da->da_fhandle;
453 453 struct sec_ol sec = {0, 0};
454 454 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
455 455 char *name;
456 456 struct sockaddr *ca;
457 457
458 458 /*
459 459 * Trusted Extension doesn't support NFSv2. MOUNT
460 460 * will reject v2 clients. Need to prevent v2 client
461 461 * access via WebNFS here.
462 462 */
463 463 if (is_system_labeled() && req->rq_vers == 2) {
464 464 dr->dr_status = NFSERR_ACCES;
465 465 return;
466 466 }
467 467
468 468 /*
469 469 * Disallow NULL paths
470 470 */
471 471 if (da->da_name == NULL || *da->da_name == '\0') {
472 472 dr->dr_status = NFSERR_ACCES;
473 473 return;
474 474 }
475 475
476 476 /*
477 477 * Allow lookups from the root - the default
478 478 * location of the public filehandle.
479 479 */
480 480 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
481 481 dvp = ZONE_ROOTVP();
482 482 VN_HOLD(dvp);
483 483 } else {
484 484 dvp = nfs_fhtovp(fhp, exi);
485 485 if (dvp == NULL) {
486 486 dr->dr_status = NFSERR_STALE;
487 487 return;
488 488 }
489 489 }
490 490
491 491 exi_hold(exi);
492 492 ASSERT3P(exi->exi_zone, ==, curzone);
493 493
494 494 /*
495 495 * Not allow lookup beyond root.
496 496 * If the filehandle matches a filehandle of the exi,
497 497 * then the ".." refers beyond the root of an exported filesystem.
498 498 */
499 499 if (strcmp(da->da_name, "..") == 0 &&
500 500 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
501 501 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
502 502 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
503 503 /*
504 504 * special case for ".." and 'nohide'exported root
505 505 */
506 506 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
507 507 error = NFSERR_ACCES;
508 508 goto out;
509 509 }
510 510 } else {
511 511 error = NFSERR_NOENT;
512 512 goto out;
513 513 }
514 514 }
515 515
516 516 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
517 517 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
518 518 MAXPATHLEN);
519 519
520 520 if (name == NULL) {
521 521 error = NFSERR_ACCES;
522 522 goto out;
523 523 }
524 524
525 525 /*
526 526 * If the public filehandle is used then allow
527 527 * a multi-component lookup, i.e. evaluate
|
↓ open down ↓ |
527 lines elided |
↑ open up ↑ |
528 528 * a pathname and follow symbolic links if
529 529 * necessary.
530 530 *
531 531 * This may result in a vnode in another filesystem
532 532 * which is OK as long as the filesystem is exported.
533 533 */
534 534 if (PUBLIC_FH2(fhp)) {
535 535 publicfh_flag = TRUE;
536 536
537 537 exi_rele(exi);
538 + exi = NULL;
538 539
539 540 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
540 541 &sec);
541 542 } else {
542 543 /*
543 544 * Do a normal single component lookup.
544 545 */
545 546 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
546 547 NULL, NULL, NULL);
547 548 }
548 549
549 550 if (name != da->da_name)
550 551 kmem_free(name, MAXPATHLEN);
551 552
552 553 if (error == 0 && vn_ismntpt(vp)) {
553 554 error = rfs_cross_mnt(&vp, &exi);
554 555 if (error)
555 556 VN_RELE(vp);
556 557 }
557 558
558 559 if (!error) {
559 560 va.va_mask = AT_ALL; /* we want everything */
560 561
561 562 error = rfs4_delegated_getattr(vp, &va, 0, cr);
562 563
563 564 /* check for overflows */
564 565 if (!error) {
565 566 acl_perm(vp, exi, &va, cr);
566 567 error = vattr_to_nattr(&va, &dr->dr_attr);
567 568 if (!error) {
568 569 if (sec.sec_flags & SEC_QUERY)
569 570 error = makefh_ol(&dr->dr_fhandle, exi,
570 571 sec.sec_index);
571 572 else {
572 573 error = makefh(&dr->dr_fhandle, vp,
573 574 exi);
574 575 if (!error && publicfh_flag &&
575 576 !chk_clnt_sec(exi, req))
576 577 auth_weak = TRUE;
577 578 }
578 579 }
579 580 }
580 581 VN_RELE(vp);
581 582 }
582 583
583 584 out:
584 585 VN_RELE(dvp);
585 586
586 587 if (exi != NULL)
587 588 exi_rele(exi);
588 589
589 590 /*
590 591 * If it's public fh, no 0x81, and client's flavor is
591 592 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
592 593 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
593 594 */
594 595 if (auth_weak)
595 596 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
596 597 else
597 598 dr->dr_status = puterrno(error);
598 599 }
599 600 void *
600 601 rfs_lookup_getfh(struct nfsdiropargs *da)
601 602 {
602 603 return (da->da_fhandle);
603 604 }
604 605
605 606 /*
606 607 * Read symbolic link.
607 608 * Returns the string in the symbolic link at the given fhandle.
608 609 */
609 610 /* ARGSUSED */
610 611 void
611 612 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
612 613 struct svc_req *req, cred_t *cr, bool_t ro)
613 614 {
614 615 int error;
615 616 struct iovec iov;
616 617 struct uio uio;
617 618 vnode_t *vp;
618 619 struct vattr va;
619 620 struct sockaddr *ca;
620 621 char *name = NULL;
621 622 int is_referral = 0;
622 623
623 624 vp = nfs_fhtovp(fhp, exi);
624 625 if (vp == NULL) {
625 626 rl->rl_data = NULL;
626 627 rl->rl_status = NFSERR_STALE;
627 628 return;
628 629 }
629 630
630 631 va.va_mask = AT_MODE;
631 632
632 633 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
633 634
634 635 if (error) {
635 636 VN_RELE(vp);
636 637 rl->rl_data = NULL;
637 638 rl->rl_status = puterrno(error);
638 639 return;
639 640 }
640 641
641 642 if (MANDLOCK(vp, va.va_mode)) {
642 643 VN_RELE(vp);
643 644 rl->rl_data = NULL;
644 645 rl->rl_status = NFSERR_ACCES;
645 646 return;
646 647 }
647 648
648 649 /* We lied about the object type for a referral */
649 650 if (vn_is_nfs_reparse(vp, cr))
650 651 is_referral = 1;
651 652
652 653 /*
653 654 * XNFS and RFC1094 require us to return ENXIO if argument
654 655 * is not a link. BUGID 1138002.
655 656 */
656 657 if (vp->v_type != VLNK && !is_referral) {
657 658 VN_RELE(vp);
658 659 rl->rl_data = NULL;
659 660 rl->rl_status = NFSERR_NXIO;
660 661 return;
661 662 }
662 663
663 664 /*
664 665 * Allocate data for pathname. This will be freed by rfs_rlfree.
665 666 */
666 667 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
667 668
668 669 if (is_referral) {
669 670 char *s;
670 671 size_t strsz;
671 672
672 673 /* Get an artificial symlink based on a referral */
673 674 s = build_symlink(vp, cr, &strsz);
674 675 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
675 676 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
676 677 vnode_t *, vp, char *, s);
677 678 if (s == NULL)
678 679 error = EINVAL;
679 680 else {
680 681 error = 0;
681 682 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
682 683 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
683 684 kmem_free(s, strsz);
684 685 }
685 686
686 687 } else {
687 688
688 689 /*
689 690 * Set up io vector to read sym link data
690 691 */
691 692 iov.iov_base = rl->rl_data;
692 693 iov.iov_len = NFS_MAXPATHLEN;
693 694 uio.uio_iov = &iov;
694 695 uio.uio_iovcnt = 1;
695 696 uio.uio_segflg = UIO_SYSSPACE;
696 697 uio.uio_extflg = UIO_COPY_CACHED;
697 698 uio.uio_loffset = (offset_t)0;
698 699 uio.uio_resid = NFS_MAXPATHLEN;
699 700
700 701 /*
701 702 * Do the readlink.
702 703 */
703 704 error = VOP_READLINK(vp, &uio, cr, NULL);
704 705
705 706 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
706 707
707 708 if (!error)
708 709 rl->rl_data[rl->rl_count] = '\0';
709 710
710 711 }
711 712
712 713
713 714 VN_RELE(vp);
714 715
715 716 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
716 717 name = nfscmd_convname(ca, exi, rl->rl_data,
717 718 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
718 719
719 720 if (name != NULL && name != rl->rl_data) {
720 721 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
721 722 rl->rl_data = name;
722 723 }
723 724
724 725 /*
725 726 * XNFS and RFC1094 require us to return ENXIO if argument
726 727 * is not a link. UFS returns EINVAL if this is the case,
727 728 * so we do the mapping here. BUGID 1138002.
728 729 */
729 730 if (error == EINVAL)
730 731 rl->rl_status = NFSERR_NXIO;
731 732 else
732 733 rl->rl_status = puterrno(error);
733 734
734 735 }
735 736 void *
736 737 rfs_readlink_getfh(fhandle_t *fhp)
737 738 {
738 739 return (fhp);
739 740 }
740 741 /*
741 742 * Free data allocated by rfs_readlink
742 743 */
743 744 void
744 745 rfs_rlfree(struct nfsrdlnres *rl)
745 746 {
746 747 if (rl->rl_data != NULL)
747 748 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
748 749 }
749 750
750 751 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
751 752
752 753 /*
753 754 * Read data.
754 755 * Returns some data read from the file at the given fhandle.
755 756 */
756 757 /* ARGSUSED */
757 758 void
758 759 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
759 760 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
760 761 {
761 762 vnode_t *vp;
762 763 int error;
763 764 struct vattr va;
764 765 struct iovec iov;
765 766 struct uio uio;
766 767 mblk_t *mp;
767 768 int alloc_err = 0;
768 769 int in_crit = 0;
769 770 caller_context_t ct;
770 771
771 772 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
772 773 if (vp == NULL) {
773 774 rr->rr_data = NULL;
774 775 rr->rr_status = NFSERR_STALE;
775 776 return;
776 777 }
777 778
778 779 if (vp->v_type != VREG) {
779 780 VN_RELE(vp);
780 781 rr->rr_data = NULL;
781 782 rr->rr_status = NFSERR_ISDIR;
782 783 return;
783 784 }
784 785
785 786 ct.cc_sysid = 0;
786 787 ct.cc_pid = 0;
787 788 ct.cc_caller_id = nfs2_srv_caller_id;
788 789 ct.cc_flags = CC_DONTBLOCK;
789 790
790 791 /*
791 792 * Enter the critical region before calling VOP_RWLOCK
792 793 * to avoid a deadlock with write requests.
793 794 */
794 795 if (nbl_need_check(vp)) {
795 796 nbl_start_crit(vp, RW_READER);
796 797 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
797 798 0, NULL)) {
798 799 nbl_end_crit(vp);
799 800 VN_RELE(vp);
800 801 rr->rr_data = NULL;
801 802 rr->rr_status = NFSERR_ACCES;
802 803 return;
803 804 }
804 805 in_crit = 1;
805 806 }
806 807
807 808 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
808 809
809 810 /* check if a monitor detected a delegation conflict */
810 811 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
811 812 if (in_crit)
812 813 nbl_end_crit(vp);
813 814 VN_RELE(vp);
814 815 /* mark as wouldblock so response is dropped */
815 816 curthread->t_flag |= T_WOULDBLOCK;
816 817
817 818 rr->rr_data = NULL;
818 819 return;
819 820 }
820 821
821 822 va.va_mask = AT_ALL;
822 823
823 824 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
824 825
825 826 if (error) {
826 827 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
827 828 if (in_crit)
828 829 nbl_end_crit(vp);
829 830
830 831 VN_RELE(vp);
831 832 rr->rr_data = NULL;
832 833 rr->rr_status = puterrno(error);
833 834
834 835 return;
835 836 }
836 837
837 838 /*
838 839 * This is a kludge to allow reading of files created
839 840 * with no read permission. The owner of the file
840 841 * is always allowed to read it.
841 842 */
842 843 if (crgetuid(cr) != va.va_uid) {
843 844 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
844 845
845 846 if (error) {
846 847 /*
847 848 * Exec is the same as read over the net because
848 849 * of demand loading.
849 850 */
850 851 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
851 852 }
852 853 if (error) {
853 854 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
854 855 if (in_crit)
855 856 nbl_end_crit(vp);
856 857 VN_RELE(vp);
857 858 rr->rr_data = NULL;
858 859 rr->rr_status = puterrno(error);
859 860
860 861 return;
861 862 }
862 863 }
863 864
864 865 if (MANDLOCK(vp, va.va_mode)) {
865 866 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
866 867 if (in_crit)
867 868 nbl_end_crit(vp);
868 869
869 870 VN_RELE(vp);
870 871 rr->rr_data = NULL;
871 872 rr->rr_status = NFSERR_ACCES;
872 873
873 874 return;
874 875 }
875 876
876 877 rr->rr_ok.rrok_wlist_len = 0;
877 878 rr->rr_ok.rrok_wlist = NULL;
878 879
879 880 if ((u_offset_t)ra->ra_offset >= va.va_size) {
880 881 rr->rr_count = 0;
881 882 rr->rr_data = NULL;
882 883 /*
883 884 * In this case, status is NFS_OK, but there is no data
884 885 * to encode. So set rr_mp to NULL.
885 886 */
886 887 rr->rr_mp = NULL;
887 888 rr->rr_ok.rrok_wlist = ra->ra_wlist;
888 889 if (rr->rr_ok.rrok_wlist)
889 890 clist_zero_len(rr->rr_ok.rrok_wlist);
890 891 goto done;
891 892 }
892 893
893 894 if (ra->ra_wlist) {
894 895 mp = NULL;
895 896 rr->rr_mp = NULL;
896 897 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
897 898 if (ra->ra_count > iov.iov_len) {
898 899 rr->rr_data = NULL;
899 900 rr->rr_status = NFSERR_INVAL;
900 901 goto done;
901 902 }
902 903 } else {
903 904 /*
904 905 * mp will contain the data to be sent out in the read reply.
905 906 * This will be freed after the reply has been sent out (by the
906 907 * driver).
907 908 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
908 909 * that the call to xdrmblk_putmblk() never fails.
909 910 */
910 911 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
911 912 &alloc_err);
912 913 ASSERT(mp != NULL);
913 914 ASSERT(alloc_err == 0);
914 915
915 916 rr->rr_mp = mp;
916 917
917 918 /*
918 919 * Set up io vector
919 920 */
920 921 iov.iov_base = (caddr_t)mp->b_datap->db_base;
921 922 iov.iov_len = ra->ra_count;
922 923 }
923 924
924 925 uio.uio_iov = &iov;
925 926 uio.uio_iovcnt = 1;
926 927 uio.uio_segflg = UIO_SYSSPACE;
927 928 uio.uio_extflg = UIO_COPY_CACHED;
928 929 uio.uio_loffset = (offset_t)ra->ra_offset;
929 930 uio.uio_resid = ra->ra_count;
930 931
931 932 error = VOP_READ(vp, &uio, 0, cr, &ct);
932 933
933 934 if (error) {
934 935 if (mp)
935 936 freeb(mp);
936 937
937 938 /*
938 939 * check if a monitor detected a delegation conflict and
939 940 * mark as wouldblock so response is dropped
940 941 */
941 942 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
942 943 curthread->t_flag |= T_WOULDBLOCK;
943 944 else
944 945 rr->rr_status = puterrno(error);
945 946
946 947 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
947 948 if (in_crit)
948 949 nbl_end_crit(vp);
949 950
950 951 VN_RELE(vp);
951 952 rr->rr_data = NULL;
952 953
953 954 return;
954 955 }
955 956
956 957 /*
957 958 * Get attributes again so we can send the latest access
958 959 * time to the client side for its cache.
959 960 */
960 961 va.va_mask = AT_ALL;
961 962
962 963 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
963 964
964 965 if (error) {
965 966 if (mp)
966 967 freeb(mp);
967 968
968 969 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
969 970 if (in_crit)
970 971 nbl_end_crit(vp);
971 972
972 973 VN_RELE(vp);
973 974 rr->rr_data = NULL;
974 975 rr->rr_status = puterrno(error);
975 976
976 977 return;
977 978 }
978 979
979 980 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
980 981
981 982 if (mp) {
982 983 rr->rr_data = (char *)mp->b_datap->db_base;
983 984 } else {
984 985 if (ra->ra_wlist) {
985 986 rr->rr_data = (caddr_t)iov.iov_base;
986 987 if (!rdma_setup_read_data2(ra, rr)) {
987 988 rr->rr_data = NULL;
988 989 rr->rr_status = puterrno(NFSERR_INVAL);
989 990 }
990 991 }
991 992 }
992 993 done:
993 994 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
994 995 if (in_crit)
995 996 nbl_end_crit(vp);
996 997
997 998 acl_perm(vp, exi, &va, cr);
998 999
999 1000 /* check for overflows */
1000 1001 error = vattr_to_nattr(&va, &rr->rr_attr);
1001 1002
1002 1003 VN_RELE(vp);
1003 1004
1004 1005 rr->rr_status = puterrno(error);
1005 1006 }
1006 1007
1007 1008 /*
1008 1009 * Free data allocated by rfs_read
1009 1010 */
1010 1011 void
1011 1012 rfs_rdfree(struct nfsrdresult *rr)
1012 1013 {
1013 1014 mblk_t *mp;
1014 1015
1015 1016 if (rr->rr_status == NFS_OK) {
1016 1017 mp = rr->rr_mp;
1017 1018 if (mp != NULL)
1018 1019 freeb(mp);
1019 1020 }
1020 1021 }
1021 1022
1022 1023 void *
1023 1024 rfs_read_getfh(struct nfsreadargs *ra)
1024 1025 {
1025 1026 return (&ra->ra_fhandle);
1026 1027 }
1027 1028
1028 1029 #define MAX_IOVECS 12
1029 1030
1030 1031 #ifdef DEBUG
1031 1032 static int rfs_write_sync_hits = 0;
1032 1033 static int rfs_write_sync_misses = 0;
1033 1034 #endif
1034 1035
1035 1036 /*
1036 1037 * Write data to file.
1037 1038 * Returns attributes of a file after writing some data to it.
1038 1039 *
1039 1040 * Any changes made here, especially in error handling might have
1040 1041 * to also be done in rfs_write (which clusters write requests).
1041 1042 */
1042 1043 /* ARGSUSED */
1043 1044 void
1044 1045 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1045 1046 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1046 1047 {
1047 1048 int error;
1048 1049 vnode_t *vp;
1049 1050 rlim64_t rlimit;
1050 1051 struct vattr va;
1051 1052 struct uio uio;
1052 1053 struct iovec iov[MAX_IOVECS];
1053 1054 mblk_t *m;
1054 1055 struct iovec *iovp;
1055 1056 int iovcnt;
1056 1057 cred_t *savecred;
1057 1058 int in_crit = 0;
1058 1059 caller_context_t ct;
1059 1060
1060 1061 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1061 1062 if (vp == NULL) {
1062 1063 ns->ns_status = NFSERR_STALE;
1063 1064 return;
1064 1065 }
1065 1066
1066 1067 if (rdonly(ro, vp)) {
1067 1068 VN_RELE(vp);
1068 1069 ns->ns_status = NFSERR_ROFS;
1069 1070 return;
1070 1071 }
1071 1072
1072 1073 if (vp->v_type != VREG) {
1073 1074 VN_RELE(vp);
1074 1075 ns->ns_status = NFSERR_ISDIR;
1075 1076 return;
1076 1077 }
1077 1078
1078 1079 ct.cc_sysid = 0;
1079 1080 ct.cc_pid = 0;
1080 1081 ct.cc_caller_id = nfs2_srv_caller_id;
1081 1082 ct.cc_flags = CC_DONTBLOCK;
1082 1083
1083 1084 va.va_mask = AT_UID|AT_MODE;
1084 1085
1085 1086 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1086 1087
1087 1088 if (error) {
1088 1089 VN_RELE(vp);
1089 1090 ns->ns_status = puterrno(error);
1090 1091
1091 1092 return;
1092 1093 }
1093 1094
1094 1095 if (crgetuid(cr) != va.va_uid) {
1095 1096 /*
1096 1097 * This is a kludge to allow writes of files created
1097 1098 * with read only permission. The owner of the file
1098 1099 * is always allowed to write it.
1099 1100 */
1100 1101 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1101 1102
1102 1103 if (error) {
1103 1104 VN_RELE(vp);
1104 1105 ns->ns_status = puterrno(error);
1105 1106 return;
1106 1107 }
1107 1108 }
1108 1109
1109 1110 /*
1110 1111 * Can't access a mandatory lock file. This might cause
1111 1112 * the NFS service thread to block forever waiting for a
1112 1113 * lock to be released that will never be released.
1113 1114 */
1114 1115 if (MANDLOCK(vp, va.va_mode)) {
1115 1116 VN_RELE(vp);
1116 1117 ns->ns_status = NFSERR_ACCES;
1117 1118 return;
1118 1119 }
1119 1120
1120 1121 /*
1121 1122 * We have to enter the critical region before calling VOP_RWLOCK
1122 1123 * to avoid a deadlock with ufs.
1123 1124 */
1124 1125 if (nbl_need_check(vp)) {
1125 1126 nbl_start_crit(vp, RW_READER);
1126 1127 in_crit = 1;
1127 1128 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1128 1129 wa->wa_count, 0, NULL)) {
1129 1130 error = EACCES;
1130 1131 goto out;
1131 1132 }
1132 1133 }
1133 1134
1134 1135 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1135 1136
1136 1137 /* check if a monitor detected a delegation conflict */
1137 1138 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1138 1139 goto out;
1139 1140 }
1140 1141
1141 1142 if (wa->wa_data || wa->wa_rlist) {
1142 1143 /* Do the RDMA thing if necessary */
1143 1144 if (wa->wa_rlist) {
1144 1145 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1145 1146 iov[0].iov_len = wa->wa_count;
1146 1147 } else {
1147 1148 iov[0].iov_base = wa->wa_data;
1148 1149 iov[0].iov_len = wa->wa_count;
1149 1150 }
1150 1151 uio.uio_iov = iov;
1151 1152 uio.uio_iovcnt = 1;
1152 1153 uio.uio_segflg = UIO_SYSSPACE;
1153 1154 uio.uio_extflg = UIO_COPY_DEFAULT;
1154 1155 uio.uio_loffset = (offset_t)wa->wa_offset;
1155 1156 uio.uio_resid = wa->wa_count;
1156 1157 /*
1157 1158 * The limit is checked on the client. We
1158 1159 * should allow any size writes here.
1159 1160 */
1160 1161 uio.uio_llimit = curproc->p_fsz_ctl;
1161 1162 rlimit = uio.uio_llimit - wa->wa_offset;
1162 1163 if (rlimit < (rlim64_t)uio.uio_resid)
1163 1164 uio.uio_resid = (uint_t)rlimit;
1164 1165
1165 1166 /*
1166 1167 * for now we assume no append mode
1167 1168 */
1168 1169 /*
1169 1170 * We're changing creds because VM may fault and we need
1170 1171 * the cred of the current thread to be used if quota
1171 1172 * checking is enabled.
1172 1173 */
1173 1174 savecred = curthread->t_cred;
1174 1175 curthread->t_cred = cr;
1175 1176 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1176 1177 curthread->t_cred = savecred;
1177 1178 } else {
1178 1179
1179 1180 iovcnt = 0;
1180 1181 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1181 1182 iovcnt++;
1182 1183 if (iovcnt <= MAX_IOVECS) {
1183 1184 #ifdef DEBUG
1184 1185 rfs_write_sync_hits++;
1185 1186 #endif
1186 1187 iovp = iov;
1187 1188 } else {
1188 1189 #ifdef DEBUG
1189 1190 rfs_write_sync_misses++;
1190 1191 #endif
1191 1192 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1192 1193 }
1193 1194 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1194 1195 uio.uio_iov = iovp;
1195 1196 uio.uio_iovcnt = iovcnt;
1196 1197 uio.uio_segflg = UIO_SYSSPACE;
1197 1198 uio.uio_extflg = UIO_COPY_DEFAULT;
1198 1199 uio.uio_loffset = (offset_t)wa->wa_offset;
1199 1200 uio.uio_resid = wa->wa_count;
1200 1201 /*
1201 1202 * The limit is checked on the client. We
1202 1203 * should allow any size writes here.
1203 1204 */
1204 1205 uio.uio_llimit = curproc->p_fsz_ctl;
1205 1206 rlimit = uio.uio_llimit - wa->wa_offset;
1206 1207 if (rlimit < (rlim64_t)uio.uio_resid)
1207 1208 uio.uio_resid = (uint_t)rlimit;
1208 1209
1209 1210 /*
1210 1211 * For now we assume no append mode.
1211 1212 */
1212 1213 /*
1213 1214 * We're changing creds because VM may fault and we need
1214 1215 * the cred of the current thread to be used if quota
1215 1216 * checking is enabled.
1216 1217 */
1217 1218 savecred = curthread->t_cred;
1218 1219 curthread->t_cred = cr;
1219 1220 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1220 1221 curthread->t_cred = savecred;
1221 1222
1222 1223 if (iovp != iov)
1223 1224 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1224 1225 }
1225 1226
1226 1227 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1227 1228
1228 1229 if (!error) {
1229 1230 /*
1230 1231 * Get attributes again so we send the latest mod
1231 1232 * time to the client side for its cache.
1232 1233 */
1233 1234 va.va_mask = AT_ALL; /* now we want everything */
1234 1235
1235 1236 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1236 1237
1237 1238 /* check for overflows */
1238 1239 if (!error) {
1239 1240 acl_perm(vp, exi, &va, cr);
1240 1241 error = vattr_to_nattr(&va, &ns->ns_attr);
1241 1242 }
1242 1243 }
1243 1244
1244 1245 out:
1245 1246 if (in_crit)
1246 1247 nbl_end_crit(vp);
1247 1248 VN_RELE(vp);
1248 1249
1249 1250 /* check if a monitor detected a delegation conflict */
1250 1251 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1251 1252 /* mark as wouldblock so response is dropped */
1252 1253 curthread->t_flag |= T_WOULDBLOCK;
1253 1254 else
1254 1255 ns->ns_status = puterrno(error);
1255 1256
1256 1257 }
1257 1258
1258 1259 struct rfs_async_write {
1259 1260 struct nfswriteargs *wa;
1260 1261 struct nfsattrstat *ns;
1261 1262 struct svc_req *req;
1262 1263 cred_t *cr;
1263 1264 bool_t ro;
1264 1265 kthread_t *thread;
1265 1266 struct rfs_async_write *list;
1266 1267 };
1267 1268
1268 1269 struct rfs_async_write_list {
1269 1270 fhandle_t *fhp;
1270 1271 kcondvar_t cv;
1271 1272 struct rfs_async_write *list;
1272 1273 struct rfs_async_write_list *next;
1273 1274 };
1274 1275
1275 1276 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1276 1277 static kmutex_t rfs_async_write_lock;
1277 1278 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1278 1279
1279 1280 #define MAXCLIOVECS 42
1280 1281 #define RFSWRITE_INITVAL (enum nfsstat) -1
1281 1282
1282 1283 #ifdef DEBUG
1283 1284 static int rfs_write_hits = 0;
1284 1285 static int rfs_write_misses = 0;
1285 1286 #endif
1286 1287
1287 1288 /*
1288 1289 * Write data to file.
1289 1290 * Returns attributes of a file after writing some data to it.
1290 1291 */
1291 1292 void
1292 1293 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1293 1294 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1294 1295 {
1295 1296 int error;
1296 1297 vnode_t *vp;
1297 1298 rlim64_t rlimit;
1298 1299 struct vattr va;
1299 1300 struct uio uio;
1300 1301 struct rfs_async_write_list *lp;
1301 1302 struct rfs_async_write_list *nlp;
1302 1303 struct rfs_async_write *rp;
1303 1304 struct rfs_async_write *nrp;
1304 1305 struct rfs_async_write *trp;
1305 1306 struct rfs_async_write *lrp;
1306 1307 int data_written;
1307 1308 int iovcnt;
1308 1309 mblk_t *m;
1309 1310 struct iovec *iovp;
1310 1311 struct iovec *niovp;
1311 1312 struct iovec iov[MAXCLIOVECS];
1312 1313 int count;
1313 1314 int rcount;
1314 1315 uint_t off;
1315 1316 uint_t len;
1316 1317 struct rfs_async_write nrpsp;
1317 1318 struct rfs_async_write_list nlpsp;
1318 1319 ushort_t t_flag;
1319 1320 cred_t *savecred;
1320 1321 int in_crit = 0;
1321 1322 caller_context_t ct;
1322 1323 nfs_srv_t *nsrv;
1323 1324
1324 1325 ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
1325 1326 nsrv = nfs_get_srv();
1326 1327 if (!nsrv->write_async) {
1327 1328 rfs_write_sync(wa, ns, exi, req, cr, ro);
1328 1329 return;
1329 1330 }
1330 1331
1331 1332 /*
1332 1333 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1333 1334 * is considered an OK.
1334 1335 */
1335 1336 ns->ns_status = RFSWRITE_INITVAL;
1336 1337
1337 1338 nrp = &nrpsp;
1338 1339 nrp->wa = wa;
1339 1340 nrp->ns = ns;
1340 1341 nrp->req = req;
1341 1342 nrp->cr = cr;
1342 1343 nrp->ro = ro;
1343 1344 nrp->thread = curthread;
1344 1345
1345 1346 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1346 1347
1347 1348 /*
1348 1349 * Look to see if there is already a cluster started
1349 1350 * for this file.
1350 1351 */
1351 1352 mutex_enter(&nsrv->async_write_lock);
1352 1353 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1353 1354 if (bcmp(&wa->wa_fhandle, lp->fhp,
1354 1355 sizeof (fhandle_t)) == 0)
1355 1356 break;
1356 1357 }
1357 1358
1358 1359 /*
1359 1360 * If lp is non-NULL, then there is already a cluster
1360 1361 * started. We need to place ourselves in the cluster
1361 1362 * list in the right place as determined by starting
1362 1363 * offset. Conflicts with non-blocking mandatory locked
1363 1364 * regions will be checked when the cluster is processed.
1364 1365 */
1365 1366 if (lp != NULL) {
1366 1367 rp = lp->list;
1367 1368 trp = NULL;
1368 1369 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1369 1370 trp = rp;
1370 1371 rp = rp->list;
1371 1372 }
1372 1373 nrp->list = rp;
1373 1374 if (trp == NULL)
1374 1375 lp->list = nrp;
1375 1376 else
1376 1377 trp->list = nrp;
1377 1378 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1378 1379 cv_wait(&lp->cv, &nsrv->async_write_lock);
1379 1380 mutex_exit(&nsrv->async_write_lock);
1380 1381
1381 1382 return;
1382 1383 }
1383 1384
1384 1385 /*
1385 1386 * No cluster started yet, start one and add ourselves
1386 1387 * to the list of clusters.
1387 1388 */
1388 1389 nrp->list = NULL;
1389 1390
1390 1391 nlp = &nlpsp;
1391 1392 nlp->fhp = &wa->wa_fhandle;
1392 1393 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1393 1394 nlp->list = nrp;
1394 1395 nlp->next = NULL;
1395 1396
1396 1397 if (nsrv->async_write_head == NULL) {
1397 1398 nsrv->async_write_head = nlp;
1398 1399 } else {
1399 1400 lp = nsrv->async_write_head;
1400 1401 while (lp->next != NULL)
1401 1402 lp = lp->next;
1402 1403 lp->next = nlp;
1403 1404 }
1404 1405 mutex_exit(&nsrv->async_write_lock);
1405 1406
1406 1407 /*
1407 1408 * Convert the file handle common to all of the requests
1408 1409 * in this cluster to a vnode.
1409 1410 */
1410 1411 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1411 1412 if (vp == NULL) {
1412 1413 mutex_enter(&nsrv->async_write_lock);
1413 1414 if (nsrv->async_write_head == nlp)
1414 1415 nsrv->async_write_head = nlp->next;
1415 1416 else {
1416 1417 lp = nsrv->async_write_head;
1417 1418 while (lp->next != nlp)
1418 1419 lp = lp->next;
1419 1420 lp->next = nlp->next;
1420 1421 }
1421 1422 t_flag = curthread->t_flag & T_WOULDBLOCK;
1422 1423 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1423 1424 rp->ns->ns_status = NFSERR_STALE;
1424 1425 rp->thread->t_flag |= t_flag;
1425 1426 }
1426 1427 cv_broadcast(&nlp->cv);
1427 1428 mutex_exit(&nsrv->async_write_lock);
1428 1429
1429 1430 return;
1430 1431 }
1431 1432
1432 1433 /*
1433 1434 * Can only write regular files. Attempts to write any
1434 1435 * other file types fail with EISDIR.
1435 1436 */
1436 1437 if (vp->v_type != VREG) {
1437 1438 VN_RELE(vp);
1438 1439 mutex_enter(&nsrv->async_write_lock);
1439 1440 if (nsrv->async_write_head == nlp)
1440 1441 nsrv->async_write_head = nlp->next;
1441 1442 else {
1442 1443 lp = nsrv->async_write_head;
1443 1444 while (lp->next != nlp)
1444 1445 lp = lp->next;
1445 1446 lp->next = nlp->next;
1446 1447 }
1447 1448 t_flag = curthread->t_flag & T_WOULDBLOCK;
1448 1449 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1449 1450 rp->ns->ns_status = NFSERR_ISDIR;
1450 1451 rp->thread->t_flag |= t_flag;
1451 1452 }
1452 1453 cv_broadcast(&nlp->cv);
1453 1454 mutex_exit(&nsrv->async_write_lock);
1454 1455
1455 1456 return;
1456 1457 }
1457 1458
1458 1459 /*
1459 1460 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1460 1461 * deadlock with ufs.
1461 1462 */
1462 1463 if (nbl_need_check(vp)) {
1463 1464 nbl_start_crit(vp, RW_READER);
1464 1465 in_crit = 1;
1465 1466 }
1466 1467
1467 1468 ct.cc_sysid = 0;
1468 1469 ct.cc_pid = 0;
1469 1470 ct.cc_caller_id = nfs2_srv_caller_id;
1470 1471 ct.cc_flags = CC_DONTBLOCK;
1471 1472
1472 1473 /*
1473 1474 * Lock the file for writing. This operation provides
1474 1475 * the delay which allows clusters to grow.
1475 1476 */
1476 1477 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1477 1478
1478 1479 /* check if a monitor detected a delegation conflict */
1479 1480 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1480 1481 if (in_crit)
1481 1482 nbl_end_crit(vp);
1482 1483 VN_RELE(vp);
1483 1484 /* mark as wouldblock so response is dropped */
1484 1485 curthread->t_flag |= T_WOULDBLOCK;
1485 1486 mutex_enter(&nsrv->async_write_lock);
1486 1487 if (nsrv->async_write_head == nlp)
1487 1488 nsrv->async_write_head = nlp->next;
1488 1489 else {
1489 1490 lp = nsrv->async_write_head;
1490 1491 while (lp->next != nlp)
1491 1492 lp = lp->next;
1492 1493 lp->next = nlp->next;
1493 1494 }
1494 1495 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1495 1496 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1496 1497 rp->ns->ns_status = puterrno(error);
1497 1498 rp->thread->t_flag |= T_WOULDBLOCK;
1498 1499 }
1499 1500 }
1500 1501 cv_broadcast(&nlp->cv);
1501 1502 mutex_exit(&nsrv->async_write_lock);
1502 1503
1503 1504 return;
1504 1505 }
1505 1506
1506 1507 /*
1507 1508 * Disconnect this cluster from the list of clusters.
1508 1509 * The cluster that is being dealt with must be fixed
1509 1510 * in size after this point, so there is no reason
1510 1511 * to leave it on the list so that new requests can
1511 1512 * find it.
1512 1513 *
1513 1514 * The algorithm is that the first write request will
1514 1515 * create a cluster, convert the file handle to a
1515 1516 * vnode pointer, and then lock the file for writing.
1516 1517 * This request is not likely to be clustered with
1517 1518 * any others. However, the next request will create
1518 1519 * a new cluster and be blocked in VOP_RWLOCK while
1519 1520 * the first request is being processed. This delay
1520 1521 * will allow more requests to be clustered in this
1521 1522 * second cluster.
1522 1523 */
1523 1524 mutex_enter(&nsrv->async_write_lock);
1524 1525 if (nsrv->async_write_head == nlp)
1525 1526 nsrv->async_write_head = nlp->next;
1526 1527 else {
1527 1528 lp = nsrv->async_write_head;
1528 1529 while (lp->next != nlp)
1529 1530 lp = lp->next;
1530 1531 lp->next = nlp->next;
1531 1532 }
1532 1533 mutex_exit(&nsrv->async_write_lock);
1533 1534
1534 1535 /*
1535 1536 * Step through the list of requests in this cluster.
1536 1537 * We need to check permissions to make sure that all
1537 1538 * of the requests have sufficient permission to write
1538 1539 * the file. A cluster can be composed of requests
1539 1540 * from different clients and different users on each
1540 1541 * client.
1541 1542 *
1542 1543 * As a side effect, we also calculate the size of the
1543 1544 * byte range that this cluster encompasses.
1544 1545 */
1545 1546 rp = nlp->list;
1546 1547 off = rp->wa->wa_offset;
1547 1548 len = (uint_t)0;
1548 1549 do {
1549 1550 if (rdonly(rp->ro, vp)) {
1550 1551 rp->ns->ns_status = NFSERR_ROFS;
1551 1552 t_flag = curthread->t_flag & T_WOULDBLOCK;
1552 1553 rp->thread->t_flag |= t_flag;
1553 1554 continue;
1554 1555 }
1555 1556
1556 1557 va.va_mask = AT_UID|AT_MODE;
1557 1558
1558 1559 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1559 1560
1560 1561 if (!error) {
1561 1562 if (crgetuid(rp->cr) != va.va_uid) {
1562 1563 /*
1563 1564 * This is a kludge to allow writes of files
1564 1565 * created with read only permission. The
1565 1566 * owner of the file is always allowed to
1566 1567 * write it.
1567 1568 */
1568 1569 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1569 1570 }
1570 1571 if (!error && MANDLOCK(vp, va.va_mode))
1571 1572 error = EACCES;
1572 1573 }
1573 1574
1574 1575 /*
1575 1576 * Check for a conflict with a nbmand-locked region.
1576 1577 */
1577 1578 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1578 1579 rp->wa->wa_count, 0, NULL)) {
1579 1580 error = EACCES;
1580 1581 }
1581 1582
1582 1583 if (error) {
1583 1584 rp->ns->ns_status = puterrno(error);
1584 1585 t_flag = curthread->t_flag & T_WOULDBLOCK;
1585 1586 rp->thread->t_flag |= t_flag;
1586 1587 continue;
1587 1588 }
1588 1589 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1589 1590 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1590 1591 } while ((rp = rp->list) != NULL);
1591 1592
1592 1593 /*
1593 1594 * Step through the cluster attempting to gather as many
1594 1595 * requests which are contiguous as possible. These
1595 1596 * contiguous requests are handled via one call to VOP_WRITE
1596 1597 * instead of different calls to VOP_WRITE. We also keep
1597 1598 * track of the fact that any data was written.
1598 1599 */
1599 1600 rp = nlp->list;
1600 1601 data_written = 0;
1601 1602 do {
1602 1603 /*
1603 1604 * Skip any requests which are already marked as having an
1604 1605 * error.
1605 1606 */
1606 1607 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1607 1608 rp = rp->list;
1608 1609 continue;
1609 1610 }
1610 1611
1611 1612 /*
1612 1613 * Count the number of iovec's which are required
1613 1614 * to handle this set of requests. One iovec is
1614 1615 * needed for each data buffer, whether addressed
1615 1616 * by wa_data or by the b_rptr pointers in the
1616 1617 * mblk chains.
1617 1618 */
1618 1619 iovcnt = 0;
1619 1620 lrp = rp;
1620 1621 for (;;) {
1621 1622 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1622 1623 iovcnt++;
1623 1624 else {
1624 1625 m = lrp->wa->wa_mblk;
1625 1626 while (m != NULL) {
1626 1627 iovcnt++;
1627 1628 m = m->b_cont;
1628 1629 }
1629 1630 }
1630 1631 if (lrp->list == NULL ||
1631 1632 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1632 1633 lrp->wa->wa_offset + lrp->wa->wa_count !=
1633 1634 lrp->list->wa->wa_offset) {
1634 1635 lrp = lrp->list;
1635 1636 break;
1636 1637 }
1637 1638 lrp = lrp->list;
1638 1639 }
1639 1640
1640 1641 if (iovcnt <= MAXCLIOVECS) {
1641 1642 #ifdef DEBUG
1642 1643 rfs_write_hits++;
1643 1644 #endif
1644 1645 niovp = iov;
1645 1646 } else {
1646 1647 #ifdef DEBUG
1647 1648 rfs_write_misses++;
1648 1649 #endif
1649 1650 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1650 1651 }
1651 1652 /*
1652 1653 * Put together the scatter/gather iovecs.
1653 1654 */
1654 1655 iovp = niovp;
1655 1656 trp = rp;
1656 1657 count = 0;
1657 1658 do {
1658 1659 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1659 1660 if (trp->wa->wa_rlist) {
1660 1661 iovp->iov_base =
1661 1662 (char *)((trp->wa->wa_rlist)->
1662 1663 u.c_daddr3);
1663 1664 iovp->iov_len = trp->wa->wa_count;
1664 1665 } else {
1665 1666 iovp->iov_base = trp->wa->wa_data;
1666 1667 iovp->iov_len = trp->wa->wa_count;
1667 1668 }
1668 1669 iovp++;
1669 1670 } else {
1670 1671 m = trp->wa->wa_mblk;
1671 1672 rcount = trp->wa->wa_count;
1672 1673 while (m != NULL) {
1673 1674 iovp->iov_base = (caddr_t)m->b_rptr;
1674 1675 iovp->iov_len = (m->b_wptr - m->b_rptr);
1675 1676 rcount -= iovp->iov_len;
1676 1677 if (rcount < 0)
1677 1678 iovp->iov_len += rcount;
1678 1679 iovp++;
1679 1680 if (rcount <= 0)
1680 1681 break;
1681 1682 m = m->b_cont;
1682 1683 }
1683 1684 }
1684 1685 count += trp->wa->wa_count;
1685 1686 trp = trp->list;
1686 1687 } while (trp != lrp);
1687 1688
1688 1689 uio.uio_iov = niovp;
1689 1690 uio.uio_iovcnt = iovcnt;
1690 1691 uio.uio_segflg = UIO_SYSSPACE;
1691 1692 uio.uio_extflg = UIO_COPY_DEFAULT;
1692 1693 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1693 1694 uio.uio_resid = count;
1694 1695 /*
1695 1696 * The limit is checked on the client. We
1696 1697 * should allow any size writes here.
1697 1698 */
1698 1699 uio.uio_llimit = curproc->p_fsz_ctl;
1699 1700 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1700 1701 if (rlimit < (rlim64_t)uio.uio_resid)
1701 1702 uio.uio_resid = (uint_t)rlimit;
1702 1703
1703 1704 /*
1704 1705 * For now we assume no append mode.
1705 1706 */
1706 1707
1707 1708 /*
1708 1709 * We're changing creds because VM may fault
1709 1710 * and we need the cred of the current
1710 1711 * thread to be used if quota * checking is
1711 1712 * enabled.
1712 1713 */
1713 1714 savecred = curthread->t_cred;
1714 1715 curthread->t_cred = cr;
1715 1716 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1716 1717 curthread->t_cred = savecred;
1717 1718
1718 1719 /* check if a monitor detected a delegation conflict */
1719 1720 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1720 1721 /* mark as wouldblock so response is dropped */
1721 1722 curthread->t_flag |= T_WOULDBLOCK;
1722 1723
1723 1724 if (niovp != iov)
1724 1725 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1725 1726
1726 1727 if (!error) {
1727 1728 data_written = 1;
1728 1729 /*
1729 1730 * Get attributes again so we send the latest mod
1730 1731 * time to the client side for its cache.
1731 1732 */
1732 1733 va.va_mask = AT_ALL; /* now we want everything */
1733 1734
1734 1735 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1735 1736
1736 1737 if (!error)
1737 1738 acl_perm(vp, exi, &va, rp->cr);
1738 1739 }
1739 1740
1740 1741 /*
1741 1742 * Fill in the status responses for each request
1742 1743 * which was just handled. Also, copy the latest
1743 1744 * attributes in to the attribute responses if
1744 1745 * appropriate.
1745 1746 */
1746 1747 t_flag = curthread->t_flag & T_WOULDBLOCK;
1747 1748 do {
1748 1749 rp->thread->t_flag |= t_flag;
1749 1750 /* check for overflows */
1750 1751 if (!error) {
1751 1752 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1752 1753 }
1753 1754 rp->ns->ns_status = puterrno(error);
1754 1755 rp = rp->list;
1755 1756 } while (rp != lrp);
1756 1757 } while (rp != NULL);
1757 1758
1758 1759 /*
1759 1760 * If any data was written at all, then we need to flush
1760 1761 * the data and metadata to stable storage.
1761 1762 */
1762 1763 if (data_written) {
1763 1764 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1764 1765
1765 1766 if (!error) {
1766 1767 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1767 1768 }
1768 1769 }
1769 1770
1770 1771 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1771 1772
1772 1773 if (in_crit)
1773 1774 nbl_end_crit(vp);
1774 1775 VN_RELE(vp);
1775 1776
1776 1777 t_flag = curthread->t_flag & T_WOULDBLOCK;
1777 1778 mutex_enter(&nsrv->async_write_lock);
1778 1779 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1779 1780 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1780 1781 rp->ns->ns_status = puterrno(error);
1781 1782 rp->thread->t_flag |= t_flag;
1782 1783 }
1783 1784 }
1784 1785 cv_broadcast(&nlp->cv);
1785 1786 mutex_exit(&nsrv->async_write_lock);
1786 1787
1787 1788 }
1788 1789
1789 1790 void *
1790 1791 rfs_write_getfh(struct nfswriteargs *wa)
1791 1792 {
1792 1793 return (&wa->wa_fhandle);
1793 1794 }
1794 1795
1795 1796 /*
1796 1797 * Create a file.
1797 1798 * Creates a file with given attributes and returns those attributes
1798 1799 * and an fhandle for the new file.
1799 1800 */
1800 1801 void
1801 1802 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1802 1803 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1803 1804 {
1804 1805 int error;
1805 1806 int lookuperr;
1806 1807 int in_crit = 0;
1807 1808 struct vattr va;
1808 1809 vnode_t *vp;
1809 1810 vnode_t *realvp;
1810 1811 vnode_t *dvp;
1811 1812 char *name = args->ca_da.da_name;
1812 1813 vnode_t *tvp = NULL;
1813 1814 int mode;
1814 1815 int lookup_ok;
1815 1816 bool_t trunc;
1816 1817 struct sockaddr *ca;
1817 1818
1818 1819 /*
1819 1820 * Disallow NULL paths
1820 1821 */
1821 1822 if (name == NULL || *name == '\0') {
1822 1823 dr->dr_status = NFSERR_ACCES;
1823 1824 return;
1824 1825 }
1825 1826
1826 1827 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1827 1828 if (dvp == NULL) {
1828 1829 dr->dr_status = NFSERR_STALE;
1829 1830 return;
1830 1831 }
1831 1832
1832 1833 error = sattr_to_vattr(args->ca_sa, &va);
1833 1834 if (error) {
1834 1835 dr->dr_status = puterrno(error);
1835 1836 return;
1836 1837 }
1837 1838
1838 1839 /*
1839 1840 * Must specify the mode.
1840 1841 */
1841 1842 if (!(va.va_mask & AT_MODE)) {
1842 1843 VN_RELE(dvp);
1843 1844 dr->dr_status = NFSERR_INVAL;
1844 1845 return;
1845 1846 }
1846 1847
1847 1848 /*
1848 1849 * This is a completely gross hack to make mknod
1849 1850 * work over the wire until we can wack the protocol
1850 1851 */
1851 1852 if ((va.va_mode & IFMT) == IFCHR) {
1852 1853 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1853 1854 va.va_type = VFIFO; /* xtra kludge for named pipe */
1854 1855 else {
1855 1856 va.va_type = VCHR;
1856 1857 /*
1857 1858 * uncompress the received dev_t
1858 1859 * if the top half is zero indicating a request
1859 1860 * from an `older style' OS.
1860 1861 */
1861 1862 if ((va.va_size & 0xffff0000) == 0)
1862 1863 va.va_rdev = nfsv2_expdev(va.va_size);
1863 1864 else
1864 1865 va.va_rdev = (dev_t)va.va_size;
1865 1866 }
1866 1867 va.va_mask &= ~AT_SIZE;
1867 1868 } else if ((va.va_mode & IFMT) == IFBLK) {
1868 1869 va.va_type = VBLK;
1869 1870 /*
1870 1871 * uncompress the received dev_t
1871 1872 * if the top half is zero indicating a request
1872 1873 * from an `older style' OS.
1873 1874 */
1874 1875 if ((va.va_size & 0xffff0000) == 0)
1875 1876 va.va_rdev = nfsv2_expdev(va.va_size);
1876 1877 else
1877 1878 va.va_rdev = (dev_t)va.va_size;
1878 1879 va.va_mask &= ~AT_SIZE;
1879 1880 } else if ((va.va_mode & IFMT) == IFSOCK) {
1880 1881 va.va_type = VSOCK;
1881 1882 } else {
1882 1883 va.va_type = VREG;
1883 1884 }
1884 1885 va.va_mode &= ~IFMT;
1885 1886 va.va_mask |= AT_TYPE;
1886 1887
1887 1888 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1888 1889 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1889 1890 MAXPATHLEN);
1890 1891 if (name == NULL) {
1891 1892 dr->dr_status = puterrno(EINVAL);
1892 1893 return;
1893 1894 }
1894 1895
1895 1896 /*
1896 1897 * Why was the choice made to use VWRITE as the mode to the
1897 1898 * call to VOP_CREATE ? This results in a bug. When a client
1898 1899 * opens a file that already exists and is RDONLY, the second
1899 1900 * open fails with an EACESS because of the mode.
1900 1901 * bug ID 1054648.
1901 1902 */
1902 1903 lookup_ok = 0;
1903 1904 mode = VWRITE;
1904 1905 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1905 1906 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1906 1907 NULL, NULL, NULL);
1907 1908 if (!error) {
1908 1909 struct vattr at;
1909 1910
1910 1911 lookup_ok = 1;
1911 1912 at.va_mask = AT_MODE;
1912 1913 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1913 1914 if (!error)
1914 1915 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1915 1916 VN_RELE(tvp);
1916 1917 tvp = NULL;
1917 1918 }
1918 1919 }
1919 1920
1920 1921 if (!lookup_ok) {
1921 1922 if (rdonly(ro, dvp)) {
1922 1923 error = EROFS;
1923 1924 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1924 1925 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1925 1926 error = EPERM;
1926 1927 } else {
1927 1928 error = 0;
1928 1929 }
1929 1930 }
1930 1931
1931 1932 /*
1932 1933 * If file size is being modified on an already existing file
1933 1934 * make sure that there are no conflicting non-blocking mandatory
1934 1935 * locks in the region being manipulated. Return EACCES if there
1935 1936 * are conflicting locks.
1936 1937 */
1937 1938 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1938 1939 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1939 1940 NULL, NULL, NULL);
1940 1941
1941 1942 if (!lookuperr &&
1942 1943 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1943 1944 VN_RELE(tvp);
1944 1945 curthread->t_flag |= T_WOULDBLOCK;
1945 1946 goto out;
1946 1947 }
1947 1948
1948 1949 if (!lookuperr && nbl_need_check(tvp)) {
1949 1950 /*
1950 1951 * The file exists. Now check if it has any
1951 1952 * conflicting non-blocking mandatory locks
1952 1953 * in the region being changed.
1953 1954 */
1954 1955 struct vattr bva;
1955 1956 u_offset_t offset;
1956 1957 ssize_t length;
1957 1958
1958 1959 nbl_start_crit(tvp, RW_READER);
1959 1960 in_crit = 1;
1960 1961
1961 1962 bva.va_mask = AT_SIZE;
1962 1963 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1963 1964 if (!error) {
1964 1965 if (va.va_size < bva.va_size) {
1965 1966 offset = va.va_size;
1966 1967 length = bva.va_size - va.va_size;
1967 1968 } else {
1968 1969 offset = bva.va_size;
1969 1970 length = va.va_size - bva.va_size;
1970 1971 }
1971 1972 if (length) {
1972 1973 if (nbl_conflict(tvp, NBL_WRITE,
1973 1974 offset, length, 0, NULL)) {
1974 1975 error = EACCES;
1975 1976 }
1976 1977 }
1977 1978 }
1978 1979 if (error) {
1979 1980 nbl_end_crit(tvp);
1980 1981 VN_RELE(tvp);
1981 1982 in_crit = 0;
1982 1983 }
1983 1984 } else if (tvp != NULL) {
1984 1985 VN_RELE(tvp);
1985 1986 }
1986 1987 }
1987 1988
1988 1989 if (!error) {
1989 1990 /*
1990 1991 * If filesystem is shared with nosuid the remove any
1991 1992 * setuid/setgid bits on create.
1992 1993 */
1993 1994 if (va.va_type == VREG &&
1994 1995 exi->exi_export.ex_flags & EX_NOSUID)
1995 1996 va.va_mode &= ~(VSUID | VSGID);
1996 1997
1997 1998 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1998 1999 NULL, NULL);
1999 2000
2000 2001 if (!error) {
2001 2002
2002 2003 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
2003 2004 trunc = TRUE;
2004 2005 else
2005 2006 trunc = FALSE;
2006 2007
2007 2008 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
2008 2009 VN_RELE(vp);
2009 2010 curthread->t_flag |= T_WOULDBLOCK;
2010 2011 goto out;
2011 2012 }
2012 2013 va.va_mask = AT_ALL;
2013 2014
2014 2015 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
2015 2016
2016 2017 /* check for overflows */
2017 2018 if (!error) {
2018 2019 acl_perm(vp, exi, &va, cr);
2019 2020 error = vattr_to_nattr(&va, &dr->dr_attr);
2020 2021 if (!error) {
2021 2022 error = makefh(&dr->dr_fhandle, vp,
2022 2023 exi);
2023 2024 }
2024 2025 }
2025 2026 /*
2026 2027 * Force modified metadata out to stable storage.
2027 2028 *
2028 2029 * if a underlying vp exists, pass it to VOP_FSYNC
2029 2030 */
2030 2031 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2031 2032 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2032 2033 else
2033 2034 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2034 2035 VN_RELE(vp);
2035 2036 }
2036 2037
2037 2038 if (in_crit) {
2038 2039 nbl_end_crit(tvp);
2039 2040 VN_RELE(tvp);
2040 2041 }
2041 2042 }
2042 2043
2043 2044 /*
2044 2045 * Force modified data and metadata out to stable storage.
2045 2046 */
2046 2047 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2047 2048
2048 2049 out:
2049 2050
2050 2051 VN_RELE(dvp);
2051 2052
2052 2053 dr->dr_status = puterrno(error);
2053 2054
2054 2055 if (name != args->ca_da.da_name)
2055 2056 kmem_free(name, MAXPATHLEN);
2056 2057 }
2057 2058 void *
2058 2059 rfs_create_getfh(struct nfscreatargs *args)
2059 2060 {
2060 2061 return (args->ca_da.da_fhandle);
2061 2062 }
2062 2063
2063 2064 /*
2064 2065 * Remove a file.
2065 2066 * Remove named file from parent directory.
2066 2067 */
2067 2068 /* ARGSUSED */
2068 2069 void
2069 2070 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2070 2071 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2071 2072 {
2072 2073 int error = 0;
2073 2074 vnode_t *vp;
2074 2075 vnode_t *targvp;
2075 2076 int in_crit = 0;
2076 2077
2077 2078 /*
2078 2079 * Disallow NULL paths
2079 2080 */
2080 2081 if (da->da_name == NULL || *da->da_name == '\0') {
2081 2082 *status = NFSERR_ACCES;
2082 2083 return;
2083 2084 }
2084 2085
2085 2086 vp = nfs_fhtovp(da->da_fhandle, exi);
2086 2087 if (vp == NULL) {
2087 2088 *status = NFSERR_STALE;
2088 2089 return;
2089 2090 }
2090 2091
2091 2092 if (rdonly(ro, vp)) {
2092 2093 VN_RELE(vp);
2093 2094 *status = NFSERR_ROFS;
2094 2095 return;
2095 2096 }
2096 2097
2097 2098 /*
2098 2099 * Check for a conflict with a non-blocking mandatory share reservation.
2099 2100 */
2100 2101 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2101 2102 NULL, cr, NULL, NULL, NULL);
2102 2103 if (error != 0) {
2103 2104 VN_RELE(vp);
2104 2105 *status = puterrno(error);
2105 2106 return;
2106 2107 }
2107 2108
2108 2109 /*
2109 2110 * If the file is delegated to an v4 client, then initiate
2110 2111 * recall and drop this request (by setting T_WOULDBLOCK).
2111 2112 * The client will eventually re-transmit the request and
2112 2113 * (hopefully), by then, the v4 client will have returned
2113 2114 * the delegation.
2114 2115 */
2115 2116
2116 2117 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2117 2118 VN_RELE(vp);
2118 2119 VN_RELE(targvp);
2119 2120 curthread->t_flag |= T_WOULDBLOCK;
2120 2121 return;
2121 2122 }
2122 2123
2123 2124 if (nbl_need_check(targvp)) {
2124 2125 nbl_start_crit(targvp, RW_READER);
2125 2126 in_crit = 1;
2126 2127 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2127 2128 error = EACCES;
2128 2129 goto out;
2129 2130 }
2130 2131 }
2131 2132
2132 2133 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2133 2134
2134 2135 /*
2135 2136 * Force modified data and metadata out to stable storage.
2136 2137 */
2137 2138 (void) VOP_FSYNC(vp, 0, cr, NULL);
2138 2139
2139 2140 out:
2140 2141 if (in_crit)
2141 2142 nbl_end_crit(targvp);
2142 2143 VN_RELE(targvp);
2143 2144 VN_RELE(vp);
2144 2145
2145 2146 *status = puterrno(error);
2146 2147
2147 2148 }
2148 2149
2149 2150 void *
2150 2151 rfs_remove_getfh(struct nfsdiropargs *da)
2151 2152 {
2152 2153 return (da->da_fhandle);
2153 2154 }
2154 2155
2155 2156 /*
2156 2157 * rename a file
2157 2158 * Give a file (from) a new name (to).
2158 2159 */
2159 2160 /* ARGSUSED */
2160 2161 void
2161 2162 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2162 2163 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2163 2164 {
2164 2165 int error = 0;
2165 2166 vnode_t *fromvp;
2166 2167 vnode_t *tovp;
2167 2168 struct exportinfo *to_exi;
2168 2169 fhandle_t *fh;
2169 2170 vnode_t *srcvp;
2170 2171 vnode_t *targvp;
2171 2172 int in_crit = 0;
2172 2173
2173 2174 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2174 2175 if (fromvp == NULL) {
2175 2176 *status = NFSERR_STALE;
2176 2177 return;
2177 2178 }
2178 2179
2179 2180 fh = args->rna_to.da_fhandle;
2180 2181 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2181 2182 if (to_exi == NULL) {
2182 2183 VN_RELE(fromvp);
2183 2184 *status = NFSERR_ACCES;
2184 2185 return;
2185 2186 }
2186 2187 exi_rele(to_exi);
2187 2188
2188 2189 if (to_exi != exi) {
2189 2190 VN_RELE(fromvp);
2190 2191 *status = NFSERR_XDEV;
2191 2192 return;
2192 2193 }
2193 2194
2194 2195 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2195 2196 if (tovp == NULL) {
2196 2197 VN_RELE(fromvp);
2197 2198 *status = NFSERR_STALE;
2198 2199 return;
2199 2200 }
2200 2201
2201 2202 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2202 2203 VN_RELE(tovp);
2203 2204 VN_RELE(fromvp);
2204 2205 *status = NFSERR_NOTDIR;
2205 2206 return;
2206 2207 }
2207 2208
2208 2209 /*
2209 2210 * Disallow NULL paths
2210 2211 */
2211 2212 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2212 2213 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2213 2214 VN_RELE(tovp);
2214 2215 VN_RELE(fromvp);
2215 2216 *status = NFSERR_ACCES;
2216 2217 return;
2217 2218 }
2218 2219
2219 2220 if (rdonly(ro, tovp)) {
2220 2221 VN_RELE(tovp);
2221 2222 VN_RELE(fromvp);
2222 2223 *status = NFSERR_ROFS;
2223 2224 return;
2224 2225 }
2225 2226
2226 2227 /*
2227 2228 * Check for a conflict with a non-blocking mandatory share reservation.
2228 2229 */
2229 2230 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2230 2231 NULL, cr, NULL, NULL, NULL);
2231 2232 if (error != 0) {
2232 2233 VN_RELE(tovp);
2233 2234 VN_RELE(fromvp);
2234 2235 *status = puterrno(error);
2235 2236 return;
2236 2237 }
2237 2238
2238 2239 /* Check for delegations on the source file */
2239 2240
2240 2241 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2241 2242 VN_RELE(tovp);
2242 2243 VN_RELE(fromvp);
2243 2244 VN_RELE(srcvp);
2244 2245 curthread->t_flag |= T_WOULDBLOCK;
2245 2246 return;
2246 2247 }
2247 2248
2248 2249 /* Check for delegation on the file being renamed over, if it exists */
2249 2250
2250 2251 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2251 2252 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2252 2253 NULL, NULL, NULL) == 0) {
2253 2254
2254 2255 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2255 2256 VN_RELE(tovp);
2256 2257 VN_RELE(fromvp);
2257 2258 VN_RELE(srcvp);
2258 2259 VN_RELE(targvp);
2259 2260 curthread->t_flag |= T_WOULDBLOCK;
2260 2261 return;
2261 2262 }
2262 2263 VN_RELE(targvp);
2263 2264 }
2264 2265
2265 2266
2266 2267 if (nbl_need_check(srcvp)) {
2267 2268 nbl_start_crit(srcvp, RW_READER);
2268 2269 in_crit = 1;
2269 2270 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2270 2271 error = EACCES;
2271 2272 goto out;
2272 2273 }
2273 2274 }
2274 2275
2275 2276 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2276 2277 tovp, args->rna_to.da_name, cr, NULL, 0);
2277 2278
2278 2279 if (error == 0)
2279 2280 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2280 2281 strlen(args->rna_to.da_name));
2281 2282
2282 2283 /*
2283 2284 * Force modified data and metadata out to stable storage.
2284 2285 */
2285 2286 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2286 2287 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2287 2288
2288 2289 out:
2289 2290 if (in_crit)
2290 2291 nbl_end_crit(srcvp);
2291 2292 VN_RELE(srcvp);
2292 2293 VN_RELE(tovp);
2293 2294 VN_RELE(fromvp);
2294 2295
2295 2296 *status = puterrno(error);
2296 2297
2297 2298 }
2298 2299 void *
2299 2300 rfs_rename_getfh(struct nfsrnmargs *args)
2300 2301 {
2301 2302 return (args->rna_from.da_fhandle);
2302 2303 }
2303 2304
2304 2305 /*
2305 2306 * Link to a file.
2306 2307 * Create a file (to) which is a hard link to the given file (from).
2307 2308 */
2308 2309 /* ARGSUSED */
2309 2310 void
2310 2311 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2311 2312 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2312 2313 {
2313 2314 int error;
2314 2315 vnode_t *fromvp;
2315 2316 vnode_t *tovp;
2316 2317 struct exportinfo *to_exi;
2317 2318 fhandle_t *fh;
2318 2319
2319 2320 fromvp = nfs_fhtovp(args->la_from, exi);
2320 2321 if (fromvp == NULL) {
2321 2322 *status = NFSERR_STALE;
2322 2323 return;
2323 2324 }
2324 2325
2325 2326 fh = args->la_to.da_fhandle;
2326 2327 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2327 2328 if (to_exi == NULL) {
2328 2329 VN_RELE(fromvp);
2329 2330 *status = NFSERR_ACCES;
2330 2331 return;
2331 2332 }
2332 2333 exi_rele(to_exi);
2333 2334
2334 2335 if (to_exi != exi) {
2335 2336 VN_RELE(fromvp);
2336 2337 *status = NFSERR_XDEV;
2337 2338 return;
2338 2339 }
2339 2340
2340 2341 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2341 2342 if (tovp == NULL) {
2342 2343 VN_RELE(fromvp);
2343 2344 *status = NFSERR_STALE;
2344 2345 return;
2345 2346 }
2346 2347
2347 2348 if (tovp->v_type != VDIR) {
2348 2349 VN_RELE(tovp);
2349 2350 VN_RELE(fromvp);
2350 2351 *status = NFSERR_NOTDIR;
2351 2352 return;
2352 2353 }
2353 2354 /*
2354 2355 * Disallow NULL paths
2355 2356 */
2356 2357 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2357 2358 VN_RELE(tovp);
2358 2359 VN_RELE(fromvp);
2359 2360 *status = NFSERR_ACCES;
2360 2361 return;
2361 2362 }
2362 2363
2363 2364 if (rdonly(ro, tovp)) {
2364 2365 VN_RELE(tovp);
2365 2366 VN_RELE(fromvp);
2366 2367 *status = NFSERR_ROFS;
2367 2368 return;
2368 2369 }
2369 2370
2370 2371 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2371 2372
2372 2373 /*
2373 2374 * Force modified data and metadata out to stable storage.
2374 2375 */
2375 2376 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2376 2377 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2377 2378
2378 2379 VN_RELE(tovp);
2379 2380 VN_RELE(fromvp);
2380 2381
2381 2382 *status = puterrno(error);
2382 2383
2383 2384 }
2384 2385 void *
2385 2386 rfs_link_getfh(struct nfslinkargs *args)
2386 2387 {
2387 2388 return (args->la_from);
2388 2389 }
2389 2390
2390 2391 /*
2391 2392 * Symbolicly link to a file.
2392 2393 * Create a file (to) with the given attributes which is a symbolic link
2393 2394 * to the given path name (to).
2394 2395 */
2395 2396 void
2396 2397 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2397 2398 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2398 2399 {
2399 2400 int error;
2400 2401 struct vattr va;
2401 2402 vnode_t *vp;
2402 2403 vnode_t *svp;
2403 2404 int lerror;
2404 2405 struct sockaddr *ca;
2405 2406 char *name = NULL;
2406 2407
2407 2408 /*
2408 2409 * Disallow NULL paths
2409 2410 */
2410 2411 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2411 2412 *status = NFSERR_ACCES;
2412 2413 return;
2413 2414 }
2414 2415
2415 2416 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2416 2417 if (vp == NULL) {
2417 2418 *status = NFSERR_STALE;
2418 2419 return;
2419 2420 }
2420 2421
2421 2422 if (rdonly(ro, vp)) {
2422 2423 VN_RELE(vp);
2423 2424 *status = NFSERR_ROFS;
2424 2425 return;
2425 2426 }
2426 2427
2427 2428 error = sattr_to_vattr(args->sla_sa, &va);
2428 2429 if (error) {
2429 2430 VN_RELE(vp);
2430 2431 *status = puterrno(error);
2431 2432 return;
2432 2433 }
2433 2434
2434 2435 if (!(va.va_mask & AT_MODE)) {
2435 2436 VN_RELE(vp);
2436 2437 *status = NFSERR_INVAL;
2437 2438 return;
2438 2439 }
2439 2440
2440 2441 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2441 2442 name = nfscmd_convname(ca, exi, args->sla_tnm,
2442 2443 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2443 2444
2444 2445 if (name == NULL) {
2445 2446 *status = NFSERR_ACCES;
2446 2447 return;
2447 2448 }
2448 2449
2449 2450 va.va_type = VLNK;
2450 2451 va.va_mask |= AT_TYPE;
2451 2452
2452 2453 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2453 2454
2454 2455 /*
2455 2456 * Force new data and metadata out to stable storage.
2456 2457 */
2457 2458 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2458 2459 NULL, cr, NULL, NULL, NULL);
2459 2460
2460 2461 if (!lerror) {
2461 2462 (void) VOP_FSYNC(svp, 0, cr, NULL);
2462 2463 VN_RELE(svp);
2463 2464 }
2464 2465
2465 2466 /*
2466 2467 * Force modified data and metadata out to stable storage.
2467 2468 */
2468 2469 (void) VOP_FSYNC(vp, 0, cr, NULL);
2469 2470
2470 2471 VN_RELE(vp);
2471 2472
2472 2473 *status = puterrno(error);
2473 2474 if (name != args->sla_tnm)
2474 2475 kmem_free(name, MAXPATHLEN);
2475 2476
2476 2477 }
2477 2478 void *
2478 2479 rfs_symlink_getfh(struct nfsslargs *args)
2479 2480 {
2480 2481 return (args->sla_from.da_fhandle);
2481 2482 }
2482 2483
2483 2484 /*
2484 2485 * Make a directory.
2485 2486 * Create a directory with the given name, parent directory, and attributes.
2486 2487 * Returns a file handle and attributes for the new directory.
2487 2488 */
2488 2489 /* ARGSUSED */
2489 2490 void
2490 2491 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2491 2492 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2492 2493 {
2493 2494 int error;
2494 2495 struct vattr va;
2495 2496 vnode_t *dvp = NULL;
2496 2497 vnode_t *vp;
2497 2498 char *name = args->ca_da.da_name;
2498 2499
2499 2500 /*
2500 2501 * Disallow NULL paths
2501 2502 */
2502 2503 if (name == NULL || *name == '\0') {
2503 2504 dr->dr_status = NFSERR_ACCES;
2504 2505 return;
2505 2506 }
2506 2507
2507 2508 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2508 2509 if (vp == NULL) {
2509 2510 dr->dr_status = NFSERR_STALE;
2510 2511 return;
2511 2512 }
2512 2513
2513 2514 if (rdonly(ro, vp)) {
2514 2515 VN_RELE(vp);
2515 2516 dr->dr_status = NFSERR_ROFS;
2516 2517 return;
2517 2518 }
2518 2519
2519 2520 error = sattr_to_vattr(args->ca_sa, &va);
2520 2521 if (error) {
2521 2522 VN_RELE(vp);
2522 2523 dr->dr_status = puterrno(error);
2523 2524 return;
2524 2525 }
2525 2526
2526 2527 if (!(va.va_mask & AT_MODE)) {
2527 2528 VN_RELE(vp);
2528 2529 dr->dr_status = NFSERR_INVAL;
2529 2530 return;
2530 2531 }
2531 2532
2532 2533 va.va_type = VDIR;
2533 2534 va.va_mask |= AT_TYPE;
2534 2535
2535 2536 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2536 2537
2537 2538 if (!error) {
2538 2539 /*
2539 2540 * Attribtutes of the newly created directory should
2540 2541 * be returned to the client.
2541 2542 */
2542 2543 va.va_mask = AT_ALL; /* We want everything */
2543 2544 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2544 2545
2545 2546 /* check for overflows */
2546 2547 if (!error) {
2547 2548 acl_perm(vp, exi, &va, cr);
2548 2549 error = vattr_to_nattr(&va, &dr->dr_attr);
2549 2550 if (!error) {
2550 2551 error = makefh(&dr->dr_fhandle, dvp, exi);
2551 2552 }
2552 2553 }
2553 2554 /*
2554 2555 * Force new data and metadata out to stable storage.
2555 2556 */
2556 2557 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2557 2558 VN_RELE(dvp);
2558 2559 }
2559 2560
2560 2561 /*
2561 2562 * Force modified data and metadata out to stable storage.
2562 2563 */
2563 2564 (void) VOP_FSYNC(vp, 0, cr, NULL);
2564 2565
2565 2566 VN_RELE(vp);
2566 2567
2567 2568 dr->dr_status = puterrno(error);
2568 2569
2569 2570 }
2570 2571 void *
2571 2572 rfs_mkdir_getfh(struct nfscreatargs *args)
2572 2573 {
2573 2574 return (args->ca_da.da_fhandle);
2574 2575 }
2575 2576
2576 2577 /*
2577 2578 * Remove a directory.
2578 2579 * Remove the given directory name from the given parent directory.
2579 2580 */
2580 2581 /* ARGSUSED */
2581 2582 void
2582 2583 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2583 2584 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2584 2585 {
2585 2586 int error;
2586 2587 vnode_t *vp;
2587 2588
2588 2589 /*
2589 2590 * Disallow NULL paths
2590 2591 */
2591 2592 if (da->da_name == NULL || *da->da_name == '\0') {
2592 2593 *status = NFSERR_ACCES;
2593 2594 return;
2594 2595 }
2595 2596
2596 2597 vp = nfs_fhtovp(da->da_fhandle, exi);
2597 2598 if (vp == NULL) {
2598 2599 *status = NFSERR_STALE;
2599 2600 return;
2600 2601 }
2601 2602
2602 2603 if (rdonly(ro, vp)) {
2603 2604 VN_RELE(vp);
2604 2605 *status = NFSERR_ROFS;
2605 2606 return;
2606 2607 }
2607 2608
2608 2609 /*
2609 2610 * VOP_RMDIR takes a third argument (the current
2610 2611 * directory of the process). That's because someone
2611 2612 * wants to return EINVAL if one tries to remove ".".
2612 2613 * Of course, NFS servers have no idea what their
2613 2614 * clients' current directories are. We fake it by
2614 2615 * supplying a vnode known to exist and illegal to
2615 2616 * remove.
2616 2617 */
2617 2618 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2618 2619
2619 2620 /*
2620 2621 * Force modified data and metadata out to stable storage.
2621 2622 */
2622 2623 (void) VOP_FSYNC(vp, 0, cr, NULL);
2623 2624
2624 2625 VN_RELE(vp);
2625 2626
2626 2627 /*
2627 2628 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2628 2629 * if the directory is not empty. A System V NFS server
2629 2630 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2630 2631 * over the wire.
2631 2632 */
2632 2633 if (error == EEXIST)
2633 2634 *status = NFSERR_NOTEMPTY;
2634 2635 else
2635 2636 *status = puterrno(error);
2636 2637
2637 2638 }
2638 2639 void *
2639 2640 rfs_rmdir_getfh(struct nfsdiropargs *da)
2640 2641 {
2641 2642 return (da->da_fhandle);
2642 2643 }
2643 2644
2644 2645 /* ARGSUSED */
2645 2646 void
2646 2647 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2647 2648 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2648 2649 {
2649 2650 int error;
2650 2651 int iseof;
2651 2652 struct iovec iov;
2652 2653 struct uio uio;
2653 2654 vnode_t *vp;
2654 2655 char *ndata = NULL;
2655 2656 struct sockaddr *ca;
2656 2657 size_t nents;
2657 2658 int ret;
2658 2659
2659 2660 vp = nfs_fhtovp(&rda->rda_fh, exi);
2660 2661 if (vp == NULL) {
2661 2662 rd->rd_entries = NULL;
2662 2663 rd->rd_status = NFSERR_STALE;
2663 2664 return;
2664 2665 }
2665 2666
2666 2667 if (vp->v_type != VDIR) {
2667 2668 VN_RELE(vp);
2668 2669 rd->rd_entries = NULL;
2669 2670 rd->rd_status = NFSERR_NOTDIR;
2670 2671 return;
2671 2672 }
2672 2673
2673 2674 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2674 2675
2675 2676 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2676 2677
2677 2678 if (error) {
2678 2679 rd->rd_entries = NULL;
2679 2680 goto bad;
2680 2681 }
2681 2682
2682 2683 if (rda->rda_count == 0) {
2683 2684 rd->rd_entries = NULL;
2684 2685 rd->rd_size = 0;
2685 2686 rd->rd_eof = FALSE;
2686 2687 goto bad;
2687 2688 }
2688 2689
2689 2690 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2690 2691
2691 2692 /*
2692 2693 * Allocate data for entries. This will be freed by rfs_rddirfree.
2693 2694 */
2694 2695 rd->rd_bufsize = (uint_t)rda->rda_count;
2695 2696 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2696 2697
2697 2698 /*
2698 2699 * Set up io vector to read directory data
2699 2700 */
2700 2701 iov.iov_base = (caddr_t)rd->rd_entries;
2701 2702 iov.iov_len = rda->rda_count;
2702 2703 uio.uio_iov = &iov;
2703 2704 uio.uio_iovcnt = 1;
2704 2705 uio.uio_segflg = UIO_SYSSPACE;
2705 2706 uio.uio_extflg = UIO_COPY_CACHED;
2706 2707 uio.uio_loffset = (offset_t)rda->rda_offset;
2707 2708 uio.uio_resid = rda->rda_count;
2708 2709
2709 2710 /*
2710 2711 * read directory
2711 2712 */
2712 2713 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2713 2714
2714 2715 /*
2715 2716 * Clean up
2716 2717 */
2717 2718 if (!error) {
2718 2719 /*
2719 2720 * set size and eof
2720 2721 */
2721 2722 if (uio.uio_resid == rda->rda_count) {
2722 2723 rd->rd_size = 0;
2723 2724 rd->rd_eof = TRUE;
2724 2725 } else {
2725 2726 rd->rd_size = (uint32_t)(rda->rda_count -
2726 2727 uio.uio_resid);
2727 2728 rd->rd_eof = iseof ? TRUE : FALSE;
2728 2729 }
2729 2730 }
2730 2731
2731 2732 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2732 2733 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2733 2734 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2734 2735 rda->rda_count, &ndata);
2735 2736
2736 2737 if (ret != 0) {
2737 2738 size_t dropbytes;
2738 2739 /*
2739 2740 * We had to drop one or more entries in order to fit
2740 2741 * during the character conversion. We need to patch
2741 2742 * up the size and eof info.
2742 2743 */
2743 2744 if (rd->rd_eof)
2744 2745 rd->rd_eof = FALSE;
2745 2746 dropbytes = nfscmd_dropped_entrysize(
2746 2747 (struct dirent64 *)rd->rd_entries, nents, ret);
2747 2748 rd->rd_size -= dropbytes;
2748 2749 }
2749 2750 if (ndata == NULL) {
2750 2751 ndata = (char *)rd->rd_entries;
2751 2752 } else if (ndata != (char *)rd->rd_entries) {
2752 2753 kmem_free(rd->rd_entries, rd->rd_bufsize);
2753 2754 rd->rd_entries = (void *)ndata;
2754 2755 rd->rd_bufsize = rda->rda_count;
2755 2756 }
2756 2757
2757 2758 bad:
2758 2759 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2759 2760
2760 2761 #if 0 /* notyet */
2761 2762 /*
2762 2763 * Don't do this. It causes local disk writes when just
2763 2764 * reading the file and the overhead is deemed larger
2764 2765 * than the benefit.
2765 2766 */
2766 2767 /*
2767 2768 * Force modified metadata out to stable storage.
2768 2769 */
2769 2770 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2770 2771 #endif
2771 2772
2772 2773 VN_RELE(vp);
2773 2774
2774 2775 rd->rd_status = puterrno(error);
2775 2776
2776 2777 }
2777 2778 void *
2778 2779 rfs_readdir_getfh(struct nfsrddirargs *rda)
2779 2780 {
2780 2781 return (&rda->rda_fh);
2781 2782 }
2782 2783 void
2783 2784 rfs_rddirfree(struct nfsrddirres *rd)
2784 2785 {
2785 2786 if (rd->rd_entries != NULL)
2786 2787 kmem_free(rd->rd_entries, rd->rd_bufsize);
2787 2788 }
2788 2789
2789 2790 /* ARGSUSED */
2790 2791 void
2791 2792 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2792 2793 struct svc_req *req, cred_t *cr, bool_t ro)
2793 2794 {
2794 2795 int error;
2795 2796 struct statvfs64 sb;
2796 2797 vnode_t *vp;
2797 2798
2798 2799 vp = nfs_fhtovp(fh, exi);
2799 2800 if (vp == NULL) {
2800 2801 fs->fs_status = NFSERR_STALE;
2801 2802 return;
2802 2803 }
2803 2804
2804 2805 error = VFS_STATVFS(vp->v_vfsp, &sb);
2805 2806
2806 2807 if (!error) {
2807 2808 fs->fs_tsize = nfstsize();
2808 2809 fs->fs_bsize = sb.f_frsize;
2809 2810 fs->fs_blocks = sb.f_blocks;
2810 2811 fs->fs_bfree = sb.f_bfree;
2811 2812 fs->fs_bavail = sb.f_bavail;
2812 2813 }
2813 2814
2814 2815 VN_RELE(vp);
2815 2816
2816 2817 fs->fs_status = puterrno(error);
2817 2818
2818 2819 }
2819 2820 void *
2820 2821 rfs_statfs_getfh(fhandle_t *fh)
2821 2822 {
2822 2823 return (fh);
2823 2824 }
2824 2825
2825 2826 static int
2826 2827 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2827 2828 {
2828 2829 vap->va_mask = 0;
2829 2830
2830 2831 /*
2831 2832 * There was a sign extension bug in some VFS based systems
2832 2833 * which stored the mode as a short. When it would get
2833 2834 * assigned to a u_long, no sign extension would occur.
2834 2835 * It needed to, but this wasn't noticed because sa_mode
2835 2836 * would then get assigned back to the short, thus ignoring
2836 2837 * the upper 16 bits of sa_mode.
2837 2838 *
2838 2839 * To make this implementation work for both broken
2839 2840 * clients and good clients, we check for both versions
2840 2841 * of the mode.
2841 2842 */
2842 2843 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2843 2844 sa->sa_mode != (uint32_t)-1) {
2844 2845 vap->va_mask |= AT_MODE;
2845 2846 vap->va_mode = sa->sa_mode;
2846 2847 }
2847 2848 if (sa->sa_uid != (uint32_t)-1) {
2848 2849 vap->va_mask |= AT_UID;
2849 2850 vap->va_uid = sa->sa_uid;
2850 2851 }
2851 2852 if (sa->sa_gid != (uint32_t)-1) {
2852 2853 vap->va_mask |= AT_GID;
2853 2854 vap->va_gid = sa->sa_gid;
2854 2855 }
2855 2856 if (sa->sa_size != (uint32_t)-1) {
2856 2857 vap->va_mask |= AT_SIZE;
2857 2858 vap->va_size = sa->sa_size;
2858 2859 }
2859 2860 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2860 2861 sa->sa_atime.tv_usec != (int32_t)-1) {
2861 2862 #ifndef _LP64
2862 2863 /* return error if time overflow */
2863 2864 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2864 2865 return (EOVERFLOW);
2865 2866 #endif
2866 2867 vap->va_mask |= AT_ATIME;
2867 2868 /*
2868 2869 * nfs protocol defines times as unsigned so don't extend sign,
2869 2870 * unless sysadmin set nfs_allow_preepoch_time.
2870 2871 */
2871 2872 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2872 2873 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2873 2874 }
2874 2875 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2875 2876 sa->sa_mtime.tv_usec != (int32_t)-1) {
2876 2877 #ifndef _LP64
2877 2878 /* return error if time overflow */
2878 2879 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2879 2880 return (EOVERFLOW);
2880 2881 #endif
2881 2882 vap->va_mask |= AT_MTIME;
2882 2883 /*
2883 2884 * nfs protocol defines times as unsigned so don't extend sign,
2884 2885 * unless sysadmin set nfs_allow_preepoch_time.
2885 2886 */
2886 2887 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2887 2888 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2888 2889 }
2889 2890 return (0);
2890 2891 }
2891 2892
2892 2893 static const enum nfsftype vt_to_nf[] = {
2893 2894 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2894 2895 };
2895 2896
2896 2897 /*
2897 2898 * check the following fields for overflow: nodeid, size, and time.
2898 2899 * There could be a problem when converting 64-bit LP64 fields
2899 2900 * into 32-bit ones. Return an error if there is an overflow.
2900 2901 */
2901 2902 int
2902 2903 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2903 2904 {
2904 2905 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2905 2906 na->na_type = vt_to_nf[vap->va_type];
2906 2907
2907 2908 if (vap->va_mode == (unsigned short) -1)
2908 2909 na->na_mode = (uint32_t)-1;
2909 2910 else
2910 2911 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2911 2912
2912 2913 if (vap->va_uid == (unsigned short)(-1))
2913 2914 na->na_uid = (uint32_t)(-1);
2914 2915 else if (vap->va_uid == UID_NOBODY)
2915 2916 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2916 2917 else
2917 2918 na->na_uid = vap->va_uid;
2918 2919
2919 2920 if (vap->va_gid == (unsigned short)(-1))
2920 2921 na->na_gid = (uint32_t)-1;
2921 2922 else if (vap->va_gid == GID_NOBODY)
2922 2923 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2923 2924 else
2924 2925 na->na_gid = vap->va_gid;
2925 2926
2926 2927 /*
2927 2928 * Do we need to check fsid for overflow? It is 64-bit in the
2928 2929 * vattr, but are bigger than 32 bit values supported?
2929 2930 */
2930 2931 na->na_fsid = vap->va_fsid;
2931 2932
2932 2933 na->na_nodeid = vap->va_nodeid;
2933 2934
2934 2935 /*
2935 2936 * Check to make sure that the nodeid is representable over the
2936 2937 * wire without losing bits.
2937 2938 */
2938 2939 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2939 2940 return (EFBIG);
2940 2941 na->na_nlink = vap->va_nlink;
2941 2942
2942 2943 /*
2943 2944 * Check for big files here, instead of at the caller. See
2944 2945 * comments in cstat for large special file explanation.
2945 2946 */
2946 2947 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2947 2948 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2948 2949 return (EFBIG);
2949 2950 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2950 2951 /* UNKNOWN_SIZE | OVERFLOW */
2951 2952 na->na_size = MAXOFF32_T;
2952 2953 } else
2953 2954 na->na_size = vap->va_size;
2954 2955 } else
2955 2956 na->na_size = vap->va_size;
2956 2957
2957 2958 /*
2958 2959 * If the vnode times overflow the 32-bit times that NFS2
2959 2960 * uses on the wire then return an error.
2960 2961 */
2961 2962 if (!NFS_VAP_TIME_OK(vap)) {
2962 2963 return (EOVERFLOW);
2963 2964 }
2964 2965 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2965 2966 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2966 2967
2967 2968 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2968 2969 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2969 2970
2970 2971 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2971 2972 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2972 2973
2973 2974 /*
2974 2975 * If the dev_t will fit into 16 bits then compress
2975 2976 * it, otherwise leave it alone. See comments in
2976 2977 * nfs_client.c.
2977 2978 */
2978 2979 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2979 2980 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2980 2981 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2981 2982 else
2982 2983 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2983 2984
2984 2985 na->na_blocks = vap->va_nblocks;
2985 2986 na->na_blocksize = vap->va_blksize;
2986 2987
2987 2988 /*
2988 2989 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2989 2990 * over-the-wire protocols for named-pipe vnodes. It remaps the
2990 2991 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2991 2992 *
2992 2993 * BUYER BEWARE:
2993 2994 * If you are porting the NFS to a non-Sun server, you probably
2994 2995 * don't want to include the following block of code. The
2995 2996 * over-the-wire special file types will be changing with the
2996 2997 * NFS Protocol Revision.
2997 2998 */
2998 2999 if (vap->va_type == VFIFO)
2999 3000 NA_SETFIFO(na);
3000 3001 return (0);
3001 3002 }
3002 3003
3003 3004 /*
3004 3005 * acl v2 support: returns approximate permission.
3005 3006 * default: returns minimal permission (more restrictive)
3006 3007 * aclok: returns maximal permission (less restrictive)
3007 3008 * This routine changes the permissions that are alaredy in *va.
3008 3009 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
3009 3010 * CLASS_OBJ is always the same as GROUP_OBJ entry.
3010 3011 */
3011 3012 static void
3012 3013 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
3013 3014 {
3014 3015 vsecattr_t vsa;
3015 3016 int aclcnt;
3016 3017 aclent_t *aclentp;
3017 3018 mode_t mask_perm;
3018 3019 mode_t grp_perm;
3019 3020 mode_t other_perm;
3020 3021 mode_t other_orig;
3021 3022 int error;
3022 3023
3023 3024 /* dont care default acl */
3024 3025 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
3025 3026 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
3026 3027
3027 3028 if (!error) {
3028 3029 aclcnt = vsa.vsa_aclcnt;
3029 3030 if (aclcnt > MIN_ACL_ENTRIES) {
3030 3031 /* non-trivial ACL */
3031 3032 aclentp = vsa.vsa_aclentp;
3032 3033 if (exi->exi_export.ex_flags & EX_ACLOK) {
3033 3034 /* maximal permissions */
3034 3035 grp_perm = 0;
3035 3036 other_perm = 0;
3036 3037 for (; aclcnt > 0; aclcnt--, aclentp++) {
3037 3038 switch (aclentp->a_type) {
3038 3039 case USER_OBJ:
3039 3040 break;
3040 3041 case USER:
3041 3042 grp_perm |=
3042 3043 aclentp->a_perm << 3;
3043 3044 other_perm |= aclentp->a_perm;
3044 3045 break;
3045 3046 case GROUP_OBJ:
3046 3047 grp_perm |=
3047 3048 aclentp->a_perm << 3;
3048 3049 break;
3049 3050 case GROUP:
3050 3051 other_perm |= aclentp->a_perm;
3051 3052 break;
3052 3053 case OTHER_OBJ:
3053 3054 other_orig = aclentp->a_perm;
3054 3055 break;
3055 3056 case CLASS_OBJ:
3056 3057 mask_perm = aclentp->a_perm;
3057 3058 break;
3058 3059 default:
3059 3060 break;
3060 3061 }
3061 3062 }
3062 3063 grp_perm &= mask_perm << 3;
3063 3064 other_perm &= mask_perm;
3064 3065 other_perm |= other_orig;
3065 3066
3066 3067 } else {
3067 3068 /* minimal permissions */
3068 3069 grp_perm = 070;
3069 3070 other_perm = 07;
3070 3071 for (; aclcnt > 0; aclcnt--, aclentp++) {
3071 3072 switch (aclentp->a_type) {
3072 3073 case USER_OBJ:
3073 3074 break;
3074 3075 case USER:
3075 3076 case CLASS_OBJ:
3076 3077 grp_perm &=
3077 3078 aclentp->a_perm << 3;
3078 3079 other_perm &=
3079 3080 aclentp->a_perm;
3080 3081 break;
3081 3082 case GROUP_OBJ:
3082 3083 grp_perm &=
3083 3084 aclentp->a_perm << 3;
3084 3085 break;
3085 3086 case GROUP:
3086 3087 other_perm &=
3087 3088 aclentp->a_perm;
3088 3089 break;
3089 3090 case OTHER_OBJ:
3090 3091 other_perm &=
3091 3092 aclentp->a_perm;
3092 3093 break;
3093 3094 default:
3094 3095 break;
3095 3096 }
3096 3097 }
3097 3098 }
3098 3099 /* copy to va */
3099 3100 va->va_mode &= ~077;
3100 3101 va->va_mode |= grp_perm | other_perm;
3101 3102 }
3102 3103 if (vsa.vsa_aclcnt)
3103 3104 kmem_free(vsa.vsa_aclentp,
3104 3105 vsa.vsa_aclcnt * sizeof (aclent_t));
3105 3106 }
3106 3107 }
3107 3108
3108 3109 void
3109 3110 rfs_srvrinit(void)
3110 3111 {
3111 3112 nfs2_srv_caller_id = fs_new_caller_id();
3112 3113 }
3113 3114
3114 3115 void
3115 3116 rfs_srvrfini(void)
3116 3117 {
3117 3118 }
3118 3119
3119 3120 /* ARGSUSED */
3120 3121 void
3121 3122 rfs_srv_zone_init(nfs_globals_t *ng)
3122 3123 {
3123 3124 nfs_srv_t *ns;
3124 3125
3125 3126 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3126 3127
3127 3128 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3128 3129 ns->write_async = 1;
3129 3130
3130 3131 ng->nfs_srv = ns;
3131 3132 }
3132 3133
3133 3134 /* ARGSUSED */
3134 3135 void
3135 3136 rfs_srv_zone_fini(nfs_globals_t *ng)
3136 3137 {
3137 3138 nfs_srv_t *ns = ng->nfs_srv;
3138 3139
3139 3140 ng->nfs_srv = NULL;
3140 3141
3141 3142 mutex_destroy(&ns->async_write_lock);
3142 3143 kmem_free(ns, sizeof (*ns));
3143 3144 }
3144 3145
3145 3146 static int
3146 3147 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3147 3148 {
3148 3149 struct clist *wcl;
3149 3150 int wlist_len;
3150 3151 uint32_t count = rr->rr_count;
3151 3152
3152 3153 wcl = ra->ra_wlist;
3153 3154
3154 3155 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3155 3156 return (FALSE);
3156 3157 }
3157 3158
3158 3159 wcl = ra->ra_wlist;
3159 3160 rr->rr_ok.rrok_wlist_len = wlist_len;
3160 3161 rr->rr_ok.rrok_wlist = wcl;
3161 3162
3162 3163 return (TRUE);
3163 3164 }
|
↓ open down ↓ |
2616 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX