Print this page
Send nfs_export_t to untraverse()
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 30 * All rights reserved.
31 31 */
32 32
33 33 /*
34 34 * Copyright 2018 Nexenta Systems, Inc.
35 35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 36 */
37 37
38 38 #include <sys/param.h>
39 39 #include <sys/types.h>
40 40 #include <sys/systm.h>
41 41 #include <sys/cred.h>
42 42 #include <sys/buf.h>
43 43 #include <sys/vfs.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/stat.h>
47 47 #include <sys/errno.h>
48 48 #include <sys/sysmacros.h>
49 49 #include <sys/statvfs.h>
50 50 #include <sys/kmem.h>
51 51 #include <sys/kstat.h>
52 52 #include <sys/dirent.h>
53 53 #include <sys/cmn_err.h>
54 54 #include <sys/debug.h>
55 55 #include <sys/vtrace.h>
56 56 #include <sys/mode.h>
57 57 #include <sys/acl.h>
58 58 #include <sys/nbmlock.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/sdt.h>
61 61
62 62 #include <rpc/types.h>
63 63 #include <rpc/auth.h>
64 64 #include <rpc/svc.h>
65 65
66 66 #include <nfs/nfs.h>
67 67 #include <nfs/export.h>
68 68 #include <nfs/nfs_cmd.h>
69 69
70 70 #include <vm/hat.h>
71 71 #include <vm/as.h>
72 72 #include <vm/seg.h>
73 73 #include <vm/seg_map.h>
74 74 #include <vm/seg_kmem.h>
75 75
76 76 #include <sys/strsubr.h>
77 77
78 78 struct rfs_async_write_list;
79 79
80 80 /*
81 81 * Zone globals of NFSv2 server
82 82 */
83 83 typedef struct nfs_srv {
84 84 kmutex_t async_write_lock;
85 85 struct rfs_async_write_list *async_write_head;
86 86
87 87 /*
88 88 * enables write clustering if == 1
89 89 */
90 90 int write_async;
91 91 } nfs_srv_t;
92 92
93 93 /*
94 94 * These are the interface routines for the server side of the
95 95 * Network File System. See the NFS version 2 protocol specification
96 96 * for a description of this interface.
97 97 */
98 98
99 99 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
100 100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
101 101 cred_t *);
102 102
103 103
104 104 /*
105 105 * Some "over the wire" UNIX file types. These are encoded
106 106 * into the mode. This needs to be fixed in the next rev.
107 107 */
108 108 #define IFMT 0170000 /* type of file */
109 109 #define IFCHR 0020000 /* character special */
110 110 #define IFBLK 0060000 /* block special */
111 111 #define IFSOCK 0140000 /* socket */
112 112
113 113 u_longlong_t nfs2_srv_caller_id;
114 114
115 115 static nfs_srv_t *
116 116 nfs_get_srv(void)
117 117 {
118 118 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
119 119 nfs_srv_t *srv = ng->nfs_srv;
120 120 ASSERT(srv != NULL);
121 121 return (srv);
122 122 }
123 123
124 124 /*
125 125 * Get file attributes.
126 126 * Returns the current attributes of the file with the given fhandle.
127 127 */
128 128 /* ARGSUSED */
129 129 void
130 130 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
131 131 struct svc_req *req, cred_t *cr, bool_t ro)
132 132 {
133 133 int error;
134 134 vnode_t *vp;
135 135 struct vattr va;
136 136
137 137 vp = nfs_fhtovp(fhp, exi);
138 138 if (vp == NULL) {
139 139 ns->ns_status = NFSERR_STALE;
140 140 return;
141 141 }
142 142
143 143 /*
144 144 * Do the getattr.
145 145 */
146 146 va.va_mask = AT_ALL; /* we want all the attributes */
147 147
148 148 error = rfs4_delegated_getattr(vp, &va, 0, cr);
149 149
150 150 /* check for overflows */
151 151 if (!error) {
152 152 /* Lie about the object type for a referral */
153 153 if (vn_is_nfs_reparse(vp, cr))
154 154 va.va_type = VLNK;
155 155
156 156 acl_perm(vp, exi, &va, cr);
157 157 error = vattr_to_nattr(&va, &ns->ns_attr);
158 158 }
159 159
160 160 VN_RELE(vp);
161 161
162 162 ns->ns_status = puterrno(error);
163 163 }
164 164 void *
165 165 rfs_getattr_getfh(fhandle_t *fhp)
166 166 {
167 167 return (fhp);
168 168 }
169 169
170 170 /*
171 171 * Set file attributes.
172 172 * Sets the attributes of the file with the given fhandle. Returns
173 173 * the new attributes.
174 174 */
175 175 /* ARGSUSED */
176 176 void
177 177 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
178 178 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
179 179 {
180 180 int error;
181 181 int flag;
182 182 int in_crit = 0;
183 183 vnode_t *vp;
184 184 struct vattr va;
185 185 struct vattr bva;
186 186 struct flock64 bf;
187 187 caller_context_t ct;
188 188
189 189
190 190 vp = nfs_fhtovp(&args->saa_fh, exi);
191 191 if (vp == NULL) {
192 192 ns->ns_status = NFSERR_STALE;
193 193 return;
194 194 }
195 195
196 196 if (rdonly(ro, vp)) {
197 197 VN_RELE(vp);
198 198 ns->ns_status = NFSERR_ROFS;
199 199 return;
200 200 }
201 201
202 202 error = sattr_to_vattr(&args->saa_sa, &va);
203 203 if (error) {
204 204 VN_RELE(vp);
205 205 ns->ns_status = puterrno(error);
206 206 return;
207 207 }
208 208
209 209 /*
210 210 * If the client is requesting a change to the mtime,
211 211 * but the nanosecond field is set to 1 billion, then
212 212 * this is a flag to the server that it should set the
213 213 * atime and mtime fields to the server's current time.
214 214 * The 1 billion number actually came from the client
215 215 * as 1 million, but the units in the over the wire
216 216 * request are microseconds instead of nanoseconds.
217 217 *
218 218 * This is an overload of the protocol and should be
219 219 * documented in the NFS Version 2 protocol specification.
220 220 */
221 221 if (va.va_mask & AT_MTIME) {
222 222 if (va.va_mtime.tv_nsec == 1000000000) {
223 223 gethrestime(&va.va_mtime);
224 224 va.va_atime = va.va_mtime;
225 225 va.va_mask |= AT_ATIME;
226 226 flag = 0;
227 227 } else
228 228 flag = ATTR_UTIME;
229 229 } else
230 230 flag = 0;
231 231
232 232 /*
233 233 * If the filesystem is exported with nosuid, then mask off
234 234 * the setuid and setgid bits.
235 235 */
236 236 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
237 237 (exi->exi_export.ex_flags & EX_NOSUID))
238 238 va.va_mode &= ~(VSUID | VSGID);
239 239
240 240 ct.cc_sysid = 0;
241 241 ct.cc_pid = 0;
242 242 ct.cc_caller_id = nfs2_srv_caller_id;
243 243 ct.cc_flags = CC_DONTBLOCK;
244 244
245 245 /*
246 246 * We need to specially handle size changes because it is
247 247 * possible for the client to create a file with modes
248 248 * which indicate read-only, but with the file opened for
249 249 * writing. If the client then tries to set the size of
250 250 * the file, then the normal access checking done in
251 251 * VOP_SETATTR would prevent the client from doing so,
252 252 * although it should be legal for it to do so. To get
253 253 * around this, we do the access checking for ourselves
254 254 * and then use VOP_SPACE which doesn't do the access
255 255 * checking which VOP_SETATTR does. VOP_SPACE can only
256 256 * operate on VREG files, let VOP_SETATTR handle the other
257 257 * extremely rare cases.
258 258 * Also the client should not be allowed to change the
259 259 * size of the file if there is a conflicting non-blocking
260 260 * mandatory lock in the region of change.
261 261 */
262 262 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
263 263 if (nbl_need_check(vp)) {
264 264 nbl_start_crit(vp, RW_READER);
265 265 in_crit = 1;
266 266 }
267 267
268 268 bva.va_mask = AT_UID | AT_SIZE;
269 269
270 270 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
271 271
272 272 if (error) {
273 273 if (in_crit)
274 274 nbl_end_crit(vp);
275 275 VN_RELE(vp);
276 276 ns->ns_status = puterrno(error);
277 277 return;
278 278 }
279 279
280 280 if (in_crit) {
281 281 u_offset_t offset;
282 282 ssize_t length;
283 283
284 284 if (va.va_size < bva.va_size) {
285 285 offset = va.va_size;
286 286 length = bva.va_size - va.va_size;
287 287 } else {
288 288 offset = bva.va_size;
289 289 length = va.va_size - bva.va_size;
290 290 }
291 291 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
292 292 NULL)) {
293 293 error = EACCES;
294 294 }
295 295 }
296 296
297 297 if (crgetuid(cr) == bva.va_uid && !error &&
298 298 va.va_size != bva.va_size) {
299 299 va.va_mask &= ~AT_SIZE;
300 300 bf.l_type = F_WRLCK;
301 301 bf.l_whence = 0;
302 302 bf.l_start = (off64_t)va.va_size;
303 303 bf.l_len = 0;
304 304 bf.l_sysid = 0;
305 305 bf.l_pid = 0;
306 306
307 307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
308 308 (offset_t)va.va_size, cr, &ct);
309 309 }
310 310 if (in_crit)
311 311 nbl_end_crit(vp);
312 312 } else
313 313 error = 0;
314 314
315 315 /*
316 316 * Do the setattr.
317 317 */
318 318 if (!error && va.va_mask) {
319 319 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
320 320 }
321 321
322 322 /*
323 323 * check if the monitor on either vop_space or vop_setattr detected
324 324 * a delegation conflict and if so, mark the thread flag as
325 325 * wouldblock so that the response is dropped and the client will
326 326 * try again.
327 327 */
328 328 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
329 329 VN_RELE(vp);
330 330 curthread->t_flag |= T_WOULDBLOCK;
331 331 return;
332 332 }
333 333
334 334 if (!error) {
335 335 va.va_mask = AT_ALL; /* get everything */
336 336
337 337 error = rfs4_delegated_getattr(vp, &va, 0, cr);
338 338
339 339 /* check for overflows */
340 340 if (!error) {
341 341 acl_perm(vp, exi, &va, cr);
342 342 error = vattr_to_nattr(&va, &ns->ns_attr);
343 343 }
344 344 }
345 345
346 346 ct.cc_flags = 0;
347 347
348 348 /*
349 349 * Force modified metadata out to stable storage.
350 350 */
351 351 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
352 352
353 353 VN_RELE(vp);
354 354
355 355 ns->ns_status = puterrno(error);
356 356 }
357 357 void *
358 358 rfs_setattr_getfh(struct nfssaargs *args)
359 359 {
360 360 return (&args->saa_fh);
361 361 }
362 362
363 363 /* Change and release @exip and @vpp only in success */
364 364 int
365 365 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
366 366 {
367 367 struct exportinfo *exi;
368 368 vnode_t *vp = *vpp;
369 369 fid_t fid;
370 370 int error;
371 371
372 372 VN_HOLD(vp);
373 373
374 374 if ((error = traverse(&vp)) != 0) {
375 375 VN_RELE(vp);
376 376 return (error);
377 377 }
378 378
379 379 bzero(&fid, sizeof (fid));
380 380 fid.fid_len = MAXFIDSZ;
381 381 error = VOP_FID(vp, &fid, NULL);
382 382 if (error) {
383 383 VN_RELE(vp);
384 384 return (error);
385 385 }
386 386
387 387 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
388 388 if (exi == NULL ||
389 389 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
390 390 /*
391 391 * It is not error, just subdir is not exported
392 392 * or "nohide" is not set
393 393 */
394 394 if (exi != NULL)
395 395 exi_rele(exi);
396 396 VN_RELE(vp);
397 397 } else {
398 398 /* go to submount */
399 399 exi_rele(*exip);
400 400 *exip = exi;
401 401
402 402 VN_RELE(*vpp);
403 403 *vpp = vp;
404 404 }
405 405
406 406 return (0);
407 407 }
408 408
409 409 /*
410 410 * Given mounted "dvp" and "exi", go upper mountpoint
411 411 * with dvp/exi correction
412 412 * Return 0 in success
413 413 */
|
↓ open down ↓ |
413 lines elided |
↑ open up ↑ |
414 414 int
415 415 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
416 416 {
417 417 struct exportinfo *exi;
418 418 vnode_t *dvp = *dvpp;
419 419
420 420 ASSERT3U((*exip)->exi_zoneid, ==, curzone->zone_id);
421 421 ASSERT((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp));
422 422
423 423 VN_HOLD(dvp);
424 - dvp = untraverse(dvp);
424 + dvp = untraverse((*exip)->exi_ne, dvp);
425 425 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
426 426 if (exi == NULL) {
427 427 VN_RELE(dvp);
428 428 return (-1);
429 429 }
430 430
431 431 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
432 432 exi_rele(*exip);
433 433 *exip = exi;
434 434 VN_RELE(*dvpp);
435 435 *dvpp = dvp;
436 436
437 437 return (0);
438 438 }
439 439 /*
440 440 * Directory lookup.
441 441 * Returns an fhandle and file attributes for file name in a directory.
442 442 */
443 443 /* ARGSUSED */
444 444 void
445 445 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
446 446 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
447 447 {
448 448 int error;
449 449 vnode_t *dvp;
450 450 vnode_t *vp;
451 451 struct vattr va;
452 452 fhandle_t *fhp = da->da_fhandle;
453 453 struct sec_ol sec = {0, 0};
454 454 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
455 455 char *name;
456 456 struct sockaddr *ca;
457 457
458 458 /*
459 459 * Trusted Extension doesn't support NFSv2. MOUNT
460 460 * will reject v2 clients. Need to prevent v2 client
461 461 * access via WebNFS here.
462 462 */
463 463 if (is_system_labeled() && req->rq_vers == 2) {
464 464 dr->dr_status = NFSERR_ACCES;
465 465 return;
466 466 }
467 467
468 468 /*
469 469 * Disallow NULL paths
470 470 */
471 471 if (da->da_name == NULL || *da->da_name == '\0') {
472 472 dr->dr_status = NFSERR_ACCES;
473 473 return;
474 474 }
475 475
476 476 /*
477 477 * Allow lookups from the root - the default
478 478 * location of the public filehandle.
479 479 */
480 480 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
481 481 dvp = ZONE_ROOTVP();
482 482 VN_HOLD(dvp);
483 483 } else {
484 484 dvp = nfs_fhtovp(fhp, exi);
485 485 if (dvp == NULL) {
486 486 dr->dr_status = NFSERR_STALE;
487 487 return;
488 488 }
489 489 }
490 490
491 491 exi_hold(exi);
492 492 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
493 493
494 494 /*
495 495 * Not allow lookup beyond root.
496 496 * If the filehandle matches a filehandle of the exi,
497 497 * then the ".." refers beyond the root of an exported filesystem.
498 498 */
499 499 if (strcmp(da->da_name, "..") == 0 &&
500 500 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
501 501 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
502 502 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
503 503 /*
504 504 * special case for ".." and 'nohide'exported root
505 505 */
506 506 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
507 507 error = NFSERR_ACCES;
508 508 goto out;
509 509 }
510 510 } else {
511 511 error = NFSERR_NOENT;
512 512 goto out;
513 513 }
514 514 }
515 515
516 516 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
517 517 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
518 518 MAXPATHLEN);
519 519
520 520 if (name == NULL) {
521 521 error = NFSERR_ACCES;
522 522 goto out;
523 523 }
524 524
525 525 /*
526 526 * If the public filehandle is used then allow
527 527 * a multi-component lookup, i.e. evaluate
528 528 * a pathname and follow symbolic links if
529 529 * necessary.
530 530 *
531 531 * This may result in a vnode in another filesystem
532 532 * which is OK as long as the filesystem is exported.
533 533 */
534 534 if (PUBLIC_FH2(fhp)) {
535 535 publicfh_flag = TRUE;
536 536
537 537 exi_rele(exi);
538 538 exi = NULL;
539 539
540 540 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
541 541 &sec);
542 542 } else {
543 543 /*
544 544 * Do a normal single component lookup.
545 545 */
546 546 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
547 547 NULL, NULL, NULL);
548 548 }
549 549
550 550 if (name != da->da_name)
551 551 kmem_free(name, MAXPATHLEN);
552 552
553 553 if (error == 0 && vn_ismntpt(vp)) {
554 554 error = rfs_cross_mnt(&vp, &exi);
555 555 if (error)
556 556 VN_RELE(vp);
557 557 }
558 558
559 559 if (!error) {
560 560 va.va_mask = AT_ALL; /* we want everything */
561 561
562 562 error = rfs4_delegated_getattr(vp, &va, 0, cr);
563 563
564 564 /* check for overflows */
565 565 if (!error) {
566 566 acl_perm(vp, exi, &va, cr);
567 567 error = vattr_to_nattr(&va, &dr->dr_attr);
568 568 if (!error) {
569 569 if (sec.sec_flags & SEC_QUERY)
570 570 error = makefh_ol(&dr->dr_fhandle, exi,
571 571 sec.sec_index);
572 572 else {
573 573 error = makefh(&dr->dr_fhandle, vp,
574 574 exi);
575 575 if (!error && publicfh_flag &&
576 576 !chk_clnt_sec(exi, req))
577 577 auth_weak = TRUE;
578 578 }
579 579 }
580 580 }
581 581 VN_RELE(vp);
582 582 }
583 583
584 584 out:
585 585 VN_RELE(dvp);
586 586
587 587 if (exi != NULL)
588 588 exi_rele(exi);
589 589
590 590 /*
591 591 * If it's public fh, no 0x81, and client's flavor is
592 592 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
593 593 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
594 594 */
595 595 if (auth_weak)
596 596 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
597 597 else
598 598 dr->dr_status = puterrno(error);
599 599 }
600 600 void *
601 601 rfs_lookup_getfh(struct nfsdiropargs *da)
602 602 {
603 603 return (da->da_fhandle);
604 604 }
605 605
606 606 /*
607 607 * Read symbolic link.
608 608 * Returns the string in the symbolic link at the given fhandle.
609 609 */
610 610 /* ARGSUSED */
611 611 void
612 612 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
613 613 struct svc_req *req, cred_t *cr, bool_t ro)
614 614 {
615 615 int error;
616 616 struct iovec iov;
617 617 struct uio uio;
618 618 vnode_t *vp;
619 619 struct vattr va;
620 620 struct sockaddr *ca;
621 621 char *name = NULL;
622 622 int is_referral = 0;
623 623
624 624 vp = nfs_fhtovp(fhp, exi);
625 625 if (vp == NULL) {
626 626 rl->rl_data = NULL;
627 627 rl->rl_status = NFSERR_STALE;
628 628 return;
629 629 }
630 630
631 631 va.va_mask = AT_MODE;
632 632
633 633 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
634 634
635 635 if (error) {
636 636 VN_RELE(vp);
637 637 rl->rl_data = NULL;
638 638 rl->rl_status = puterrno(error);
639 639 return;
640 640 }
641 641
642 642 if (MANDLOCK(vp, va.va_mode)) {
643 643 VN_RELE(vp);
644 644 rl->rl_data = NULL;
645 645 rl->rl_status = NFSERR_ACCES;
646 646 return;
647 647 }
648 648
649 649 /* We lied about the object type for a referral */
650 650 if (vn_is_nfs_reparse(vp, cr))
651 651 is_referral = 1;
652 652
653 653 /*
654 654 * XNFS and RFC1094 require us to return ENXIO if argument
655 655 * is not a link. BUGID 1138002.
656 656 */
657 657 if (vp->v_type != VLNK && !is_referral) {
658 658 VN_RELE(vp);
659 659 rl->rl_data = NULL;
660 660 rl->rl_status = NFSERR_NXIO;
661 661 return;
662 662 }
663 663
664 664 /*
665 665 * Allocate data for pathname. This will be freed by rfs_rlfree.
666 666 */
667 667 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
668 668
669 669 if (is_referral) {
670 670 char *s;
671 671 size_t strsz;
672 672
673 673 /* Get an artificial symlink based on a referral */
674 674 s = build_symlink(vp, cr, &strsz);
675 675 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
676 676 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
677 677 vnode_t *, vp, char *, s);
678 678 if (s == NULL)
679 679 error = EINVAL;
680 680 else {
681 681 error = 0;
682 682 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
683 683 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
684 684 kmem_free(s, strsz);
685 685 }
686 686
687 687 } else {
688 688
689 689 /*
690 690 * Set up io vector to read sym link data
691 691 */
692 692 iov.iov_base = rl->rl_data;
693 693 iov.iov_len = NFS_MAXPATHLEN;
694 694 uio.uio_iov = &iov;
695 695 uio.uio_iovcnt = 1;
696 696 uio.uio_segflg = UIO_SYSSPACE;
697 697 uio.uio_extflg = UIO_COPY_CACHED;
698 698 uio.uio_loffset = (offset_t)0;
699 699 uio.uio_resid = NFS_MAXPATHLEN;
700 700
701 701 /*
702 702 * Do the readlink.
703 703 */
704 704 error = VOP_READLINK(vp, &uio, cr, NULL);
705 705
706 706 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
707 707
708 708 if (!error)
709 709 rl->rl_data[rl->rl_count] = '\0';
710 710
711 711 }
712 712
713 713
714 714 VN_RELE(vp);
715 715
716 716 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
717 717 name = nfscmd_convname(ca, exi, rl->rl_data,
718 718 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
719 719
720 720 if (name != NULL && name != rl->rl_data) {
721 721 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
722 722 rl->rl_data = name;
723 723 }
724 724
725 725 /*
726 726 * XNFS and RFC1094 require us to return ENXIO if argument
727 727 * is not a link. UFS returns EINVAL if this is the case,
728 728 * so we do the mapping here. BUGID 1138002.
729 729 */
730 730 if (error == EINVAL)
731 731 rl->rl_status = NFSERR_NXIO;
732 732 else
733 733 rl->rl_status = puterrno(error);
734 734
735 735 }
736 736 void *
737 737 rfs_readlink_getfh(fhandle_t *fhp)
738 738 {
739 739 return (fhp);
740 740 }
741 741 /*
742 742 * Free data allocated by rfs_readlink
743 743 */
744 744 void
745 745 rfs_rlfree(struct nfsrdlnres *rl)
746 746 {
747 747 if (rl->rl_data != NULL)
748 748 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
749 749 }
750 750
751 751 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
752 752
753 753 /*
754 754 * Read data.
755 755 * Returns some data read from the file at the given fhandle.
756 756 */
757 757 /* ARGSUSED */
758 758 void
759 759 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
760 760 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
761 761 {
762 762 vnode_t *vp;
763 763 int error;
764 764 struct vattr va;
765 765 struct iovec iov;
766 766 struct uio uio;
767 767 mblk_t *mp;
768 768 int alloc_err = 0;
769 769 int in_crit = 0;
770 770 caller_context_t ct;
771 771
772 772 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
773 773 if (vp == NULL) {
774 774 rr->rr_data = NULL;
775 775 rr->rr_status = NFSERR_STALE;
776 776 return;
777 777 }
778 778
779 779 if (vp->v_type != VREG) {
780 780 VN_RELE(vp);
781 781 rr->rr_data = NULL;
782 782 rr->rr_status = NFSERR_ISDIR;
783 783 return;
784 784 }
785 785
786 786 ct.cc_sysid = 0;
787 787 ct.cc_pid = 0;
788 788 ct.cc_caller_id = nfs2_srv_caller_id;
789 789 ct.cc_flags = CC_DONTBLOCK;
790 790
791 791 /*
792 792 * Enter the critical region before calling VOP_RWLOCK
793 793 * to avoid a deadlock with write requests.
794 794 */
795 795 if (nbl_need_check(vp)) {
796 796 nbl_start_crit(vp, RW_READER);
797 797 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
798 798 0, NULL)) {
799 799 nbl_end_crit(vp);
800 800 VN_RELE(vp);
801 801 rr->rr_data = NULL;
802 802 rr->rr_status = NFSERR_ACCES;
803 803 return;
804 804 }
805 805 in_crit = 1;
806 806 }
807 807
808 808 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
809 809
810 810 /* check if a monitor detected a delegation conflict */
811 811 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
812 812 if (in_crit)
813 813 nbl_end_crit(vp);
814 814 VN_RELE(vp);
815 815 /* mark as wouldblock so response is dropped */
816 816 curthread->t_flag |= T_WOULDBLOCK;
817 817
818 818 rr->rr_data = NULL;
819 819 return;
820 820 }
821 821
822 822 va.va_mask = AT_ALL;
823 823
824 824 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
825 825
826 826 if (error) {
827 827 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
828 828 if (in_crit)
829 829 nbl_end_crit(vp);
830 830
831 831 VN_RELE(vp);
832 832 rr->rr_data = NULL;
833 833 rr->rr_status = puterrno(error);
834 834
835 835 return;
836 836 }
837 837
838 838 /*
839 839 * This is a kludge to allow reading of files created
840 840 * with no read permission. The owner of the file
841 841 * is always allowed to read it.
842 842 */
843 843 if (crgetuid(cr) != va.va_uid) {
844 844 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
845 845
846 846 if (error) {
847 847 /*
848 848 * Exec is the same as read over the net because
849 849 * of demand loading.
850 850 */
851 851 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
852 852 }
853 853 if (error) {
854 854 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
855 855 if (in_crit)
856 856 nbl_end_crit(vp);
857 857 VN_RELE(vp);
858 858 rr->rr_data = NULL;
859 859 rr->rr_status = puterrno(error);
860 860
861 861 return;
862 862 }
863 863 }
864 864
865 865 if (MANDLOCK(vp, va.va_mode)) {
866 866 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
867 867 if (in_crit)
868 868 nbl_end_crit(vp);
869 869
870 870 VN_RELE(vp);
871 871 rr->rr_data = NULL;
872 872 rr->rr_status = NFSERR_ACCES;
873 873
874 874 return;
875 875 }
876 876
877 877 rr->rr_ok.rrok_wlist_len = 0;
878 878 rr->rr_ok.rrok_wlist = NULL;
879 879
880 880 if ((u_offset_t)ra->ra_offset >= va.va_size) {
881 881 rr->rr_count = 0;
882 882 rr->rr_data = NULL;
883 883 /*
884 884 * In this case, status is NFS_OK, but there is no data
885 885 * to encode. So set rr_mp to NULL.
886 886 */
887 887 rr->rr_mp = NULL;
888 888 rr->rr_ok.rrok_wlist = ra->ra_wlist;
889 889 if (rr->rr_ok.rrok_wlist)
890 890 clist_zero_len(rr->rr_ok.rrok_wlist);
891 891 goto done;
892 892 }
893 893
894 894 if (ra->ra_wlist) {
895 895 mp = NULL;
896 896 rr->rr_mp = NULL;
897 897 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
898 898 if (ra->ra_count > iov.iov_len) {
899 899 rr->rr_data = NULL;
900 900 rr->rr_status = NFSERR_INVAL;
901 901 goto done;
902 902 }
903 903 } else {
904 904 /*
905 905 * mp will contain the data to be sent out in the read reply.
906 906 * This will be freed after the reply has been sent out (by the
907 907 * driver).
908 908 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
909 909 * that the call to xdrmblk_putmblk() never fails.
910 910 */
911 911 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
912 912 &alloc_err);
913 913 ASSERT(mp != NULL);
914 914 ASSERT(alloc_err == 0);
915 915
916 916 rr->rr_mp = mp;
917 917
918 918 /*
919 919 * Set up io vector
920 920 */
921 921 iov.iov_base = (caddr_t)mp->b_datap->db_base;
922 922 iov.iov_len = ra->ra_count;
923 923 }
924 924
925 925 uio.uio_iov = &iov;
926 926 uio.uio_iovcnt = 1;
927 927 uio.uio_segflg = UIO_SYSSPACE;
928 928 uio.uio_extflg = UIO_COPY_CACHED;
929 929 uio.uio_loffset = (offset_t)ra->ra_offset;
930 930 uio.uio_resid = ra->ra_count;
931 931
932 932 error = VOP_READ(vp, &uio, 0, cr, &ct);
933 933
934 934 if (error) {
935 935 if (mp)
936 936 freeb(mp);
937 937
938 938 /*
939 939 * check if a monitor detected a delegation conflict and
940 940 * mark as wouldblock so response is dropped
941 941 */
942 942 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
943 943 curthread->t_flag |= T_WOULDBLOCK;
944 944 else
945 945 rr->rr_status = puterrno(error);
946 946
947 947 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
948 948 if (in_crit)
949 949 nbl_end_crit(vp);
950 950
951 951 VN_RELE(vp);
952 952 rr->rr_data = NULL;
953 953
954 954 return;
955 955 }
956 956
957 957 /*
958 958 * Get attributes again so we can send the latest access
959 959 * time to the client side for its cache.
960 960 */
961 961 va.va_mask = AT_ALL;
962 962
963 963 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
964 964
965 965 if (error) {
966 966 if (mp)
967 967 freeb(mp);
968 968
969 969 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
970 970 if (in_crit)
971 971 nbl_end_crit(vp);
972 972
973 973 VN_RELE(vp);
974 974 rr->rr_data = NULL;
975 975 rr->rr_status = puterrno(error);
976 976
977 977 return;
978 978 }
979 979
980 980 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
981 981
982 982 if (mp) {
983 983 rr->rr_data = (char *)mp->b_datap->db_base;
984 984 } else {
985 985 if (ra->ra_wlist) {
986 986 rr->rr_data = (caddr_t)iov.iov_base;
987 987 if (!rdma_setup_read_data2(ra, rr)) {
988 988 rr->rr_data = NULL;
989 989 rr->rr_status = puterrno(NFSERR_INVAL);
990 990 }
991 991 }
992 992 }
993 993 done:
994 994 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
995 995 if (in_crit)
996 996 nbl_end_crit(vp);
997 997
998 998 acl_perm(vp, exi, &va, cr);
999 999
1000 1000 /* check for overflows */
1001 1001 error = vattr_to_nattr(&va, &rr->rr_attr);
1002 1002
1003 1003 VN_RELE(vp);
1004 1004
1005 1005 rr->rr_status = puterrno(error);
1006 1006 }
1007 1007
1008 1008 /*
1009 1009 * Free data allocated by rfs_read
1010 1010 */
1011 1011 void
1012 1012 rfs_rdfree(struct nfsrdresult *rr)
1013 1013 {
1014 1014 mblk_t *mp;
1015 1015
1016 1016 if (rr->rr_status == NFS_OK) {
1017 1017 mp = rr->rr_mp;
1018 1018 if (mp != NULL)
1019 1019 freeb(mp);
1020 1020 }
1021 1021 }
1022 1022
1023 1023 void *
1024 1024 rfs_read_getfh(struct nfsreadargs *ra)
1025 1025 {
1026 1026 return (&ra->ra_fhandle);
1027 1027 }
1028 1028
1029 1029 #define MAX_IOVECS 12
1030 1030
1031 1031 #ifdef DEBUG
1032 1032 static int rfs_write_sync_hits = 0;
1033 1033 static int rfs_write_sync_misses = 0;
1034 1034 #endif
1035 1035
1036 1036 /*
1037 1037 * Write data to file.
1038 1038 * Returns attributes of a file after writing some data to it.
1039 1039 *
1040 1040 * Any changes made here, especially in error handling might have
1041 1041 * to also be done in rfs_write (which clusters write requests).
1042 1042 */
1043 1043 /* ARGSUSED */
1044 1044 void
1045 1045 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1046 1046 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1047 1047 {
1048 1048 int error;
1049 1049 vnode_t *vp;
1050 1050 rlim64_t rlimit;
1051 1051 struct vattr va;
1052 1052 struct uio uio;
1053 1053 struct iovec iov[MAX_IOVECS];
1054 1054 mblk_t *m;
1055 1055 struct iovec *iovp;
1056 1056 int iovcnt;
1057 1057 cred_t *savecred;
1058 1058 int in_crit = 0;
1059 1059 caller_context_t ct;
1060 1060
1061 1061 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1062 1062 if (vp == NULL) {
1063 1063 ns->ns_status = NFSERR_STALE;
1064 1064 return;
1065 1065 }
1066 1066
1067 1067 if (rdonly(ro, vp)) {
1068 1068 VN_RELE(vp);
1069 1069 ns->ns_status = NFSERR_ROFS;
1070 1070 return;
1071 1071 }
1072 1072
1073 1073 if (vp->v_type != VREG) {
1074 1074 VN_RELE(vp);
1075 1075 ns->ns_status = NFSERR_ISDIR;
1076 1076 return;
1077 1077 }
1078 1078
1079 1079 ct.cc_sysid = 0;
1080 1080 ct.cc_pid = 0;
1081 1081 ct.cc_caller_id = nfs2_srv_caller_id;
1082 1082 ct.cc_flags = CC_DONTBLOCK;
1083 1083
1084 1084 va.va_mask = AT_UID|AT_MODE;
1085 1085
1086 1086 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1087 1087
1088 1088 if (error) {
1089 1089 VN_RELE(vp);
1090 1090 ns->ns_status = puterrno(error);
1091 1091
1092 1092 return;
1093 1093 }
1094 1094
1095 1095 if (crgetuid(cr) != va.va_uid) {
1096 1096 /*
1097 1097 * This is a kludge to allow writes of files created
1098 1098 * with read only permission. The owner of the file
1099 1099 * is always allowed to write it.
1100 1100 */
1101 1101 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1102 1102
1103 1103 if (error) {
1104 1104 VN_RELE(vp);
1105 1105 ns->ns_status = puterrno(error);
1106 1106 return;
1107 1107 }
1108 1108 }
1109 1109
1110 1110 /*
1111 1111 * Can't access a mandatory lock file. This might cause
1112 1112 * the NFS service thread to block forever waiting for a
1113 1113 * lock to be released that will never be released.
1114 1114 */
1115 1115 if (MANDLOCK(vp, va.va_mode)) {
1116 1116 VN_RELE(vp);
1117 1117 ns->ns_status = NFSERR_ACCES;
1118 1118 return;
1119 1119 }
1120 1120
1121 1121 /*
1122 1122 * We have to enter the critical region before calling VOP_RWLOCK
1123 1123 * to avoid a deadlock with ufs.
1124 1124 */
1125 1125 if (nbl_need_check(vp)) {
1126 1126 nbl_start_crit(vp, RW_READER);
1127 1127 in_crit = 1;
1128 1128 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1129 1129 wa->wa_count, 0, NULL)) {
1130 1130 error = EACCES;
1131 1131 goto out;
1132 1132 }
1133 1133 }
1134 1134
1135 1135 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1136 1136
1137 1137 /* check if a monitor detected a delegation conflict */
1138 1138 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1139 1139 goto out;
1140 1140 }
1141 1141
1142 1142 if (wa->wa_data || wa->wa_rlist) {
1143 1143 /* Do the RDMA thing if necessary */
1144 1144 if (wa->wa_rlist) {
1145 1145 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1146 1146 iov[0].iov_len = wa->wa_count;
1147 1147 } else {
1148 1148 iov[0].iov_base = wa->wa_data;
1149 1149 iov[0].iov_len = wa->wa_count;
1150 1150 }
1151 1151 uio.uio_iov = iov;
1152 1152 uio.uio_iovcnt = 1;
1153 1153 uio.uio_segflg = UIO_SYSSPACE;
1154 1154 uio.uio_extflg = UIO_COPY_DEFAULT;
1155 1155 uio.uio_loffset = (offset_t)wa->wa_offset;
1156 1156 uio.uio_resid = wa->wa_count;
1157 1157 /*
1158 1158 * The limit is checked on the client. We
1159 1159 * should allow any size writes here.
1160 1160 */
1161 1161 uio.uio_llimit = curproc->p_fsz_ctl;
1162 1162 rlimit = uio.uio_llimit - wa->wa_offset;
1163 1163 if (rlimit < (rlim64_t)uio.uio_resid)
1164 1164 uio.uio_resid = (uint_t)rlimit;
1165 1165
1166 1166 /*
1167 1167 * for now we assume no append mode
1168 1168 */
1169 1169 /*
1170 1170 * We're changing creds because VM may fault and we need
1171 1171 * the cred of the current thread to be used if quota
1172 1172 * checking is enabled.
1173 1173 */
1174 1174 savecred = curthread->t_cred;
1175 1175 curthread->t_cred = cr;
1176 1176 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1177 1177 curthread->t_cred = savecred;
1178 1178 } else {
1179 1179
1180 1180 iovcnt = 0;
1181 1181 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1182 1182 iovcnt++;
1183 1183 if (iovcnt <= MAX_IOVECS) {
1184 1184 #ifdef DEBUG
1185 1185 rfs_write_sync_hits++;
1186 1186 #endif
1187 1187 iovp = iov;
1188 1188 } else {
1189 1189 #ifdef DEBUG
1190 1190 rfs_write_sync_misses++;
1191 1191 #endif
1192 1192 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1193 1193 }
1194 1194 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1195 1195 uio.uio_iov = iovp;
1196 1196 uio.uio_iovcnt = iovcnt;
1197 1197 uio.uio_segflg = UIO_SYSSPACE;
1198 1198 uio.uio_extflg = UIO_COPY_DEFAULT;
1199 1199 uio.uio_loffset = (offset_t)wa->wa_offset;
1200 1200 uio.uio_resid = wa->wa_count;
1201 1201 /*
1202 1202 * The limit is checked on the client. We
1203 1203 * should allow any size writes here.
1204 1204 */
1205 1205 uio.uio_llimit = curproc->p_fsz_ctl;
1206 1206 rlimit = uio.uio_llimit - wa->wa_offset;
1207 1207 if (rlimit < (rlim64_t)uio.uio_resid)
1208 1208 uio.uio_resid = (uint_t)rlimit;
1209 1209
1210 1210 /*
1211 1211 * For now we assume no append mode.
1212 1212 */
1213 1213 /*
1214 1214 * We're changing creds because VM may fault and we need
1215 1215 * the cred of the current thread to be used if quota
1216 1216 * checking is enabled.
1217 1217 */
1218 1218 savecred = curthread->t_cred;
1219 1219 curthread->t_cred = cr;
1220 1220 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1221 1221 curthread->t_cred = savecred;
1222 1222
1223 1223 if (iovp != iov)
1224 1224 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1225 1225 }
1226 1226
1227 1227 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1228 1228
1229 1229 if (!error) {
1230 1230 /*
1231 1231 * Get attributes again so we send the latest mod
1232 1232 * time to the client side for its cache.
1233 1233 */
1234 1234 va.va_mask = AT_ALL; /* now we want everything */
1235 1235
1236 1236 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1237 1237
1238 1238 /* check for overflows */
1239 1239 if (!error) {
1240 1240 acl_perm(vp, exi, &va, cr);
1241 1241 error = vattr_to_nattr(&va, &ns->ns_attr);
1242 1242 }
1243 1243 }
1244 1244
1245 1245 out:
1246 1246 if (in_crit)
1247 1247 nbl_end_crit(vp);
1248 1248 VN_RELE(vp);
1249 1249
1250 1250 /* check if a monitor detected a delegation conflict */
1251 1251 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1252 1252 /* mark as wouldblock so response is dropped */
1253 1253 curthread->t_flag |= T_WOULDBLOCK;
1254 1254 else
1255 1255 ns->ns_status = puterrno(error);
1256 1256
1257 1257 }
1258 1258
1259 1259 struct rfs_async_write {
1260 1260 struct nfswriteargs *wa;
1261 1261 struct nfsattrstat *ns;
1262 1262 struct svc_req *req;
1263 1263 cred_t *cr;
1264 1264 bool_t ro;
1265 1265 kthread_t *thread;
1266 1266 struct rfs_async_write *list;
1267 1267 };
1268 1268
1269 1269 struct rfs_async_write_list {
1270 1270 fhandle_t *fhp;
1271 1271 kcondvar_t cv;
1272 1272 struct rfs_async_write *list;
1273 1273 struct rfs_async_write_list *next;
1274 1274 };
1275 1275
1276 1276 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1277 1277 static kmutex_t rfs_async_write_lock;
1278 1278 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1279 1279
1280 1280 #define MAXCLIOVECS 42
1281 1281 #define RFSWRITE_INITVAL (enum nfsstat) -1
1282 1282
1283 1283 #ifdef DEBUG
1284 1284 static int rfs_write_hits = 0;
1285 1285 static int rfs_write_misses = 0;
1286 1286 #endif
1287 1287
1288 1288 /*
1289 1289 * Write data to file.
1290 1290 * Returns attributes of a file after writing some data to it.
1291 1291 */
1292 1292 void
1293 1293 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1294 1294 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1295 1295 {
1296 1296 int error;
1297 1297 vnode_t *vp;
1298 1298 rlim64_t rlimit;
1299 1299 struct vattr va;
1300 1300 struct uio uio;
1301 1301 struct rfs_async_write_list *lp;
1302 1302 struct rfs_async_write_list *nlp;
1303 1303 struct rfs_async_write *rp;
1304 1304 struct rfs_async_write *nrp;
1305 1305 struct rfs_async_write *trp;
1306 1306 struct rfs_async_write *lrp;
1307 1307 int data_written;
1308 1308 int iovcnt;
1309 1309 mblk_t *m;
1310 1310 struct iovec *iovp;
1311 1311 struct iovec *niovp;
1312 1312 struct iovec iov[MAXCLIOVECS];
1313 1313 int count;
1314 1314 int rcount;
1315 1315 uint_t off;
1316 1316 uint_t len;
1317 1317 struct rfs_async_write nrpsp;
1318 1318 struct rfs_async_write_list nlpsp;
1319 1319 ushort_t t_flag;
1320 1320 cred_t *savecred;
1321 1321 int in_crit = 0;
1322 1322 caller_context_t ct;
1323 1323 nfs_srv_t *nsrv;
1324 1324
1325 1325 ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
1326 1326 nsrv = nfs_get_srv();
1327 1327 if (!nsrv->write_async) {
1328 1328 rfs_write_sync(wa, ns, exi, req, cr, ro);
1329 1329 return;
1330 1330 }
1331 1331
1332 1332 /*
1333 1333 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1334 1334 * is considered an OK.
1335 1335 */
1336 1336 ns->ns_status = RFSWRITE_INITVAL;
1337 1337
1338 1338 nrp = &nrpsp;
1339 1339 nrp->wa = wa;
1340 1340 nrp->ns = ns;
1341 1341 nrp->req = req;
1342 1342 nrp->cr = cr;
1343 1343 nrp->ro = ro;
1344 1344 nrp->thread = curthread;
1345 1345
1346 1346 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1347 1347
1348 1348 /*
1349 1349 * Look to see if there is already a cluster started
1350 1350 * for this file.
1351 1351 */
1352 1352 mutex_enter(&nsrv->async_write_lock);
1353 1353 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1354 1354 if (bcmp(&wa->wa_fhandle, lp->fhp,
1355 1355 sizeof (fhandle_t)) == 0)
1356 1356 break;
1357 1357 }
1358 1358
1359 1359 /*
1360 1360 * If lp is non-NULL, then there is already a cluster
1361 1361 * started. We need to place ourselves in the cluster
1362 1362 * list in the right place as determined by starting
1363 1363 * offset. Conflicts with non-blocking mandatory locked
1364 1364 * regions will be checked when the cluster is processed.
1365 1365 */
1366 1366 if (lp != NULL) {
1367 1367 rp = lp->list;
1368 1368 trp = NULL;
1369 1369 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1370 1370 trp = rp;
1371 1371 rp = rp->list;
1372 1372 }
1373 1373 nrp->list = rp;
1374 1374 if (trp == NULL)
1375 1375 lp->list = nrp;
1376 1376 else
1377 1377 trp->list = nrp;
1378 1378 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1379 1379 cv_wait(&lp->cv, &nsrv->async_write_lock);
1380 1380 mutex_exit(&nsrv->async_write_lock);
1381 1381
1382 1382 return;
1383 1383 }
1384 1384
1385 1385 /*
1386 1386 * No cluster started yet, start one and add ourselves
1387 1387 * to the list of clusters.
1388 1388 */
1389 1389 nrp->list = NULL;
1390 1390
1391 1391 nlp = &nlpsp;
1392 1392 nlp->fhp = &wa->wa_fhandle;
1393 1393 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1394 1394 nlp->list = nrp;
1395 1395 nlp->next = NULL;
1396 1396
1397 1397 if (nsrv->async_write_head == NULL) {
1398 1398 nsrv->async_write_head = nlp;
1399 1399 } else {
1400 1400 lp = nsrv->async_write_head;
1401 1401 while (lp->next != NULL)
1402 1402 lp = lp->next;
1403 1403 lp->next = nlp;
1404 1404 }
1405 1405 mutex_exit(&nsrv->async_write_lock);
1406 1406
1407 1407 /*
1408 1408 * Convert the file handle common to all of the requests
1409 1409 * in this cluster to a vnode.
1410 1410 */
1411 1411 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1412 1412 if (vp == NULL) {
1413 1413 mutex_enter(&nsrv->async_write_lock);
1414 1414 if (nsrv->async_write_head == nlp)
1415 1415 nsrv->async_write_head = nlp->next;
1416 1416 else {
1417 1417 lp = nsrv->async_write_head;
1418 1418 while (lp->next != nlp)
1419 1419 lp = lp->next;
1420 1420 lp->next = nlp->next;
1421 1421 }
1422 1422 t_flag = curthread->t_flag & T_WOULDBLOCK;
1423 1423 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1424 1424 rp->ns->ns_status = NFSERR_STALE;
1425 1425 rp->thread->t_flag |= t_flag;
1426 1426 }
1427 1427 cv_broadcast(&nlp->cv);
1428 1428 mutex_exit(&nsrv->async_write_lock);
1429 1429
1430 1430 return;
1431 1431 }
1432 1432
1433 1433 /*
1434 1434 * Can only write regular files. Attempts to write any
1435 1435 * other file types fail with EISDIR.
1436 1436 */
1437 1437 if (vp->v_type != VREG) {
1438 1438 VN_RELE(vp);
1439 1439 mutex_enter(&nsrv->async_write_lock);
1440 1440 if (nsrv->async_write_head == nlp)
1441 1441 nsrv->async_write_head = nlp->next;
1442 1442 else {
1443 1443 lp = nsrv->async_write_head;
1444 1444 while (lp->next != nlp)
1445 1445 lp = lp->next;
1446 1446 lp->next = nlp->next;
1447 1447 }
1448 1448 t_flag = curthread->t_flag & T_WOULDBLOCK;
1449 1449 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1450 1450 rp->ns->ns_status = NFSERR_ISDIR;
1451 1451 rp->thread->t_flag |= t_flag;
1452 1452 }
1453 1453 cv_broadcast(&nlp->cv);
1454 1454 mutex_exit(&nsrv->async_write_lock);
1455 1455
1456 1456 return;
1457 1457 }
1458 1458
1459 1459 /*
1460 1460 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1461 1461 * deadlock with ufs.
1462 1462 */
1463 1463 if (nbl_need_check(vp)) {
1464 1464 nbl_start_crit(vp, RW_READER);
1465 1465 in_crit = 1;
1466 1466 }
1467 1467
1468 1468 ct.cc_sysid = 0;
1469 1469 ct.cc_pid = 0;
1470 1470 ct.cc_caller_id = nfs2_srv_caller_id;
1471 1471 ct.cc_flags = CC_DONTBLOCK;
1472 1472
1473 1473 /*
1474 1474 * Lock the file for writing. This operation provides
1475 1475 * the delay which allows clusters to grow.
1476 1476 */
1477 1477 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1478 1478
1479 1479 /* check if a monitor detected a delegation conflict */
1480 1480 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1481 1481 if (in_crit)
1482 1482 nbl_end_crit(vp);
1483 1483 VN_RELE(vp);
1484 1484 /* mark as wouldblock so response is dropped */
1485 1485 curthread->t_flag |= T_WOULDBLOCK;
1486 1486 mutex_enter(&nsrv->async_write_lock);
1487 1487 if (nsrv->async_write_head == nlp)
1488 1488 nsrv->async_write_head = nlp->next;
1489 1489 else {
1490 1490 lp = nsrv->async_write_head;
1491 1491 while (lp->next != nlp)
1492 1492 lp = lp->next;
1493 1493 lp->next = nlp->next;
1494 1494 }
1495 1495 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1496 1496 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1497 1497 rp->ns->ns_status = puterrno(error);
1498 1498 rp->thread->t_flag |= T_WOULDBLOCK;
1499 1499 }
1500 1500 }
1501 1501 cv_broadcast(&nlp->cv);
1502 1502 mutex_exit(&nsrv->async_write_lock);
1503 1503
1504 1504 return;
1505 1505 }
1506 1506
1507 1507 /*
1508 1508 * Disconnect this cluster from the list of clusters.
1509 1509 * The cluster that is being dealt with must be fixed
1510 1510 * in size after this point, so there is no reason
1511 1511 * to leave it on the list so that new requests can
1512 1512 * find it.
1513 1513 *
1514 1514 * The algorithm is that the first write request will
1515 1515 * create a cluster, convert the file handle to a
1516 1516 * vnode pointer, and then lock the file for writing.
1517 1517 * This request is not likely to be clustered with
1518 1518 * any others. However, the next request will create
1519 1519 * a new cluster and be blocked in VOP_RWLOCK while
1520 1520 * the first request is being processed. This delay
1521 1521 * will allow more requests to be clustered in this
1522 1522 * second cluster.
1523 1523 */
1524 1524 mutex_enter(&nsrv->async_write_lock);
1525 1525 if (nsrv->async_write_head == nlp)
1526 1526 nsrv->async_write_head = nlp->next;
1527 1527 else {
1528 1528 lp = nsrv->async_write_head;
1529 1529 while (lp->next != nlp)
1530 1530 lp = lp->next;
1531 1531 lp->next = nlp->next;
1532 1532 }
1533 1533 mutex_exit(&nsrv->async_write_lock);
1534 1534
1535 1535 /*
1536 1536 * Step through the list of requests in this cluster.
1537 1537 * We need to check permissions to make sure that all
1538 1538 * of the requests have sufficient permission to write
1539 1539 * the file. A cluster can be composed of requests
1540 1540 * from different clients and different users on each
1541 1541 * client.
1542 1542 *
1543 1543 * As a side effect, we also calculate the size of the
1544 1544 * byte range that this cluster encompasses.
1545 1545 */
1546 1546 rp = nlp->list;
1547 1547 off = rp->wa->wa_offset;
1548 1548 len = (uint_t)0;
1549 1549 do {
1550 1550 if (rdonly(rp->ro, vp)) {
1551 1551 rp->ns->ns_status = NFSERR_ROFS;
1552 1552 t_flag = curthread->t_flag & T_WOULDBLOCK;
1553 1553 rp->thread->t_flag |= t_flag;
1554 1554 continue;
1555 1555 }
1556 1556
1557 1557 va.va_mask = AT_UID|AT_MODE;
1558 1558
1559 1559 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1560 1560
1561 1561 if (!error) {
1562 1562 if (crgetuid(rp->cr) != va.va_uid) {
1563 1563 /*
1564 1564 * This is a kludge to allow writes of files
1565 1565 * created with read only permission. The
1566 1566 * owner of the file is always allowed to
1567 1567 * write it.
1568 1568 */
1569 1569 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1570 1570 }
1571 1571 if (!error && MANDLOCK(vp, va.va_mode))
1572 1572 error = EACCES;
1573 1573 }
1574 1574
1575 1575 /*
1576 1576 * Check for a conflict with a nbmand-locked region.
1577 1577 */
1578 1578 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1579 1579 rp->wa->wa_count, 0, NULL)) {
1580 1580 error = EACCES;
1581 1581 }
1582 1582
1583 1583 if (error) {
1584 1584 rp->ns->ns_status = puterrno(error);
1585 1585 t_flag = curthread->t_flag & T_WOULDBLOCK;
1586 1586 rp->thread->t_flag |= t_flag;
1587 1587 continue;
1588 1588 }
1589 1589 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1590 1590 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1591 1591 } while ((rp = rp->list) != NULL);
1592 1592
1593 1593 /*
1594 1594 * Step through the cluster attempting to gather as many
1595 1595 * requests which are contiguous as possible. These
1596 1596 * contiguous requests are handled via one call to VOP_WRITE
1597 1597 * instead of different calls to VOP_WRITE. We also keep
1598 1598 * track of the fact that any data was written.
1599 1599 */
1600 1600 rp = nlp->list;
1601 1601 data_written = 0;
1602 1602 do {
1603 1603 /*
1604 1604 * Skip any requests which are already marked as having an
1605 1605 * error.
1606 1606 */
1607 1607 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1608 1608 rp = rp->list;
1609 1609 continue;
1610 1610 }
1611 1611
1612 1612 /*
1613 1613 * Count the number of iovec's which are required
1614 1614 * to handle this set of requests. One iovec is
1615 1615 * needed for each data buffer, whether addressed
1616 1616 * by wa_data or by the b_rptr pointers in the
1617 1617 * mblk chains.
1618 1618 */
1619 1619 iovcnt = 0;
1620 1620 lrp = rp;
1621 1621 for (;;) {
1622 1622 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1623 1623 iovcnt++;
1624 1624 else {
1625 1625 m = lrp->wa->wa_mblk;
1626 1626 while (m != NULL) {
1627 1627 iovcnt++;
1628 1628 m = m->b_cont;
1629 1629 }
1630 1630 }
1631 1631 if (lrp->list == NULL ||
1632 1632 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1633 1633 lrp->wa->wa_offset + lrp->wa->wa_count !=
1634 1634 lrp->list->wa->wa_offset) {
1635 1635 lrp = lrp->list;
1636 1636 break;
1637 1637 }
1638 1638 lrp = lrp->list;
1639 1639 }
1640 1640
1641 1641 if (iovcnt <= MAXCLIOVECS) {
1642 1642 #ifdef DEBUG
1643 1643 rfs_write_hits++;
1644 1644 #endif
1645 1645 niovp = iov;
1646 1646 } else {
1647 1647 #ifdef DEBUG
1648 1648 rfs_write_misses++;
1649 1649 #endif
1650 1650 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1651 1651 }
1652 1652 /*
1653 1653 * Put together the scatter/gather iovecs.
1654 1654 */
1655 1655 iovp = niovp;
1656 1656 trp = rp;
1657 1657 count = 0;
1658 1658 do {
1659 1659 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1660 1660 if (trp->wa->wa_rlist) {
1661 1661 iovp->iov_base =
1662 1662 (char *)((trp->wa->wa_rlist)->
1663 1663 u.c_daddr3);
1664 1664 iovp->iov_len = trp->wa->wa_count;
1665 1665 } else {
1666 1666 iovp->iov_base = trp->wa->wa_data;
1667 1667 iovp->iov_len = trp->wa->wa_count;
1668 1668 }
1669 1669 iovp++;
1670 1670 } else {
1671 1671 m = trp->wa->wa_mblk;
1672 1672 rcount = trp->wa->wa_count;
1673 1673 while (m != NULL) {
1674 1674 iovp->iov_base = (caddr_t)m->b_rptr;
1675 1675 iovp->iov_len = (m->b_wptr - m->b_rptr);
1676 1676 rcount -= iovp->iov_len;
1677 1677 if (rcount < 0)
1678 1678 iovp->iov_len += rcount;
1679 1679 iovp++;
1680 1680 if (rcount <= 0)
1681 1681 break;
1682 1682 m = m->b_cont;
1683 1683 }
1684 1684 }
1685 1685 count += trp->wa->wa_count;
1686 1686 trp = trp->list;
1687 1687 } while (trp != lrp);
1688 1688
1689 1689 uio.uio_iov = niovp;
1690 1690 uio.uio_iovcnt = iovcnt;
1691 1691 uio.uio_segflg = UIO_SYSSPACE;
1692 1692 uio.uio_extflg = UIO_COPY_DEFAULT;
1693 1693 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1694 1694 uio.uio_resid = count;
1695 1695 /*
1696 1696 * The limit is checked on the client. We
1697 1697 * should allow any size writes here.
1698 1698 */
1699 1699 uio.uio_llimit = curproc->p_fsz_ctl;
1700 1700 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1701 1701 if (rlimit < (rlim64_t)uio.uio_resid)
1702 1702 uio.uio_resid = (uint_t)rlimit;
1703 1703
1704 1704 /*
1705 1705 * For now we assume no append mode.
1706 1706 */
1707 1707
1708 1708 /*
1709 1709 * We're changing creds because VM may fault
1710 1710 * and we need the cred of the current
1711 1711 * thread to be used if quota * checking is
1712 1712 * enabled.
1713 1713 */
1714 1714 savecred = curthread->t_cred;
1715 1715 curthread->t_cred = cr;
1716 1716 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1717 1717 curthread->t_cred = savecred;
1718 1718
1719 1719 /* check if a monitor detected a delegation conflict */
1720 1720 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1721 1721 /* mark as wouldblock so response is dropped */
1722 1722 curthread->t_flag |= T_WOULDBLOCK;
1723 1723
1724 1724 if (niovp != iov)
1725 1725 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1726 1726
1727 1727 if (!error) {
1728 1728 data_written = 1;
1729 1729 /*
1730 1730 * Get attributes again so we send the latest mod
1731 1731 * time to the client side for its cache.
1732 1732 */
1733 1733 va.va_mask = AT_ALL; /* now we want everything */
1734 1734
1735 1735 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1736 1736
1737 1737 if (!error)
1738 1738 acl_perm(vp, exi, &va, rp->cr);
1739 1739 }
1740 1740
1741 1741 /*
1742 1742 * Fill in the status responses for each request
1743 1743 * which was just handled. Also, copy the latest
1744 1744 * attributes in to the attribute responses if
1745 1745 * appropriate.
1746 1746 */
1747 1747 t_flag = curthread->t_flag & T_WOULDBLOCK;
1748 1748 do {
1749 1749 rp->thread->t_flag |= t_flag;
1750 1750 /* check for overflows */
1751 1751 if (!error) {
1752 1752 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1753 1753 }
1754 1754 rp->ns->ns_status = puterrno(error);
1755 1755 rp = rp->list;
1756 1756 } while (rp != lrp);
1757 1757 } while (rp != NULL);
1758 1758
1759 1759 /*
1760 1760 * If any data was written at all, then we need to flush
1761 1761 * the data and metadata to stable storage.
1762 1762 */
1763 1763 if (data_written) {
1764 1764 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1765 1765
1766 1766 if (!error) {
1767 1767 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1768 1768 }
1769 1769 }
1770 1770
1771 1771 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1772 1772
1773 1773 if (in_crit)
1774 1774 nbl_end_crit(vp);
1775 1775 VN_RELE(vp);
1776 1776
1777 1777 t_flag = curthread->t_flag & T_WOULDBLOCK;
1778 1778 mutex_enter(&nsrv->async_write_lock);
1779 1779 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1780 1780 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1781 1781 rp->ns->ns_status = puterrno(error);
1782 1782 rp->thread->t_flag |= t_flag;
1783 1783 }
1784 1784 }
1785 1785 cv_broadcast(&nlp->cv);
1786 1786 mutex_exit(&nsrv->async_write_lock);
1787 1787
1788 1788 }
1789 1789
1790 1790 void *
1791 1791 rfs_write_getfh(struct nfswriteargs *wa)
1792 1792 {
1793 1793 return (&wa->wa_fhandle);
1794 1794 }
1795 1795
1796 1796 /*
1797 1797 * Create a file.
1798 1798 * Creates a file with given attributes and returns those attributes
1799 1799 * and an fhandle for the new file.
1800 1800 */
1801 1801 void
1802 1802 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1803 1803 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1804 1804 {
1805 1805 int error;
1806 1806 int lookuperr;
1807 1807 int in_crit = 0;
1808 1808 struct vattr va;
1809 1809 vnode_t *vp;
1810 1810 vnode_t *realvp;
1811 1811 vnode_t *dvp;
1812 1812 char *name = args->ca_da.da_name;
1813 1813 vnode_t *tvp = NULL;
1814 1814 int mode;
1815 1815 int lookup_ok;
1816 1816 bool_t trunc;
1817 1817 struct sockaddr *ca;
1818 1818
1819 1819 /*
1820 1820 * Disallow NULL paths
1821 1821 */
1822 1822 if (name == NULL || *name == '\0') {
1823 1823 dr->dr_status = NFSERR_ACCES;
1824 1824 return;
1825 1825 }
1826 1826
1827 1827 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1828 1828 if (dvp == NULL) {
1829 1829 dr->dr_status = NFSERR_STALE;
1830 1830 return;
1831 1831 }
1832 1832
1833 1833 error = sattr_to_vattr(args->ca_sa, &va);
1834 1834 if (error) {
1835 1835 dr->dr_status = puterrno(error);
1836 1836 return;
1837 1837 }
1838 1838
1839 1839 /*
1840 1840 * Must specify the mode.
1841 1841 */
1842 1842 if (!(va.va_mask & AT_MODE)) {
1843 1843 VN_RELE(dvp);
1844 1844 dr->dr_status = NFSERR_INVAL;
1845 1845 return;
1846 1846 }
1847 1847
1848 1848 /*
1849 1849 * This is a completely gross hack to make mknod
1850 1850 * work over the wire until we can wack the protocol
1851 1851 */
1852 1852 if ((va.va_mode & IFMT) == IFCHR) {
1853 1853 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1854 1854 va.va_type = VFIFO; /* xtra kludge for named pipe */
1855 1855 else {
1856 1856 va.va_type = VCHR;
1857 1857 /*
1858 1858 * uncompress the received dev_t
1859 1859 * if the top half is zero indicating a request
1860 1860 * from an `older style' OS.
1861 1861 */
1862 1862 if ((va.va_size & 0xffff0000) == 0)
1863 1863 va.va_rdev = nfsv2_expdev(va.va_size);
1864 1864 else
1865 1865 va.va_rdev = (dev_t)va.va_size;
1866 1866 }
1867 1867 va.va_mask &= ~AT_SIZE;
1868 1868 } else if ((va.va_mode & IFMT) == IFBLK) {
1869 1869 va.va_type = VBLK;
1870 1870 /*
1871 1871 * uncompress the received dev_t
1872 1872 * if the top half is zero indicating a request
1873 1873 * from an `older style' OS.
1874 1874 */
1875 1875 if ((va.va_size & 0xffff0000) == 0)
1876 1876 va.va_rdev = nfsv2_expdev(va.va_size);
1877 1877 else
1878 1878 va.va_rdev = (dev_t)va.va_size;
1879 1879 va.va_mask &= ~AT_SIZE;
1880 1880 } else if ((va.va_mode & IFMT) == IFSOCK) {
1881 1881 va.va_type = VSOCK;
1882 1882 } else {
1883 1883 va.va_type = VREG;
1884 1884 }
1885 1885 va.va_mode &= ~IFMT;
1886 1886 va.va_mask |= AT_TYPE;
1887 1887
1888 1888 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1889 1889 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1890 1890 MAXPATHLEN);
1891 1891 if (name == NULL) {
1892 1892 dr->dr_status = puterrno(EINVAL);
1893 1893 return;
1894 1894 }
1895 1895
1896 1896 /*
1897 1897 * Why was the choice made to use VWRITE as the mode to the
1898 1898 * call to VOP_CREATE ? This results in a bug. When a client
1899 1899 * opens a file that already exists and is RDONLY, the second
1900 1900 * open fails with an EACESS because of the mode.
1901 1901 * bug ID 1054648.
1902 1902 */
1903 1903 lookup_ok = 0;
1904 1904 mode = VWRITE;
1905 1905 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1906 1906 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1907 1907 NULL, NULL, NULL);
1908 1908 if (!error) {
1909 1909 struct vattr at;
1910 1910
1911 1911 lookup_ok = 1;
1912 1912 at.va_mask = AT_MODE;
1913 1913 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1914 1914 if (!error)
1915 1915 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1916 1916 VN_RELE(tvp);
1917 1917 tvp = NULL;
1918 1918 }
1919 1919 }
1920 1920
1921 1921 if (!lookup_ok) {
1922 1922 if (rdonly(ro, dvp)) {
1923 1923 error = EROFS;
1924 1924 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1925 1925 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1926 1926 error = EPERM;
1927 1927 } else {
1928 1928 error = 0;
1929 1929 }
1930 1930 }
1931 1931
1932 1932 /*
1933 1933 * If file size is being modified on an already existing file
1934 1934 * make sure that there are no conflicting non-blocking mandatory
1935 1935 * locks in the region being manipulated. Return EACCES if there
1936 1936 * are conflicting locks.
1937 1937 */
1938 1938 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1939 1939 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1940 1940 NULL, NULL, NULL);
1941 1941
1942 1942 if (!lookuperr &&
1943 1943 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1944 1944 VN_RELE(tvp);
1945 1945 curthread->t_flag |= T_WOULDBLOCK;
1946 1946 goto out;
1947 1947 }
1948 1948
1949 1949 if (!lookuperr && nbl_need_check(tvp)) {
1950 1950 /*
1951 1951 * The file exists. Now check if it has any
1952 1952 * conflicting non-blocking mandatory locks
1953 1953 * in the region being changed.
1954 1954 */
1955 1955 struct vattr bva;
1956 1956 u_offset_t offset;
1957 1957 ssize_t length;
1958 1958
1959 1959 nbl_start_crit(tvp, RW_READER);
1960 1960 in_crit = 1;
1961 1961
1962 1962 bva.va_mask = AT_SIZE;
1963 1963 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1964 1964 if (!error) {
1965 1965 if (va.va_size < bva.va_size) {
1966 1966 offset = va.va_size;
1967 1967 length = bva.va_size - va.va_size;
1968 1968 } else {
1969 1969 offset = bva.va_size;
1970 1970 length = va.va_size - bva.va_size;
1971 1971 }
1972 1972 if (length) {
1973 1973 if (nbl_conflict(tvp, NBL_WRITE,
1974 1974 offset, length, 0, NULL)) {
1975 1975 error = EACCES;
1976 1976 }
1977 1977 }
1978 1978 }
1979 1979 if (error) {
1980 1980 nbl_end_crit(tvp);
1981 1981 VN_RELE(tvp);
1982 1982 in_crit = 0;
1983 1983 }
1984 1984 } else if (tvp != NULL) {
1985 1985 VN_RELE(tvp);
1986 1986 }
1987 1987 }
1988 1988
1989 1989 if (!error) {
1990 1990 /*
1991 1991 * If filesystem is shared with nosuid the remove any
1992 1992 * setuid/setgid bits on create.
1993 1993 */
1994 1994 if (va.va_type == VREG &&
1995 1995 exi->exi_export.ex_flags & EX_NOSUID)
1996 1996 va.va_mode &= ~(VSUID | VSGID);
1997 1997
1998 1998 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1999 1999 NULL, NULL);
2000 2000
2001 2001 if (!error) {
2002 2002
2003 2003 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
2004 2004 trunc = TRUE;
2005 2005 else
2006 2006 trunc = FALSE;
2007 2007
2008 2008 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
2009 2009 VN_RELE(vp);
2010 2010 curthread->t_flag |= T_WOULDBLOCK;
2011 2011 goto out;
2012 2012 }
2013 2013 va.va_mask = AT_ALL;
2014 2014
2015 2015 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
2016 2016
2017 2017 /* check for overflows */
2018 2018 if (!error) {
2019 2019 acl_perm(vp, exi, &va, cr);
2020 2020 error = vattr_to_nattr(&va, &dr->dr_attr);
2021 2021 if (!error) {
2022 2022 error = makefh(&dr->dr_fhandle, vp,
2023 2023 exi);
2024 2024 }
2025 2025 }
2026 2026 /*
2027 2027 * Force modified metadata out to stable storage.
2028 2028 *
2029 2029 * if a underlying vp exists, pass it to VOP_FSYNC
2030 2030 */
2031 2031 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2032 2032 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2033 2033 else
2034 2034 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2035 2035 VN_RELE(vp);
2036 2036 }
2037 2037
2038 2038 if (in_crit) {
2039 2039 nbl_end_crit(tvp);
2040 2040 VN_RELE(tvp);
2041 2041 }
2042 2042 }
2043 2043
2044 2044 /*
2045 2045 * Force modified data and metadata out to stable storage.
2046 2046 */
2047 2047 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2048 2048
2049 2049 out:
2050 2050
2051 2051 VN_RELE(dvp);
2052 2052
2053 2053 dr->dr_status = puterrno(error);
2054 2054
2055 2055 if (name != args->ca_da.da_name)
2056 2056 kmem_free(name, MAXPATHLEN);
2057 2057 }
2058 2058 void *
2059 2059 rfs_create_getfh(struct nfscreatargs *args)
2060 2060 {
2061 2061 return (args->ca_da.da_fhandle);
2062 2062 }
2063 2063
2064 2064 /*
2065 2065 * Remove a file.
2066 2066 * Remove named file from parent directory.
2067 2067 */
2068 2068 /* ARGSUSED */
2069 2069 void
2070 2070 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2071 2071 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2072 2072 {
2073 2073 int error = 0;
2074 2074 vnode_t *vp;
2075 2075 vnode_t *targvp;
2076 2076 int in_crit = 0;
2077 2077
2078 2078 /*
2079 2079 * Disallow NULL paths
2080 2080 */
2081 2081 if (da->da_name == NULL || *da->da_name == '\0') {
2082 2082 *status = NFSERR_ACCES;
2083 2083 return;
2084 2084 }
2085 2085
2086 2086 vp = nfs_fhtovp(da->da_fhandle, exi);
2087 2087 if (vp == NULL) {
2088 2088 *status = NFSERR_STALE;
2089 2089 return;
2090 2090 }
2091 2091
2092 2092 if (rdonly(ro, vp)) {
2093 2093 VN_RELE(vp);
2094 2094 *status = NFSERR_ROFS;
2095 2095 return;
2096 2096 }
2097 2097
2098 2098 /*
2099 2099 * Check for a conflict with a non-blocking mandatory share reservation.
2100 2100 */
2101 2101 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2102 2102 NULL, cr, NULL, NULL, NULL);
2103 2103 if (error != 0) {
2104 2104 VN_RELE(vp);
2105 2105 *status = puterrno(error);
2106 2106 return;
2107 2107 }
2108 2108
2109 2109 /*
2110 2110 * If the file is delegated to an v4 client, then initiate
2111 2111 * recall and drop this request (by setting T_WOULDBLOCK).
2112 2112 * The client will eventually re-transmit the request and
2113 2113 * (hopefully), by then, the v4 client will have returned
2114 2114 * the delegation.
2115 2115 */
2116 2116
2117 2117 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2118 2118 VN_RELE(vp);
2119 2119 VN_RELE(targvp);
2120 2120 curthread->t_flag |= T_WOULDBLOCK;
2121 2121 return;
2122 2122 }
2123 2123
2124 2124 if (nbl_need_check(targvp)) {
2125 2125 nbl_start_crit(targvp, RW_READER);
2126 2126 in_crit = 1;
2127 2127 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2128 2128 error = EACCES;
2129 2129 goto out;
2130 2130 }
2131 2131 }
2132 2132
2133 2133 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2134 2134
2135 2135 /*
2136 2136 * Force modified data and metadata out to stable storage.
2137 2137 */
2138 2138 (void) VOP_FSYNC(vp, 0, cr, NULL);
2139 2139
2140 2140 out:
2141 2141 if (in_crit)
2142 2142 nbl_end_crit(targvp);
2143 2143 VN_RELE(targvp);
2144 2144 VN_RELE(vp);
2145 2145
2146 2146 *status = puterrno(error);
2147 2147
2148 2148 }
2149 2149
2150 2150 void *
2151 2151 rfs_remove_getfh(struct nfsdiropargs *da)
2152 2152 {
2153 2153 return (da->da_fhandle);
2154 2154 }
2155 2155
2156 2156 /*
2157 2157 * rename a file
2158 2158 * Give a file (from) a new name (to).
2159 2159 */
2160 2160 /* ARGSUSED */
2161 2161 void
2162 2162 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2163 2163 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2164 2164 {
2165 2165 int error = 0;
2166 2166 vnode_t *fromvp;
2167 2167 vnode_t *tovp;
2168 2168 struct exportinfo *to_exi;
2169 2169 fhandle_t *fh;
2170 2170 vnode_t *srcvp;
2171 2171 vnode_t *targvp;
2172 2172 int in_crit = 0;
2173 2173
2174 2174 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2175 2175 if (fromvp == NULL) {
2176 2176 *status = NFSERR_STALE;
2177 2177 return;
2178 2178 }
2179 2179
2180 2180 fh = args->rna_to.da_fhandle;
2181 2181 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2182 2182 if (to_exi == NULL) {
2183 2183 VN_RELE(fromvp);
2184 2184 *status = NFSERR_ACCES;
2185 2185 return;
2186 2186 }
2187 2187 exi_rele(to_exi);
2188 2188
2189 2189 if (to_exi != exi) {
2190 2190 VN_RELE(fromvp);
2191 2191 *status = NFSERR_XDEV;
2192 2192 return;
2193 2193 }
2194 2194
2195 2195 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2196 2196 if (tovp == NULL) {
2197 2197 VN_RELE(fromvp);
2198 2198 *status = NFSERR_STALE;
2199 2199 return;
2200 2200 }
2201 2201
2202 2202 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2203 2203 VN_RELE(tovp);
2204 2204 VN_RELE(fromvp);
2205 2205 *status = NFSERR_NOTDIR;
2206 2206 return;
2207 2207 }
2208 2208
2209 2209 /*
2210 2210 * Disallow NULL paths
2211 2211 */
2212 2212 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2213 2213 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2214 2214 VN_RELE(tovp);
2215 2215 VN_RELE(fromvp);
2216 2216 *status = NFSERR_ACCES;
2217 2217 return;
2218 2218 }
2219 2219
2220 2220 if (rdonly(ro, tovp)) {
2221 2221 VN_RELE(tovp);
2222 2222 VN_RELE(fromvp);
2223 2223 *status = NFSERR_ROFS;
2224 2224 return;
2225 2225 }
2226 2226
2227 2227 /*
2228 2228 * Check for a conflict with a non-blocking mandatory share reservation.
2229 2229 */
2230 2230 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2231 2231 NULL, cr, NULL, NULL, NULL);
2232 2232 if (error != 0) {
2233 2233 VN_RELE(tovp);
2234 2234 VN_RELE(fromvp);
2235 2235 *status = puterrno(error);
2236 2236 return;
2237 2237 }
2238 2238
2239 2239 /* Check for delegations on the source file */
2240 2240
2241 2241 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2242 2242 VN_RELE(tovp);
2243 2243 VN_RELE(fromvp);
2244 2244 VN_RELE(srcvp);
2245 2245 curthread->t_flag |= T_WOULDBLOCK;
2246 2246 return;
2247 2247 }
2248 2248
2249 2249 /* Check for delegation on the file being renamed over, if it exists */
2250 2250
2251 2251 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2252 2252 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2253 2253 NULL, NULL, NULL) == 0) {
2254 2254
2255 2255 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2256 2256 VN_RELE(tovp);
2257 2257 VN_RELE(fromvp);
2258 2258 VN_RELE(srcvp);
2259 2259 VN_RELE(targvp);
2260 2260 curthread->t_flag |= T_WOULDBLOCK;
2261 2261 return;
2262 2262 }
2263 2263 VN_RELE(targvp);
2264 2264 }
2265 2265
2266 2266
2267 2267 if (nbl_need_check(srcvp)) {
2268 2268 nbl_start_crit(srcvp, RW_READER);
2269 2269 in_crit = 1;
2270 2270 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2271 2271 error = EACCES;
2272 2272 goto out;
2273 2273 }
2274 2274 }
2275 2275
2276 2276 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2277 2277 tovp, args->rna_to.da_name, cr, NULL, 0);
2278 2278
2279 2279 if (error == 0)
2280 2280 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2281 2281 strlen(args->rna_to.da_name));
2282 2282
2283 2283 /*
2284 2284 * Force modified data and metadata out to stable storage.
2285 2285 */
2286 2286 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2287 2287 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2288 2288
2289 2289 out:
2290 2290 if (in_crit)
2291 2291 nbl_end_crit(srcvp);
2292 2292 VN_RELE(srcvp);
2293 2293 VN_RELE(tovp);
2294 2294 VN_RELE(fromvp);
2295 2295
2296 2296 *status = puterrno(error);
2297 2297
2298 2298 }
2299 2299 void *
2300 2300 rfs_rename_getfh(struct nfsrnmargs *args)
2301 2301 {
2302 2302 return (args->rna_from.da_fhandle);
2303 2303 }
2304 2304
2305 2305 /*
2306 2306 * Link to a file.
2307 2307 * Create a file (to) which is a hard link to the given file (from).
2308 2308 */
2309 2309 /* ARGSUSED */
2310 2310 void
2311 2311 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2312 2312 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2313 2313 {
2314 2314 int error;
2315 2315 vnode_t *fromvp;
2316 2316 vnode_t *tovp;
2317 2317 struct exportinfo *to_exi;
2318 2318 fhandle_t *fh;
2319 2319
2320 2320 fromvp = nfs_fhtovp(args->la_from, exi);
2321 2321 if (fromvp == NULL) {
2322 2322 *status = NFSERR_STALE;
2323 2323 return;
2324 2324 }
2325 2325
2326 2326 fh = args->la_to.da_fhandle;
2327 2327 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2328 2328 if (to_exi == NULL) {
2329 2329 VN_RELE(fromvp);
2330 2330 *status = NFSERR_ACCES;
2331 2331 return;
2332 2332 }
2333 2333 exi_rele(to_exi);
2334 2334
2335 2335 if (to_exi != exi) {
2336 2336 VN_RELE(fromvp);
2337 2337 *status = NFSERR_XDEV;
2338 2338 return;
2339 2339 }
2340 2340
2341 2341 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2342 2342 if (tovp == NULL) {
2343 2343 VN_RELE(fromvp);
2344 2344 *status = NFSERR_STALE;
2345 2345 return;
2346 2346 }
2347 2347
2348 2348 if (tovp->v_type != VDIR) {
2349 2349 VN_RELE(tovp);
2350 2350 VN_RELE(fromvp);
2351 2351 *status = NFSERR_NOTDIR;
2352 2352 return;
2353 2353 }
2354 2354 /*
2355 2355 * Disallow NULL paths
2356 2356 */
2357 2357 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2358 2358 VN_RELE(tovp);
2359 2359 VN_RELE(fromvp);
2360 2360 *status = NFSERR_ACCES;
2361 2361 return;
2362 2362 }
2363 2363
2364 2364 if (rdonly(ro, tovp)) {
2365 2365 VN_RELE(tovp);
2366 2366 VN_RELE(fromvp);
2367 2367 *status = NFSERR_ROFS;
2368 2368 return;
2369 2369 }
2370 2370
2371 2371 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2372 2372
2373 2373 /*
2374 2374 * Force modified data and metadata out to stable storage.
2375 2375 */
2376 2376 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2377 2377 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2378 2378
2379 2379 VN_RELE(tovp);
2380 2380 VN_RELE(fromvp);
2381 2381
2382 2382 *status = puterrno(error);
2383 2383
2384 2384 }
2385 2385 void *
2386 2386 rfs_link_getfh(struct nfslinkargs *args)
2387 2387 {
2388 2388 return (args->la_from);
2389 2389 }
2390 2390
2391 2391 /*
2392 2392 * Symbolicly link to a file.
2393 2393 * Create a file (to) with the given attributes which is a symbolic link
2394 2394 * to the given path name (to).
2395 2395 */
2396 2396 void
2397 2397 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2398 2398 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2399 2399 {
2400 2400 int error;
2401 2401 struct vattr va;
2402 2402 vnode_t *vp;
2403 2403 vnode_t *svp;
2404 2404 int lerror;
2405 2405 struct sockaddr *ca;
2406 2406 char *name = NULL;
2407 2407
2408 2408 /*
2409 2409 * Disallow NULL paths
2410 2410 */
2411 2411 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2412 2412 *status = NFSERR_ACCES;
2413 2413 return;
2414 2414 }
2415 2415
2416 2416 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2417 2417 if (vp == NULL) {
2418 2418 *status = NFSERR_STALE;
2419 2419 return;
2420 2420 }
2421 2421
2422 2422 if (rdonly(ro, vp)) {
2423 2423 VN_RELE(vp);
2424 2424 *status = NFSERR_ROFS;
2425 2425 return;
2426 2426 }
2427 2427
2428 2428 error = sattr_to_vattr(args->sla_sa, &va);
2429 2429 if (error) {
2430 2430 VN_RELE(vp);
2431 2431 *status = puterrno(error);
2432 2432 return;
2433 2433 }
2434 2434
2435 2435 if (!(va.va_mask & AT_MODE)) {
2436 2436 VN_RELE(vp);
2437 2437 *status = NFSERR_INVAL;
2438 2438 return;
2439 2439 }
2440 2440
2441 2441 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2442 2442 name = nfscmd_convname(ca, exi, args->sla_tnm,
2443 2443 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2444 2444
2445 2445 if (name == NULL) {
2446 2446 *status = NFSERR_ACCES;
2447 2447 return;
2448 2448 }
2449 2449
2450 2450 va.va_type = VLNK;
2451 2451 va.va_mask |= AT_TYPE;
2452 2452
2453 2453 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2454 2454
2455 2455 /*
2456 2456 * Force new data and metadata out to stable storage.
2457 2457 */
2458 2458 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2459 2459 NULL, cr, NULL, NULL, NULL);
2460 2460
2461 2461 if (!lerror) {
2462 2462 (void) VOP_FSYNC(svp, 0, cr, NULL);
2463 2463 VN_RELE(svp);
2464 2464 }
2465 2465
2466 2466 /*
2467 2467 * Force modified data and metadata out to stable storage.
2468 2468 */
2469 2469 (void) VOP_FSYNC(vp, 0, cr, NULL);
2470 2470
2471 2471 VN_RELE(vp);
2472 2472
2473 2473 *status = puterrno(error);
2474 2474 if (name != args->sla_tnm)
2475 2475 kmem_free(name, MAXPATHLEN);
2476 2476
2477 2477 }
2478 2478 void *
2479 2479 rfs_symlink_getfh(struct nfsslargs *args)
2480 2480 {
2481 2481 return (args->sla_from.da_fhandle);
2482 2482 }
2483 2483
2484 2484 /*
2485 2485 * Make a directory.
2486 2486 * Create a directory with the given name, parent directory, and attributes.
2487 2487 * Returns a file handle and attributes for the new directory.
2488 2488 */
2489 2489 /* ARGSUSED */
2490 2490 void
2491 2491 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2492 2492 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2493 2493 {
2494 2494 int error;
2495 2495 struct vattr va;
2496 2496 vnode_t *dvp = NULL;
2497 2497 vnode_t *vp;
2498 2498 char *name = args->ca_da.da_name;
2499 2499
2500 2500 /*
2501 2501 * Disallow NULL paths
2502 2502 */
2503 2503 if (name == NULL || *name == '\0') {
2504 2504 dr->dr_status = NFSERR_ACCES;
2505 2505 return;
2506 2506 }
2507 2507
2508 2508 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2509 2509 if (vp == NULL) {
2510 2510 dr->dr_status = NFSERR_STALE;
2511 2511 return;
2512 2512 }
2513 2513
2514 2514 if (rdonly(ro, vp)) {
2515 2515 VN_RELE(vp);
2516 2516 dr->dr_status = NFSERR_ROFS;
2517 2517 return;
2518 2518 }
2519 2519
2520 2520 error = sattr_to_vattr(args->ca_sa, &va);
2521 2521 if (error) {
2522 2522 VN_RELE(vp);
2523 2523 dr->dr_status = puterrno(error);
2524 2524 return;
2525 2525 }
2526 2526
2527 2527 if (!(va.va_mask & AT_MODE)) {
2528 2528 VN_RELE(vp);
2529 2529 dr->dr_status = NFSERR_INVAL;
2530 2530 return;
2531 2531 }
2532 2532
2533 2533 va.va_type = VDIR;
2534 2534 va.va_mask |= AT_TYPE;
2535 2535
2536 2536 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2537 2537
2538 2538 if (!error) {
2539 2539 /*
2540 2540 * Attribtutes of the newly created directory should
2541 2541 * be returned to the client.
2542 2542 */
2543 2543 va.va_mask = AT_ALL; /* We want everything */
2544 2544 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2545 2545
2546 2546 /* check for overflows */
2547 2547 if (!error) {
2548 2548 acl_perm(vp, exi, &va, cr);
2549 2549 error = vattr_to_nattr(&va, &dr->dr_attr);
2550 2550 if (!error) {
2551 2551 error = makefh(&dr->dr_fhandle, dvp, exi);
2552 2552 }
2553 2553 }
2554 2554 /*
2555 2555 * Force new data and metadata out to stable storage.
2556 2556 */
2557 2557 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2558 2558 VN_RELE(dvp);
2559 2559 }
2560 2560
2561 2561 /*
2562 2562 * Force modified data and metadata out to stable storage.
2563 2563 */
2564 2564 (void) VOP_FSYNC(vp, 0, cr, NULL);
2565 2565
2566 2566 VN_RELE(vp);
2567 2567
2568 2568 dr->dr_status = puterrno(error);
2569 2569
2570 2570 }
2571 2571 void *
2572 2572 rfs_mkdir_getfh(struct nfscreatargs *args)
2573 2573 {
2574 2574 return (args->ca_da.da_fhandle);
2575 2575 }
2576 2576
2577 2577 /*
2578 2578 * Remove a directory.
2579 2579 * Remove the given directory name from the given parent directory.
2580 2580 */
2581 2581 /* ARGSUSED */
2582 2582 void
2583 2583 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2584 2584 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2585 2585 {
2586 2586 int error;
2587 2587 vnode_t *vp;
2588 2588
2589 2589 /*
2590 2590 * Disallow NULL paths
2591 2591 */
2592 2592 if (da->da_name == NULL || *da->da_name == '\0') {
2593 2593 *status = NFSERR_ACCES;
2594 2594 return;
2595 2595 }
2596 2596
2597 2597 vp = nfs_fhtovp(da->da_fhandle, exi);
2598 2598 if (vp == NULL) {
2599 2599 *status = NFSERR_STALE;
2600 2600 return;
2601 2601 }
2602 2602
2603 2603 if (rdonly(ro, vp)) {
2604 2604 VN_RELE(vp);
2605 2605 *status = NFSERR_ROFS;
2606 2606 return;
2607 2607 }
2608 2608
2609 2609 /*
2610 2610 * VOP_RMDIR takes a third argument (the current
2611 2611 * directory of the process). That's because someone
2612 2612 * wants to return EINVAL if one tries to remove ".".
2613 2613 * Of course, NFS servers have no idea what their
2614 2614 * clients' current directories are. We fake it by
2615 2615 * supplying a vnode known to exist and illegal to
2616 2616 * remove.
2617 2617 */
2618 2618 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2619 2619
2620 2620 /*
2621 2621 * Force modified data and metadata out to stable storage.
2622 2622 */
2623 2623 (void) VOP_FSYNC(vp, 0, cr, NULL);
2624 2624
2625 2625 VN_RELE(vp);
2626 2626
2627 2627 /*
2628 2628 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2629 2629 * if the directory is not empty. A System V NFS server
2630 2630 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2631 2631 * over the wire.
2632 2632 */
2633 2633 if (error == EEXIST)
2634 2634 *status = NFSERR_NOTEMPTY;
2635 2635 else
2636 2636 *status = puterrno(error);
2637 2637
2638 2638 }
2639 2639 void *
2640 2640 rfs_rmdir_getfh(struct nfsdiropargs *da)
2641 2641 {
2642 2642 return (da->da_fhandle);
2643 2643 }
2644 2644
2645 2645 /* ARGSUSED */
2646 2646 void
2647 2647 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2648 2648 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2649 2649 {
2650 2650 int error;
2651 2651 int iseof;
2652 2652 struct iovec iov;
2653 2653 struct uio uio;
2654 2654 vnode_t *vp;
2655 2655 char *ndata = NULL;
2656 2656 struct sockaddr *ca;
2657 2657 size_t nents;
2658 2658 int ret;
2659 2659
2660 2660 vp = nfs_fhtovp(&rda->rda_fh, exi);
2661 2661 if (vp == NULL) {
2662 2662 rd->rd_entries = NULL;
2663 2663 rd->rd_status = NFSERR_STALE;
2664 2664 return;
2665 2665 }
2666 2666
2667 2667 if (vp->v_type != VDIR) {
2668 2668 VN_RELE(vp);
2669 2669 rd->rd_entries = NULL;
2670 2670 rd->rd_status = NFSERR_NOTDIR;
2671 2671 return;
2672 2672 }
2673 2673
2674 2674 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2675 2675
2676 2676 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2677 2677
2678 2678 if (error) {
2679 2679 rd->rd_entries = NULL;
2680 2680 goto bad;
2681 2681 }
2682 2682
2683 2683 if (rda->rda_count == 0) {
2684 2684 rd->rd_entries = NULL;
2685 2685 rd->rd_size = 0;
2686 2686 rd->rd_eof = FALSE;
2687 2687 goto bad;
2688 2688 }
2689 2689
2690 2690 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2691 2691
2692 2692 /*
2693 2693 * Allocate data for entries. This will be freed by rfs_rddirfree.
2694 2694 */
2695 2695 rd->rd_bufsize = (uint_t)rda->rda_count;
2696 2696 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2697 2697
2698 2698 /*
2699 2699 * Set up io vector to read directory data
2700 2700 */
2701 2701 iov.iov_base = (caddr_t)rd->rd_entries;
2702 2702 iov.iov_len = rda->rda_count;
2703 2703 uio.uio_iov = &iov;
2704 2704 uio.uio_iovcnt = 1;
2705 2705 uio.uio_segflg = UIO_SYSSPACE;
2706 2706 uio.uio_extflg = UIO_COPY_CACHED;
2707 2707 uio.uio_loffset = (offset_t)rda->rda_offset;
2708 2708 uio.uio_resid = rda->rda_count;
2709 2709
2710 2710 /*
2711 2711 * read directory
2712 2712 */
2713 2713 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2714 2714
2715 2715 /*
2716 2716 * Clean up
2717 2717 */
2718 2718 if (!error) {
2719 2719 /*
2720 2720 * set size and eof
2721 2721 */
2722 2722 if (uio.uio_resid == rda->rda_count) {
2723 2723 rd->rd_size = 0;
2724 2724 rd->rd_eof = TRUE;
2725 2725 } else {
2726 2726 rd->rd_size = (uint32_t)(rda->rda_count -
2727 2727 uio.uio_resid);
2728 2728 rd->rd_eof = iseof ? TRUE : FALSE;
2729 2729 }
2730 2730 }
2731 2731
2732 2732 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2733 2733 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2734 2734 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2735 2735 rda->rda_count, &ndata);
2736 2736
2737 2737 if (ret != 0) {
2738 2738 size_t dropbytes;
2739 2739 /*
2740 2740 * We had to drop one or more entries in order to fit
2741 2741 * during the character conversion. We need to patch
2742 2742 * up the size and eof info.
2743 2743 */
2744 2744 if (rd->rd_eof)
2745 2745 rd->rd_eof = FALSE;
2746 2746 dropbytes = nfscmd_dropped_entrysize(
2747 2747 (struct dirent64 *)rd->rd_entries, nents, ret);
2748 2748 rd->rd_size -= dropbytes;
2749 2749 }
2750 2750 if (ndata == NULL) {
2751 2751 ndata = (char *)rd->rd_entries;
2752 2752 } else if (ndata != (char *)rd->rd_entries) {
2753 2753 kmem_free(rd->rd_entries, rd->rd_bufsize);
2754 2754 rd->rd_entries = (void *)ndata;
2755 2755 rd->rd_bufsize = rda->rda_count;
2756 2756 }
2757 2757
2758 2758 bad:
2759 2759 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2760 2760
2761 2761 #if 0 /* notyet */
2762 2762 /*
2763 2763 * Don't do this. It causes local disk writes when just
2764 2764 * reading the file and the overhead is deemed larger
2765 2765 * than the benefit.
2766 2766 */
2767 2767 /*
2768 2768 * Force modified metadata out to stable storage.
2769 2769 */
2770 2770 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2771 2771 #endif
2772 2772
2773 2773 VN_RELE(vp);
2774 2774
2775 2775 rd->rd_status = puterrno(error);
2776 2776
2777 2777 }
2778 2778 void *
2779 2779 rfs_readdir_getfh(struct nfsrddirargs *rda)
2780 2780 {
2781 2781 return (&rda->rda_fh);
2782 2782 }
2783 2783 void
2784 2784 rfs_rddirfree(struct nfsrddirres *rd)
2785 2785 {
2786 2786 if (rd->rd_entries != NULL)
2787 2787 kmem_free(rd->rd_entries, rd->rd_bufsize);
2788 2788 }
2789 2789
2790 2790 /* ARGSUSED */
2791 2791 void
2792 2792 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2793 2793 struct svc_req *req, cred_t *cr, bool_t ro)
2794 2794 {
2795 2795 int error;
2796 2796 struct statvfs64 sb;
2797 2797 vnode_t *vp;
2798 2798
2799 2799 vp = nfs_fhtovp(fh, exi);
2800 2800 if (vp == NULL) {
2801 2801 fs->fs_status = NFSERR_STALE;
2802 2802 return;
2803 2803 }
2804 2804
2805 2805 error = VFS_STATVFS(vp->v_vfsp, &sb);
2806 2806
2807 2807 if (!error) {
2808 2808 fs->fs_tsize = nfstsize();
2809 2809 fs->fs_bsize = sb.f_frsize;
2810 2810 fs->fs_blocks = sb.f_blocks;
2811 2811 fs->fs_bfree = sb.f_bfree;
2812 2812 fs->fs_bavail = sb.f_bavail;
2813 2813 }
2814 2814
2815 2815 VN_RELE(vp);
2816 2816
2817 2817 fs->fs_status = puterrno(error);
2818 2818
2819 2819 }
2820 2820 void *
2821 2821 rfs_statfs_getfh(fhandle_t *fh)
2822 2822 {
2823 2823 return (fh);
2824 2824 }
2825 2825
2826 2826 static int
2827 2827 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2828 2828 {
2829 2829 vap->va_mask = 0;
2830 2830
2831 2831 /*
2832 2832 * There was a sign extension bug in some VFS based systems
2833 2833 * which stored the mode as a short. When it would get
2834 2834 * assigned to a u_long, no sign extension would occur.
2835 2835 * It needed to, but this wasn't noticed because sa_mode
2836 2836 * would then get assigned back to the short, thus ignoring
2837 2837 * the upper 16 bits of sa_mode.
2838 2838 *
2839 2839 * To make this implementation work for both broken
2840 2840 * clients and good clients, we check for both versions
2841 2841 * of the mode.
2842 2842 */
2843 2843 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2844 2844 sa->sa_mode != (uint32_t)-1) {
2845 2845 vap->va_mask |= AT_MODE;
2846 2846 vap->va_mode = sa->sa_mode;
2847 2847 }
2848 2848 if (sa->sa_uid != (uint32_t)-1) {
2849 2849 vap->va_mask |= AT_UID;
2850 2850 vap->va_uid = sa->sa_uid;
2851 2851 }
2852 2852 if (sa->sa_gid != (uint32_t)-1) {
2853 2853 vap->va_mask |= AT_GID;
2854 2854 vap->va_gid = sa->sa_gid;
2855 2855 }
2856 2856 if (sa->sa_size != (uint32_t)-1) {
2857 2857 vap->va_mask |= AT_SIZE;
2858 2858 vap->va_size = sa->sa_size;
2859 2859 }
2860 2860 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2861 2861 sa->sa_atime.tv_usec != (int32_t)-1) {
2862 2862 #ifndef _LP64
2863 2863 /* return error if time overflow */
2864 2864 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2865 2865 return (EOVERFLOW);
2866 2866 #endif
2867 2867 vap->va_mask |= AT_ATIME;
2868 2868 /*
2869 2869 * nfs protocol defines times as unsigned so don't extend sign,
2870 2870 * unless sysadmin set nfs_allow_preepoch_time.
2871 2871 */
2872 2872 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2873 2873 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2874 2874 }
2875 2875 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2876 2876 sa->sa_mtime.tv_usec != (int32_t)-1) {
2877 2877 #ifndef _LP64
2878 2878 /* return error if time overflow */
2879 2879 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2880 2880 return (EOVERFLOW);
2881 2881 #endif
2882 2882 vap->va_mask |= AT_MTIME;
2883 2883 /*
2884 2884 * nfs protocol defines times as unsigned so don't extend sign,
2885 2885 * unless sysadmin set nfs_allow_preepoch_time.
2886 2886 */
2887 2887 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2888 2888 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2889 2889 }
2890 2890 return (0);
2891 2891 }
2892 2892
2893 2893 static const enum nfsftype vt_to_nf[] = {
2894 2894 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2895 2895 };
2896 2896
2897 2897 /*
2898 2898 * check the following fields for overflow: nodeid, size, and time.
2899 2899 * There could be a problem when converting 64-bit LP64 fields
2900 2900 * into 32-bit ones. Return an error if there is an overflow.
2901 2901 */
2902 2902 int
2903 2903 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2904 2904 {
2905 2905 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2906 2906 na->na_type = vt_to_nf[vap->va_type];
2907 2907
2908 2908 if (vap->va_mode == (unsigned short) -1)
2909 2909 na->na_mode = (uint32_t)-1;
2910 2910 else
2911 2911 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2912 2912
2913 2913 if (vap->va_uid == (unsigned short)(-1))
2914 2914 na->na_uid = (uint32_t)(-1);
2915 2915 else if (vap->va_uid == UID_NOBODY)
2916 2916 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2917 2917 else
2918 2918 na->na_uid = vap->va_uid;
2919 2919
2920 2920 if (vap->va_gid == (unsigned short)(-1))
2921 2921 na->na_gid = (uint32_t)-1;
2922 2922 else if (vap->va_gid == GID_NOBODY)
2923 2923 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2924 2924 else
2925 2925 na->na_gid = vap->va_gid;
2926 2926
2927 2927 /*
2928 2928 * Do we need to check fsid for overflow? It is 64-bit in the
2929 2929 * vattr, but are bigger than 32 bit values supported?
2930 2930 */
2931 2931 na->na_fsid = vap->va_fsid;
2932 2932
2933 2933 na->na_nodeid = vap->va_nodeid;
2934 2934
2935 2935 /*
2936 2936 * Check to make sure that the nodeid is representable over the
2937 2937 * wire without losing bits.
2938 2938 */
2939 2939 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2940 2940 return (EFBIG);
2941 2941 na->na_nlink = vap->va_nlink;
2942 2942
2943 2943 /*
2944 2944 * Check for big files here, instead of at the caller. See
2945 2945 * comments in cstat for large special file explanation.
2946 2946 */
2947 2947 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2948 2948 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2949 2949 return (EFBIG);
2950 2950 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2951 2951 /* UNKNOWN_SIZE | OVERFLOW */
2952 2952 na->na_size = MAXOFF32_T;
2953 2953 } else
2954 2954 na->na_size = vap->va_size;
2955 2955 } else
2956 2956 na->na_size = vap->va_size;
2957 2957
2958 2958 /*
2959 2959 * If the vnode times overflow the 32-bit times that NFS2
2960 2960 * uses on the wire then return an error.
2961 2961 */
2962 2962 if (!NFS_VAP_TIME_OK(vap)) {
2963 2963 return (EOVERFLOW);
2964 2964 }
2965 2965 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2966 2966 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2967 2967
2968 2968 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2969 2969 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2970 2970
2971 2971 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2972 2972 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2973 2973
2974 2974 /*
2975 2975 * If the dev_t will fit into 16 bits then compress
2976 2976 * it, otherwise leave it alone. See comments in
2977 2977 * nfs_client.c.
2978 2978 */
2979 2979 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2980 2980 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2981 2981 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2982 2982 else
2983 2983 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2984 2984
2985 2985 na->na_blocks = vap->va_nblocks;
2986 2986 na->na_blocksize = vap->va_blksize;
2987 2987
2988 2988 /*
2989 2989 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2990 2990 * over-the-wire protocols for named-pipe vnodes. It remaps the
2991 2991 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2992 2992 *
2993 2993 * BUYER BEWARE:
2994 2994 * If you are porting the NFS to a non-Sun server, you probably
2995 2995 * don't want to include the following block of code. The
2996 2996 * over-the-wire special file types will be changing with the
2997 2997 * NFS Protocol Revision.
2998 2998 */
2999 2999 if (vap->va_type == VFIFO)
3000 3000 NA_SETFIFO(na);
3001 3001 return (0);
3002 3002 }
3003 3003
3004 3004 /*
3005 3005 * acl v2 support: returns approximate permission.
3006 3006 * default: returns minimal permission (more restrictive)
3007 3007 * aclok: returns maximal permission (less restrictive)
3008 3008 * This routine changes the permissions that are alaredy in *va.
3009 3009 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
3010 3010 * CLASS_OBJ is always the same as GROUP_OBJ entry.
3011 3011 */
3012 3012 static void
3013 3013 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
3014 3014 {
3015 3015 vsecattr_t vsa;
3016 3016 int aclcnt;
3017 3017 aclent_t *aclentp;
3018 3018 mode_t mask_perm;
3019 3019 mode_t grp_perm;
3020 3020 mode_t other_perm;
3021 3021 mode_t other_orig;
3022 3022 int error;
3023 3023
3024 3024 /* dont care default acl */
3025 3025 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
3026 3026 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
3027 3027
3028 3028 if (!error) {
3029 3029 aclcnt = vsa.vsa_aclcnt;
3030 3030 if (aclcnt > MIN_ACL_ENTRIES) {
3031 3031 /* non-trivial ACL */
3032 3032 aclentp = vsa.vsa_aclentp;
3033 3033 if (exi->exi_export.ex_flags & EX_ACLOK) {
3034 3034 /* maximal permissions */
3035 3035 grp_perm = 0;
3036 3036 other_perm = 0;
3037 3037 for (; aclcnt > 0; aclcnt--, aclentp++) {
3038 3038 switch (aclentp->a_type) {
3039 3039 case USER_OBJ:
3040 3040 break;
3041 3041 case USER:
3042 3042 grp_perm |=
3043 3043 aclentp->a_perm << 3;
3044 3044 other_perm |= aclentp->a_perm;
3045 3045 break;
3046 3046 case GROUP_OBJ:
3047 3047 grp_perm |=
3048 3048 aclentp->a_perm << 3;
3049 3049 break;
3050 3050 case GROUP:
3051 3051 other_perm |= aclentp->a_perm;
3052 3052 break;
3053 3053 case OTHER_OBJ:
3054 3054 other_orig = aclentp->a_perm;
3055 3055 break;
3056 3056 case CLASS_OBJ:
3057 3057 mask_perm = aclentp->a_perm;
3058 3058 break;
3059 3059 default:
3060 3060 break;
3061 3061 }
3062 3062 }
3063 3063 grp_perm &= mask_perm << 3;
3064 3064 other_perm &= mask_perm;
3065 3065 other_perm |= other_orig;
3066 3066
3067 3067 } else {
3068 3068 /* minimal permissions */
3069 3069 grp_perm = 070;
3070 3070 other_perm = 07;
3071 3071 for (; aclcnt > 0; aclcnt--, aclentp++) {
3072 3072 switch (aclentp->a_type) {
3073 3073 case USER_OBJ:
3074 3074 break;
3075 3075 case USER:
3076 3076 case CLASS_OBJ:
3077 3077 grp_perm &=
3078 3078 aclentp->a_perm << 3;
3079 3079 other_perm &=
3080 3080 aclentp->a_perm;
3081 3081 break;
3082 3082 case GROUP_OBJ:
3083 3083 grp_perm &=
3084 3084 aclentp->a_perm << 3;
3085 3085 break;
3086 3086 case GROUP:
3087 3087 other_perm &=
3088 3088 aclentp->a_perm;
3089 3089 break;
3090 3090 case OTHER_OBJ:
3091 3091 other_perm &=
3092 3092 aclentp->a_perm;
3093 3093 break;
3094 3094 default:
3095 3095 break;
3096 3096 }
3097 3097 }
3098 3098 }
3099 3099 /* copy to va */
3100 3100 va->va_mode &= ~077;
3101 3101 va->va_mode |= grp_perm | other_perm;
3102 3102 }
3103 3103 if (vsa.vsa_aclcnt)
3104 3104 kmem_free(vsa.vsa_aclentp,
3105 3105 vsa.vsa_aclcnt * sizeof (aclent_t));
3106 3106 }
3107 3107 }
3108 3108
3109 3109 void
3110 3110 rfs_srvrinit(void)
3111 3111 {
3112 3112 nfs2_srv_caller_id = fs_new_caller_id();
3113 3113 }
3114 3114
3115 3115 void
3116 3116 rfs_srvrfini(void)
3117 3117 {
3118 3118 }
3119 3119
3120 3120 /* ARGSUSED */
3121 3121 void
3122 3122 rfs_srv_zone_init(nfs_globals_t *ng)
3123 3123 {
3124 3124 nfs_srv_t *ns;
3125 3125
3126 3126 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3127 3127
3128 3128 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3129 3129 ns->write_async = 1;
3130 3130
3131 3131 ng->nfs_srv = ns;
3132 3132 }
3133 3133
3134 3134 /* ARGSUSED */
3135 3135 void
3136 3136 rfs_srv_zone_fini(nfs_globals_t *ng)
3137 3137 {
3138 3138 nfs_srv_t *ns = ng->nfs_srv;
3139 3139
3140 3140 ng->nfs_srv = NULL;
3141 3141
3142 3142 mutex_destroy(&ns->async_write_lock);
3143 3143 kmem_free(ns, sizeof (*ns));
3144 3144 }
3145 3145
3146 3146 static int
3147 3147 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3148 3148 {
3149 3149 struct clist *wcl;
3150 3150 int wlist_len;
3151 3151 uint32_t count = rr->rr_count;
3152 3152
3153 3153 wcl = ra->ra_wlist;
3154 3154
3155 3155 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3156 3156 return (FALSE);
3157 3157 }
3158 3158
3159 3159 wcl = ra->ra_wlist;
3160 3160 rr->rr_ok.rrok_wlist_len = wlist_len;
3161 3161 rr->rr_ok.rrok_wlist = wcl;
3162 3162
3163 3163 return (TRUE);
3164 3164 }
|
↓ open down ↓ |
2730 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX