Print this page
Send zone's rootvp to untraverse()
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 30 * All rights reserved.
31 31 */
32 32
33 33 /*
34 34 * Copyright 2018 Nexenta Systems, Inc.
35 35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 36 */
37 37
38 38 #include <sys/param.h>
39 39 #include <sys/types.h>
40 40 #include <sys/systm.h>
41 41 #include <sys/cred.h>
42 42 #include <sys/buf.h>
43 43 #include <sys/vfs.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/stat.h>
47 47 #include <sys/errno.h>
48 48 #include <sys/sysmacros.h>
49 49 #include <sys/statvfs.h>
50 50 #include <sys/kmem.h>
51 51 #include <sys/kstat.h>
52 52 #include <sys/dirent.h>
53 53 #include <sys/cmn_err.h>
54 54 #include <sys/debug.h>
55 55 #include <sys/vtrace.h>
56 56 #include <sys/mode.h>
57 57 #include <sys/acl.h>
58 58 #include <sys/nbmlock.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/sdt.h>
61 61
62 62 #include <rpc/types.h>
63 63 #include <rpc/auth.h>
64 64 #include <rpc/svc.h>
65 65
66 66 #include <nfs/nfs.h>
67 67 #include <nfs/export.h>
68 68 #include <nfs/nfs_cmd.h>
69 69
70 70 #include <vm/hat.h>
71 71 #include <vm/as.h>
72 72 #include <vm/seg.h>
73 73 #include <vm/seg_map.h>
74 74 #include <vm/seg_kmem.h>
75 75
76 76 #include <sys/strsubr.h>
77 77
78 78 struct rfs_async_write_list;
79 79
80 80 /*
81 81 * Zone globals of NFSv2 server
82 82 */
83 83 typedef struct nfs_srv {
84 84 kmutex_t async_write_lock;
85 85 struct rfs_async_write_list *async_write_head;
86 86
87 87 /*
88 88 * enables write clustering if == 1
89 89 */
90 90 int write_async;
91 91 } nfs_srv_t;
92 92
93 93 /*
94 94 * These are the interface routines for the server side of the
95 95 * Network File System. See the NFS version 2 protocol specification
96 96 * for a description of this interface.
97 97 */
98 98
99 99 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
100 100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
101 101 cred_t *);
102 102
103 103
104 104 /*
105 105 * Some "over the wire" UNIX file types. These are encoded
106 106 * into the mode. This needs to be fixed in the next rev.
107 107 */
108 108 #define IFMT 0170000 /* type of file */
109 109 #define IFCHR 0020000 /* character special */
110 110 #define IFBLK 0060000 /* block special */
111 111 #define IFSOCK 0140000 /* socket */
112 112
113 113 u_longlong_t nfs2_srv_caller_id;
114 114
115 115 static nfs_srv_t *
116 116 nfs_get_srv(void)
117 117 {
118 118 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
119 119 nfs_srv_t *srv = ng->nfs_srv;
120 120 ASSERT(srv != NULL);
121 121 return (srv);
122 122 }
123 123
124 124 /*
125 125 * Get file attributes.
126 126 * Returns the current attributes of the file with the given fhandle.
127 127 */
128 128 /* ARGSUSED */
129 129 void
130 130 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
131 131 struct svc_req *req, cred_t *cr, bool_t ro)
132 132 {
133 133 int error;
134 134 vnode_t *vp;
135 135 struct vattr va;
136 136
137 137 vp = nfs_fhtovp(fhp, exi);
138 138 if (vp == NULL) {
139 139 ns->ns_status = NFSERR_STALE;
140 140 return;
141 141 }
142 142
143 143 /*
144 144 * Do the getattr.
145 145 */
146 146 va.va_mask = AT_ALL; /* we want all the attributes */
147 147
148 148 error = rfs4_delegated_getattr(vp, &va, 0, cr);
149 149
150 150 /* check for overflows */
151 151 if (!error) {
152 152 /* Lie about the object type for a referral */
153 153 if (vn_is_nfs_reparse(vp, cr))
154 154 va.va_type = VLNK;
155 155
156 156 acl_perm(vp, exi, &va, cr);
157 157 error = vattr_to_nattr(&va, &ns->ns_attr);
158 158 }
159 159
160 160 VN_RELE(vp);
161 161
162 162 ns->ns_status = puterrno(error);
163 163 }
164 164 void *
165 165 rfs_getattr_getfh(fhandle_t *fhp)
166 166 {
167 167 return (fhp);
168 168 }
169 169
170 170 /*
171 171 * Set file attributes.
172 172 * Sets the attributes of the file with the given fhandle. Returns
173 173 * the new attributes.
174 174 */
175 175 /* ARGSUSED */
176 176 void
177 177 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
178 178 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
179 179 {
180 180 int error;
181 181 int flag;
182 182 int in_crit = 0;
183 183 vnode_t *vp;
184 184 struct vattr va;
185 185 struct vattr bva;
186 186 struct flock64 bf;
187 187 caller_context_t ct;
188 188
189 189
190 190 vp = nfs_fhtovp(&args->saa_fh, exi);
191 191 if (vp == NULL) {
192 192 ns->ns_status = NFSERR_STALE;
193 193 return;
194 194 }
195 195
196 196 if (rdonly(ro, vp)) {
197 197 VN_RELE(vp);
198 198 ns->ns_status = NFSERR_ROFS;
199 199 return;
200 200 }
201 201
202 202 error = sattr_to_vattr(&args->saa_sa, &va);
203 203 if (error) {
204 204 VN_RELE(vp);
205 205 ns->ns_status = puterrno(error);
206 206 return;
207 207 }
208 208
209 209 /*
210 210 * If the client is requesting a change to the mtime,
211 211 * but the nanosecond field is set to 1 billion, then
212 212 * this is a flag to the server that it should set the
213 213 * atime and mtime fields to the server's current time.
214 214 * The 1 billion number actually came from the client
215 215 * as 1 million, but the units in the over the wire
216 216 * request are microseconds instead of nanoseconds.
217 217 *
218 218 * This is an overload of the protocol and should be
219 219 * documented in the NFS Version 2 protocol specification.
220 220 */
221 221 if (va.va_mask & AT_MTIME) {
222 222 if (va.va_mtime.tv_nsec == 1000000000) {
223 223 gethrestime(&va.va_mtime);
224 224 va.va_atime = va.va_mtime;
225 225 va.va_mask |= AT_ATIME;
226 226 flag = 0;
227 227 } else
228 228 flag = ATTR_UTIME;
229 229 } else
230 230 flag = 0;
231 231
232 232 /*
233 233 * If the filesystem is exported with nosuid, then mask off
234 234 * the setuid and setgid bits.
235 235 */
236 236 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
237 237 (exi->exi_export.ex_flags & EX_NOSUID))
238 238 va.va_mode &= ~(VSUID | VSGID);
239 239
240 240 ct.cc_sysid = 0;
241 241 ct.cc_pid = 0;
242 242 ct.cc_caller_id = nfs2_srv_caller_id;
243 243 ct.cc_flags = CC_DONTBLOCK;
244 244
245 245 /*
246 246 * We need to specially handle size changes because it is
247 247 * possible for the client to create a file with modes
248 248 * which indicate read-only, but with the file opened for
249 249 * writing. If the client then tries to set the size of
250 250 * the file, then the normal access checking done in
251 251 * VOP_SETATTR would prevent the client from doing so,
252 252 * although it should be legal for it to do so. To get
253 253 * around this, we do the access checking for ourselves
254 254 * and then use VOP_SPACE which doesn't do the access
255 255 * checking which VOP_SETATTR does. VOP_SPACE can only
256 256 * operate on VREG files, let VOP_SETATTR handle the other
257 257 * extremely rare cases.
258 258 * Also the client should not be allowed to change the
259 259 * size of the file if there is a conflicting non-blocking
260 260 * mandatory lock in the region of change.
261 261 */
262 262 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
263 263 if (nbl_need_check(vp)) {
264 264 nbl_start_crit(vp, RW_READER);
265 265 in_crit = 1;
266 266 }
267 267
268 268 bva.va_mask = AT_UID | AT_SIZE;
269 269
270 270 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
271 271
272 272 if (error) {
273 273 if (in_crit)
274 274 nbl_end_crit(vp);
275 275 VN_RELE(vp);
276 276 ns->ns_status = puterrno(error);
277 277 return;
278 278 }
279 279
280 280 if (in_crit) {
281 281 u_offset_t offset;
282 282 ssize_t length;
283 283
284 284 if (va.va_size < bva.va_size) {
285 285 offset = va.va_size;
286 286 length = bva.va_size - va.va_size;
287 287 } else {
288 288 offset = bva.va_size;
289 289 length = va.va_size - bva.va_size;
290 290 }
291 291 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
292 292 NULL)) {
293 293 error = EACCES;
294 294 }
295 295 }
296 296
297 297 if (crgetuid(cr) == bva.va_uid && !error &&
298 298 va.va_size != bva.va_size) {
299 299 va.va_mask &= ~AT_SIZE;
300 300 bf.l_type = F_WRLCK;
301 301 bf.l_whence = 0;
302 302 bf.l_start = (off64_t)va.va_size;
303 303 bf.l_len = 0;
304 304 bf.l_sysid = 0;
305 305 bf.l_pid = 0;
306 306
307 307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
308 308 (offset_t)va.va_size, cr, &ct);
309 309 }
310 310 if (in_crit)
311 311 nbl_end_crit(vp);
312 312 } else
313 313 error = 0;
314 314
315 315 /*
316 316 * Do the setattr.
317 317 */
318 318 if (!error && va.va_mask) {
319 319 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
320 320 }
321 321
322 322 /*
323 323 * check if the monitor on either vop_space or vop_setattr detected
324 324 * a delegation conflict and if so, mark the thread flag as
325 325 * wouldblock so that the response is dropped and the client will
326 326 * try again.
327 327 */
328 328 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
329 329 VN_RELE(vp);
330 330 curthread->t_flag |= T_WOULDBLOCK;
331 331 return;
332 332 }
333 333
334 334 if (!error) {
335 335 va.va_mask = AT_ALL; /* get everything */
336 336
337 337 error = rfs4_delegated_getattr(vp, &va, 0, cr);
338 338
339 339 /* check for overflows */
340 340 if (!error) {
341 341 acl_perm(vp, exi, &va, cr);
342 342 error = vattr_to_nattr(&va, &ns->ns_attr);
343 343 }
344 344 }
345 345
346 346 ct.cc_flags = 0;
347 347
348 348 /*
349 349 * Force modified metadata out to stable storage.
350 350 */
351 351 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
352 352
353 353 VN_RELE(vp);
354 354
355 355 ns->ns_status = puterrno(error);
356 356 }
357 357 void *
358 358 rfs_setattr_getfh(struct nfssaargs *args)
359 359 {
360 360 return (&args->saa_fh);
361 361 }
362 362
363 363 /* Change and release @exip and @vpp only in success */
364 364 int
365 365 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
366 366 {
367 367 struct exportinfo *exi;
368 368 vnode_t *vp = *vpp;
369 369 fid_t fid;
370 370 int error;
371 371
372 372 VN_HOLD(vp);
373 373
374 374 if ((error = traverse(&vp)) != 0) {
375 375 VN_RELE(vp);
376 376 return (error);
377 377 }
378 378
379 379 bzero(&fid, sizeof (fid));
380 380 fid.fid_len = MAXFIDSZ;
381 381 error = VOP_FID(vp, &fid, NULL);
382 382 if (error) {
383 383 VN_RELE(vp);
384 384 return (error);
385 385 }
386 386
387 387 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
388 388 if (exi == NULL ||
389 389 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
390 390 /*
391 391 * It is not error, just subdir is not exported
392 392 * or "nohide" is not set
393 393 */
394 394 if (exi != NULL)
395 395 exi_rele(exi);
396 396 VN_RELE(vp);
397 397 } else {
398 398 /* go to submount */
399 399 exi_rele(*exip);
400 400 *exip = exi;
401 401
402 402 VN_RELE(*vpp);
403 403 *vpp = vp;
404 404 }
405 405
406 406 return (0);
407 407 }
408 408
|
↓ open down ↓ |
408 lines elided |
↑ open up ↑ |
409 409 /*
410 410 * Given mounted "dvp" and "exi", go upper mountpoint
411 411 * with dvp/exi correction
412 412 * Return 0 in success
413 413 */
414 414 int
415 415 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
416 416 {
417 417 struct exportinfo *exi;
418 418 vnode_t *dvp = *dvpp;
419 + vnode_t *zone_rootvp;
419 420
420 - ASSERT3U((*exip)->exi_zoneid, ==, curzone->zone_id);
421 - ASSERT((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp));
421 + zone_rootvp = (*exip)->exi_ne->exi_root->exi_vp;
422 + ASSERT((dvp->v_flag & VROOT) || VN_CMP(zone_rootvp, dvp));
422 423
423 424 VN_HOLD(dvp);
424 - dvp = untraverse(dvp);
425 + dvp = untraverse(dvp, zone_rootvp);
425 426 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
426 427 if (exi == NULL) {
427 428 VN_RELE(dvp);
428 429 return (-1);
429 430 }
430 431
431 - ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
432 + ASSERT3U(exi->exi_zoneid, ==, (*exip)->exi_zoneid);
432 433 exi_rele(*exip);
433 434 *exip = exi;
434 435 VN_RELE(*dvpp);
435 436 *dvpp = dvp;
436 437
437 438 return (0);
438 439 }
439 440 /*
440 441 * Directory lookup.
441 442 * Returns an fhandle and file attributes for file name in a directory.
442 443 */
443 444 /* ARGSUSED */
444 445 void
445 446 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
446 447 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
447 448 {
448 449 int error;
449 450 vnode_t *dvp;
450 451 vnode_t *vp;
451 452 struct vattr va;
452 453 fhandle_t *fhp = da->da_fhandle;
453 454 struct sec_ol sec = {0, 0};
454 455 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
455 456 char *name;
456 457 struct sockaddr *ca;
457 458
458 459 /*
459 460 * Trusted Extension doesn't support NFSv2. MOUNT
460 461 * will reject v2 clients. Need to prevent v2 client
461 462 * access via WebNFS here.
462 463 */
463 464 if (is_system_labeled() && req->rq_vers == 2) {
464 465 dr->dr_status = NFSERR_ACCES;
465 466 return;
466 467 }
467 468
468 469 /*
469 470 * Disallow NULL paths
470 471 */
471 472 if (da->da_name == NULL || *da->da_name == '\0') {
472 473 dr->dr_status = NFSERR_ACCES;
473 474 return;
474 475 }
475 476
476 477 /*
477 478 * Allow lookups from the root - the default
478 479 * location of the public filehandle.
479 480 */
480 481 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
481 482 dvp = ZONE_ROOTVP();
482 483 VN_HOLD(dvp);
483 484 } else {
484 485 dvp = nfs_fhtovp(fhp, exi);
485 486 if (dvp == NULL) {
486 487 dr->dr_status = NFSERR_STALE;
487 488 return;
488 489 }
489 490 }
490 491
491 492 exi_hold(exi);
492 493 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
493 494
494 495 /*
495 496 * Not allow lookup beyond root.
496 497 * If the filehandle matches a filehandle of the exi,
497 498 * then the ".." refers beyond the root of an exported filesystem.
498 499 */
499 500 if (strcmp(da->da_name, "..") == 0 &&
500 501 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
501 502 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
502 503 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
503 504 /*
504 505 * special case for ".." and 'nohide'exported root
505 506 */
506 507 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
507 508 error = NFSERR_ACCES;
508 509 goto out;
509 510 }
510 511 } else {
511 512 error = NFSERR_NOENT;
512 513 goto out;
513 514 }
514 515 }
515 516
516 517 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
517 518 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
518 519 MAXPATHLEN);
519 520
520 521 if (name == NULL) {
521 522 error = NFSERR_ACCES;
522 523 goto out;
523 524 }
524 525
525 526 /*
526 527 * If the public filehandle is used then allow
527 528 * a multi-component lookup, i.e. evaluate
528 529 * a pathname and follow symbolic links if
529 530 * necessary.
530 531 *
531 532 * This may result in a vnode in another filesystem
532 533 * which is OK as long as the filesystem is exported.
533 534 */
534 535 if (PUBLIC_FH2(fhp)) {
535 536 publicfh_flag = TRUE;
536 537
537 538 exi_rele(exi);
538 539 exi = NULL;
539 540
540 541 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
541 542 &sec);
542 543 } else {
543 544 /*
544 545 * Do a normal single component lookup.
545 546 */
546 547 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
547 548 NULL, NULL, NULL);
548 549 }
549 550
550 551 if (name != da->da_name)
551 552 kmem_free(name, MAXPATHLEN);
552 553
553 554 if (error == 0 && vn_ismntpt(vp)) {
554 555 error = rfs_cross_mnt(&vp, &exi);
555 556 if (error)
556 557 VN_RELE(vp);
557 558 }
558 559
559 560 if (!error) {
560 561 va.va_mask = AT_ALL; /* we want everything */
561 562
562 563 error = rfs4_delegated_getattr(vp, &va, 0, cr);
563 564
564 565 /* check for overflows */
565 566 if (!error) {
566 567 acl_perm(vp, exi, &va, cr);
567 568 error = vattr_to_nattr(&va, &dr->dr_attr);
568 569 if (!error) {
569 570 if (sec.sec_flags & SEC_QUERY)
570 571 error = makefh_ol(&dr->dr_fhandle, exi,
571 572 sec.sec_index);
572 573 else {
573 574 error = makefh(&dr->dr_fhandle, vp,
574 575 exi);
575 576 if (!error && publicfh_flag &&
576 577 !chk_clnt_sec(exi, req))
577 578 auth_weak = TRUE;
578 579 }
579 580 }
580 581 }
581 582 VN_RELE(vp);
582 583 }
583 584
584 585 out:
585 586 VN_RELE(dvp);
586 587
587 588 if (exi != NULL)
588 589 exi_rele(exi);
589 590
590 591 /*
591 592 * If it's public fh, no 0x81, and client's flavor is
592 593 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
593 594 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
594 595 */
595 596 if (auth_weak)
596 597 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
597 598 else
598 599 dr->dr_status = puterrno(error);
599 600 }
600 601 void *
601 602 rfs_lookup_getfh(struct nfsdiropargs *da)
602 603 {
603 604 return (da->da_fhandle);
604 605 }
605 606
606 607 /*
607 608 * Read symbolic link.
608 609 * Returns the string in the symbolic link at the given fhandle.
609 610 */
610 611 /* ARGSUSED */
611 612 void
612 613 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
613 614 struct svc_req *req, cred_t *cr, bool_t ro)
614 615 {
615 616 int error;
616 617 struct iovec iov;
617 618 struct uio uio;
618 619 vnode_t *vp;
619 620 struct vattr va;
620 621 struct sockaddr *ca;
621 622 char *name = NULL;
622 623 int is_referral = 0;
623 624
624 625 vp = nfs_fhtovp(fhp, exi);
625 626 if (vp == NULL) {
626 627 rl->rl_data = NULL;
627 628 rl->rl_status = NFSERR_STALE;
628 629 return;
629 630 }
630 631
631 632 va.va_mask = AT_MODE;
632 633
633 634 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
634 635
635 636 if (error) {
636 637 VN_RELE(vp);
637 638 rl->rl_data = NULL;
638 639 rl->rl_status = puterrno(error);
639 640 return;
640 641 }
641 642
642 643 if (MANDLOCK(vp, va.va_mode)) {
643 644 VN_RELE(vp);
644 645 rl->rl_data = NULL;
645 646 rl->rl_status = NFSERR_ACCES;
646 647 return;
647 648 }
648 649
649 650 /* We lied about the object type for a referral */
650 651 if (vn_is_nfs_reparse(vp, cr))
651 652 is_referral = 1;
652 653
653 654 /*
654 655 * XNFS and RFC1094 require us to return ENXIO if argument
655 656 * is not a link. BUGID 1138002.
656 657 */
657 658 if (vp->v_type != VLNK && !is_referral) {
658 659 VN_RELE(vp);
659 660 rl->rl_data = NULL;
660 661 rl->rl_status = NFSERR_NXIO;
661 662 return;
662 663 }
663 664
664 665 /*
665 666 * Allocate data for pathname. This will be freed by rfs_rlfree.
666 667 */
667 668 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
668 669
669 670 if (is_referral) {
670 671 char *s;
671 672 size_t strsz;
672 673
673 674 /* Get an artificial symlink based on a referral */
674 675 s = build_symlink(vp, cr, &strsz);
675 676 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
676 677 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
677 678 vnode_t *, vp, char *, s);
678 679 if (s == NULL)
679 680 error = EINVAL;
680 681 else {
681 682 error = 0;
682 683 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
683 684 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
684 685 kmem_free(s, strsz);
685 686 }
686 687
687 688 } else {
688 689
689 690 /*
690 691 * Set up io vector to read sym link data
691 692 */
692 693 iov.iov_base = rl->rl_data;
693 694 iov.iov_len = NFS_MAXPATHLEN;
694 695 uio.uio_iov = &iov;
695 696 uio.uio_iovcnt = 1;
696 697 uio.uio_segflg = UIO_SYSSPACE;
697 698 uio.uio_extflg = UIO_COPY_CACHED;
698 699 uio.uio_loffset = (offset_t)0;
699 700 uio.uio_resid = NFS_MAXPATHLEN;
700 701
701 702 /*
702 703 * Do the readlink.
703 704 */
704 705 error = VOP_READLINK(vp, &uio, cr, NULL);
705 706
706 707 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
707 708
708 709 if (!error)
709 710 rl->rl_data[rl->rl_count] = '\0';
710 711
711 712 }
712 713
713 714
714 715 VN_RELE(vp);
715 716
716 717 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
717 718 name = nfscmd_convname(ca, exi, rl->rl_data,
718 719 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
719 720
720 721 if (name != NULL && name != rl->rl_data) {
721 722 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
722 723 rl->rl_data = name;
723 724 }
724 725
725 726 /*
726 727 * XNFS and RFC1094 require us to return ENXIO if argument
727 728 * is not a link. UFS returns EINVAL if this is the case,
728 729 * so we do the mapping here. BUGID 1138002.
729 730 */
730 731 if (error == EINVAL)
731 732 rl->rl_status = NFSERR_NXIO;
732 733 else
733 734 rl->rl_status = puterrno(error);
734 735
735 736 }
736 737 void *
737 738 rfs_readlink_getfh(fhandle_t *fhp)
738 739 {
739 740 return (fhp);
740 741 }
741 742 /*
742 743 * Free data allocated by rfs_readlink
743 744 */
744 745 void
745 746 rfs_rlfree(struct nfsrdlnres *rl)
746 747 {
747 748 if (rl->rl_data != NULL)
748 749 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
749 750 }
750 751
751 752 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
752 753
753 754 /*
754 755 * Read data.
755 756 * Returns some data read from the file at the given fhandle.
756 757 */
757 758 /* ARGSUSED */
758 759 void
759 760 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
760 761 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
761 762 {
762 763 vnode_t *vp;
763 764 int error;
764 765 struct vattr va;
765 766 struct iovec iov;
766 767 struct uio uio;
767 768 mblk_t *mp;
768 769 int alloc_err = 0;
769 770 int in_crit = 0;
770 771 caller_context_t ct;
771 772
772 773 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
773 774 if (vp == NULL) {
774 775 rr->rr_data = NULL;
775 776 rr->rr_status = NFSERR_STALE;
776 777 return;
777 778 }
778 779
779 780 if (vp->v_type != VREG) {
780 781 VN_RELE(vp);
781 782 rr->rr_data = NULL;
782 783 rr->rr_status = NFSERR_ISDIR;
783 784 return;
784 785 }
785 786
786 787 ct.cc_sysid = 0;
787 788 ct.cc_pid = 0;
788 789 ct.cc_caller_id = nfs2_srv_caller_id;
789 790 ct.cc_flags = CC_DONTBLOCK;
790 791
791 792 /*
792 793 * Enter the critical region before calling VOP_RWLOCK
793 794 * to avoid a deadlock with write requests.
794 795 */
795 796 if (nbl_need_check(vp)) {
796 797 nbl_start_crit(vp, RW_READER);
797 798 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
798 799 0, NULL)) {
799 800 nbl_end_crit(vp);
800 801 VN_RELE(vp);
801 802 rr->rr_data = NULL;
802 803 rr->rr_status = NFSERR_ACCES;
803 804 return;
804 805 }
805 806 in_crit = 1;
806 807 }
807 808
808 809 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
809 810
810 811 /* check if a monitor detected a delegation conflict */
811 812 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
812 813 if (in_crit)
813 814 nbl_end_crit(vp);
814 815 VN_RELE(vp);
815 816 /* mark as wouldblock so response is dropped */
816 817 curthread->t_flag |= T_WOULDBLOCK;
817 818
818 819 rr->rr_data = NULL;
819 820 return;
820 821 }
821 822
822 823 va.va_mask = AT_ALL;
823 824
824 825 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
825 826
826 827 if (error) {
827 828 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
828 829 if (in_crit)
829 830 nbl_end_crit(vp);
830 831
831 832 VN_RELE(vp);
832 833 rr->rr_data = NULL;
833 834 rr->rr_status = puterrno(error);
834 835
835 836 return;
836 837 }
837 838
838 839 /*
839 840 * This is a kludge to allow reading of files created
840 841 * with no read permission. The owner of the file
841 842 * is always allowed to read it.
842 843 */
843 844 if (crgetuid(cr) != va.va_uid) {
844 845 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
845 846
846 847 if (error) {
847 848 /*
848 849 * Exec is the same as read over the net because
849 850 * of demand loading.
850 851 */
851 852 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
852 853 }
853 854 if (error) {
854 855 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
855 856 if (in_crit)
856 857 nbl_end_crit(vp);
857 858 VN_RELE(vp);
858 859 rr->rr_data = NULL;
859 860 rr->rr_status = puterrno(error);
860 861
861 862 return;
862 863 }
863 864 }
864 865
865 866 if (MANDLOCK(vp, va.va_mode)) {
866 867 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
867 868 if (in_crit)
868 869 nbl_end_crit(vp);
869 870
870 871 VN_RELE(vp);
871 872 rr->rr_data = NULL;
872 873 rr->rr_status = NFSERR_ACCES;
873 874
874 875 return;
875 876 }
876 877
877 878 rr->rr_ok.rrok_wlist_len = 0;
878 879 rr->rr_ok.rrok_wlist = NULL;
879 880
880 881 if ((u_offset_t)ra->ra_offset >= va.va_size) {
881 882 rr->rr_count = 0;
882 883 rr->rr_data = NULL;
883 884 /*
884 885 * In this case, status is NFS_OK, but there is no data
885 886 * to encode. So set rr_mp to NULL.
886 887 */
887 888 rr->rr_mp = NULL;
888 889 rr->rr_ok.rrok_wlist = ra->ra_wlist;
889 890 if (rr->rr_ok.rrok_wlist)
890 891 clist_zero_len(rr->rr_ok.rrok_wlist);
891 892 goto done;
892 893 }
893 894
894 895 if (ra->ra_wlist) {
895 896 mp = NULL;
896 897 rr->rr_mp = NULL;
897 898 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
898 899 if (ra->ra_count > iov.iov_len) {
899 900 rr->rr_data = NULL;
900 901 rr->rr_status = NFSERR_INVAL;
901 902 goto done;
902 903 }
903 904 } else {
904 905 /*
905 906 * mp will contain the data to be sent out in the read reply.
906 907 * This will be freed after the reply has been sent out (by the
907 908 * driver).
908 909 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
909 910 * that the call to xdrmblk_putmblk() never fails.
910 911 */
911 912 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
912 913 &alloc_err);
913 914 ASSERT(mp != NULL);
914 915 ASSERT(alloc_err == 0);
915 916
916 917 rr->rr_mp = mp;
917 918
918 919 /*
919 920 * Set up io vector
920 921 */
921 922 iov.iov_base = (caddr_t)mp->b_datap->db_base;
922 923 iov.iov_len = ra->ra_count;
923 924 }
924 925
925 926 uio.uio_iov = &iov;
926 927 uio.uio_iovcnt = 1;
927 928 uio.uio_segflg = UIO_SYSSPACE;
928 929 uio.uio_extflg = UIO_COPY_CACHED;
929 930 uio.uio_loffset = (offset_t)ra->ra_offset;
930 931 uio.uio_resid = ra->ra_count;
931 932
932 933 error = VOP_READ(vp, &uio, 0, cr, &ct);
933 934
934 935 if (error) {
935 936 if (mp)
936 937 freeb(mp);
937 938
938 939 /*
939 940 * check if a monitor detected a delegation conflict and
940 941 * mark as wouldblock so response is dropped
941 942 */
942 943 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
943 944 curthread->t_flag |= T_WOULDBLOCK;
944 945 else
945 946 rr->rr_status = puterrno(error);
946 947
947 948 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
948 949 if (in_crit)
949 950 nbl_end_crit(vp);
950 951
951 952 VN_RELE(vp);
952 953 rr->rr_data = NULL;
953 954
954 955 return;
955 956 }
956 957
957 958 /*
958 959 * Get attributes again so we can send the latest access
959 960 * time to the client side for its cache.
960 961 */
961 962 va.va_mask = AT_ALL;
962 963
963 964 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
964 965
965 966 if (error) {
966 967 if (mp)
967 968 freeb(mp);
968 969
969 970 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
970 971 if (in_crit)
971 972 nbl_end_crit(vp);
972 973
973 974 VN_RELE(vp);
974 975 rr->rr_data = NULL;
975 976 rr->rr_status = puterrno(error);
976 977
977 978 return;
978 979 }
979 980
980 981 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
981 982
982 983 if (mp) {
983 984 rr->rr_data = (char *)mp->b_datap->db_base;
984 985 } else {
985 986 if (ra->ra_wlist) {
986 987 rr->rr_data = (caddr_t)iov.iov_base;
987 988 if (!rdma_setup_read_data2(ra, rr)) {
988 989 rr->rr_data = NULL;
989 990 rr->rr_status = puterrno(NFSERR_INVAL);
990 991 }
991 992 }
992 993 }
993 994 done:
994 995 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
995 996 if (in_crit)
996 997 nbl_end_crit(vp);
997 998
998 999 acl_perm(vp, exi, &va, cr);
999 1000
1000 1001 /* check for overflows */
1001 1002 error = vattr_to_nattr(&va, &rr->rr_attr);
1002 1003
1003 1004 VN_RELE(vp);
1004 1005
1005 1006 rr->rr_status = puterrno(error);
1006 1007 }
1007 1008
1008 1009 /*
1009 1010 * Free data allocated by rfs_read
1010 1011 */
1011 1012 void
1012 1013 rfs_rdfree(struct nfsrdresult *rr)
1013 1014 {
1014 1015 mblk_t *mp;
1015 1016
1016 1017 if (rr->rr_status == NFS_OK) {
1017 1018 mp = rr->rr_mp;
1018 1019 if (mp != NULL)
1019 1020 freeb(mp);
1020 1021 }
1021 1022 }
1022 1023
1023 1024 void *
1024 1025 rfs_read_getfh(struct nfsreadargs *ra)
1025 1026 {
1026 1027 return (&ra->ra_fhandle);
1027 1028 }
1028 1029
1029 1030 #define MAX_IOVECS 12
1030 1031
1031 1032 #ifdef DEBUG
1032 1033 static int rfs_write_sync_hits = 0;
1033 1034 static int rfs_write_sync_misses = 0;
1034 1035 #endif
1035 1036
1036 1037 /*
1037 1038 * Write data to file.
1038 1039 * Returns attributes of a file after writing some data to it.
1039 1040 *
1040 1041 * Any changes made here, especially in error handling might have
1041 1042 * to also be done in rfs_write (which clusters write requests).
1042 1043 */
1043 1044 /* ARGSUSED */
1044 1045 void
1045 1046 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1046 1047 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1047 1048 {
1048 1049 int error;
1049 1050 vnode_t *vp;
1050 1051 rlim64_t rlimit;
1051 1052 struct vattr va;
1052 1053 struct uio uio;
1053 1054 struct iovec iov[MAX_IOVECS];
1054 1055 mblk_t *m;
1055 1056 struct iovec *iovp;
1056 1057 int iovcnt;
1057 1058 cred_t *savecred;
1058 1059 int in_crit = 0;
1059 1060 caller_context_t ct;
1060 1061
1061 1062 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1062 1063 if (vp == NULL) {
1063 1064 ns->ns_status = NFSERR_STALE;
1064 1065 return;
1065 1066 }
1066 1067
1067 1068 if (rdonly(ro, vp)) {
1068 1069 VN_RELE(vp);
1069 1070 ns->ns_status = NFSERR_ROFS;
1070 1071 return;
1071 1072 }
1072 1073
1073 1074 if (vp->v_type != VREG) {
1074 1075 VN_RELE(vp);
1075 1076 ns->ns_status = NFSERR_ISDIR;
1076 1077 return;
1077 1078 }
1078 1079
1079 1080 ct.cc_sysid = 0;
1080 1081 ct.cc_pid = 0;
1081 1082 ct.cc_caller_id = nfs2_srv_caller_id;
1082 1083 ct.cc_flags = CC_DONTBLOCK;
1083 1084
1084 1085 va.va_mask = AT_UID|AT_MODE;
1085 1086
1086 1087 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1087 1088
1088 1089 if (error) {
1089 1090 VN_RELE(vp);
1090 1091 ns->ns_status = puterrno(error);
1091 1092
1092 1093 return;
1093 1094 }
1094 1095
1095 1096 if (crgetuid(cr) != va.va_uid) {
1096 1097 /*
1097 1098 * This is a kludge to allow writes of files created
1098 1099 * with read only permission. The owner of the file
1099 1100 * is always allowed to write it.
1100 1101 */
1101 1102 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1102 1103
1103 1104 if (error) {
1104 1105 VN_RELE(vp);
1105 1106 ns->ns_status = puterrno(error);
1106 1107 return;
1107 1108 }
1108 1109 }
1109 1110
1110 1111 /*
1111 1112 * Can't access a mandatory lock file. This might cause
1112 1113 * the NFS service thread to block forever waiting for a
1113 1114 * lock to be released that will never be released.
1114 1115 */
1115 1116 if (MANDLOCK(vp, va.va_mode)) {
1116 1117 VN_RELE(vp);
1117 1118 ns->ns_status = NFSERR_ACCES;
1118 1119 return;
1119 1120 }
1120 1121
1121 1122 /*
1122 1123 * We have to enter the critical region before calling VOP_RWLOCK
1123 1124 * to avoid a deadlock with ufs.
1124 1125 */
1125 1126 if (nbl_need_check(vp)) {
1126 1127 nbl_start_crit(vp, RW_READER);
1127 1128 in_crit = 1;
1128 1129 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1129 1130 wa->wa_count, 0, NULL)) {
1130 1131 error = EACCES;
1131 1132 goto out;
1132 1133 }
1133 1134 }
1134 1135
1135 1136 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1136 1137
1137 1138 /* check if a monitor detected a delegation conflict */
1138 1139 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1139 1140 goto out;
1140 1141 }
1141 1142
1142 1143 if (wa->wa_data || wa->wa_rlist) {
1143 1144 /* Do the RDMA thing if necessary */
1144 1145 if (wa->wa_rlist) {
1145 1146 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1146 1147 iov[0].iov_len = wa->wa_count;
1147 1148 } else {
1148 1149 iov[0].iov_base = wa->wa_data;
1149 1150 iov[0].iov_len = wa->wa_count;
1150 1151 }
1151 1152 uio.uio_iov = iov;
1152 1153 uio.uio_iovcnt = 1;
1153 1154 uio.uio_segflg = UIO_SYSSPACE;
1154 1155 uio.uio_extflg = UIO_COPY_DEFAULT;
1155 1156 uio.uio_loffset = (offset_t)wa->wa_offset;
1156 1157 uio.uio_resid = wa->wa_count;
1157 1158 /*
1158 1159 * The limit is checked on the client. We
1159 1160 * should allow any size writes here.
1160 1161 */
1161 1162 uio.uio_llimit = curproc->p_fsz_ctl;
1162 1163 rlimit = uio.uio_llimit - wa->wa_offset;
1163 1164 if (rlimit < (rlim64_t)uio.uio_resid)
1164 1165 uio.uio_resid = (uint_t)rlimit;
1165 1166
1166 1167 /*
1167 1168 * for now we assume no append mode
1168 1169 */
1169 1170 /*
1170 1171 * We're changing creds because VM may fault and we need
1171 1172 * the cred of the current thread to be used if quota
1172 1173 * checking is enabled.
1173 1174 */
1174 1175 savecred = curthread->t_cred;
1175 1176 curthread->t_cred = cr;
1176 1177 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1177 1178 curthread->t_cred = savecred;
1178 1179 } else {
1179 1180
1180 1181 iovcnt = 0;
1181 1182 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1182 1183 iovcnt++;
1183 1184 if (iovcnt <= MAX_IOVECS) {
1184 1185 #ifdef DEBUG
1185 1186 rfs_write_sync_hits++;
1186 1187 #endif
1187 1188 iovp = iov;
1188 1189 } else {
1189 1190 #ifdef DEBUG
1190 1191 rfs_write_sync_misses++;
1191 1192 #endif
1192 1193 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1193 1194 }
1194 1195 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1195 1196 uio.uio_iov = iovp;
1196 1197 uio.uio_iovcnt = iovcnt;
1197 1198 uio.uio_segflg = UIO_SYSSPACE;
1198 1199 uio.uio_extflg = UIO_COPY_DEFAULT;
1199 1200 uio.uio_loffset = (offset_t)wa->wa_offset;
1200 1201 uio.uio_resid = wa->wa_count;
1201 1202 /*
1202 1203 * The limit is checked on the client. We
1203 1204 * should allow any size writes here.
1204 1205 */
1205 1206 uio.uio_llimit = curproc->p_fsz_ctl;
1206 1207 rlimit = uio.uio_llimit - wa->wa_offset;
1207 1208 if (rlimit < (rlim64_t)uio.uio_resid)
1208 1209 uio.uio_resid = (uint_t)rlimit;
1209 1210
1210 1211 /*
1211 1212 * For now we assume no append mode.
1212 1213 */
1213 1214 /*
1214 1215 * We're changing creds because VM may fault and we need
1215 1216 * the cred of the current thread to be used if quota
1216 1217 * checking is enabled.
1217 1218 */
1218 1219 savecred = curthread->t_cred;
1219 1220 curthread->t_cred = cr;
1220 1221 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1221 1222 curthread->t_cred = savecred;
1222 1223
1223 1224 if (iovp != iov)
1224 1225 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1225 1226 }
1226 1227
1227 1228 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1228 1229
1229 1230 if (!error) {
1230 1231 /*
1231 1232 * Get attributes again so we send the latest mod
1232 1233 * time to the client side for its cache.
1233 1234 */
1234 1235 va.va_mask = AT_ALL; /* now we want everything */
1235 1236
1236 1237 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1237 1238
1238 1239 /* check for overflows */
1239 1240 if (!error) {
1240 1241 acl_perm(vp, exi, &va, cr);
1241 1242 error = vattr_to_nattr(&va, &ns->ns_attr);
1242 1243 }
1243 1244 }
1244 1245
1245 1246 out:
1246 1247 if (in_crit)
1247 1248 nbl_end_crit(vp);
1248 1249 VN_RELE(vp);
1249 1250
1250 1251 /* check if a monitor detected a delegation conflict */
1251 1252 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1252 1253 /* mark as wouldblock so response is dropped */
1253 1254 curthread->t_flag |= T_WOULDBLOCK;
1254 1255 else
1255 1256 ns->ns_status = puterrno(error);
1256 1257
1257 1258 }
1258 1259
1259 1260 struct rfs_async_write {
1260 1261 struct nfswriteargs *wa;
1261 1262 struct nfsattrstat *ns;
1262 1263 struct svc_req *req;
1263 1264 cred_t *cr;
1264 1265 bool_t ro;
1265 1266 kthread_t *thread;
1266 1267 struct rfs_async_write *list;
1267 1268 };
1268 1269
1269 1270 struct rfs_async_write_list {
1270 1271 fhandle_t *fhp;
1271 1272 kcondvar_t cv;
1272 1273 struct rfs_async_write *list;
1273 1274 struct rfs_async_write_list *next;
1274 1275 };
1275 1276
1276 1277 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1277 1278 static kmutex_t rfs_async_write_lock;
1278 1279 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1279 1280
1280 1281 #define MAXCLIOVECS 42
1281 1282 #define RFSWRITE_INITVAL (enum nfsstat) -1
1282 1283
1283 1284 #ifdef DEBUG
1284 1285 static int rfs_write_hits = 0;
1285 1286 static int rfs_write_misses = 0;
1286 1287 #endif
1287 1288
1288 1289 /*
1289 1290 * Write data to file.
1290 1291 * Returns attributes of a file after writing some data to it.
1291 1292 */
1292 1293 void
1293 1294 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1294 1295 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1295 1296 {
1296 1297 int error;
1297 1298 vnode_t *vp;
1298 1299 rlim64_t rlimit;
1299 1300 struct vattr va;
1300 1301 struct uio uio;
1301 1302 struct rfs_async_write_list *lp;
1302 1303 struct rfs_async_write_list *nlp;
1303 1304 struct rfs_async_write *rp;
1304 1305 struct rfs_async_write *nrp;
1305 1306 struct rfs_async_write *trp;
1306 1307 struct rfs_async_write *lrp;
1307 1308 int data_written;
1308 1309 int iovcnt;
1309 1310 mblk_t *m;
1310 1311 struct iovec *iovp;
1311 1312 struct iovec *niovp;
1312 1313 struct iovec iov[MAXCLIOVECS];
1313 1314 int count;
1314 1315 int rcount;
1315 1316 uint_t off;
1316 1317 uint_t len;
1317 1318 struct rfs_async_write nrpsp;
1318 1319 struct rfs_async_write_list nlpsp;
1319 1320 ushort_t t_flag;
1320 1321 cred_t *savecred;
1321 1322 int in_crit = 0;
1322 1323 caller_context_t ct;
1323 1324 nfs_srv_t *nsrv;
1324 1325
1325 1326 ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
1326 1327 nsrv = nfs_get_srv();
1327 1328 if (!nsrv->write_async) {
1328 1329 rfs_write_sync(wa, ns, exi, req, cr, ro);
1329 1330 return;
1330 1331 }
1331 1332
1332 1333 /*
1333 1334 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1334 1335 * is considered an OK.
1335 1336 */
1336 1337 ns->ns_status = RFSWRITE_INITVAL;
1337 1338
1338 1339 nrp = &nrpsp;
1339 1340 nrp->wa = wa;
1340 1341 nrp->ns = ns;
1341 1342 nrp->req = req;
1342 1343 nrp->cr = cr;
1343 1344 nrp->ro = ro;
1344 1345 nrp->thread = curthread;
1345 1346
1346 1347 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1347 1348
1348 1349 /*
1349 1350 * Look to see if there is already a cluster started
1350 1351 * for this file.
1351 1352 */
1352 1353 mutex_enter(&nsrv->async_write_lock);
1353 1354 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1354 1355 if (bcmp(&wa->wa_fhandle, lp->fhp,
1355 1356 sizeof (fhandle_t)) == 0)
1356 1357 break;
1357 1358 }
1358 1359
1359 1360 /*
1360 1361 * If lp is non-NULL, then there is already a cluster
1361 1362 * started. We need to place ourselves in the cluster
1362 1363 * list in the right place as determined by starting
1363 1364 * offset. Conflicts with non-blocking mandatory locked
1364 1365 * regions will be checked when the cluster is processed.
1365 1366 */
1366 1367 if (lp != NULL) {
1367 1368 rp = lp->list;
1368 1369 trp = NULL;
1369 1370 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1370 1371 trp = rp;
1371 1372 rp = rp->list;
1372 1373 }
1373 1374 nrp->list = rp;
1374 1375 if (trp == NULL)
1375 1376 lp->list = nrp;
1376 1377 else
1377 1378 trp->list = nrp;
1378 1379 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1379 1380 cv_wait(&lp->cv, &nsrv->async_write_lock);
1380 1381 mutex_exit(&nsrv->async_write_lock);
1381 1382
1382 1383 return;
1383 1384 }
1384 1385
1385 1386 /*
1386 1387 * No cluster started yet, start one and add ourselves
1387 1388 * to the list of clusters.
1388 1389 */
1389 1390 nrp->list = NULL;
1390 1391
1391 1392 nlp = &nlpsp;
1392 1393 nlp->fhp = &wa->wa_fhandle;
1393 1394 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1394 1395 nlp->list = nrp;
1395 1396 nlp->next = NULL;
1396 1397
1397 1398 if (nsrv->async_write_head == NULL) {
1398 1399 nsrv->async_write_head = nlp;
1399 1400 } else {
1400 1401 lp = nsrv->async_write_head;
1401 1402 while (lp->next != NULL)
1402 1403 lp = lp->next;
1403 1404 lp->next = nlp;
1404 1405 }
1405 1406 mutex_exit(&nsrv->async_write_lock);
1406 1407
1407 1408 /*
1408 1409 * Convert the file handle common to all of the requests
1409 1410 * in this cluster to a vnode.
1410 1411 */
1411 1412 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1412 1413 if (vp == NULL) {
1413 1414 mutex_enter(&nsrv->async_write_lock);
1414 1415 if (nsrv->async_write_head == nlp)
1415 1416 nsrv->async_write_head = nlp->next;
1416 1417 else {
1417 1418 lp = nsrv->async_write_head;
1418 1419 while (lp->next != nlp)
1419 1420 lp = lp->next;
1420 1421 lp->next = nlp->next;
1421 1422 }
1422 1423 t_flag = curthread->t_flag & T_WOULDBLOCK;
1423 1424 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1424 1425 rp->ns->ns_status = NFSERR_STALE;
1425 1426 rp->thread->t_flag |= t_flag;
1426 1427 }
1427 1428 cv_broadcast(&nlp->cv);
1428 1429 mutex_exit(&nsrv->async_write_lock);
1429 1430
1430 1431 return;
1431 1432 }
1432 1433
1433 1434 /*
1434 1435 * Can only write regular files. Attempts to write any
1435 1436 * other file types fail with EISDIR.
1436 1437 */
1437 1438 if (vp->v_type != VREG) {
1438 1439 VN_RELE(vp);
1439 1440 mutex_enter(&nsrv->async_write_lock);
1440 1441 if (nsrv->async_write_head == nlp)
1441 1442 nsrv->async_write_head = nlp->next;
1442 1443 else {
1443 1444 lp = nsrv->async_write_head;
1444 1445 while (lp->next != nlp)
1445 1446 lp = lp->next;
1446 1447 lp->next = nlp->next;
1447 1448 }
1448 1449 t_flag = curthread->t_flag & T_WOULDBLOCK;
1449 1450 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1450 1451 rp->ns->ns_status = NFSERR_ISDIR;
1451 1452 rp->thread->t_flag |= t_flag;
1452 1453 }
1453 1454 cv_broadcast(&nlp->cv);
1454 1455 mutex_exit(&nsrv->async_write_lock);
1455 1456
1456 1457 return;
1457 1458 }
1458 1459
1459 1460 /*
1460 1461 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1461 1462 * deadlock with ufs.
1462 1463 */
1463 1464 if (nbl_need_check(vp)) {
1464 1465 nbl_start_crit(vp, RW_READER);
1465 1466 in_crit = 1;
1466 1467 }
1467 1468
1468 1469 ct.cc_sysid = 0;
1469 1470 ct.cc_pid = 0;
1470 1471 ct.cc_caller_id = nfs2_srv_caller_id;
1471 1472 ct.cc_flags = CC_DONTBLOCK;
1472 1473
1473 1474 /*
1474 1475 * Lock the file for writing. This operation provides
1475 1476 * the delay which allows clusters to grow.
1476 1477 */
1477 1478 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1478 1479
1479 1480 /* check if a monitor detected a delegation conflict */
1480 1481 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1481 1482 if (in_crit)
1482 1483 nbl_end_crit(vp);
1483 1484 VN_RELE(vp);
1484 1485 /* mark as wouldblock so response is dropped */
1485 1486 curthread->t_flag |= T_WOULDBLOCK;
1486 1487 mutex_enter(&nsrv->async_write_lock);
1487 1488 if (nsrv->async_write_head == nlp)
1488 1489 nsrv->async_write_head = nlp->next;
1489 1490 else {
1490 1491 lp = nsrv->async_write_head;
1491 1492 while (lp->next != nlp)
1492 1493 lp = lp->next;
1493 1494 lp->next = nlp->next;
1494 1495 }
1495 1496 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1496 1497 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1497 1498 rp->ns->ns_status = puterrno(error);
1498 1499 rp->thread->t_flag |= T_WOULDBLOCK;
1499 1500 }
1500 1501 }
1501 1502 cv_broadcast(&nlp->cv);
1502 1503 mutex_exit(&nsrv->async_write_lock);
1503 1504
1504 1505 return;
1505 1506 }
1506 1507
1507 1508 /*
1508 1509 * Disconnect this cluster from the list of clusters.
1509 1510 * The cluster that is being dealt with must be fixed
1510 1511 * in size after this point, so there is no reason
1511 1512 * to leave it on the list so that new requests can
1512 1513 * find it.
1513 1514 *
1514 1515 * The algorithm is that the first write request will
1515 1516 * create a cluster, convert the file handle to a
1516 1517 * vnode pointer, and then lock the file for writing.
1517 1518 * This request is not likely to be clustered with
1518 1519 * any others. However, the next request will create
1519 1520 * a new cluster and be blocked in VOP_RWLOCK while
1520 1521 * the first request is being processed. This delay
1521 1522 * will allow more requests to be clustered in this
1522 1523 * second cluster.
1523 1524 */
1524 1525 mutex_enter(&nsrv->async_write_lock);
1525 1526 if (nsrv->async_write_head == nlp)
1526 1527 nsrv->async_write_head = nlp->next;
1527 1528 else {
1528 1529 lp = nsrv->async_write_head;
1529 1530 while (lp->next != nlp)
1530 1531 lp = lp->next;
1531 1532 lp->next = nlp->next;
1532 1533 }
1533 1534 mutex_exit(&nsrv->async_write_lock);
1534 1535
1535 1536 /*
1536 1537 * Step through the list of requests in this cluster.
1537 1538 * We need to check permissions to make sure that all
1538 1539 * of the requests have sufficient permission to write
1539 1540 * the file. A cluster can be composed of requests
1540 1541 * from different clients and different users on each
1541 1542 * client.
1542 1543 *
1543 1544 * As a side effect, we also calculate the size of the
1544 1545 * byte range that this cluster encompasses.
1545 1546 */
1546 1547 rp = nlp->list;
1547 1548 off = rp->wa->wa_offset;
1548 1549 len = (uint_t)0;
1549 1550 do {
1550 1551 if (rdonly(rp->ro, vp)) {
1551 1552 rp->ns->ns_status = NFSERR_ROFS;
1552 1553 t_flag = curthread->t_flag & T_WOULDBLOCK;
1553 1554 rp->thread->t_flag |= t_flag;
1554 1555 continue;
1555 1556 }
1556 1557
1557 1558 va.va_mask = AT_UID|AT_MODE;
1558 1559
1559 1560 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1560 1561
1561 1562 if (!error) {
1562 1563 if (crgetuid(rp->cr) != va.va_uid) {
1563 1564 /*
1564 1565 * This is a kludge to allow writes of files
1565 1566 * created with read only permission. The
1566 1567 * owner of the file is always allowed to
1567 1568 * write it.
1568 1569 */
1569 1570 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1570 1571 }
1571 1572 if (!error && MANDLOCK(vp, va.va_mode))
1572 1573 error = EACCES;
1573 1574 }
1574 1575
1575 1576 /*
1576 1577 * Check for a conflict with a nbmand-locked region.
1577 1578 */
1578 1579 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1579 1580 rp->wa->wa_count, 0, NULL)) {
1580 1581 error = EACCES;
1581 1582 }
1582 1583
1583 1584 if (error) {
1584 1585 rp->ns->ns_status = puterrno(error);
1585 1586 t_flag = curthread->t_flag & T_WOULDBLOCK;
1586 1587 rp->thread->t_flag |= t_flag;
1587 1588 continue;
1588 1589 }
1589 1590 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1590 1591 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1591 1592 } while ((rp = rp->list) != NULL);
1592 1593
1593 1594 /*
1594 1595 * Step through the cluster attempting to gather as many
1595 1596 * requests which are contiguous as possible. These
1596 1597 * contiguous requests are handled via one call to VOP_WRITE
1597 1598 * instead of different calls to VOP_WRITE. We also keep
1598 1599 * track of the fact that any data was written.
1599 1600 */
1600 1601 rp = nlp->list;
1601 1602 data_written = 0;
1602 1603 do {
1603 1604 /*
1604 1605 * Skip any requests which are already marked as having an
1605 1606 * error.
1606 1607 */
1607 1608 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1608 1609 rp = rp->list;
1609 1610 continue;
1610 1611 }
1611 1612
1612 1613 /*
1613 1614 * Count the number of iovec's which are required
1614 1615 * to handle this set of requests. One iovec is
1615 1616 * needed for each data buffer, whether addressed
1616 1617 * by wa_data or by the b_rptr pointers in the
1617 1618 * mblk chains.
1618 1619 */
1619 1620 iovcnt = 0;
1620 1621 lrp = rp;
1621 1622 for (;;) {
1622 1623 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1623 1624 iovcnt++;
1624 1625 else {
1625 1626 m = lrp->wa->wa_mblk;
1626 1627 while (m != NULL) {
1627 1628 iovcnt++;
1628 1629 m = m->b_cont;
1629 1630 }
1630 1631 }
1631 1632 if (lrp->list == NULL ||
1632 1633 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1633 1634 lrp->wa->wa_offset + lrp->wa->wa_count !=
1634 1635 lrp->list->wa->wa_offset) {
1635 1636 lrp = lrp->list;
1636 1637 break;
1637 1638 }
1638 1639 lrp = lrp->list;
1639 1640 }
1640 1641
1641 1642 if (iovcnt <= MAXCLIOVECS) {
1642 1643 #ifdef DEBUG
1643 1644 rfs_write_hits++;
1644 1645 #endif
1645 1646 niovp = iov;
1646 1647 } else {
1647 1648 #ifdef DEBUG
1648 1649 rfs_write_misses++;
1649 1650 #endif
1650 1651 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1651 1652 }
1652 1653 /*
1653 1654 * Put together the scatter/gather iovecs.
1654 1655 */
1655 1656 iovp = niovp;
1656 1657 trp = rp;
1657 1658 count = 0;
1658 1659 do {
1659 1660 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1660 1661 if (trp->wa->wa_rlist) {
1661 1662 iovp->iov_base =
1662 1663 (char *)((trp->wa->wa_rlist)->
1663 1664 u.c_daddr3);
1664 1665 iovp->iov_len = trp->wa->wa_count;
1665 1666 } else {
1666 1667 iovp->iov_base = trp->wa->wa_data;
1667 1668 iovp->iov_len = trp->wa->wa_count;
1668 1669 }
1669 1670 iovp++;
1670 1671 } else {
1671 1672 m = trp->wa->wa_mblk;
1672 1673 rcount = trp->wa->wa_count;
1673 1674 while (m != NULL) {
1674 1675 iovp->iov_base = (caddr_t)m->b_rptr;
1675 1676 iovp->iov_len = (m->b_wptr - m->b_rptr);
1676 1677 rcount -= iovp->iov_len;
1677 1678 if (rcount < 0)
1678 1679 iovp->iov_len += rcount;
1679 1680 iovp++;
1680 1681 if (rcount <= 0)
1681 1682 break;
1682 1683 m = m->b_cont;
1683 1684 }
1684 1685 }
1685 1686 count += trp->wa->wa_count;
1686 1687 trp = trp->list;
1687 1688 } while (trp != lrp);
1688 1689
1689 1690 uio.uio_iov = niovp;
1690 1691 uio.uio_iovcnt = iovcnt;
1691 1692 uio.uio_segflg = UIO_SYSSPACE;
1692 1693 uio.uio_extflg = UIO_COPY_DEFAULT;
1693 1694 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1694 1695 uio.uio_resid = count;
1695 1696 /*
1696 1697 * The limit is checked on the client. We
1697 1698 * should allow any size writes here.
1698 1699 */
1699 1700 uio.uio_llimit = curproc->p_fsz_ctl;
1700 1701 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1701 1702 if (rlimit < (rlim64_t)uio.uio_resid)
1702 1703 uio.uio_resid = (uint_t)rlimit;
1703 1704
1704 1705 /*
1705 1706 * For now we assume no append mode.
1706 1707 */
1707 1708
1708 1709 /*
1709 1710 * We're changing creds because VM may fault
1710 1711 * and we need the cred of the current
1711 1712 * thread to be used if quota * checking is
1712 1713 * enabled.
1713 1714 */
1714 1715 savecred = curthread->t_cred;
1715 1716 curthread->t_cred = cr;
1716 1717 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1717 1718 curthread->t_cred = savecred;
1718 1719
1719 1720 /* check if a monitor detected a delegation conflict */
1720 1721 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1721 1722 /* mark as wouldblock so response is dropped */
1722 1723 curthread->t_flag |= T_WOULDBLOCK;
1723 1724
1724 1725 if (niovp != iov)
1725 1726 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1726 1727
1727 1728 if (!error) {
1728 1729 data_written = 1;
1729 1730 /*
1730 1731 * Get attributes again so we send the latest mod
1731 1732 * time to the client side for its cache.
1732 1733 */
1733 1734 va.va_mask = AT_ALL; /* now we want everything */
1734 1735
1735 1736 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1736 1737
1737 1738 if (!error)
1738 1739 acl_perm(vp, exi, &va, rp->cr);
1739 1740 }
1740 1741
1741 1742 /*
1742 1743 * Fill in the status responses for each request
1743 1744 * which was just handled. Also, copy the latest
1744 1745 * attributes in to the attribute responses if
1745 1746 * appropriate.
1746 1747 */
1747 1748 t_flag = curthread->t_flag & T_WOULDBLOCK;
1748 1749 do {
1749 1750 rp->thread->t_flag |= t_flag;
1750 1751 /* check for overflows */
1751 1752 if (!error) {
1752 1753 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1753 1754 }
1754 1755 rp->ns->ns_status = puterrno(error);
1755 1756 rp = rp->list;
1756 1757 } while (rp != lrp);
1757 1758 } while (rp != NULL);
1758 1759
1759 1760 /*
1760 1761 * If any data was written at all, then we need to flush
1761 1762 * the data and metadata to stable storage.
1762 1763 */
1763 1764 if (data_written) {
1764 1765 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1765 1766
1766 1767 if (!error) {
1767 1768 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1768 1769 }
1769 1770 }
1770 1771
1771 1772 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1772 1773
1773 1774 if (in_crit)
1774 1775 nbl_end_crit(vp);
1775 1776 VN_RELE(vp);
1776 1777
1777 1778 t_flag = curthread->t_flag & T_WOULDBLOCK;
1778 1779 mutex_enter(&nsrv->async_write_lock);
1779 1780 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1780 1781 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1781 1782 rp->ns->ns_status = puterrno(error);
1782 1783 rp->thread->t_flag |= t_flag;
1783 1784 }
1784 1785 }
1785 1786 cv_broadcast(&nlp->cv);
1786 1787 mutex_exit(&nsrv->async_write_lock);
1787 1788
1788 1789 }
1789 1790
1790 1791 void *
1791 1792 rfs_write_getfh(struct nfswriteargs *wa)
1792 1793 {
1793 1794 return (&wa->wa_fhandle);
1794 1795 }
1795 1796
1796 1797 /*
1797 1798 * Create a file.
1798 1799 * Creates a file with given attributes and returns those attributes
1799 1800 * and an fhandle for the new file.
1800 1801 */
1801 1802 void
1802 1803 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1803 1804 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1804 1805 {
1805 1806 int error;
1806 1807 int lookuperr;
1807 1808 int in_crit = 0;
1808 1809 struct vattr va;
1809 1810 vnode_t *vp;
1810 1811 vnode_t *realvp;
1811 1812 vnode_t *dvp;
1812 1813 char *name = args->ca_da.da_name;
1813 1814 vnode_t *tvp = NULL;
1814 1815 int mode;
1815 1816 int lookup_ok;
1816 1817 bool_t trunc;
1817 1818 struct sockaddr *ca;
1818 1819
1819 1820 /*
1820 1821 * Disallow NULL paths
1821 1822 */
1822 1823 if (name == NULL || *name == '\0') {
1823 1824 dr->dr_status = NFSERR_ACCES;
1824 1825 return;
1825 1826 }
1826 1827
1827 1828 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1828 1829 if (dvp == NULL) {
1829 1830 dr->dr_status = NFSERR_STALE;
1830 1831 return;
1831 1832 }
1832 1833
1833 1834 error = sattr_to_vattr(args->ca_sa, &va);
1834 1835 if (error) {
1835 1836 dr->dr_status = puterrno(error);
1836 1837 return;
1837 1838 }
1838 1839
1839 1840 /*
1840 1841 * Must specify the mode.
1841 1842 */
1842 1843 if (!(va.va_mask & AT_MODE)) {
1843 1844 VN_RELE(dvp);
1844 1845 dr->dr_status = NFSERR_INVAL;
1845 1846 return;
1846 1847 }
1847 1848
1848 1849 /*
1849 1850 * This is a completely gross hack to make mknod
1850 1851 * work over the wire until we can wack the protocol
1851 1852 */
1852 1853 if ((va.va_mode & IFMT) == IFCHR) {
1853 1854 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1854 1855 va.va_type = VFIFO; /* xtra kludge for named pipe */
1855 1856 else {
1856 1857 va.va_type = VCHR;
1857 1858 /*
1858 1859 * uncompress the received dev_t
1859 1860 * if the top half is zero indicating a request
1860 1861 * from an `older style' OS.
1861 1862 */
1862 1863 if ((va.va_size & 0xffff0000) == 0)
1863 1864 va.va_rdev = nfsv2_expdev(va.va_size);
1864 1865 else
1865 1866 va.va_rdev = (dev_t)va.va_size;
1866 1867 }
1867 1868 va.va_mask &= ~AT_SIZE;
1868 1869 } else if ((va.va_mode & IFMT) == IFBLK) {
1869 1870 va.va_type = VBLK;
1870 1871 /*
1871 1872 * uncompress the received dev_t
1872 1873 * if the top half is zero indicating a request
1873 1874 * from an `older style' OS.
1874 1875 */
1875 1876 if ((va.va_size & 0xffff0000) == 0)
1876 1877 va.va_rdev = nfsv2_expdev(va.va_size);
1877 1878 else
1878 1879 va.va_rdev = (dev_t)va.va_size;
1879 1880 va.va_mask &= ~AT_SIZE;
1880 1881 } else if ((va.va_mode & IFMT) == IFSOCK) {
1881 1882 va.va_type = VSOCK;
1882 1883 } else {
1883 1884 va.va_type = VREG;
1884 1885 }
1885 1886 va.va_mode &= ~IFMT;
1886 1887 va.va_mask |= AT_TYPE;
1887 1888
1888 1889 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1889 1890 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1890 1891 MAXPATHLEN);
1891 1892 if (name == NULL) {
1892 1893 dr->dr_status = puterrno(EINVAL);
1893 1894 return;
1894 1895 }
1895 1896
1896 1897 /*
1897 1898 * Why was the choice made to use VWRITE as the mode to the
1898 1899 * call to VOP_CREATE ? This results in a bug. When a client
1899 1900 * opens a file that already exists and is RDONLY, the second
1900 1901 * open fails with an EACESS because of the mode.
1901 1902 * bug ID 1054648.
1902 1903 */
1903 1904 lookup_ok = 0;
1904 1905 mode = VWRITE;
1905 1906 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1906 1907 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1907 1908 NULL, NULL, NULL);
1908 1909 if (!error) {
1909 1910 struct vattr at;
1910 1911
1911 1912 lookup_ok = 1;
1912 1913 at.va_mask = AT_MODE;
1913 1914 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1914 1915 if (!error)
1915 1916 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1916 1917 VN_RELE(tvp);
1917 1918 tvp = NULL;
1918 1919 }
1919 1920 }
1920 1921
1921 1922 if (!lookup_ok) {
1922 1923 if (rdonly(ro, dvp)) {
1923 1924 error = EROFS;
1924 1925 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1925 1926 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1926 1927 error = EPERM;
1927 1928 } else {
1928 1929 error = 0;
1929 1930 }
1930 1931 }
1931 1932
1932 1933 /*
1933 1934 * If file size is being modified on an already existing file
1934 1935 * make sure that there are no conflicting non-blocking mandatory
1935 1936 * locks in the region being manipulated. Return EACCES if there
1936 1937 * are conflicting locks.
1937 1938 */
1938 1939 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1939 1940 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1940 1941 NULL, NULL, NULL);
1941 1942
1942 1943 if (!lookuperr &&
1943 1944 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1944 1945 VN_RELE(tvp);
1945 1946 curthread->t_flag |= T_WOULDBLOCK;
1946 1947 goto out;
1947 1948 }
1948 1949
1949 1950 if (!lookuperr && nbl_need_check(tvp)) {
1950 1951 /*
1951 1952 * The file exists. Now check if it has any
1952 1953 * conflicting non-blocking mandatory locks
1953 1954 * in the region being changed.
1954 1955 */
1955 1956 struct vattr bva;
1956 1957 u_offset_t offset;
1957 1958 ssize_t length;
1958 1959
1959 1960 nbl_start_crit(tvp, RW_READER);
1960 1961 in_crit = 1;
1961 1962
1962 1963 bva.va_mask = AT_SIZE;
1963 1964 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1964 1965 if (!error) {
1965 1966 if (va.va_size < bva.va_size) {
1966 1967 offset = va.va_size;
1967 1968 length = bva.va_size - va.va_size;
1968 1969 } else {
1969 1970 offset = bva.va_size;
1970 1971 length = va.va_size - bva.va_size;
1971 1972 }
1972 1973 if (length) {
1973 1974 if (nbl_conflict(tvp, NBL_WRITE,
1974 1975 offset, length, 0, NULL)) {
1975 1976 error = EACCES;
1976 1977 }
1977 1978 }
1978 1979 }
1979 1980 if (error) {
1980 1981 nbl_end_crit(tvp);
1981 1982 VN_RELE(tvp);
1982 1983 in_crit = 0;
1983 1984 }
1984 1985 } else if (tvp != NULL) {
1985 1986 VN_RELE(tvp);
1986 1987 }
1987 1988 }
1988 1989
1989 1990 if (!error) {
1990 1991 /*
1991 1992 * If filesystem is shared with nosuid the remove any
1992 1993 * setuid/setgid bits on create.
1993 1994 */
1994 1995 if (va.va_type == VREG &&
1995 1996 exi->exi_export.ex_flags & EX_NOSUID)
1996 1997 va.va_mode &= ~(VSUID | VSGID);
1997 1998
1998 1999 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1999 2000 NULL, NULL);
2000 2001
2001 2002 if (!error) {
2002 2003
2003 2004 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
2004 2005 trunc = TRUE;
2005 2006 else
2006 2007 trunc = FALSE;
2007 2008
2008 2009 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
2009 2010 VN_RELE(vp);
2010 2011 curthread->t_flag |= T_WOULDBLOCK;
2011 2012 goto out;
2012 2013 }
2013 2014 va.va_mask = AT_ALL;
2014 2015
2015 2016 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
2016 2017
2017 2018 /* check for overflows */
2018 2019 if (!error) {
2019 2020 acl_perm(vp, exi, &va, cr);
2020 2021 error = vattr_to_nattr(&va, &dr->dr_attr);
2021 2022 if (!error) {
2022 2023 error = makefh(&dr->dr_fhandle, vp,
2023 2024 exi);
2024 2025 }
2025 2026 }
2026 2027 /*
2027 2028 * Force modified metadata out to stable storage.
2028 2029 *
2029 2030 * if a underlying vp exists, pass it to VOP_FSYNC
2030 2031 */
2031 2032 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2032 2033 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2033 2034 else
2034 2035 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2035 2036 VN_RELE(vp);
2036 2037 }
2037 2038
2038 2039 if (in_crit) {
2039 2040 nbl_end_crit(tvp);
2040 2041 VN_RELE(tvp);
2041 2042 }
2042 2043 }
2043 2044
2044 2045 /*
2045 2046 * Force modified data and metadata out to stable storage.
2046 2047 */
2047 2048 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2048 2049
2049 2050 out:
2050 2051
2051 2052 VN_RELE(dvp);
2052 2053
2053 2054 dr->dr_status = puterrno(error);
2054 2055
2055 2056 if (name != args->ca_da.da_name)
2056 2057 kmem_free(name, MAXPATHLEN);
2057 2058 }
2058 2059 void *
2059 2060 rfs_create_getfh(struct nfscreatargs *args)
2060 2061 {
2061 2062 return (args->ca_da.da_fhandle);
2062 2063 }
2063 2064
2064 2065 /*
2065 2066 * Remove a file.
2066 2067 * Remove named file from parent directory.
2067 2068 */
2068 2069 /* ARGSUSED */
2069 2070 void
2070 2071 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2071 2072 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2072 2073 {
2073 2074 int error = 0;
2074 2075 vnode_t *vp;
2075 2076 vnode_t *targvp;
2076 2077 int in_crit = 0;
2077 2078
2078 2079 /*
2079 2080 * Disallow NULL paths
2080 2081 */
2081 2082 if (da->da_name == NULL || *da->da_name == '\0') {
2082 2083 *status = NFSERR_ACCES;
2083 2084 return;
2084 2085 }
2085 2086
2086 2087 vp = nfs_fhtovp(da->da_fhandle, exi);
2087 2088 if (vp == NULL) {
2088 2089 *status = NFSERR_STALE;
2089 2090 return;
2090 2091 }
2091 2092
2092 2093 if (rdonly(ro, vp)) {
2093 2094 VN_RELE(vp);
2094 2095 *status = NFSERR_ROFS;
2095 2096 return;
2096 2097 }
2097 2098
2098 2099 /*
2099 2100 * Check for a conflict with a non-blocking mandatory share reservation.
2100 2101 */
2101 2102 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2102 2103 NULL, cr, NULL, NULL, NULL);
2103 2104 if (error != 0) {
2104 2105 VN_RELE(vp);
2105 2106 *status = puterrno(error);
2106 2107 return;
2107 2108 }
2108 2109
2109 2110 /*
2110 2111 * If the file is delegated to an v4 client, then initiate
2111 2112 * recall and drop this request (by setting T_WOULDBLOCK).
2112 2113 * The client will eventually re-transmit the request and
2113 2114 * (hopefully), by then, the v4 client will have returned
2114 2115 * the delegation.
2115 2116 */
2116 2117
2117 2118 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2118 2119 VN_RELE(vp);
2119 2120 VN_RELE(targvp);
2120 2121 curthread->t_flag |= T_WOULDBLOCK;
2121 2122 return;
2122 2123 }
2123 2124
2124 2125 if (nbl_need_check(targvp)) {
2125 2126 nbl_start_crit(targvp, RW_READER);
2126 2127 in_crit = 1;
2127 2128 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2128 2129 error = EACCES;
2129 2130 goto out;
2130 2131 }
2131 2132 }
2132 2133
2133 2134 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2134 2135
2135 2136 /*
2136 2137 * Force modified data and metadata out to stable storage.
2137 2138 */
2138 2139 (void) VOP_FSYNC(vp, 0, cr, NULL);
2139 2140
2140 2141 out:
2141 2142 if (in_crit)
2142 2143 nbl_end_crit(targvp);
2143 2144 VN_RELE(targvp);
2144 2145 VN_RELE(vp);
2145 2146
2146 2147 *status = puterrno(error);
2147 2148
2148 2149 }
2149 2150
2150 2151 void *
2151 2152 rfs_remove_getfh(struct nfsdiropargs *da)
2152 2153 {
2153 2154 return (da->da_fhandle);
2154 2155 }
2155 2156
2156 2157 /*
2157 2158 * rename a file
2158 2159 * Give a file (from) a new name (to).
2159 2160 */
2160 2161 /* ARGSUSED */
2161 2162 void
2162 2163 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2163 2164 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2164 2165 {
2165 2166 int error = 0;
2166 2167 vnode_t *fromvp;
2167 2168 vnode_t *tovp;
2168 2169 struct exportinfo *to_exi;
2169 2170 fhandle_t *fh;
2170 2171 vnode_t *srcvp;
2171 2172 vnode_t *targvp;
2172 2173 int in_crit = 0;
2173 2174
2174 2175 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2175 2176 if (fromvp == NULL) {
2176 2177 *status = NFSERR_STALE;
2177 2178 return;
2178 2179 }
2179 2180
2180 2181 fh = args->rna_to.da_fhandle;
2181 2182 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2182 2183 if (to_exi == NULL) {
2183 2184 VN_RELE(fromvp);
2184 2185 *status = NFSERR_ACCES;
2185 2186 return;
2186 2187 }
2187 2188 exi_rele(to_exi);
2188 2189
2189 2190 if (to_exi != exi) {
2190 2191 VN_RELE(fromvp);
2191 2192 *status = NFSERR_XDEV;
2192 2193 return;
2193 2194 }
2194 2195
2195 2196 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2196 2197 if (tovp == NULL) {
2197 2198 VN_RELE(fromvp);
2198 2199 *status = NFSERR_STALE;
2199 2200 return;
2200 2201 }
2201 2202
2202 2203 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2203 2204 VN_RELE(tovp);
2204 2205 VN_RELE(fromvp);
2205 2206 *status = NFSERR_NOTDIR;
2206 2207 return;
2207 2208 }
2208 2209
2209 2210 /*
2210 2211 * Disallow NULL paths
2211 2212 */
2212 2213 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2213 2214 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2214 2215 VN_RELE(tovp);
2215 2216 VN_RELE(fromvp);
2216 2217 *status = NFSERR_ACCES;
2217 2218 return;
2218 2219 }
2219 2220
2220 2221 if (rdonly(ro, tovp)) {
2221 2222 VN_RELE(tovp);
2222 2223 VN_RELE(fromvp);
2223 2224 *status = NFSERR_ROFS;
2224 2225 return;
2225 2226 }
2226 2227
2227 2228 /*
2228 2229 * Check for a conflict with a non-blocking mandatory share reservation.
2229 2230 */
2230 2231 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2231 2232 NULL, cr, NULL, NULL, NULL);
2232 2233 if (error != 0) {
2233 2234 VN_RELE(tovp);
2234 2235 VN_RELE(fromvp);
2235 2236 *status = puterrno(error);
2236 2237 return;
2237 2238 }
2238 2239
2239 2240 /* Check for delegations on the source file */
2240 2241
2241 2242 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2242 2243 VN_RELE(tovp);
2243 2244 VN_RELE(fromvp);
2244 2245 VN_RELE(srcvp);
2245 2246 curthread->t_flag |= T_WOULDBLOCK;
2246 2247 return;
2247 2248 }
2248 2249
2249 2250 /* Check for delegation on the file being renamed over, if it exists */
2250 2251
2251 2252 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2252 2253 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2253 2254 NULL, NULL, NULL) == 0) {
2254 2255
2255 2256 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2256 2257 VN_RELE(tovp);
2257 2258 VN_RELE(fromvp);
2258 2259 VN_RELE(srcvp);
2259 2260 VN_RELE(targvp);
2260 2261 curthread->t_flag |= T_WOULDBLOCK;
2261 2262 return;
2262 2263 }
2263 2264 VN_RELE(targvp);
2264 2265 }
2265 2266
2266 2267
2267 2268 if (nbl_need_check(srcvp)) {
2268 2269 nbl_start_crit(srcvp, RW_READER);
2269 2270 in_crit = 1;
2270 2271 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2271 2272 error = EACCES;
2272 2273 goto out;
2273 2274 }
2274 2275 }
2275 2276
2276 2277 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2277 2278 tovp, args->rna_to.da_name, cr, NULL, 0);
2278 2279
2279 2280 if (error == 0)
2280 2281 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2281 2282 strlen(args->rna_to.da_name));
2282 2283
2283 2284 /*
2284 2285 * Force modified data and metadata out to stable storage.
2285 2286 */
2286 2287 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2287 2288 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2288 2289
2289 2290 out:
2290 2291 if (in_crit)
2291 2292 nbl_end_crit(srcvp);
2292 2293 VN_RELE(srcvp);
2293 2294 VN_RELE(tovp);
2294 2295 VN_RELE(fromvp);
2295 2296
2296 2297 *status = puterrno(error);
2297 2298
2298 2299 }
2299 2300 void *
2300 2301 rfs_rename_getfh(struct nfsrnmargs *args)
2301 2302 {
2302 2303 return (args->rna_from.da_fhandle);
2303 2304 }
2304 2305
2305 2306 /*
2306 2307 * Link to a file.
2307 2308 * Create a file (to) which is a hard link to the given file (from).
2308 2309 */
2309 2310 /* ARGSUSED */
2310 2311 void
2311 2312 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2312 2313 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2313 2314 {
2314 2315 int error;
2315 2316 vnode_t *fromvp;
2316 2317 vnode_t *tovp;
2317 2318 struct exportinfo *to_exi;
2318 2319 fhandle_t *fh;
2319 2320
2320 2321 fromvp = nfs_fhtovp(args->la_from, exi);
2321 2322 if (fromvp == NULL) {
2322 2323 *status = NFSERR_STALE;
2323 2324 return;
2324 2325 }
2325 2326
2326 2327 fh = args->la_to.da_fhandle;
2327 2328 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2328 2329 if (to_exi == NULL) {
2329 2330 VN_RELE(fromvp);
2330 2331 *status = NFSERR_ACCES;
2331 2332 return;
2332 2333 }
2333 2334 exi_rele(to_exi);
2334 2335
2335 2336 if (to_exi != exi) {
2336 2337 VN_RELE(fromvp);
2337 2338 *status = NFSERR_XDEV;
2338 2339 return;
2339 2340 }
2340 2341
2341 2342 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2342 2343 if (tovp == NULL) {
2343 2344 VN_RELE(fromvp);
2344 2345 *status = NFSERR_STALE;
2345 2346 return;
2346 2347 }
2347 2348
2348 2349 if (tovp->v_type != VDIR) {
2349 2350 VN_RELE(tovp);
2350 2351 VN_RELE(fromvp);
2351 2352 *status = NFSERR_NOTDIR;
2352 2353 return;
2353 2354 }
2354 2355 /*
2355 2356 * Disallow NULL paths
2356 2357 */
2357 2358 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2358 2359 VN_RELE(tovp);
2359 2360 VN_RELE(fromvp);
2360 2361 *status = NFSERR_ACCES;
2361 2362 return;
2362 2363 }
2363 2364
2364 2365 if (rdonly(ro, tovp)) {
2365 2366 VN_RELE(tovp);
2366 2367 VN_RELE(fromvp);
2367 2368 *status = NFSERR_ROFS;
2368 2369 return;
2369 2370 }
2370 2371
2371 2372 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2372 2373
2373 2374 /*
2374 2375 * Force modified data and metadata out to stable storage.
2375 2376 */
2376 2377 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2377 2378 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2378 2379
2379 2380 VN_RELE(tovp);
2380 2381 VN_RELE(fromvp);
2381 2382
2382 2383 *status = puterrno(error);
2383 2384
2384 2385 }
2385 2386 void *
2386 2387 rfs_link_getfh(struct nfslinkargs *args)
2387 2388 {
2388 2389 return (args->la_from);
2389 2390 }
2390 2391
2391 2392 /*
2392 2393 * Symbolicly link to a file.
2393 2394 * Create a file (to) with the given attributes which is a symbolic link
2394 2395 * to the given path name (to).
2395 2396 */
2396 2397 void
2397 2398 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2398 2399 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2399 2400 {
2400 2401 int error;
2401 2402 struct vattr va;
2402 2403 vnode_t *vp;
2403 2404 vnode_t *svp;
2404 2405 int lerror;
2405 2406 struct sockaddr *ca;
2406 2407 char *name = NULL;
2407 2408
2408 2409 /*
2409 2410 * Disallow NULL paths
2410 2411 */
2411 2412 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2412 2413 *status = NFSERR_ACCES;
2413 2414 return;
2414 2415 }
2415 2416
2416 2417 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2417 2418 if (vp == NULL) {
2418 2419 *status = NFSERR_STALE;
2419 2420 return;
2420 2421 }
2421 2422
2422 2423 if (rdonly(ro, vp)) {
2423 2424 VN_RELE(vp);
2424 2425 *status = NFSERR_ROFS;
2425 2426 return;
2426 2427 }
2427 2428
2428 2429 error = sattr_to_vattr(args->sla_sa, &va);
2429 2430 if (error) {
2430 2431 VN_RELE(vp);
2431 2432 *status = puterrno(error);
2432 2433 return;
2433 2434 }
2434 2435
2435 2436 if (!(va.va_mask & AT_MODE)) {
2436 2437 VN_RELE(vp);
2437 2438 *status = NFSERR_INVAL;
2438 2439 return;
2439 2440 }
2440 2441
2441 2442 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2442 2443 name = nfscmd_convname(ca, exi, args->sla_tnm,
2443 2444 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2444 2445
2445 2446 if (name == NULL) {
2446 2447 *status = NFSERR_ACCES;
2447 2448 return;
2448 2449 }
2449 2450
2450 2451 va.va_type = VLNK;
2451 2452 va.va_mask |= AT_TYPE;
2452 2453
2453 2454 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2454 2455
2455 2456 /*
2456 2457 * Force new data and metadata out to stable storage.
2457 2458 */
2458 2459 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2459 2460 NULL, cr, NULL, NULL, NULL);
2460 2461
2461 2462 if (!lerror) {
2462 2463 (void) VOP_FSYNC(svp, 0, cr, NULL);
2463 2464 VN_RELE(svp);
2464 2465 }
2465 2466
2466 2467 /*
2467 2468 * Force modified data and metadata out to stable storage.
2468 2469 */
2469 2470 (void) VOP_FSYNC(vp, 0, cr, NULL);
2470 2471
2471 2472 VN_RELE(vp);
2472 2473
2473 2474 *status = puterrno(error);
2474 2475 if (name != args->sla_tnm)
2475 2476 kmem_free(name, MAXPATHLEN);
2476 2477
2477 2478 }
2478 2479 void *
2479 2480 rfs_symlink_getfh(struct nfsslargs *args)
2480 2481 {
2481 2482 return (args->sla_from.da_fhandle);
2482 2483 }
2483 2484
2484 2485 /*
2485 2486 * Make a directory.
2486 2487 * Create a directory with the given name, parent directory, and attributes.
2487 2488 * Returns a file handle and attributes for the new directory.
2488 2489 */
2489 2490 /* ARGSUSED */
2490 2491 void
2491 2492 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2492 2493 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2493 2494 {
2494 2495 int error;
2495 2496 struct vattr va;
2496 2497 vnode_t *dvp = NULL;
2497 2498 vnode_t *vp;
2498 2499 char *name = args->ca_da.da_name;
2499 2500
2500 2501 /*
2501 2502 * Disallow NULL paths
2502 2503 */
2503 2504 if (name == NULL || *name == '\0') {
2504 2505 dr->dr_status = NFSERR_ACCES;
2505 2506 return;
2506 2507 }
2507 2508
2508 2509 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2509 2510 if (vp == NULL) {
2510 2511 dr->dr_status = NFSERR_STALE;
2511 2512 return;
2512 2513 }
2513 2514
2514 2515 if (rdonly(ro, vp)) {
2515 2516 VN_RELE(vp);
2516 2517 dr->dr_status = NFSERR_ROFS;
2517 2518 return;
2518 2519 }
2519 2520
2520 2521 error = sattr_to_vattr(args->ca_sa, &va);
2521 2522 if (error) {
2522 2523 VN_RELE(vp);
2523 2524 dr->dr_status = puterrno(error);
2524 2525 return;
2525 2526 }
2526 2527
2527 2528 if (!(va.va_mask & AT_MODE)) {
2528 2529 VN_RELE(vp);
2529 2530 dr->dr_status = NFSERR_INVAL;
2530 2531 return;
2531 2532 }
2532 2533
2533 2534 va.va_type = VDIR;
2534 2535 va.va_mask |= AT_TYPE;
2535 2536
2536 2537 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2537 2538
2538 2539 if (!error) {
2539 2540 /*
2540 2541 * Attribtutes of the newly created directory should
2541 2542 * be returned to the client.
2542 2543 */
2543 2544 va.va_mask = AT_ALL; /* We want everything */
2544 2545 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2545 2546
2546 2547 /* check for overflows */
2547 2548 if (!error) {
2548 2549 acl_perm(vp, exi, &va, cr);
2549 2550 error = vattr_to_nattr(&va, &dr->dr_attr);
2550 2551 if (!error) {
2551 2552 error = makefh(&dr->dr_fhandle, dvp, exi);
2552 2553 }
2553 2554 }
2554 2555 /*
2555 2556 * Force new data and metadata out to stable storage.
2556 2557 */
2557 2558 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2558 2559 VN_RELE(dvp);
2559 2560 }
2560 2561
2561 2562 /*
2562 2563 * Force modified data and metadata out to stable storage.
2563 2564 */
2564 2565 (void) VOP_FSYNC(vp, 0, cr, NULL);
2565 2566
2566 2567 VN_RELE(vp);
2567 2568
2568 2569 dr->dr_status = puterrno(error);
2569 2570
2570 2571 }
2571 2572 void *
2572 2573 rfs_mkdir_getfh(struct nfscreatargs *args)
2573 2574 {
2574 2575 return (args->ca_da.da_fhandle);
2575 2576 }
2576 2577
2577 2578 /*
2578 2579 * Remove a directory.
2579 2580 * Remove the given directory name from the given parent directory.
2580 2581 */
2581 2582 /* ARGSUSED */
2582 2583 void
2583 2584 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2584 2585 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2585 2586 {
2586 2587 int error;
2587 2588 vnode_t *vp;
2588 2589
2589 2590 /*
2590 2591 * Disallow NULL paths
2591 2592 */
2592 2593 if (da->da_name == NULL || *da->da_name == '\0') {
2593 2594 *status = NFSERR_ACCES;
2594 2595 return;
2595 2596 }
2596 2597
2597 2598 vp = nfs_fhtovp(da->da_fhandle, exi);
2598 2599 if (vp == NULL) {
2599 2600 *status = NFSERR_STALE;
2600 2601 return;
2601 2602 }
2602 2603
2603 2604 if (rdonly(ro, vp)) {
2604 2605 VN_RELE(vp);
2605 2606 *status = NFSERR_ROFS;
2606 2607 return;
2607 2608 }
2608 2609
2609 2610 /*
2610 2611 * VOP_RMDIR takes a third argument (the current
2611 2612 * directory of the process). That's because someone
2612 2613 * wants to return EINVAL if one tries to remove ".".
2613 2614 * Of course, NFS servers have no idea what their
2614 2615 * clients' current directories are. We fake it by
2615 2616 * supplying a vnode known to exist and illegal to
2616 2617 * remove.
2617 2618 */
2618 2619 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2619 2620
2620 2621 /*
2621 2622 * Force modified data and metadata out to stable storage.
2622 2623 */
2623 2624 (void) VOP_FSYNC(vp, 0, cr, NULL);
2624 2625
2625 2626 VN_RELE(vp);
2626 2627
2627 2628 /*
2628 2629 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2629 2630 * if the directory is not empty. A System V NFS server
2630 2631 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2631 2632 * over the wire.
2632 2633 */
2633 2634 if (error == EEXIST)
2634 2635 *status = NFSERR_NOTEMPTY;
2635 2636 else
2636 2637 *status = puterrno(error);
2637 2638
2638 2639 }
2639 2640 void *
2640 2641 rfs_rmdir_getfh(struct nfsdiropargs *da)
2641 2642 {
2642 2643 return (da->da_fhandle);
2643 2644 }
2644 2645
2645 2646 /* ARGSUSED */
2646 2647 void
2647 2648 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2648 2649 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2649 2650 {
2650 2651 int error;
2651 2652 int iseof;
2652 2653 struct iovec iov;
2653 2654 struct uio uio;
2654 2655 vnode_t *vp;
2655 2656 char *ndata = NULL;
2656 2657 struct sockaddr *ca;
2657 2658 size_t nents;
2658 2659 int ret;
2659 2660
2660 2661 vp = nfs_fhtovp(&rda->rda_fh, exi);
2661 2662 if (vp == NULL) {
2662 2663 rd->rd_entries = NULL;
2663 2664 rd->rd_status = NFSERR_STALE;
2664 2665 return;
2665 2666 }
2666 2667
2667 2668 if (vp->v_type != VDIR) {
2668 2669 VN_RELE(vp);
2669 2670 rd->rd_entries = NULL;
2670 2671 rd->rd_status = NFSERR_NOTDIR;
2671 2672 return;
2672 2673 }
2673 2674
2674 2675 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2675 2676
2676 2677 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2677 2678
2678 2679 if (error) {
2679 2680 rd->rd_entries = NULL;
2680 2681 goto bad;
2681 2682 }
2682 2683
2683 2684 if (rda->rda_count == 0) {
2684 2685 rd->rd_entries = NULL;
2685 2686 rd->rd_size = 0;
2686 2687 rd->rd_eof = FALSE;
2687 2688 goto bad;
2688 2689 }
2689 2690
2690 2691 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2691 2692
2692 2693 /*
2693 2694 * Allocate data for entries. This will be freed by rfs_rddirfree.
2694 2695 */
2695 2696 rd->rd_bufsize = (uint_t)rda->rda_count;
2696 2697 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2697 2698
2698 2699 /*
2699 2700 * Set up io vector to read directory data
2700 2701 */
2701 2702 iov.iov_base = (caddr_t)rd->rd_entries;
2702 2703 iov.iov_len = rda->rda_count;
2703 2704 uio.uio_iov = &iov;
2704 2705 uio.uio_iovcnt = 1;
2705 2706 uio.uio_segflg = UIO_SYSSPACE;
2706 2707 uio.uio_extflg = UIO_COPY_CACHED;
2707 2708 uio.uio_loffset = (offset_t)rda->rda_offset;
2708 2709 uio.uio_resid = rda->rda_count;
2709 2710
2710 2711 /*
2711 2712 * read directory
2712 2713 */
2713 2714 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2714 2715
2715 2716 /*
2716 2717 * Clean up
2717 2718 */
2718 2719 if (!error) {
2719 2720 /*
2720 2721 * set size and eof
2721 2722 */
2722 2723 if (uio.uio_resid == rda->rda_count) {
2723 2724 rd->rd_size = 0;
2724 2725 rd->rd_eof = TRUE;
2725 2726 } else {
2726 2727 rd->rd_size = (uint32_t)(rda->rda_count -
2727 2728 uio.uio_resid);
2728 2729 rd->rd_eof = iseof ? TRUE : FALSE;
2729 2730 }
2730 2731 }
2731 2732
2732 2733 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2733 2734 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2734 2735 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2735 2736 rda->rda_count, &ndata);
2736 2737
2737 2738 if (ret != 0) {
2738 2739 size_t dropbytes;
2739 2740 /*
2740 2741 * We had to drop one or more entries in order to fit
2741 2742 * during the character conversion. We need to patch
2742 2743 * up the size and eof info.
2743 2744 */
2744 2745 if (rd->rd_eof)
2745 2746 rd->rd_eof = FALSE;
2746 2747 dropbytes = nfscmd_dropped_entrysize(
2747 2748 (struct dirent64 *)rd->rd_entries, nents, ret);
2748 2749 rd->rd_size -= dropbytes;
2749 2750 }
2750 2751 if (ndata == NULL) {
2751 2752 ndata = (char *)rd->rd_entries;
2752 2753 } else if (ndata != (char *)rd->rd_entries) {
2753 2754 kmem_free(rd->rd_entries, rd->rd_bufsize);
2754 2755 rd->rd_entries = (void *)ndata;
2755 2756 rd->rd_bufsize = rda->rda_count;
2756 2757 }
2757 2758
2758 2759 bad:
2759 2760 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2760 2761
2761 2762 #if 0 /* notyet */
2762 2763 /*
2763 2764 * Don't do this. It causes local disk writes when just
2764 2765 * reading the file and the overhead is deemed larger
2765 2766 * than the benefit.
2766 2767 */
2767 2768 /*
2768 2769 * Force modified metadata out to stable storage.
2769 2770 */
2770 2771 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2771 2772 #endif
2772 2773
2773 2774 VN_RELE(vp);
2774 2775
2775 2776 rd->rd_status = puterrno(error);
2776 2777
2777 2778 }
2778 2779 void *
2779 2780 rfs_readdir_getfh(struct nfsrddirargs *rda)
2780 2781 {
2781 2782 return (&rda->rda_fh);
2782 2783 }
2783 2784 void
2784 2785 rfs_rddirfree(struct nfsrddirres *rd)
2785 2786 {
2786 2787 if (rd->rd_entries != NULL)
2787 2788 kmem_free(rd->rd_entries, rd->rd_bufsize);
2788 2789 }
2789 2790
2790 2791 /* ARGSUSED */
2791 2792 void
2792 2793 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2793 2794 struct svc_req *req, cred_t *cr, bool_t ro)
2794 2795 {
2795 2796 int error;
2796 2797 struct statvfs64 sb;
2797 2798 vnode_t *vp;
2798 2799
2799 2800 vp = nfs_fhtovp(fh, exi);
2800 2801 if (vp == NULL) {
2801 2802 fs->fs_status = NFSERR_STALE;
2802 2803 return;
2803 2804 }
2804 2805
2805 2806 error = VFS_STATVFS(vp->v_vfsp, &sb);
2806 2807
2807 2808 if (!error) {
2808 2809 fs->fs_tsize = nfstsize();
2809 2810 fs->fs_bsize = sb.f_frsize;
2810 2811 fs->fs_blocks = sb.f_blocks;
2811 2812 fs->fs_bfree = sb.f_bfree;
2812 2813 fs->fs_bavail = sb.f_bavail;
2813 2814 }
2814 2815
2815 2816 VN_RELE(vp);
2816 2817
2817 2818 fs->fs_status = puterrno(error);
2818 2819
2819 2820 }
2820 2821 void *
2821 2822 rfs_statfs_getfh(fhandle_t *fh)
2822 2823 {
2823 2824 return (fh);
2824 2825 }
2825 2826
2826 2827 static int
2827 2828 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2828 2829 {
2829 2830 vap->va_mask = 0;
2830 2831
2831 2832 /*
2832 2833 * There was a sign extension bug in some VFS based systems
2833 2834 * which stored the mode as a short. When it would get
2834 2835 * assigned to a u_long, no sign extension would occur.
2835 2836 * It needed to, but this wasn't noticed because sa_mode
2836 2837 * would then get assigned back to the short, thus ignoring
2837 2838 * the upper 16 bits of sa_mode.
2838 2839 *
2839 2840 * To make this implementation work for both broken
2840 2841 * clients and good clients, we check for both versions
2841 2842 * of the mode.
2842 2843 */
2843 2844 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2844 2845 sa->sa_mode != (uint32_t)-1) {
2845 2846 vap->va_mask |= AT_MODE;
2846 2847 vap->va_mode = sa->sa_mode;
2847 2848 }
2848 2849 if (sa->sa_uid != (uint32_t)-1) {
2849 2850 vap->va_mask |= AT_UID;
2850 2851 vap->va_uid = sa->sa_uid;
2851 2852 }
2852 2853 if (sa->sa_gid != (uint32_t)-1) {
2853 2854 vap->va_mask |= AT_GID;
2854 2855 vap->va_gid = sa->sa_gid;
2855 2856 }
2856 2857 if (sa->sa_size != (uint32_t)-1) {
2857 2858 vap->va_mask |= AT_SIZE;
2858 2859 vap->va_size = sa->sa_size;
2859 2860 }
2860 2861 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2861 2862 sa->sa_atime.tv_usec != (int32_t)-1) {
2862 2863 #ifndef _LP64
2863 2864 /* return error if time overflow */
2864 2865 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2865 2866 return (EOVERFLOW);
2866 2867 #endif
2867 2868 vap->va_mask |= AT_ATIME;
2868 2869 /*
2869 2870 * nfs protocol defines times as unsigned so don't extend sign,
2870 2871 * unless sysadmin set nfs_allow_preepoch_time.
2871 2872 */
2872 2873 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2873 2874 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2874 2875 }
2875 2876 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2876 2877 sa->sa_mtime.tv_usec != (int32_t)-1) {
2877 2878 #ifndef _LP64
2878 2879 /* return error if time overflow */
2879 2880 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2880 2881 return (EOVERFLOW);
2881 2882 #endif
2882 2883 vap->va_mask |= AT_MTIME;
2883 2884 /*
2884 2885 * nfs protocol defines times as unsigned so don't extend sign,
2885 2886 * unless sysadmin set nfs_allow_preepoch_time.
2886 2887 */
2887 2888 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2888 2889 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2889 2890 }
2890 2891 return (0);
2891 2892 }
2892 2893
2893 2894 static const enum nfsftype vt_to_nf[] = {
2894 2895 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2895 2896 };
2896 2897
2897 2898 /*
2898 2899 * check the following fields for overflow: nodeid, size, and time.
2899 2900 * There could be a problem when converting 64-bit LP64 fields
2900 2901 * into 32-bit ones. Return an error if there is an overflow.
2901 2902 */
2902 2903 int
2903 2904 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2904 2905 {
2905 2906 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2906 2907 na->na_type = vt_to_nf[vap->va_type];
2907 2908
2908 2909 if (vap->va_mode == (unsigned short) -1)
2909 2910 na->na_mode = (uint32_t)-1;
2910 2911 else
2911 2912 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2912 2913
2913 2914 if (vap->va_uid == (unsigned short)(-1))
2914 2915 na->na_uid = (uint32_t)(-1);
2915 2916 else if (vap->va_uid == UID_NOBODY)
2916 2917 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2917 2918 else
2918 2919 na->na_uid = vap->va_uid;
2919 2920
2920 2921 if (vap->va_gid == (unsigned short)(-1))
2921 2922 na->na_gid = (uint32_t)-1;
2922 2923 else if (vap->va_gid == GID_NOBODY)
2923 2924 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2924 2925 else
2925 2926 na->na_gid = vap->va_gid;
2926 2927
2927 2928 /*
2928 2929 * Do we need to check fsid for overflow? It is 64-bit in the
2929 2930 * vattr, but are bigger than 32 bit values supported?
2930 2931 */
2931 2932 na->na_fsid = vap->va_fsid;
2932 2933
2933 2934 na->na_nodeid = vap->va_nodeid;
2934 2935
2935 2936 /*
2936 2937 * Check to make sure that the nodeid is representable over the
2937 2938 * wire without losing bits.
2938 2939 */
2939 2940 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2940 2941 return (EFBIG);
2941 2942 na->na_nlink = vap->va_nlink;
2942 2943
2943 2944 /*
2944 2945 * Check for big files here, instead of at the caller. See
2945 2946 * comments in cstat for large special file explanation.
2946 2947 */
2947 2948 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2948 2949 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2949 2950 return (EFBIG);
2950 2951 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2951 2952 /* UNKNOWN_SIZE | OVERFLOW */
2952 2953 na->na_size = MAXOFF32_T;
2953 2954 } else
2954 2955 na->na_size = vap->va_size;
2955 2956 } else
2956 2957 na->na_size = vap->va_size;
2957 2958
2958 2959 /*
2959 2960 * If the vnode times overflow the 32-bit times that NFS2
2960 2961 * uses on the wire then return an error.
2961 2962 */
2962 2963 if (!NFS_VAP_TIME_OK(vap)) {
2963 2964 return (EOVERFLOW);
2964 2965 }
2965 2966 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2966 2967 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2967 2968
2968 2969 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2969 2970 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2970 2971
2971 2972 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2972 2973 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2973 2974
2974 2975 /*
2975 2976 * If the dev_t will fit into 16 bits then compress
2976 2977 * it, otherwise leave it alone. See comments in
2977 2978 * nfs_client.c.
2978 2979 */
2979 2980 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2980 2981 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2981 2982 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2982 2983 else
2983 2984 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2984 2985
2985 2986 na->na_blocks = vap->va_nblocks;
2986 2987 na->na_blocksize = vap->va_blksize;
2987 2988
2988 2989 /*
2989 2990 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2990 2991 * over-the-wire protocols for named-pipe vnodes. It remaps the
2991 2992 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2992 2993 *
2993 2994 * BUYER BEWARE:
2994 2995 * If you are porting the NFS to a non-Sun server, you probably
2995 2996 * don't want to include the following block of code. The
2996 2997 * over-the-wire special file types will be changing with the
2997 2998 * NFS Protocol Revision.
2998 2999 */
2999 3000 if (vap->va_type == VFIFO)
3000 3001 NA_SETFIFO(na);
3001 3002 return (0);
3002 3003 }
3003 3004
3004 3005 /*
3005 3006 * acl v2 support: returns approximate permission.
3006 3007 * default: returns minimal permission (more restrictive)
3007 3008 * aclok: returns maximal permission (less restrictive)
3008 3009 * This routine changes the permissions that are alaredy in *va.
3009 3010 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
3010 3011 * CLASS_OBJ is always the same as GROUP_OBJ entry.
3011 3012 */
3012 3013 static void
3013 3014 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
3014 3015 {
3015 3016 vsecattr_t vsa;
3016 3017 int aclcnt;
3017 3018 aclent_t *aclentp;
3018 3019 mode_t mask_perm;
3019 3020 mode_t grp_perm;
3020 3021 mode_t other_perm;
3021 3022 mode_t other_orig;
3022 3023 int error;
3023 3024
3024 3025 /* dont care default acl */
3025 3026 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
3026 3027 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
3027 3028
3028 3029 if (!error) {
3029 3030 aclcnt = vsa.vsa_aclcnt;
3030 3031 if (aclcnt > MIN_ACL_ENTRIES) {
3031 3032 /* non-trivial ACL */
3032 3033 aclentp = vsa.vsa_aclentp;
3033 3034 if (exi->exi_export.ex_flags & EX_ACLOK) {
3034 3035 /* maximal permissions */
3035 3036 grp_perm = 0;
3036 3037 other_perm = 0;
3037 3038 for (; aclcnt > 0; aclcnt--, aclentp++) {
3038 3039 switch (aclentp->a_type) {
3039 3040 case USER_OBJ:
3040 3041 break;
3041 3042 case USER:
3042 3043 grp_perm |=
3043 3044 aclentp->a_perm << 3;
3044 3045 other_perm |= aclentp->a_perm;
3045 3046 break;
3046 3047 case GROUP_OBJ:
3047 3048 grp_perm |=
3048 3049 aclentp->a_perm << 3;
3049 3050 break;
3050 3051 case GROUP:
3051 3052 other_perm |= aclentp->a_perm;
3052 3053 break;
3053 3054 case OTHER_OBJ:
3054 3055 other_orig = aclentp->a_perm;
3055 3056 break;
3056 3057 case CLASS_OBJ:
3057 3058 mask_perm = aclentp->a_perm;
3058 3059 break;
3059 3060 default:
3060 3061 break;
3061 3062 }
3062 3063 }
3063 3064 grp_perm &= mask_perm << 3;
3064 3065 other_perm &= mask_perm;
3065 3066 other_perm |= other_orig;
3066 3067
3067 3068 } else {
3068 3069 /* minimal permissions */
3069 3070 grp_perm = 070;
3070 3071 other_perm = 07;
3071 3072 for (; aclcnt > 0; aclcnt--, aclentp++) {
3072 3073 switch (aclentp->a_type) {
3073 3074 case USER_OBJ:
3074 3075 break;
3075 3076 case USER:
3076 3077 case CLASS_OBJ:
3077 3078 grp_perm &=
3078 3079 aclentp->a_perm << 3;
3079 3080 other_perm &=
3080 3081 aclentp->a_perm;
3081 3082 break;
3082 3083 case GROUP_OBJ:
3083 3084 grp_perm &=
3084 3085 aclentp->a_perm << 3;
3085 3086 break;
3086 3087 case GROUP:
3087 3088 other_perm &=
3088 3089 aclentp->a_perm;
3089 3090 break;
3090 3091 case OTHER_OBJ:
3091 3092 other_perm &=
3092 3093 aclentp->a_perm;
3093 3094 break;
3094 3095 default:
3095 3096 break;
3096 3097 }
3097 3098 }
3098 3099 }
3099 3100 /* copy to va */
3100 3101 va->va_mode &= ~077;
3101 3102 va->va_mode |= grp_perm | other_perm;
3102 3103 }
3103 3104 if (vsa.vsa_aclcnt)
3104 3105 kmem_free(vsa.vsa_aclentp,
3105 3106 vsa.vsa_aclcnt * sizeof (aclent_t));
3106 3107 }
3107 3108 }
3108 3109
3109 3110 void
3110 3111 rfs_srvrinit(void)
3111 3112 {
3112 3113 nfs2_srv_caller_id = fs_new_caller_id();
3113 3114 }
3114 3115
3115 3116 void
3116 3117 rfs_srvrfini(void)
3117 3118 {
3118 3119 }
3119 3120
3120 3121 /* ARGSUSED */
3121 3122 void
3122 3123 rfs_srv_zone_init(nfs_globals_t *ng)
3123 3124 {
3124 3125 nfs_srv_t *ns;
3125 3126
3126 3127 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3127 3128
3128 3129 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3129 3130 ns->write_async = 1;
3130 3131
3131 3132 ng->nfs_srv = ns;
3132 3133 }
3133 3134
3134 3135 /* ARGSUSED */
3135 3136 void
3136 3137 rfs_srv_zone_fini(nfs_globals_t *ng)
3137 3138 {
3138 3139 nfs_srv_t *ns = ng->nfs_srv;
3139 3140
3140 3141 ng->nfs_srv = NULL;
3141 3142
3142 3143 mutex_destroy(&ns->async_write_lock);
3143 3144 kmem_free(ns, sizeof (*ns));
3144 3145 }
3145 3146
3146 3147 static int
3147 3148 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3148 3149 {
3149 3150 struct clist *wcl;
3150 3151 int wlist_len;
3151 3152 uint32_t count = rr->rr_count;
3152 3153
3153 3154 wcl = ra->ra_wlist;
3154 3155
3155 3156 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3156 3157 return (FALSE);
3157 3158 }
3158 3159
3159 3160 wcl = ra->ra_wlist;
3160 3161 rr->rr_ok.rrok_wlist_len = wlist_len;
3161 3162 rr->rr_ok.rrok_wlist = wcl;
3162 3163
3163 3164 return (TRUE);
3164 3165 }
|
↓ open down ↓ |
2723 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX