Print this page
Revert exi_zone to exi_zoneid, and install exi_ne backpointer
Caution with use after exi_rele()
Dan mods to NFS desgin problems re. multiple zone keys
curzone reality check and teardown changes to use the RIGHT zone
Try to remove assumption that zone's root vnode is marked VROOT
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 30 * All rights reserved.
31 31 */
32 32
33 33 /*
34 34 * Copyright 2018 Nexenta Systems, Inc.
35 35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 36 */
37 37
38 38 #include <sys/param.h>
39 39 #include <sys/types.h>
40 40 #include <sys/systm.h>
41 41 #include <sys/cred.h>
42 42 #include <sys/buf.h>
43 43 #include <sys/vfs.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/stat.h>
47 47 #include <sys/errno.h>
48 48 #include <sys/sysmacros.h>
49 49 #include <sys/statvfs.h>
50 50 #include <sys/kmem.h>
51 51 #include <sys/kstat.h>
52 52 #include <sys/dirent.h>
53 53 #include <sys/cmn_err.h>
54 54 #include <sys/debug.h>
55 55 #include <sys/vtrace.h>
56 56 #include <sys/mode.h>
57 57 #include <sys/acl.h>
58 58 #include <sys/nbmlock.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/sdt.h>
61 61
62 62 #include <rpc/types.h>
63 63 #include <rpc/auth.h>
64 64 #include <rpc/svc.h>
65 65
66 66 #include <nfs/nfs.h>
67 67 #include <nfs/export.h>
68 68 #include <nfs/nfs_cmd.h>
69 69
70 70 #include <vm/hat.h>
71 71 #include <vm/as.h>
72 72 #include <vm/seg.h>
73 73 #include <vm/seg_map.h>
74 74 #include <vm/seg_kmem.h>
75 75
76 76 #include <sys/strsubr.h>
77 77
78 78 struct rfs_async_write_list;
79 79
80 80 /*
81 81 * Zone globals of NFSv2 server
82 82 */
83 83 typedef struct nfs_srv {
84 84 kmutex_t async_write_lock;
85 85 struct rfs_async_write_list *async_write_head;
86 86
87 87 /*
88 88 * enables write clustering if == 1
89 89 */
90 90 int write_async;
91 91 } nfs_srv_t;
92 92
93 93 /*
94 94 * These are the interface routines for the server side of the
95 95 * Network File System. See the NFS version 2 protocol specification
96 96 * for a description of this interface.
97 97 */
98 98
99 99 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
100 100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
101 101 cred_t *);
102 102
103 103
104 104 /*
105 105 * Some "over the wire" UNIX file types. These are encoded
106 106 * into the mode. This needs to be fixed in the next rev.
107 107 */
108 108 #define IFMT 0170000 /* type of file */
109 109 #define IFCHR 0020000 /* character special */
110 110 #define IFBLK 0060000 /* block special */
111 111 #define IFSOCK 0140000 /* socket */
112 112
113 113 u_longlong_t nfs2_srv_caller_id;
114 114
115 115 static nfs_srv_t *
116 116 nfs_get_srv(void)
117 117 {
118 118 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
119 119 nfs_srv_t *srv = ng->nfs_srv;
120 120 ASSERT(srv != NULL);
121 121 return (srv);
122 122 }
123 123
124 124 /*
125 125 * Get file attributes.
126 126 * Returns the current attributes of the file with the given fhandle.
127 127 */
128 128 /* ARGSUSED */
129 129 void
130 130 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
131 131 struct svc_req *req, cred_t *cr, bool_t ro)
132 132 {
133 133 int error;
134 134 vnode_t *vp;
135 135 struct vattr va;
136 136
137 137 vp = nfs_fhtovp(fhp, exi);
138 138 if (vp == NULL) {
139 139 ns->ns_status = NFSERR_STALE;
140 140 return;
141 141 }
142 142
143 143 /*
144 144 * Do the getattr.
145 145 */
146 146 va.va_mask = AT_ALL; /* we want all the attributes */
147 147
148 148 error = rfs4_delegated_getattr(vp, &va, 0, cr);
149 149
150 150 /* check for overflows */
151 151 if (!error) {
152 152 /* Lie about the object type for a referral */
153 153 if (vn_is_nfs_reparse(vp, cr))
154 154 va.va_type = VLNK;
155 155
156 156 acl_perm(vp, exi, &va, cr);
157 157 error = vattr_to_nattr(&va, &ns->ns_attr);
158 158 }
159 159
160 160 VN_RELE(vp);
161 161
162 162 ns->ns_status = puterrno(error);
163 163 }
164 164 void *
165 165 rfs_getattr_getfh(fhandle_t *fhp)
166 166 {
167 167 return (fhp);
168 168 }
169 169
170 170 /*
171 171 * Set file attributes.
172 172 * Sets the attributes of the file with the given fhandle. Returns
173 173 * the new attributes.
174 174 */
175 175 /* ARGSUSED */
176 176 void
177 177 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
178 178 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
179 179 {
180 180 int error;
181 181 int flag;
182 182 int in_crit = 0;
183 183 vnode_t *vp;
184 184 struct vattr va;
185 185 struct vattr bva;
186 186 struct flock64 bf;
187 187 caller_context_t ct;
188 188
189 189
190 190 vp = nfs_fhtovp(&args->saa_fh, exi);
191 191 if (vp == NULL) {
192 192 ns->ns_status = NFSERR_STALE;
193 193 return;
194 194 }
195 195
196 196 if (rdonly(ro, vp)) {
197 197 VN_RELE(vp);
198 198 ns->ns_status = NFSERR_ROFS;
199 199 return;
200 200 }
201 201
202 202 error = sattr_to_vattr(&args->saa_sa, &va);
203 203 if (error) {
204 204 VN_RELE(vp);
205 205 ns->ns_status = puterrno(error);
206 206 return;
207 207 }
208 208
209 209 /*
210 210 * If the client is requesting a change to the mtime,
211 211 * but the nanosecond field is set to 1 billion, then
212 212 * this is a flag to the server that it should set the
213 213 * atime and mtime fields to the server's current time.
214 214 * The 1 billion number actually came from the client
215 215 * as 1 million, but the units in the over the wire
216 216 * request are microseconds instead of nanoseconds.
217 217 *
218 218 * This is an overload of the protocol and should be
219 219 * documented in the NFS Version 2 protocol specification.
220 220 */
221 221 if (va.va_mask & AT_MTIME) {
222 222 if (va.va_mtime.tv_nsec == 1000000000) {
223 223 gethrestime(&va.va_mtime);
224 224 va.va_atime = va.va_mtime;
225 225 va.va_mask |= AT_ATIME;
226 226 flag = 0;
227 227 } else
228 228 flag = ATTR_UTIME;
229 229 } else
230 230 flag = 0;
231 231
232 232 /*
233 233 * If the filesystem is exported with nosuid, then mask off
234 234 * the setuid and setgid bits.
235 235 */
236 236 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
237 237 (exi->exi_export.ex_flags & EX_NOSUID))
238 238 va.va_mode &= ~(VSUID | VSGID);
239 239
240 240 ct.cc_sysid = 0;
241 241 ct.cc_pid = 0;
242 242 ct.cc_caller_id = nfs2_srv_caller_id;
243 243 ct.cc_flags = CC_DONTBLOCK;
244 244
245 245 /*
246 246 * We need to specially handle size changes because it is
247 247 * possible for the client to create a file with modes
248 248 * which indicate read-only, but with the file opened for
249 249 * writing. If the client then tries to set the size of
250 250 * the file, then the normal access checking done in
251 251 * VOP_SETATTR would prevent the client from doing so,
252 252 * although it should be legal for it to do so. To get
253 253 * around this, we do the access checking for ourselves
254 254 * and then use VOP_SPACE which doesn't do the access
255 255 * checking which VOP_SETATTR does. VOP_SPACE can only
256 256 * operate on VREG files, let VOP_SETATTR handle the other
257 257 * extremely rare cases.
258 258 * Also the client should not be allowed to change the
259 259 * size of the file if there is a conflicting non-blocking
260 260 * mandatory lock in the region of change.
261 261 */
262 262 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
263 263 if (nbl_need_check(vp)) {
264 264 nbl_start_crit(vp, RW_READER);
265 265 in_crit = 1;
266 266 }
267 267
268 268 bva.va_mask = AT_UID | AT_SIZE;
269 269
270 270 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
271 271
272 272 if (error) {
273 273 if (in_crit)
274 274 nbl_end_crit(vp);
275 275 VN_RELE(vp);
276 276 ns->ns_status = puterrno(error);
277 277 return;
278 278 }
279 279
280 280 if (in_crit) {
281 281 u_offset_t offset;
282 282 ssize_t length;
283 283
284 284 if (va.va_size < bva.va_size) {
285 285 offset = va.va_size;
286 286 length = bva.va_size - va.va_size;
287 287 } else {
288 288 offset = bva.va_size;
289 289 length = va.va_size - bva.va_size;
290 290 }
291 291 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
292 292 NULL)) {
293 293 error = EACCES;
294 294 }
295 295 }
296 296
297 297 if (crgetuid(cr) == bva.va_uid && !error &&
298 298 va.va_size != bva.va_size) {
299 299 va.va_mask &= ~AT_SIZE;
300 300 bf.l_type = F_WRLCK;
301 301 bf.l_whence = 0;
302 302 bf.l_start = (off64_t)va.va_size;
303 303 bf.l_len = 0;
304 304 bf.l_sysid = 0;
305 305 bf.l_pid = 0;
306 306
307 307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
308 308 (offset_t)va.va_size, cr, &ct);
309 309 }
310 310 if (in_crit)
311 311 nbl_end_crit(vp);
312 312 } else
313 313 error = 0;
314 314
315 315 /*
316 316 * Do the setattr.
317 317 */
318 318 if (!error && va.va_mask) {
319 319 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
320 320 }
321 321
322 322 /*
323 323 * check if the monitor on either vop_space or vop_setattr detected
324 324 * a delegation conflict and if so, mark the thread flag as
325 325 * wouldblock so that the response is dropped and the client will
326 326 * try again.
327 327 */
328 328 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
329 329 VN_RELE(vp);
330 330 curthread->t_flag |= T_WOULDBLOCK;
331 331 return;
332 332 }
333 333
334 334 if (!error) {
335 335 va.va_mask = AT_ALL; /* get everything */
336 336
337 337 error = rfs4_delegated_getattr(vp, &va, 0, cr);
338 338
339 339 /* check for overflows */
340 340 if (!error) {
341 341 acl_perm(vp, exi, &va, cr);
342 342 error = vattr_to_nattr(&va, &ns->ns_attr);
343 343 }
344 344 }
345 345
346 346 ct.cc_flags = 0;
347 347
348 348 /*
349 349 * Force modified metadata out to stable storage.
350 350 */
351 351 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
352 352
353 353 VN_RELE(vp);
354 354
355 355 ns->ns_status = puterrno(error);
356 356 }
357 357 void *
358 358 rfs_setattr_getfh(struct nfssaargs *args)
359 359 {
360 360 return (&args->saa_fh);
361 361 }
362 362
363 363 /* Change and release @exip and @vpp only in success */
364 364 int
365 365 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
366 366 {
367 367 struct exportinfo *exi;
368 368 vnode_t *vp = *vpp;
369 369 fid_t fid;
370 370 int error;
371 371
372 372 VN_HOLD(vp);
373 373
374 374 if ((error = traverse(&vp)) != 0) {
375 375 VN_RELE(vp);
376 376 return (error);
377 377 }
378 378
379 379 bzero(&fid, sizeof (fid));
380 380 fid.fid_len = MAXFIDSZ;
381 381 error = VOP_FID(vp, &fid, NULL);
382 382 if (error) {
383 383 VN_RELE(vp);
384 384 return (error);
385 385 }
386 386
387 387 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
388 388 if (exi == NULL ||
389 389 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
390 390 /*
391 391 * It is not error, just subdir is not exported
392 392 * or "nohide" is not set
393 393 */
394 394 if (exi != NULL)
395 395 exi_rele(exi);
396 396 VN_RELE(vp);
397 397 } else {
398 398 /* go to submount */
399 399 exi_rele(*exip);
400 400 *exip = exi;
401 401
402 402 VN_RELE(*vpp);
403 403 *vpp = vp;
404 404 }
405 405
406 406 return (0);
407 407 }
408 408
409 409 /*
|
↓ open down ↓ |
409 lines elided |
↑ open up ↑ |
410 410 * Given mounted "dvp" and "exi", go upper mountpoint
411 411 * with dvp/exi correction
412 412 * Return 0 in success
413 413 */
414 414 int
415 415 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
416 416 {
417 417 struct exportinfo *exi;
418 418 vnode_t *dvp = *dvpp;
419 419
420 - ASSERT(dvp->v_flag & VROOT);
420 + ASSERT3U((*exip)->exi_zoneid, ==, curzone->zone_id);
421 + ASSERT((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp));
421 422
422 423 VN_HOLD(dvp);
423 424 dvp = untraverse(dvp);
424 425 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
425 426 if (exi == NULL) {
426 427 VN_RELE(dvp);
427 428 return (-1);
428 429 }
429 430
431 + ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
430 432 exi_rele(*exip);
431 433 *exip = exi;
432 434 VN_RELE(*dvpp);
433 435 *dvpp = dvp;
434 436
435 437 return (0);
436 438 }
437 439 /*
438 440 * Directory lookup.
439 441 * Returns an fhandle and file attributes for file name in a directory.
440 442 */
441 443 /* ARGSUSED */
442 444 void
443 445 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
444 446 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
445 447 {
446 448 int error;
447 449 vnode_t *dvp;
448 450 vnode_t *vp;
449 451 struct vattr va;
450 452 fhandle_t *fhp = da->da_fhandle;
451 453 struct sec_ol sec = {0, 0};
452 454 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
453 455 char *name;
454 456 struct sockaddr *ca;
455 457
456 458 /*
457 459 * Trusted Extension doesn't support NFSv2. MOUNT
458 460 * will reject v2 clients. Need to prevent v2 client
459 461 * access via WebNFS here.
460 462 */
461 463 if (is_system_labeled() && req->rq_vers == 2) {
462 464 dr->dr_status = NFSERR_ACCES;
463 465 return;
464 466 }
465 467
466 468 /*
467 469 * Disallow NULL paths
468 470 */
469 471 if (da->da_name == NULL || *da->da_name == '\0') {
470 472 dr->dr_status = NFSERR_ACCES;
471 473 return;
472 474 }
473 475
474 476 /*
475 477 * Allow lookups from the root - the default
476 478 * location of the public filehandle.
477 479 */
478 480 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
479 481 dvp = ZONE_ROOTVP();
|
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
480 482 VN_HOLD(dvp);
481 483 } else {
482 484 dvp = nfs_fhtovp(fhp, exi);
483 485 if (dvp == NULL) {
484 486 dr->dr_status = NFSERR_STALE;
485 487 return;
486 488 }
487 489 }
488 490
489 491 exi_hold(exi);
492 + ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
490 493
491 494 /*
492 495 * Not allow lookup beyond root.
493 496 * If the filehandle matches a filehandle of the exi,
494 497 * then the ".." refers beyond the root of an exported filesystem.
495 498 */
496 499 if (strcmp(da->da_name, "..") == 0 &&
497 500 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
498 501 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
499 - (dvp->v_flag & VROOT)) {
502 + ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
500 503 /*
501 504 * special case for ".." and 'nohide'exported root
502 505 */
503 506 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
504 507 error = NFSERR_ACCES;
505 508 goto out;
506 509 }
507 510 } else {
508 511 error = NFSERR_NOENT;
509 512 goto out;
510 513 }
511 514 }
512 515
513 516 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
514 517 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
515 518 MAXPATHLEN);
516 519
517 520 if (name == NULL) {
518 521 error = NFSERR_ACCES;
519 522 goto out;
520 523 }
521 524
522 525 /*
523 526 * If the public filehandle is used then allow
524 527 * a multi-component lookup, i.e. evaluate
|
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
525 528 * a pathname and follow symbolic links if
526 529 * necessary.
527 530 *
528 531 * This may result in a vnode in another filesystem
529 532 * which is OK as long as the filesystem is exported.
530 533 */
531 534 if (PUBLIC_FH2(fhp)) {
532 535 publicfh_flag = TRUE;
533 536
534 537 exi_rele(exi);
538 + exi = NULL;
535 539
536 540 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
537 541 &sec);
538 542 } else {
539 543 /*
540 544 * Do a normal single component lookup.
541 545 */
542 546 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
543 547 NULL, NULL, NULL);
544 548 }
545 549
546 550 if (name != da->da_name)
547 551 kmem_free(name, MAXPATHLEN);
548 552
549 553 if (error == 0 && vn_ismntpt(vp)) {
550 554 error = rfs_cross_mnt(&vp, &exi);
551 555 if (error)
552 556 VN_RELE(vp);
553 557 }
554 558
555 559 if (!error) {
556 560 va.va_mask = AT_ALL; /* we want everything */
557 561
558 562 error = rfs4_delegated_getattr(vp, &va, 0, cr);
559 563
560 564 /* check for overflows */
561 565 if (!error) {
562 566 acl_perm(vp, exi, &va, cr);
563 567 error = vattr_to_nattr(&va, &dr->dr_attr);
564 568 if (!error) {
565 569 if (sec.sec_flags & SEC_QUERY)
566 570 error = makefh_ol(&dr->dr_fhandle, exi,
567 571 sec.sec_index);
568 572 else {
569 573 error = makefh(&dr->dr_fhandle, vp,
570 574 exi);
571 575 if (!error && publicfh_flag &&
572 576 !chk_clnt_sec(exi, req))
573 577 auth_weak = TRUE;
574 578 }
575 579 }
576 580 }
577 581 VN_RELE(vp);
578 582 }
579 583
580 584 out:
581 585 VN_RELE(dvp);
582 586
583 587 if (exi != NULL)
584 588 exi_rele(exi);
585 589
586 590 /*
587 591 * If it's public fh, no 0x81, and client's flavor is
588 592 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
589 593 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
590 594 */
591 595 if (auth_weak)
592 596 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
593 597 else
594 598 dr->dr_status = puterrno(error);
595 599 }
596 600 void *
597 601 rfs_lookup_getfh(struct nfsdiropargs *da)
598 602 {
599 603 return (da->da_fhandle);
600 604 }
601 605
602 606 /*
603 607 * Read symbolic link.
604 608 * Returns the string in the symbolic link at the given fhandle.
605 609 */
606 610 /* ARGSUSED */
607 611 void
608 612 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
609 613 struct svc_req *req, cred_t *cr, bool_t ro)
610 614 {
611 615 int error;
612 616 struct iovec iov;
613 617 struct uio uio;
614 618 vnode_t *vp;
615 619 struct vattr va;
616 620 struct sockaddr *ca;
617 621 char *name = NULL;
618 622 int is_referral = 0;
619 623
620 624 vp = nfs_fhtovp(fhp, exi);
621 625 if (vp == NULL) {
622 626 rl->rl_data = NULL;
623 627 rl->rl_status = NFSERR_STALE;
624 628 return;
625 629 }
626 630
627 631 va.va_mask = AT_MODE;
628 632
629 633 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
630 634
631 635 if (error) {
632 636 VN_RELE(vp);
633 637 rl->rl_data = NULL;
634 638 rl->rl_status = puterrno(error);
635 639 return;
636 640 }
637 641
638 642 if (MANDLOCK(vp, va.va_mode)) {
639 643 VN_RELE(vp);
640 644 rl->rl_data = NULL;
641 645 rl->rl_status = NFSERR_ACCES;
642 646 return;
643 647 }
644 648
645 649 /* We lied about the object type for a referral */
646 650 if (vn_is_nfs_reparse(vp, cr))
647 651 is_referral = 1;
648 652
649 653 /*
650 654 * XNFS and RFC1094 require us to return ENXIO if argument
651 655 * is not a link. BUGID 1138002.
652 656 */
653 657 if (vp->v_type != VLNK && !is_referral) {
654 658 VN_RELE(vp);
655 659 rl->rl_data = NULL;
656 660 rl->rl_status = NFSERR_NXIO;
657 661 return;
658 662 }
659 663
660 664 /*
661 665 * Allocate data for pathname. This will be freed by rfs_rlfree.
662 666 */
663 667 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
664 668
665 669 if (is_referral) {
666 670 char *s;
667 671 size_t strsz;
668 672
669 673 /* Get an artificial symlink based on a referral */
670 674 s = build_symlink(vp, cr, &strsz);
671 675 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
672 676 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
673 677 vnode_t *, vp, char *, s);
674 678 if (s == NULL)
675 679 error = EINVAL;
676 680 else {
677 681 error = 0;
678 682 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
679 683 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
680 684 kmem_free(s, strsz);
681 685 }
682 686
683 687 } else {
684 688
685 689 /*
686 690 * Set up io vector to read sym link data
687 691 */
688 692 iov.iov_base = rl->rl_data;
689 693 iov.iov_len = NFS_MAXPATHLEN;
690 694 uio.uio_iov = &iov;
691 695 uio.uio_iovcnt = 1;
692 696 uio.uio_segflg = UIO_SYSSPACE;
693 697 uio.uio_extflg = UIO_COPY_CACHED;
694 698 uio.uio_loffset = (offset_t)0;
695 699 uio.uio_resid = NFS_MAXPATHLEN;
696 700
697 701 /*
698 702 * Do the readlink.
699 703 */
700 704 error = VOP_READLINK(vp, &uio, cr, NULL);
701 705
702 706 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
703 707
704 708 if (!error)
705 709 rl->rl_data[rl->rl_count] = '\0';
706 710
707 711 }
708 712
709 713
710 714 VN_RELE(vp);
711 715
712 716 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
713 717 name = nfscmd_convname(ca, exi, rl->rl_data,
714 718 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
715 719
716 720 if (name != NULL && name != rl->rl_data) {
717 721 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
718 722 rl->rl_data = name;
719 723 }
720 724
721 725 /*
722 726 * XNFS and RFC1094 require us to return ENXIO if argument
723 727 * is not a link. UFS returns EINVAL if this is the case,
724 728 * so we do the mapping here. BUGID 1138002.
725 729 */
726 730 if (error == EINVAL)
727 731 rl->rl_status = NFSERR_NXIO;
728 732 else
729 733 rl->rl_status = puterrno(error);
730 734
731 735 }
732 736 void *
733 737 rfs_readlink_getfh(fhandle_t *fhp)
734 738 {
735 739 return (fhp);
736 740 }
737 741 /*
738 742 * Free data allocated by rfs_readlink
739 743 */
740 744 void
741 745 rfs_rlfree(struct nfsrdlnres *rl)
742 746 {
743 747 if (rl->rl_data != NULL)
744 748 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
745 749 }
746 750
747 751 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
748 752
749 753 /*
750 754 * Read data.
751 755 * Returns some data read from the file at the given fhandle.
752 756 */
753 757 /* ARGSUSED */
754 758 void
755 759 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
756 760 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
757 761 {
758 762 vnode_t *vp;
759 763 int error;
760 764 struct vattr va;
761 765 struct iovec iov;
762 766 struct uio uio;
763 767 mblk_t *mp;
764 768 int alloc_err = 0;
765 769 int in_crit = 0;
766 770 caller_context_t ct;
767 771
768 772 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
769 773 if (vp == NULL) {
770 774 rr->rr_data = NULL;
771 775 rr->rr_status = NFSERR_STALE;
772 776 return;
773 777 }
774 778
775 779 if (vp->v_type != VREG) {
776 780 VN_RELE(vp);
777 781 rr->rr_data = NULL;
778 782 rr->rr_status = NFSERR_ISDIR;
779 783 return;
780 784 }
781 785
782 786 ct.cc_sysid = 0;
783 787 ct.cc_pid = 0;
784 788 ct.cc_caller_id = nfs2_srv_caller_id;
785 789 ct.cc_flags = CC_DONTBLOCK;
786 790
787 791 /*
788 792 * Enter the critical region before calling VOP_RWLOCK
789 793 * to avoid a deadlock with write requests.
790 794 */
791 795 if (nbl_need_check(vp)) {
792 796 nbl_start_crit(vp, RW_READER);
793 797 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
794 798 0, NULL)) {
795 799 nbl_end_crit(vp);
796 800 VN_RELE(vp);
797 801 rr->rr_data = NULL;
798 802 rr->rr_status = NFSERR_ACCES;
799 803 return;
800 804 }
801 805 in_crit = 1;
802 806 }
803 807
804 808 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
805 809
806 810 /* check if a monitor detected a delegation conflict */
807 811 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
808 812 if (in_crit)
809 813 nbl_end_crit(vp);
810 814 VN_RELE(vp);
811 815 /* mark as wouldblock so response is dropped */
812 816 curthread->t_flag |= T_WOULDBLOCK;
813 817
814 818 rr->rr_data = NULL;
815 819 return;
816 820 }
817 821
818 822 va.va_mask = AT_ALL;
819 823
820 824 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
821 825
822 826 if (error) {
823 827 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
824 828 if (in_crit)
825 829 nbl_end_crit(vp);
826 830
827 831 VN_RELE(vp);
828 832 rr->rr_data = NULL;
829 833 rr->rr_status = puterrno(error);
830 834
831 835 return;
832 836 }
833 837
834 838 /*
835 839 * This is a kludge to allow reading of files created
836 840 * with no read permission. The owner of the file
837 841 * is always allowed to read it.
838 842 */
839 843 if (crgetuid(cr) != va.va_uid) {
840 844 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
841 845
842 846 if (error) {
843 847 /*
844 848 * Exec is the same as read over the net because
845 849 * of demand loading.
846 850 */
847 851 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
848 852 }
849 853 if (error) {
850 854 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
851 855 if (in_crit)
852 856 nbl_end_crit(vp);
853 857 VN_RELE(vp);
854 858 rr->rr_data = NULL;
855 859 rr->rr_status = puterrno(error);
856 860
857 861 return;
858 862 }
859 863 }
860 864
861 865 if (MANDLOCK(vp, va.va_mode)) {
862 866 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
863 867 if (in_crit)
864 868 nbl_end_crit(vp);
865 869
866 870 VN_RELE(vp);
867 871 rr->rr_data = NULL;
868 872 rr->rr_status = NFSERR_ACCES;
869 873
870 874 return;
871 875 }
872 876
873 877 rr->rr_ok.rrok_wlist_len = 0;
874 878 rr->rr_ok.rrok_wlist = NULL;
875 879
876 880 if ((u_offset_t)ra->ra_offset >= va.va_size) {
877 881 rr->rr_count = 0;
878 882 rr->rr_data = NULL;
879 883 /*
880 884 * In this case, status is NFS_OK, but there is no data
881 885 * to encode. So set rr_mp to NULL.
882 886 */
883 887 rr->rr_mp = NULL;
884 888 rr->rr_ok.rrok_wlist = ra->ra_wlist;
885 889 if (rr->rr_ok.rrok_wlist)
886 890 clist_zero_len(rr->rr_ok.rrok_wlist);
887 891 goto done;
888 892 }
889 893
890 894 if (ra->ra_wlist) {
891 895 mp = NULL;
892 896 rr->rr_mp = NULL;
893 897 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
894 898 if (ra->ra_count > iov.iov_len) {
895 899 rr->rr_data = NULL;
896 900 rr->rr_status = NFSERR_INVAL;
897 901 goto done;
898 902 }
899 903 } else {
900 904 /*
901 905 * mp will contain the data to be sent out in the read reply.
902 906 * This will be freed after the reply has been sent out (by the
903 907 * driver).
904 908 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
905 909 * that the call to xdrmblk_putmblk() never fails.
906 910 */
907 911 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
908 912 &alloc_err);
909 913 ASSERT(mp != NULL);
910 914 ASSERT(alloc_err == 0);
911 915
912 916 rr->rr_mp = mp;
913 917
914 918 /*
915 919 * Set up io vector
916 920 */
917 921 iov.iov_base = (caddr_t)mp->b_datap->db_base;
918 922 iov.iov_len = ra->ra_count;
919 923 }
920 924
921 925 uio.uio_iov = &iov;
922 926 uio.uio_iovcnt = 1;
923 927 uio.uio_segflg = UIO_SYSSPACE;
924 928 uio.uio_extflg = UIO_COPY_CACHED;
925 929 uio.uio_loffset = (offset_t)ra->ra_offset;
926 930 uio.uio_resid = ra->ra_count;
927 931
928 932 error = VOP_READ(vp, &uio, 0, cr, &ct);
929 933
930 934 if (error) {
931 935 if (mp)
932 936 freeb(mp);
933 937
934 938 /*
935 939 * check if a monitor detected a delegation conflict and
936 940 * mark as wouldblock so response is dropped
937 941 */
938 942 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
939 943 curthread->t_flag |= T_WOULDBLOCK;
940 944 else
941 945 rr->rr_status = puterrno(error);
942 946
943 947 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
944 948 if (in_crit)
945 949 nbl_end_crit(vp);
946 950
947 951 VN_RELE(vp);
948 952 rr->rr_data = NULL;
949 953
950 954 return;
951 955 }
952 956
953 957 /*
954 958 * Get attributes again so we can send the latest access
955 959 * time to the client side for its cache.
956 960 */
957 961 va.va_mask = AT_ALL;
958 962
959 963 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
960 964
961 965 if (error) {
962 966 if (mp)
963 967 freeb(mp);
964 968
965 969 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
966 970 if (in_crit)
967 971 nbl_end_crit(vp);
968 972
969 973 VN_RELE(vp);
970 974 rr->rr_data = NULL;
971 975 rr->rr_status = puterrno(error);
972 976
973 977 return;
974 978 }
975 979
976 980 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
977 981
978 982 if (mp) {
979 983 rr->rr_data = (char *)mp->b_datap->db_base;
980 984 } else {
981 985 if (ra->ra_wlist) {
982 986 rr->rr_data = (caddr_t)iov.iov_base;
983 987 if (!rdma_setup_read_data2(ra, rr)) {
984 988 rr->rr_data = NULL;
985 989 rr->rr_status = puterrno(NFSERR_INVAL);
986 990 }
987 991 }
988 992 }
989 993 done:
990 994 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
991 995 if (in_crit)
992 996 nbl_end_crit(vp);
993 997
994 998 acl_perm(vp, exi, &va, cr);
995 999
996 1000 /* check for overflows */
997 1001 error = vattr_to_nattr(&va, &rr->rr_attr);
998 1002
999 1003 VN_RELE(vp);
1000 1004
1001 1005 rr->rr_status = puterrno(error);
1002 1006 }
1003 1007
1004 1008 /*
1005 1009 * Free data allocated by rfs_read
1006 1010 */
1007 1011 void
1008 1012 rfs_rdfree(struct nfsrdresult *rr)
1009 1013 {
1010 1014 mblk_t *mp;
1011 1015
1012 1016 if (rr->rr_status == NFS_OK) {
1013 1017 mp = rr->rr_mp;
1014 1018 if (mp != NULL)
1015 1019 freeb(mp);
1016 1020 }
1017 1021 }
1018 1022
1019 1023 void *
1020 1024 rfs_read_getfh(struct nfsreadargs *ra)
1021 1025 {
1022 1026 return (&ra->ra_fhandle);
1023 1027 }
1024 1028
1025 1029 #define MAX_IOVECS 12
1026 1030
1027 1031 #ifdef DEBUG
1028 1032 static int rfs_write_sync_hits = 0;
1029 1033 static int rfs_write_sync_misses = 0;
1030 1034 #endif
1031 1035
1032 1036 /*
1033 1037 * Write data to file.
1034 1038 * Returns attributes of a file after writing some data to it.
1035 1039 *
1036 1040 * Any changes made here, especially in error handling might have
1037 1041 * to also be done in rfs_write (which clusters write requests).
1038 1042 */
1039 1043 /* ARGSUSED */
1040 1044 void
1041 1045 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1042 1046 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1043 1047 {
1044 1048 int error;
1045 1049 vnode_t *vp;
1046 1050 rlim64_t rlimit;
1047 1051 struct vattr va;
1048 1052 struct uio uio;
1049 1053 struct iovec iov[MAX_IOVECS];
1050 1054 mblk_t *m;
1051 1055 struct iovec *iovp;
1052 1056 int iovcnt;
1053 1057 cred_t *savecred;
1054 1058 int in_crit = 0;
1055 1059 caller_context_t ct;
1056 1060
1057 1061 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1058 1062 if (vp == NULL) {
1059 1063 ns->ns_status = NFSERR_STALE;
1060 1064 return;
1061 1065 }
1062 1066
1063 1067 if (rdonly(ro, vp)) {
1064 1068 VN_RELE(vp);
1065 1069 ns->ns_status = NFSERR_ROFS;
1066 1070 return;
1067 1071 }
1068 1072
1069 1073 if (vp->v_type != VREG) {
1070 1074 VN_RELE(vp);
1071 1075 ns->ns_status = NFSERR_ISDIR;
1072 1076 return;
1073 1077 }
1074 1078
1075 1079 ct.cc_sysid = 0;
1076 1080 ct.cc_pid = 0;
1077 1081 ct.cc_caller_id = nfs2_srv_caller_id;
1078 1082 ct.cc_flags = CC_DONTBLOCK;
1079 1083
1080 1084 va.va_mask = AT_UID|AT_MODE;
1081 1085
1082 1086 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1083 1087
1084 1088 if (error) {
1085 1089 VN_RELE(vp);
1086 1090 ns->ns_status = puterrno(error);
1087 1091
1088 1092 return;
1089 1093 }
1090 1094
1091 1095 if (crgetuid(cr) != va.va_uid) {
1092 1096 /*
1093 1097 * This is a kludge to allow writes of files created
1094 1098 * with read only permission. The owner of the file
1095 1099 * is always allowed to write it.
1096 1100 */
1097 1101 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1098 1102
1099 1103 if (error) {
1100 1104 VN_RELE(vp);
1101 1105 ns->ns_status = puterrno(error);
1102 1106 return;
1103 1107 }
1104 1108 }
1105 1109
1106 1110 /*
1107 1111 * Can't access a mandatory lock file. This might cause
1108 1112 * the NFS service thread to block forever waiting for a
1109 1113 * lock to be released that will never be released.
1110 1114 */
1111 1115 if (MANDLOCK(vp, va.va_mode)) {
1112 1116 VN_RELE(vp);
1113 1117 ns->ns_status = NFSERR_ACCES;
1114 1118 return;
1115 1119 }
1116 1120
1117 1121 /*
1118 1122 * We have to enter the critical region before calling VOP_RWLOCK
1119 1123 * to avoid a deadlock with ufs.
1120 1124 */
1121 1125 if (nbl_need_check(vp)) {
1122 1126 nbl_start_crit(vp, RW_READER);
1123 1127 in_crit = 1;
1124 1128 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1125 1129 wa->wa_count, 0, NULL)) {
1126 1130 error = EACCES;
1127 1131 goto out;
1128 1132 }
1129 1133 }
1130 1134
1131 1135 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1132 1136
1133 1137 /* check if a monitor detected a delegation conflict */
1134 1138 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1135 1139 goto out;
1136 1140 }
1137 1141
1138 1142 if (wa->wa_data || wa->wa_rlist) {
1139 1143 /* Do the RDMA thing if necessary */
1140 1144 if (wa->wa_rlist) {
1141 1145 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1142 1146 iov[0].iov_len = wa->wa_count;
1143 1147 } else {
1144 1148 iov[0].iov_base = wa->wa_data;
1145 1149 iov[0].iov_len = wa->wa_count;
1146 1150 }
1147 1151 uio.uio_iov = iov;
1148 1152 uio.uio_iovcnt = 1;
1149 1153 uio.uio_segflg = UIO_SYSSPACE;
1150 1154 uio.uio_extflg = UIO_COPY_DEFAULT;
1151 1155 uio.uio_loffset = (offset_t)wa->wa_offset;
1152 1156 uio.uio_resid = wa->wa_count;
1153 1157 /*
1154 1158 * The limit is checked on the client. We
1155 1159 * should allow any size writes here.
1156 1160 */
1157 1161 uio.uio_llimit = curproc->p_fsz_ctl;
1158 1162 rlimit = uio.uio_llimit - wa->wa_offset;
1159 1163 if (rlimit < (rlim64_t)uio.uio_resid)
1160 1164 uio.uio_resid = (uint_t)rlimit;
1161 1165
1162 1166 /*
1163 1167 * for now we assume no append mode
1164 1168 */
1165 1169 /*
1166 1170 * We're changing creds because VM may fault and we need
1167 1171 * the cred of the current thread to be used if quota
1168 1172 * checking is enabled.
1169 1173 */
1170 1174 savecred = curthread->t_cred;
1171 1175 curthread->t_cred = cr;
1172 1176 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1173 1177 curthread->t_cred = savecred;
1174 1178 } else {
1175 1179
1176 1180 iovcnt = 0;
1177 1181 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1178 1182 iovcnt++;
1179 1183 if (iovcnt <= MAX_IOVECS) {
1180 1184 #ifdef DEBUG
1181 1185 rfs_write_sync_hits++;
1182 1186 #endif
1183 1187 iovp = iov;
1184 1188 } else {
1185 1189 #ifdef DEBUG
1186 1190 rfs_write_sync_misses++;
1187 1191 #endif
1188 1192 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1189 1193 }
1190 1194 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1191 1195 uio.uio_iov = iovp;
1192 1196 uio.uio_iovcnt = iovcnt;
1193 1197 uio.uio_segflg = UIO_SYSSPACE;
1194 1198 uio.uio_extflg = UIO_COPY_DEFAULT;
1195 1199 uio.uio_loffset = (offset_t)wa->wa_offset;
1196 1200 uio.uio_resid = wa->wa_count;
1197 1201 /*
1198 1202 * The limit is checked on the client. We
1199 1203 * should allow any size writes here.
1200 1204 */
1201 1205 uio.uio_llimit = curproc->p_fsz_ctl;
1202 1206 rlimit = uio.uio_llimit - wa->wa_offset;
1203 1207 if (rlimit < (rlim64_t)uio.uio_resid)
1204 1208 uio.uio_resid = (uint_t)rlimit;
1205 1209
1206 1210 /*
1207 1211 * For now we assume no append mode.
1208 1212 */
1209 1213 /*
1210 1214 * We're changing creds because VM may fault and we need
1211 1215 * the cred of the current thread to be used if quota
1212 1216 * checking is enabled.
1213 1217 */
1214 1218 savecred = curthread->t_cred;
1215 1219 curthread->t_cred = cr;
1216 1220 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1217 1221 curthread->t_cred = savecred;
1218 1222
1219 1223 if (iovp != iov)
1220 1224 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1221 1225 }
1222 1226
1223 1227 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1224 1228
1225 1229 if (!error) {
1226 1230 /*
1227 1231 * Get attributes again so we send the latest mod
1228 1232 * time to the client side for its cache.
1229 1233 */
1230 1234 va.va_mask = AT_ALL; /* now we want everything */
1231 1235
1232 1236 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1233 1237
1234 1238 /* check for overflows */
1235 1239 if (!error) {
1236 1240 acl_perm(vp, exi, &va, cr);
1237 1241 error = vattr_to_nattr(&va, &ns->ns_attr);
1238 1242 }
1239 1243 }
1240 1244
1241 1245 out:
1242 1246 if (in_crit)
1243 1247 nbl_end_crit(vp);
1244 1248 VN_RELE(vp);
1245 1249
1246 1250 /* check if a monitor detected a delegation conflict */
1247 1251 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1248 1252 /* mark as wouldblock so response is dropped */
1249 1253 curthread->t_flag |= T_WOULDBLOCK;
1250 1254 else
1251 1255 ns->ns_status = puterrno(error);
1252 1256
1253 1257 }
1254 1258
1255 1259 struct rfs_async_write {
1256 1260 struct nfswriteargs *wa;
1257 1261 struct nfsattrstat *ns;
1258 1262 struct svc_req *req;
1259 1263 cred_t *cr;
1260 1264 bool_t ro;
1261 1265 kthread_t *thread;
1262 1266 struct rfs_async_write *list;
1263 1267 };
1264 1268
1265 1269 struct rfs_async_write_list {
1266 1270 fhandle_t *fhp;
1267 1271 kcondvar_t cv;
1268 1272 struct rfs_async_write *list;
1269 1273 struct rfs_async_write_list *next;
1270 1274 };
1271 1275
1272 1276 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1273 1277 static kmutex_t rfs_async_write_lock;
1274 1278 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1275 1279
1276 1280 #define MAXCLIOVECS 42
1277 1281 #define RFSWRITE_INITVAL (enum nfsstat) -1
1278 1282
1279 1283 #ifdef DEBUG
1280 1284 static int rfs_write_hits = 0;
1281 1285 static int rfs_write_misses = 0;
1282 1286 #endif
1283 1287
1284 1288 /*
1285 1289 * Write data to file.
1286 1290 * Returns attributes of a file after writing some data to it.
1287 1291 */
1288 1292 void
1289 1293 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1290 1294 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1291 1295 {
1292 1296 int error;
1293 1297 vnode_t *vp;
1294 1298 rlim64_t rlimit;
1295 1299 struct vattr va;
1296 1300 struct uio uio;
1297 1301 struct rfs_async_write_list *lp;
1298 1302 struct rfs_async_write_list *nlp;
1299 1303 struct rfs_async_write *rp;
1300 1304 struct rfs_async_write *nrp;
1301 1305 struct rfs_async_write *trp;
1302 1306 struct rfs_async_write *lrp;
1303 1307 int data_written;
1304 1308 int iovcnt;
1305 1309 mblk_t *m;
1306 1310 struct iovec *iovp;
1307 1311 struct iovec *niovp;
1308 1312 struct iovec iov[MAXCLIOVECS];
1309 1313 int count;
1310 1314 int rcount;
|
↓ open down ↓ |
766 lines elided |
↑ open up ↑ |
1311 1315 uint_t off;
1312 1316 uint_t len;
1313 1317 struct rfs_async_write nrpsp;
1314 1318 struct rfs_async_write_list nlpsp;
1315 1319 ushort_t t_flag;
1316 1320 cred_t *savecred;
1317 1321 int in_crit = 0;
1318 1322 caller_context_t ct;
1319 1323 nfs_srv_t *nsrv;
1320 1324
1325 + ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
1321 1326 nsrv = nfs_get_srv();
1322 1327 if (!nsrv->write_async) {
1323 1328 rfs_write_sync(wa, ns, exi, req, cr, ro);
1324 1329 return;
1325 1330 }
1326 1331
1327 1332 /*
1328 1333 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1329 1334 * is considered an OK.
1330 1335 */
1331 1336 ns->ns_status = RFSWRITE_INITVAL;
1332 1337
1333 1338 nrp = &nrpsp;
1334 1339 nrp->wa = wa;
1335 1340 nrp->ns = ns;
1336 1341 nrp->req = req;
1337 1342 nrp->cr = cr;
1338 1343 nrp->ro = ro;
1339 1344 nrp->thread = curthread;
1340 1345
1341 1346 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1342 1347
1343 1348 /*
1344 1349 * Look to see if there is already a cluster started
1345 1350 * for this file.
1346 1351 */
1347 1352 mutex_enter(&nsrv->async_write_lock);
1348 1353 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1349 1354 if (bcmp(&wa->wa_fhandle, lp->fhp,
1350 1355 sizeof (fhandle_t)) == 0)
1351 1356 break;
1352 1357 }
1353 1358
1354 1359 /*
1355 1360 * If lp is non-NULL, then there is already a cluster
1356 1361 * started. We need to place ourselves in the cluster
1357 1362 * list in the right place as determined by starting
1358 1363 * offset. Conflicts with non-blocking mandatory locked
1359 1364 * regions will be checked when the cluster is processed.
1360 1365 */
1361 1366 if (lp != NULL) {
1362 1367 rp = lp->list;
1363 1368 trp = NULL;
1364 1369 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1365 1370 trp = rp;
1366 1371 rp = rp->list;
1367 1372 }
1368 1373 nrp->list = rp;
1369 1374 if (trp == NULL)
1370 1375 lp->list = nrp;
1371 1376 else
1372 1377 trp->list = nrp;
1373 1378 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1374 1379 cv_wait(&lp->cv, &nsrv->async_write_lock);
1375 1380 mutex_exit(&nsrv->async_write_lock);
1376 1381
1377 1382 return;
1378 1383 }
1379 1384
1380 1385 /*
1381 1386 * No cluster started yet, start one and add ourselves
1382 1387 * to the list of clusters.
1383 1388 */
1384 1389 nrp->list = NULL;
1385 1390
1386 1391 nlp = &nlpsp;
1387 1392 nlp->fhp = &wa->wa_fhandle;
1388 1393 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1389 1394 nlp->list = nrp;
1390 1395 nlp->next = NULL;
1391 1396
1392 1397 if (nsrv->async_write_head == NULL) {
1393 1398 nsrv->async_write_head = nlp;
1394 1399 } else {
1395 1400 lp = nsrv->async_write_head;
1396 1401 while (lp->next != NULL)
1397 1402 lp = lp->next;
1398 1403 lp->next = nlp;
1399 1404 }
1400 1405 mutex_exit(&nsrv->async_write_lock);
1401 1406
1402 1407 /*
1403 1408 * Convert the file handle common to all of the requests
1404 1409 * in this cluster to a vnode.
1405 1410 */
1406 1411 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1407 1412 if (vp == NULL) {
1408 1413 mutex_enter(&nsrv->async_write_lock);
1409 1414 if (nsrv->async_write_head == nlp)
1410 1415 nsrv->async_write_head = nlp->next;
1411 1416 else {
1412 1417 lp = nsrv->async_write_head;
1413 1418 while (lp->next != nlp)
1414 1419 lp = lp->next;
1415 1420 lp->next = nlp->next;
1416 1421 }
1417 1422 t_flag = curthread->t_flag & T_WOULDBLOCK;
1418 1423 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1419 1424 rp->ns->ns_status = NFSERR_STALE;
1420 1425 rp->thread->t_flag |= t_flag;
1421 1426 }
1422 1427 cv_broadcast(&nlp->cv);
1423 1428 mutex_exit(&nsrv->async_write_lock);
1424 1429
1425 1430 return;
1426 1431 }
1427 1432
1428 1433 /*
1429 1434 * Can only write regular files. Attempts to write any
1430 1435 * other file types fail with EISDIR.
1431 1436 */
1432 1437 if (vp->v_type != VREG) {
1433 1438 VN_RELE(vp);
1434 1439 mutex_enter(&nsrv->async_write_lock);
1435 1440 if (nsrv->async_write_head == nlp)
1436 1441 nsrv->async_write_head = nlp->next;
1437 1442 else {
1438 1443 lp = nsrv->async_write_head;
1439 1444 while (lp->next != nlp)
1440 1445 lp = lp->next;
1441 1446 lp->next = nlp->next;
1442 1447 }
1443 1448 t_flag = curthread->t_flag & T_WOULDBLOCK;
1444 1449 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1445 1450 rp->ns->ns_status = NFSERR_ISDIR;
1446 1451 rp->thread->t_flag |= t_flag;
1447 1452 }
1448 1453 cv_broadcast(&nlp->cv);
1449 1454 mutex_exit(&nsrv->async_write_lock);
1450 1455
1451 1456 return;
1452 1457 }
1453 1458
1454 1459 /*
1455 1460 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1456 1461 * deadlock with ufs.
1457 1462 */
1458 1463 if (nbl_need_check(vp)) {
1459 1464 nbl_start_crit(vp, RW_READER);
1460 1465 in_crit = 1;
1461 1466 }
1462 1467
1463 1468 ct.cc_sysid = 0;
1464 1469 ct.cc_pid = 0;
1465 1470 ct.cc_caller_id = nfs2_srv_caller_id;
1466 1471 ct.cc_flags = CC_DONTBLOCK;
1467 1472
1468 1473 /*
1469 1474 * Lock the file for writing. This operation provides
1470 1475 * the delay which allows clusters to grow.
1471 1476 */
1472 1477 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1473 1478
1474 1479 /* check if a monitor detected a delegation conflict */
1475 1480 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1476 1481 if (in_crit)
1477 1482 nbl_end_crit(vp);
1478 1483 VN_RELE(vp);
1479 1484 /* mark as wouldblock so response is dropped */
1480 1485 curthread->t_flag |= T_WOULDBLOCK;
1481 1486 mutex_enter(&nsrv->async_write_lock);
1482 1487 if (nsrv->async_write_head == nlp)
1483 1488 nsrv->async_write_head = nlp->next;
1484 1489 else {
1485 1490 lp = nsrv->async_write_head;
1486 1491 while (lp->next != nlp)
1487 1492 lp = lp->next;
1488 1493 lp->next = nlp->next;
1489 1494 }
1490 1495 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1491 1496 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1492 1497 rp->ns->ns_status = puterrno(error);
1493 1498 rp->thread->t_flag |= T_WOULDBLOCK;
1494 1499 }
1495 1500 }
1496 1501 cv_broadcast(&nlp->cv);
1497 1502 mutex_exit(&nsrv->async_write_lock);
1498 1503
1499 1504 return;
1500 1505 }
1501 1506
1502 1507 /*
1503 1508 * Disconnect this cluster from the list of clusters.
1504 1509 * The cluster that is being dealt with must be fixed
1505 1510 * in size after this point, so there is no reason
1506 1511 * to leave it on the list so that new requests can
1507 1512 * find it.
1508 1513 *
1509 1514 * The algorithm is that the first write request will
1510 1515 * create a cluster, convert the file handle to a
1511 1516 * vnode pointer, and then lock the file for writing.
1512 1517 * This request is not likely to be clustered with
1513 1518 * any others. However, the next request will create
1514 1519 * a new cluster and be blocked in VOP_RWLOCK while
1515 1520 * the first request is being processed. This delay
1516 1521 * will allow more requests to be clustered in this
1517 1522 * second cluster.
1518 1523 */
1519 1524 mutex_enter(&nsrv->async_write_lock);
1520 1525 if (nsrv->async_write_head == nlp)
1521 1526 nsrv->async_write_head = nlp->next;
1522 1527 else {
1523 1528 lp = nsrv->async_write_head;
1524 1529 while (lp->next != nlp)
1525 1530 lp = lp->next;
1526 1531 lp->next = nlp->next;
1527 1532 }
1528 1533 mutex_exit(&nsrv->async_write_lock);
1529 1534
1530 1535 /*
1531 1536 * Step through the list of requests in this cluster.
1532 1537 * We need to check permissions to make sure that all
1533 1538 * of the requests have sufficient permission to write
1534 1539 * the file. A cluster can be composed of requests
1535 1540 * from different clients and different users on each
1536 1541 * client.
1537 1542 *
1538 1543 * As a side effect, we also calculate the size of the
1539 1544 * byte range that this cluster encompasses.
1540 1545 */
1541 1546 rp = nlp->list;
1542 1547 off = rp->wa->wa_offset;
1543 1548 len = (uint_t)0;
1544 1549 do {
1545 1550 if (rdonly(rp->ro, vp)) {
1546 1551 rp->ns->ns_status = NFSERR_ROFS;
1547 1552 t_flag = curthread->t_flag & T_WOULDBLOCK;
1548 1553 rp->thread->t_flag |= t_flag;
1549 1554 continue;
1550 1555 }
1551 1556
1552 1557 va.va_mask = AT_UID|AT_MODE;
1553 1558
1554 1559 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1555 1560
1556 1561 if (!error) {
1557 1562 if (crgetuid(rp->cr) != va.va_uid) {
1558 1563 /*
1559 1564 * This is a kludge to allow writes of files
1560 1565 * created with read only permission. The
1561 1566 * owner of the file is always allowed to
1562 1567 * write it.
1563 1568 */
1564 1569 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1565 1570 }
1566 1571 if (!error && MANDLOCK(vp, va.va_mode))
1567 1572 error = EACCES;
1568 1573 }
1569 1574
1570 1575 /*
1571 1576 * Check for a conflict with a nbmand-locked region.
1572 1577 */
1573 1578 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1574 1579 rp->wa->wa_count, 0, NULL)) {
1575 1580 error = EACCES;
1576 1581 }
1577 1582
1578 1583 if (error) {
1579 1584 rp->ns->ns_status = puterrno(error);
1580 1585 t_flag = curthread->t_flag & T_WOULDBLOCK;
1581 1586 rp->thread->t_flag |= t_flag;
1582 1587 continue;
1583 1588 }
1584 1589 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1585 1590 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1586 1591 } while ((rp = rp->list) != NULL);
1587 1592
1588 1593 /*
1589 1594 * Step through the cluster attempting to gather as many
1590 1595 * requests which are contiguous as possible. These
1591 1596 * contiguous requests are handled via one call to VOP_WRITE
1592 1597 * instead of different calls to VOP_WRITE. We also keep
1593 1598 * track of the fact that any data was written.
1594 1599 */
1595 1600 rp = nlp->list;
1596 1601 data_written = 0;
1597 1602 do {
1598 1603 /*
1599 1604 * Skip any requests which are already marked as having an
1600 1605 * error.
1601 1606 */
1602 1607 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1603 1608 rp = rp->list;
1604 1609 continue;
1605 1610 }
1606 1611
1607 1612 /*
1608 1613 * Count the number of iovec's which are required
1609 1614 * to handle this set of requests. One iovec is
1610 1615 * needed for each data buffer, whether addressed
1611 1616 * by wa_data or by the b_rptr pointers in the
1612 1617 * mblk chains.
1613 1618 */
1614 1619 iovcnt = 0;
1615 1620 lrp = rp;
1616 1621 for (;;) {
1617 1622 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1618 1623 iovcnt++;
1619 1624 else {
1620 1625 m = lrp->wa->wa_mblk;
1621 1626 while (m != NULL) {
1622 1627 iovcnt++;
1623 1628 m = m->b_cont;
1624 1629 }
1625 1630 }
1626 1631 if (lrp->list == NULL ||
1627 1632 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1628 1633 lrp->wa->wa_offset + lrp->wa->wa_count !=
1629 1634 lrp->list->wa->wa_offset) {
1630 1635 lrp = lrp->list;
1631 1636 break;
1632 1637 }
1633 1638 lrp = lrp->list;
1634 1639 }
1635 1640
1636 1641 if (iovcnt <= MAXCLIOVECS) {
1637 1642 #ifdef DEBUG
1638 1643 rfs_write_hits++;
1639 1644 #endif
1640 1645 niovp = iov;
1641 1646 } else {
1642 1647 #ifdef DEBUG
1643 1648 rfs_write_misses++;
1644 1649 #endif
1645 1650 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1646 1651 }
1647 1652 /*
1648 1653 * Put together the scatter/gather iovecs.
1649 1654 */
1650 1655 iovp = niovp;
1651 1656 trp = rp;
1652 1657 count = 0;
1653 1658 do {
1654 1659 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1655 1660 if (trp->wa->wa_rlist) {
1656 1661 iovp->iov_base =
1657 1662 (char *)((trp->wa->wa_rlist)->
1658 1663 u.c_daddr3);
1659 1664 iovp->iov_len = trp->wa->wa_count;
1660 1665 } else {
1661 1666 iovp->iov_base = trp->wa->wa_data;
1662 1667 iovp->iov_len = trp->wa->wa_count;
1663 1668 }
1664 1669 iovp++;
1665 1670 } else {
1666 1671 m = trp->wa->wa_mblk;
1667 1672 rcount = trp->wa->wa_count;
1668 1673 while (m != NULL) {
1669 1674 iovp->iov_base = (caddr_t)m->b_rptr;
1670 1675 iovp->iov_len = (m->b_wptr - m->b_rptr);
1671 1676 rcount -= iovp->iov_len;
1672 1677 if (rcount < 0)
1673 1678 iovp->iov_len += rcount;
1674 1679 iovp++;
1675 1680 if (rcount <= 0)
1676 1681 break;
1677 1682 m = m->b_cont;
1678 1683 }
1679 1684 }
1680 1685 count += trp->wa->wa_count;
1681 1686 trp = trp->list;
1682 1687 } while (trp != lrp);
1683 1688
1684 1689 uio.uio_iov = niovp;
1685 1690 uio.uio_iovcnt = iovcnt;
1686 1691 uio.uio_segflg = UIO_SYSSPACE;
1687 1692 uio.uio_extflg = UIO_COPY_DEFAULT;
1688 1693 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1689 1694 uio.uio_resid = count;
1690 1695 /*
1691 1696 * The limit is checked on the client. We
1692 1697 * should allow any size writes here.
1693 1698 */
1694 1699 uio.uio_llimit = curproc->p_fsz_ctl;
1695 1700 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1696 1701 if (rlimit < (rlim64_t)uio.uio_resid)
1697 1702 uio.uio_resid = (uint_t)rlimit;
1698 1703
1699 1704 /*
1700 1705 * For now we assume no append mode.
1701 1706 */
1702 1707
1703 1708 /*
1704 1709 * We're changing creds because VM may fault
1705 1710 * and we need the cred of the current
1706 1711 * thread to be used if quota * checking is
1707 1712 * enabled.
1708 1713 */
1709 1714 savecred = curthread->t_cred;
1710 1715 curthread->t_cred = cr;
1711 1716 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1712 1717 curthread->t_cred = savecred;
1713 1718
1714 1719 /* check if a monitor detected a delegation conflict */
1715 1720 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1716 1721 /* mark as wouldblock so response is dropped */
1717 1722 curthread->t_flag |= T_WOULDBLOCK;
1718 1723
1719 1724 if (niovp != iov)
1720 1725 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1721 1726
1722 1727 if (!error) {
1723 1728 data_written = 1;
1724 1729 /*
1725 1730 * Get attributes again so we send the latest mod
1726 1731 * time to the client side for its cache.
1727 1732 */
1728 1733 va.va_mask = AT_ALL; /* now we want everything */
1729 1734
1730 1735 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1731 1736
1732 1737 if (!error)
1733 1738 acl_perm(vp, exi, &va, rp->cr);
1734 1739 }
1735 1740
1736 1741 /*
1737 1742 * Fill in the status responses for each request
1738 1743 * which was just handled. Also, copy the latest
1739 1744 * attributes in to the attribute responses if
1740 1745 * appropriate.
1741 1746 */
1742 1747 t_flag = curthread->t_flag & T_WOULDBLOCK;
1743 1748 do {
1744 1749 rp->thread->t_flag |= t_flag;
1745 1750 /* check for overflows */
1746 1751 if (!error) {
1747 1752 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1748 1753 }
1749 1754 rp->ns->ns_status = puterrno(error);
1750 1755 rp = rp->list;
1751 1756 } while (rp != lrp);
1752 1757 } while (rp != NULL);
1753 1758
1754 1759 /*
1755 1760 * If any data was written at all, then we need to flush
1756 1761 * the data and metadata to stable storage.
1757 1762 */
1758 1763 if (data_written) {
1759 1764 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1760 1765
1761 1766 if (!error) {
1762 1767 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1763 1768 }
1764 1769 }
1765 1770
1766 1771 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1767 1772
1768 1773 if (in_crit)
1769 1774 nbl_end_crit(vp);
1770 1775 VN_RELE(vp);
1771 1776
1772 1777 t_flag = curthread->t_flag & T_WOULDBLOCK;
1773 1778 mutex_enter(&nsrv->async_write_lock);
1774 1779 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1775 1780 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1776 1781 rp->ns->ns_status = puterrno(error);
1777 1782 rp->thread->t_flag |= t_flag;
1778 1783 }
1779 1784 }
1780 1785 cv_broadcast(&nlp->cv);
1781 1786 mutex_exit(&nsrv->async_write_lock);
1782 1787
1783 1788 }
1784 1789
1785 1790 void *
1786 1791 rfs_write_getfh(struct nfswriteargs *wa)
1787 1792 {
1788 1793 return (&wa->wa_fhandle);
1789 1794 }
1790 1795
1791 1796 /*
1792 1797 * Create a file.
1793 1798 * Creates a file with given attributes and returns those attributes
1794 1799 * and an fhandle for the new file.
1795 1800 */
1796 1801 void
1797 1802 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1798 1803 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1799 1804 {
1800 1805 int error;
1801 1806 int lookuperr;
1802 1807 int in_crit = 0;
1803 1808 struct vattr va;
1804 1809 vnode_t *vp;
1805 1810 vnode_t *realvp;
1806 1811 vnode_t *dvp;
1807 1812 char *name = args->ca_da.da_name;
1808 1813 vnode_t *tvp = NULL;
1809 1814 int mode;
1810 1815 int lookup_ok;
1811 1816 bool_t trunc;
1812 1817 struct sockaddr *ca;
1813 1818
1814 1819 /*
1815 1820 * Disallow NULL paths
1816 1821 */
1817 1822 if (name == NULL || *name == '\0') {
1818 1823 dr->dr_status = NFSERR_ACCES;
1819 1824 return;
1820 1825 }
1821 1826
1822 1827 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1823 1828 if (dvp == NULL) {
1824 1829 dr->dr_status = NFSERR_STALE;
1825 1830 return;
1826 1831 }
1827 1832
1828 1833 error = sattr_to_vattr(args->ca_sa, &va);
1829 1834 if (error) {
1830 1835 dr->dr_status = puterrno(error);
1831 1836 return;
1832 1837 }
1833 1838
1834 1839 /*
1835 1840 * Must specify the mode.
1836 1841 */
1837 1842 if (!(va.va_mask & AT_MODE)) {
1838 1843 VN_RELE(dvp);
1839 1844 dr->dr_status = NFSERR_INVAL;
1840 1845 return;
1841 1846 }
1842 1847
1843 1848 /*
1844 1849 * This is a completely gross hack to make mknod
1845 1850 * work over the wire until we can wack the protocol
1846 1851 */
1847 1852 if ((va.va_mode & IFMT) == IFCHR) {
1848 1853 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1849 1854 va.va_type = VFIFO; /* xtra kludge for named pipe */
1850 1855 else {
1851 1856 va.va_type = VCHR;
1852 1857 /*
1853 1858 * uncompress the received dev_t
1854 1859 * if the top half is zero indicating a request
1855 1860 * from an `older style' OS.
1856 1861 */
1857 1862 if ((va.va_size & 0xffff0000) == 0)
1858 1863 va.va_rdev = nfsv2_expdev(va.va_size);
1859 1864 else
1860 1865 va.va_rdev = (dev_t)va.va_size;
1861 1866 }
1862 1867 va.va_mask &= ~AT_SIZE;
1863 1868 } else if ((va.va_mode & IFMT) == IFBLK) {
1864 1869 va.va_type = VBLK;
1865 1870 /*
1866 1871 * uncompress the received dev_t
1867 1872 * if the top half is zero indicating a request
1868 1873 * from an `older style' OS.
1869 1874 */
1870 1875 if ((va.va_size & 0xffff0000) == 0)
1871 1876 va.va_rdev = nfsv2_expdev(va.va_size);
1872 1877 else
1873 1878 va.va_rdev = (dev_t)va.va_size;
1874 1879 va.va_mask &= ~AT_SIZE;
1875 1880 } else if ((va.va_mode & IFMT) == IFSOCK) {
1876 1881 va.va_type = VSOCK;
1877 1882 } else {
1878 1883 va.va_type = VREG;
1879 1884 }
1880 1885 va.va_mode &= ~IFMT;
1881 1886 va.va_mask |= AT_TYPE;
1882 1887
1883 1888 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1884 1889 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1885 1890 MAXPATHLEN);
1886 1891 if (name == NULL) {
1887 1892 dr->dr_status = puterrno(EINVAL);
1888 1893 return;
1889 1894 }
1890 1895
1891 1896 /*
1892 1897 * Why was the choice made to use VWRITE as the mode to the
1893 1898 * call to VOP_CREATE ? This results in a bug. When a client
1894 1899 * opens a file that already exists and is RDONLY, the second
1895 1900 * open fails with an EACESS because of the mode.
1896 1901 * bug ID 1054648.
1897 1902 */
1898 1903 lookup_ok = 0;
1899 1904 mode = VWRITE;
1900 1905 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1901 1906 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1902 1907 NULL, NULL, NULL);
1903 1908 if (!error) {
1904 1909 struct vattr at;
1905 1910
1906 1911 lookup_ok = 1;
1907 1912 at.va_mask = AT_MODE;
1908 1913 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1909 1914 if (!error)
1910 1915 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1911 1916 VN_RELE(tvp);
1912 1917 tvp = NULL;
1913 1918 }
1914 1919 }
1915 1920
1916 1921 if (!lookup_ok) {
1917 1922 if (rdonly(ro, dvp)) {
1918 1923 error = EROFS;
1919 1924 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1920 1925 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1921 1926 error = EPERM;
1922 1927 } else {
1923 1928 error = 0;
1924 1929 }
1925 1930 }
1926 1931
1927 1932 /*
1928 1933 * If file size is being modified on an already existing file
1929 1934 * make sure that there are no conflicting non-blocking mandatory
1930 1935 * locks in the region being manipulated. Return EACCES if there
1931 1936 * are conflicting locks.
1932 1937 */
1933 1938 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1934 1939 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1935 1940 NULL, NULL, NULL);
1936 1941
1937 1942 if (!lookuperr &&
1938 1943 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1939 1944 VN_RELE(tvp);
1940 1945 curthread->t_flag |= T_WOULDBLOCK;
1941 1946 goto out;
1942 1947 }
1943 1948
1944 1949 if (!lookuperr && nbl_need_check(tvp)) {
1945 1950 /*
1946 1951 * The file exists. Now check if it has any
1947 1952 * conflicting non-blocking mandatory locks
1948 1953 * in the region being changed.
1949 1954 */
1950 1955 struct vattr bva;
1951 1956 u_offset_t offset;
1952 1957 ssize_t length;
1953 1958
1954 1959 nbl_start_crit(tvp, RW_READER);
1955 1960 in_crit = 1;
1956 1961
1957 1962 bva.va_mask = AT_SIZE;
1958 1963 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1959 1964 if (!error) {
1960 1965 if (va.va_size < bva.va_size) {
1961 1966 offset = va.va_size;
1962 1967 length = bva.va_size - va.va_size;
1963 1968 } else {
1964 1969 offset = bva.va_size;
1965 1970 length = va.va_size - bva.va_size;
1966 1971 }
1967 1972 if (length) {
1968 1973 if (nbl_conflict(tvp, NBL_WRITE,
1969 1974 offset, length, 0, NULL)) {
1970 1975 error = EACCES;
1971 1976 }
1972 1977 }
1973 1978 }
1974 1979 if (error) {
1975 1980 nbl_end_crit(tvp);
1976 1981 VN_RELE(tvp);
1977 1982 in_crit = 0;
1978 1983 }
1979 1984 } else if (tvp != NULL) {
1980 1985 VN_RELE(tvp);
1981 1986 }
1982 1987 }
1983 1988
1984 1989 if (!error) {
1985 1990 /*
1986 1991 * If filesystem is shared with nosuid the remove any
1987 1992 * setuid/setgid bits on create.
1988 1993 */
1989 1994 if (va.va_type == VREG &&
1990 1995 exi->exi_export.ex_flags & EX_NOSUID)
1991 1996 va.va_mode &= ~(VSUID | VSGID);
1992 1997
1993 1998 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1994 1999 NULL, NULL);
1995 2000
1996 2001 if (!error) {
1997 2002
1998 2003 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1999 2004 trunc = TRUE;
2000 2005 else
2001 2006 trunc = FALSE;
2002 2007
2003 2008 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
2004 2009 VN_RELE(vp);
2005 2010 curthread->t_flag |= T_WOULDBLOCK;
2006 2011 goto out;
2007 2012 }
2008 2013 va.va_mask = AT_ALL;
2009 2014
2010 2015 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
2011 2016
2012 2017 /* check for overflows */
2013 2018 if (!error) {
2014 2019 acl_perm(vp, exi, &va, cr);
2015 2020 error = vattr_to_nattr(&va, &dr->dr_attr);
2016 2021 if (!error) {
2017 2022 error = makefh(&dr->dr_fhandle, vp,
2018 2023 exi);
2019 2024 }
2020 2025 }
2021 2026 /*
2022 2027 * Force modified metadata out to stable storage.
2023 2028 *
2024 2029 * if a underlying vp exists, pass it to VOP_FSYNC
2025 2030 */
2026 2031 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2027 2032 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2028 2033 else
2029 2034 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2030 2035 VN_RELE(vp);
2031 2036 }
2032 2037
2033 2038 if (in_crit) {
2034 2039 nbl_end_crit(tvp);
2035 2040 VN_RELE(tvp);
2036 2041 }
2037 2042 }
2038 2043
2039 2044 /*
2040 2045 * Force modified data and metadata out to stable storage.
2041 2046 */
2042 2047 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2043 2048
2044 2049 out:
2045 2050
2046 2051 VN_RELE(dvp);
2047 2052
2048 2053 dr->dr_status = puterrno(error);
2049 2054
2050 2055 if (name != args->ca_da.da_name)
2051 2056 kmem_free(name, MAXPATHLEN);
2052 2057 }
2053 2058 void *
2054 2059 rfs_create_getfh(struct nfscreatargs *args)
2055 2060 {
2056 2061 return (args->ca_da.da_fhandle);
2057 2062 }
2058 2063
2059 2064 /*
2060 2065 * Remove a file.
2061 2066 * Remove named file from parent directory.
2062 2067 */
2063 2068 /* ARGSUSED */
2064 2069 void
2065 2070 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2066 2071 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2067 2072 {
2068 2073 int error = 0;
2069 2074 vnode_t *vp;
2070 2075 vnode_t *targvp;
2071 2076 int in_crit = 0;
2072 2077
2073 2078 /*
2074 2079 * Disallow NULL paths
2075 2080 */
2076 2081 if (da->da_name == NULL || *da->da_name == '\0') {
2077 2082 *status = NFSERR_ACCES;
2078 2083 return;
2079 2084 }
2080 2085
2081 2086 vp = nfs_fhtovp(da->da_fhandle, exi);
2082 2087 if (vp == NULL) {
2083 2088 *status = NFSERR_STALE;
2084 2089 return;
2085 2090 }
2086 2091
2087 2092 if (rdonly(ro, vp)) {
2088 2093 VN_RELE(vp);
2089 2094 *status = NFSERR_ROFS;
2090 2095 return;
2091 2096 }
2092 2097
2093 2098 /*
2094 2099 * Check for a conflict with a non-blocking mandatory share reservation.
2095 2100 */
2096 2101 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2097 2102 NULL, cr, NULL, NULL, NULL);
2098 2103 if (error != 0) {
2099 2104 VN_RELE(vp);
2100 2105 *status = puterrno(error);
2101 2106 return;
2102 2107 }
2103 2108
2104 2109 /*
2105 2110 * If the file is delegated to an v4 client, then initiate
2106 2111 * recall and drop this request (by setting T_WOULDBLOCK).
2107 2112 * The client will eventually re-transmit the request and
2108 2113 * (hopefully), by then, the v4 client will have returned
2109 2114 * the delegation.
2110 2115 */
2111 2116
2112 2117 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2113 2118 VN_RELE(vp);
2114 2119 VN_RELE(targvp);
2115 2120 curthread->t_flag |= T_WOULDBLOCK;
2116 2121 return;
2117 2122 }
2118 2123
2119 2124 if (nbl_need_check(targvp)) {
2120 2125 nbl_start_crit(targvp, RW_READER);
2121 2126 in_crit = 1;
2122 2127 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2123 2128 error = EACCES;
2124 2129 goto out;
2125 2130 }
2126 2131 }
2127 2132
2128 2133 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2129 2134
2130 2135 /*
2131 2136 * Force modified data and metadata out to stable storage.
2132 2137 */
2133 2138 (void) VOP_FSYNC(vp, 0, cr, NULL);
2134 2139
2135 2140 out:
2136 2141 if (in_crit)
2137 2142 nbl_end_crit(targvp);
2138 2143 VN_RELE(targvp);
2139 2144 VN_RELE(vp);
2140 2145
2141 2146 *status = puterrno(error);
2142 2147
2143 2148 }
2144 2149
2145 2150 void *
2146 2151 rfs_remove_getfh(struct nfsdiropargs *da)
2147 2152 {
2148 2153 return (da->da_fhandle);
2149 2154 }
2150 2155
2151 2156 /*
2152 2157 * rename a file
2153 2158 * Give a file (from) a new name (to).
2154 2159 */
2155 2160 /* ARGSUSED */
2156 2161 void
2157 2162 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2158 2163 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2159 2164 {
2160 2165 int error = 0;
2161 2166 vnode_t *fromvp;
2162 2167 vnode_t *tovp;
2163 2168 struct exportinfo *to_exi;
2164 2169 fhandle_t *fh;
2165 2170 vnode_t *srcvp;
2166 2171 vnode_t *targvp;
2167 2172 int in_crit = 0;
2168 2173
2169 2174 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2170 2175 if (fromvp == NULL) {
2171 2176 *status = NFSERR_STALE;
2172 2177 return;
2173 2178 }
2174 2179
2175 2180 fh = args->rna_to.da_fhandle;
2176 2181 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2177 2182 if (to_exi == NULL) {
2178 2183 VN_RELE(fromvp);
2179 2184 *status = NFSERR_ACCES;
2180 2185 return;
2181 2186 }
2182 2187 exi_rele(to_exi);
2183 2188
2184 2189 if (to_exi != exi) {
2185 2190 VN_RELE(fromvp);
2186 2191 *status = NFSERR_XDEV;
2187 2192 return;
2188 2193 }
2189 2194
2190 2195 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2191 2196 if (tovp == NULL) {
2192 2197 VN_RELE(fromvp);
2193 2198 *status = NFSERR_STALE;
2194 2199 return;
2195 2200 }
2196 2201
2197 2202 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2198 2203 VN_RELE(tovp);
2199 2204 VN_RELE(fromvp);
2200 2205 *status = NFSERR_NOTDIR;
2201 2206 return;
2202 2207 }
2203 2208
2204 2209 /*
2205 2210 * Disallow NULL paths
2206 2211 */
2207 2212 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2208 2213 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2209 2214 VN_RELE(tovp);
2210 2215 VN_RELE(fromvp);
2211 2216 *status = NFSERR_ACCES;
2212 2217 return;
2213 2218 }
2214 2219
2215 2220 if (rdonly(ro, tovp)) {
2216 2221 VN_RELE(tovp);
2217 2222 VN_RELE(fromvp);
2218 2223 *status = NFSERR_ROFS;
2219 2224 return;
2220 2225 }
2221 2226
2222 2227 /*
2223 2228 * Check for a conflict with a non-blocking mandatory share reservation.
2224 2229 */
2225 2230 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2226 2231 NULL, cr, NULL, NULL, NULL);
2227 2232 if (error != 0) {
2228 2233 VN_RELE(tovp);
2229 2234 VN_RELE(fromvp);
2230 2235 *status = puterrno(error);
2231 2236 return;
2232 2237 }
2233 2238
2234 2239 /* Check for delegations on the source file */
2235 2240
2236 2241 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2237 2242 VN_RELE(tovp);
2238 2243 VN_RELE(fromvp);
2239 2244 VN_RELE(srcvp);
2240 2245 curthread->t_flag |= T_WOULDBLOCK;
2241 2246 return;
2242 2247 }
2243 2248
2244 2249 /* Check for delegation on the file being renamed over, if it exists */
2245 2250
2246 2251 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2247 2252 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2248 2253 NULL, NULL, NULL) == 0) {
2249 2254
2250 2255 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2251 2256 VN_RELE(tovp);
2252 2257 VN_RELE(fromvp);
2253 2258 VN_RELE(srcvp);
2254 2259 VN_RELE(targvp);
2255 2260 curthread->t_flag |= T_WOULDBLOCK;
2256 2261 return;
2257 2262 }
2258 2263 VN_RELE(targvp);
2259 2264 }
2260 2265
2261 2266
2262 2267 if (nbl_need_check(srcvp)) {
2263 2268 nbl_start_crit(srcvp, RW_READER);
2264 2269 in_crit = 1;
2265 2270 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2266 2271 error = EACCES;
2267 2272 goto out;
2268 2273 }
2269 2274 }
2270 2275
2271 2276 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2272 2277 tovp, args->rna_to.da_name, cr, NULL, 0);
2273 2278
2274 2279 if (error == 0)
2275 2280 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2276 2281 strlen(args->rna_to.da_name));
2277 2282
2278 2283 /*
2279 2284 * Force modified data and metadata out to stable storage.
2280 2285 */
2281 2286 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2282 2287 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2283 2288
2284 2289 out:
2285 2290 if (in_crit)
2286 2291 nbl_end_crit(srcvp);
2287 2292 VN_RELE(srcvp);
2288 2293 VN_RELE(tovp);
2289 2294 VN_RELE(fromvp);
2290 2295
2291 2296 *status = puterrno(error);
2292 2297
2293 2298 }
2294 2299 void *
2295 2300 rfs_rename_getfh(struct nfsrnmargs *args)
2296 2301 {
2297 2302 return (args->rna_from.da_fhandle);
2298 2303 }
2299 2304
2300 2305 /*
2301 2306 * Link to a file.
2302 2307 * Create a file (to) which is a hard link to the given file (from).
2303 2308 */
2304 2309 /* ARGSUSED */
2305 2310 void
2306 2311 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2307 2312 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2308 2313 {
2309 2314 int error;
2310 2315 vnode_t *fromvp;
2311 2316 vnode_t *tovp;
2312 2317 struct exportinfo *to_exi;
2313 2318 fhandle_t *fh;
2314 2319
2315 2320 fromvp = nfs_fhtovp(args->la_from, exi);
2316 2321 if (fromvp == NULL) {
2317 2322 *status = NFSERR_STALE;
2318 2323 return;
2319 2324 }
2320 2325
2321 2326 fh = args->la_to.da_fhandle;
2322 2327 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2323 2328 if (to_exi == NULL) {
2324 2329 VN_RELE(fromvp);
2325 2330 *status = NFSERR_ACCES;
2326 2331 return;
2327 2332 }
2328 2333 exi_rele(to_exi);
2329 2334
2330 2335 if (to_exi != exi) {
2331 2336 VN_RELE(fromvp);
2332 2337 *status = NFSERR_XDEV;
2333 2338 return;
2334 2339 }
2335 2340
2336 2341 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2337 2342 if (tovp == NULL) {
2338 2343 VN_RELE(fromvp);
2339 2344 *status = NFSERR_STALE;
2340 2345 return;
2341 2346 }
2342 2347
2343 2348 if (tovp->v_type != VDIR) {
2344 2349 VN_RELE(tovp);
2345 2350 VN_RELE(fromvp);
2346 2351 *status = NFSERR_NOTDIR;
2347 2352 return;
2348 2353 }
2349 2354 /*
2350 2355 * Disallow NULL paths
2351 2356 */
2352 2357 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2353 2358 VN_RELE(tovp);
2354 2359 VN_RELE(fromvp);
2355 2360 *status = NFSERR_ACCES;
2356 2361 return;
2357 2362 }
2358 2363
2359 2364 if (rdonly(ro, tovp)) {
2360 2365 VN_RELE(tovp);
2361 2366 VN_RELE(fromvp);
2362 2367 *status = NFSERR_ROFS;
2363 2368 return;
2364 2369 }
2365 2370
2366 2371 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2367 2372
2368 2373 /*
2369 2374 * Force modified data and metadata out to stable storage.
2370 2375 */
2371 2376 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2372 2377 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2373 2378
2374 2379 VN_RELE(tovp);
2375 2380 VN_RELE(fromvp);
2376 2381
2377 2382 *status = puterrno(error);
2378 2383
2379 2384 }
2380 2385 void *
2381 2386 rfs_link_getfh(struct nfslinkargs *args)
2382 2387 {
2383 2388 return (args->la_from);
2384 2389 }
2385 2390
2386 2391 /*
2387 2392 * Symbolicly link to a file.
2388 2393 * Create a file (to) with the given attributes which is a symbolic link
2389 2394 * to the given path name (to).
2390 2395 */
2391 2396 void
2392 2397 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2393 2398 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2394 2399 {
2395 2400 int error;
2396 2401 struct vattr va;
2397 2402 vnode_t *vp;
2398 2403 vnode_t *svp;
2399 2404 int lerror;
2400 2405 struct sockaddr *ca;
2401 2406 char *name = NULL;
2402 2407
2403 2408 /*
2404 2409 * Disallow NULL paths
2405 2410 */
2406 2411 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2407 2412 *status = NFSERR_ACCES;
2408 2413 return;
2409 2414 }
2410 2415
2411 2416 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2412 2417 if (vp == NULL) {
2413 2418 *status = NFSERR_STALE;
2414 2419 return;
2415 2420 }
2416 2421
2417 2422 if (rdonly(ro, vp)) {
2418 2423 VN_RELE(vp);
2419 2424 *status = NFSERR_ROFS;
2420 2425 return;
2421 2426 }
2422 2427
2423 2428 error = sattr_to_vattr(args->sla_sa, &va);
2424 2429 if (error) {
2425 2430 VN_RELE(vp);
2426 2431 *status = puterrno(error);
2427 2432 return;
2428 2433 }
2429 2434
2430 2435 if (!(va.va_mask & AT_MODE)) {
2431 2436 VN_RELE(vp);
2432 2437 *status = NFSERR_INVAL;
2433 2438 return;
2434 2439 }
2435 2440
2436 2441 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2437 2442 name = nfscmd_convname(ca, exi, args->sla_tnm,
2438 2443 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2439 2444
2440 2445 if (name == NULL) {
2441 2446 *status = NFSERR_ACCES;
2442 2447 return;
2443 2448 }
2444 2449
2445 2450 va.va_type = VLNK;
2446 2451 va.va_mask |= AT_TYPE;
2447 2452
2448 2453 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2449 2454
2450 2455 /*
2451 2456 * Force new data and metadata out to stable storage.
2452 2457 */
2453 2458 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2454 2459 NULL, cr, NULL, NULL, NULL);
2455 2460
2456 2461 if (!lerror) {
2457 2462 (void) VOP_FSYNC(svp, 0, cr, NULL);
2458 2463 VN_RELE(svp);
2459 2464 }
2460 2465
2461 2466 /*
2462 2467 * Force modified data and metadata out to stable storage.
2463 2468 */
2464 2469 (void) VOP_FSYNC(vp, 0, cr, NULL);
2465 2470
2466 2471 VN_RELE(vp);
2467 2472
2468 2473 *status = puterrno(error);
2469 2474 if (name != args->sla_tnm)
2470 2475 kmem_free(name, MAXPATHLEN);
2471 2476
2472 2477 }
2473 2478 void *
2474 2479 rfs_symlink_getfh(struct nfsslargs *args)
2475 2480 {
2476 2481 return (args->sla_from.da_fhandle);
2477 2482 }
2478 2483
2479 2484 /*
2480 2485 * Make a directory.
2481 2486 * Create a directory with the given name, parent directory, and attributes.
2482 2487 * Returns a file handle and attributes for the new directory.
2483 2488 */
2484 2489 /* ARGSUSED */
2485 2490 void
2486 2491 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2487 2492 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2488 2493 {
2489 2494 int error;
2490 2495 struct vattr va;
2491 2496 vnode_t *dvp = NULL;
2492 2497 vnode_t *vp;
2493 2498 char *name = args->ca_da.da_name;
2494 2499
2495 2500 /*
2496 2501 * Disallow NULL paths
2497 2502 */
2498 2503 if (name == NULL || *name == '\0') {
2499 2504 dr->dr_status = NFSERR_ACCES;
2500 2505 return;
2501 2506 }
2502 2507
2503 2508 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2504 2509 if (vp == NULL) {
2505 2510 dr->dr_status = NFSERR_STALE;
2506 2511 return;
2507 2512 }
2508 2513
2509 2514 if (rdonly(ro, vp)) {
2510 2515 VN_RELE(vp);
2511 2516 dr->dr_status = NFSERR_ROFS;
2512 2517 return;
2513 2518 }
2514 2519
2515 2520 error = sattr_to_vattr(args->ca_sa, &va);
2516 2521 if (error) {
2517 2522 VN_RELE(vp);
2518 2523 dr->dr_status = puterrno(error);
2519 2524 return;
2520 2525 }
2521 2526
2522 2527 if (!(va.va_mask & AT_MODE)) {
2523 2528 VN_RELE(vp);
2524 2529 dr->dr_status = NFSERR_INVAL;
2525 2530 return;
2526 2531 }
2527 2532
2528 2533 va.va_type = VDIR;
2529 2534 va.va_mask |= AT_TYPE;
2530 2535
2531 2536 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2532 2537
2533 2538 if (!error) {
2534 2539 /*
2535 2540 * Attribtutes of the newly created directory should
2536 2541 * be returned to the client.
2537 2542 */
2538 2543 va.va_mask = AT_ALL; /* We want everything */
2539 2544 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2540 2545
2541 2546 /* check for overflows */
2542 2547 if (!error) {
2543 2548 acl_perm(vp, exi, &va, cr);
2544 2549 error = vattr_to_nattr(&va, &dr->dr_attr);
2545 2550 if (!error) {
2546 2551 error = makefh(&dr->dr_fhandle, dvp, exi);
2547 2552 }
2548 2553 }
2549 2554 /*
2550 2555 * Force new data and metadata out to stable storage.
2551 2556 */
2552 2557 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2553 2558 VN_RELE(dvp);
2554 2559 }
2555 2560
2556 2561 /*
2557 2562 * Force modified data and metadata out to stable storage.
2558 2563 */
2559 2564 (void) VOP_FSYNC(vp, 0, cr, NULL);
2560 2565
2561 2566 VN_RELE(vp);
2562 2567
2563 2568 dr->dr_status = puterrno(error);
2564 2569
2565 2570 }
2566 2571 void *
2567 2572 rfs_mkdir_getfh(struct nfscreatargs *args)
2568 2573 {
2569 2574 return (args->ca_da.da_fhandle);
2570 2575 }
2571 2576
2572 2577 /*
2573 2578 * Remove a directory.
2574 2579 * Remove the given directory name from the given parent directory.
2575 2580 */
2576 2581 /* ARGSUSED */
2577 2582 void
2578 2583 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2579 2584 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2580 2585 {
2581 2586 int error;
2582 2587 vnode_t *vp;
2583 2588
2584 2589 /*
2585 2590 * Disallow NULL paths
2586 2591 */
2587 2592 if (da->da_name == NULL || *da->da_name == '\0') {
2588 2593 *status = NFSERR_ACCES;
2589 2594 return;
2590 2595 }
2591 2596
2592 2597 vp = nfs_fhtovp(da->da_fhandle, exi);
2593 2598 if (vp == NULL) {
2594 2599 *status = NFSERR_STALE;
2595 2600 return;
2596 2601 }
2597 2602
2598 2603 if (rdonly(ro, vp)) {
2599 2604 VN_RELE(vp);
2600 2605 *status = NFSERR_ROFS;
2601 2606 return;
2602 2607 }
2603 2608
2604 2609 /*
2605 2610 * VOP_RMDIR takes a third argument (the current
2606 2611 * directory of the process). That's because someone
2607 2612 * wants to return EINVAL if one tries to remove ".".
2608 2613 * Of course, NFS servers have no idea what their
2609 2614 * clients' current directories are. We fake it by
2610 2615 * supplying a vnode known to exist and illegal to
2611 2616 * remove.
2612 2617 */
2613 2618 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2614 2619
2615 2620 /*
2616 2621 * Force modified data and metadata out to stable storage.
2617 2622 */
2618 2623 (void) VOP_FSYNC(vp, 0, cr, NULL);
2619 2624
2620 2625 VN_RELE(vp);
2621 2626
2622 2627 /*
2623 2628 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2624 2629 * if the directory is not empty. A System V NFS server
2625 2630 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2626 2631 * over the wire.
2627 2632 */
2628 2633 if (error == EEXIST)
2629 2634 *status = NFSERR_NOTEMPTY;
2630 2635 else
2631 2636 *status = puterrno(error);
2632 2637
2633 2638 }
2634 2639 void *
2635 2640 rfs_rmdir_getfh(struct nfsdiropargs *da)
2636 2641 {
2637 2642 return (da->da_fhandle);
2638 2643 }
2639 2644
2640 2645 /* ARGSUSED */
2641 2646 void
2642 2647 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2643 2648 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2644 2649 {
2645 2650 int error;
2646 2651 int iseof;
2647 2652 struct iovec iov;
2648 2653 struct uio uio;
2649 2654 vnode_t *vp;
2650 2655 char *ndata = NULL;
2651 2656 struct sockaddr *ca;
2652 2657 size_t nents;
2653 2658 int ret;
2654 2659
2655 2660 vp = nfs_fhtovp(&rda->rda_fh, exi);
2656 2661 if (vp == NULL) {
2657 2662 rd->rd_entries = NULL;
2658 2663 rd->rd_status = NFSERR_STALE;
2659 2664 return;
2660 2665 }
2661 2666
2662 2667 if (vp->v_type != VDIR) {
2663 2668 VN_RELE(vp);
2664 2669 rd->rd_entries = NULL;
2665 2670 rd->rd_status = NFSERR_NOTDIR;
2666 2671 return;
2667 2672 }
2668 2673
2669 2674 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2670 2675
2671 2676 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2672 2677
2673 2678 if (error) {
2674 2679 rd->rd_entries = NULL;
2675 2680 goto bad;
2676 2681 }
2677 2682
2678 2683 if (rda->rda_count == 0) {
2679 2684 rd->rd_entries = NULL;
2680 2685 rd->rd_size = 0;
2681 2686 rd->rd_eof = FALSE;
2682 2687 goto bad;
2683 2688 }
2684 2689
2685 2690 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2686 2691
2687 2692 /*
2688 2693 * Allocate data for entries. This will be freed by rfs_rddirfree.
2689 2694 */
2690 2695 rd->rd_bufsize = (uint_t)rda->rda_count;
2691 2696 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2692 2697
2693 2698 /*
2694 2699 * Set up io vector to read directory data
2695 2700 */
2696 2701 iov.iov_base = (caddr_t)rd->rd_entries;
2697 2702 iov.iov_len = rda->rda_count;
2698 2703 uio.uio_iov = &iov;
2699 2704 uio.uio_iovcnt = 1;
2700 2705 uio.uio_segflg = UIO_SYSSPACE;
2701 2706 uio.uio_extflg = UIO_COPY_CACHED;
2702 2707 uio.uio_loffset = (offset_t)rda->rda_offset;
2703 2708 uio.uio_resid = rda->rda_count;
2704 2709
2705 2710 /*
2706 2711 * read directory
2707 2712 */
2708 2713 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2709 2714
2710 2715 /*
2711 2716 * Clean up
2712 2717 */
2713 2718 if (!error) {
2714 2719 /*
2715 2720 * set size and eof
2716 2721 */
2717 2722 if (uio.uio_resid == rda->rda_count) {
2718 2723 rd->rd_size = 0;
2719 2724 rd->rd_eof = TRUE;
2720 2725 } else {
2721 2726 rd->rd_size = (uint32_t)(rda->rda_count -
2722 2727 uio.uio_resid);
2723 2728 rd->rd_eof = iseof ? TRUE : FALSE;
2724 2729 }
2725 2730 }
2726 2731
2727 2732 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2728 2733 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2729 2734 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2730 2735 rda->rda_count, &ndata);
2731 2736
2732 2737 if (ret != 0) {
2733 2738 size_t dropbytes;
2734 2739 /*
2735 2740 * We had to drop one or more entries in order to fit
2736 2741 * during the character conversion. We need to patch
2737 2742 * up the size and eof info.
2738 2743 */
2739 2744 if (rd->rd_eof)
2740 2745 rd->rd_eof = FALSE;
2741 2746 dropbytes = nfscmd_dropped_entrysize(
2742 2747 (struct dirent64 *)rd->rd_entries, nents, ret);
2743 2748 rd->rd_size -= dropbytes;
2744 2749 }
2745 2750 if (ndata == NULL) {
2746 2751 ndata = (char *)rd->rd_entries;
2747 2752 } else if (ndata != (char *)rd->rd_entries) {
2748 2753 kmem_free(rd->rd_entries, rd->rd_bufsize);
2749 2754 rd->rd_entries = (void *)ndata;
2750 2755 rd->rd_bufsize = rda->rda_count;
2751 2756 }
2752 2757
2753 2758 bad:
2754 2759 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2755 2760
2756 2761 #if 0 /* notyet */
2757 2762 /*
2758 2763 * Don't do this. It causes local disk writes when just
2759 2764 * reading the file and the overhead is deemed larger
2760 2765 * than the benefit.
2761 2766 */
2762 2767 /*
2763 2768 * Force modified metadata out to stable storage.
2764 2769 */
2765 2770 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2766 2771 #endif
2767 2772
2768 2773 VN_RELE(vp);
2769 2774
2770 2775 rd->rd_status = puterrno(error);
2771 2776
2772 2777 }
2773 2778 void *
2774 2779 rfs_readdir_getfh(struct nfsrddirargs *rda)
2775 2780 {
2776 2781 return (&rda->rda_fh);
2777 2782 }
2778 2783 void
2779 2784 rfs_rddirfree(struct nfsrddirres *rd)
2780 2785 {
2781 2786 if (rd->rd_entries != NULL)
2782 2787 kmem_free(rd->rd_entries, rd->rd_bufsize);
2783 2788 }
2784 2789
2785 2790 /* ARGSUSED */
2786 2791 void
2787 2792 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2788 2793 struct svc_req *req, cred_t *cr, bool_t ro)
2789 2794 {
2790 2795 int error;
2791 2796 struct statvfs64 sb;
2792 2797 vnode_t *vp;
2793 2798
2794 2799 vp = nfs_fhtovp(fh, exi);
2795 2800 if (vp == NULL) {
2796 2801 fs->fs_status = NFSERR_STALE;
2797 2802 return;
2798 2803 }
2799 2804
2800 2805 error = VFS_STATVFS(vp->v_vfsp, &sb);
2801 2806
2802 2807 if (!error) {
2803 2808 fs->fs_tsize = nfstsize();
2804 2809 fs->fs_bsize = sb.f_frsize;
2805 2810 fs->fs_blocks = sb.f_blocks;
2806 2811 fs->fs_bfree = sb.f_bfree;
2807 2812 fs->fs_bavail = sb.f_bavail;
2808 2813 }
2809 2814
2810 2815 VN_RELE(vp);
2811 2816
2812 2817 fs->fs_status = puterrno(error);
2813 2818
2814 2819 }
2815 2820 void *
2816 2821 rfs_statfs_getfh(fhandle_t *fh)
2817 2822 {
2818 2823 return (fh);
2819 2824 }
2820 2825
2821 2826 static int
2822 2827 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2823 2828 {
2824 2829 vap->va_mask = 0;
2825 2830
2826 2831 /*
2827 2832 * There was a sign extension bug in some VFS based systems
2828 2833 * which stored the mode as a short. When it would get
2829 2834 * assigned to a u_long, no sign extension would occur.
2830 2835 * It needed to, but this wasn't noticed because sa_mode
2831 2836 * would then get assigned back to the short, thus ignoring
2832 2837 * the upper 16 bits of sa_mode.
2833 2838 *
2834 2839 * To make this implementation work for both broken
2835 2840 * clients and good clients, we check for both versions
2836 2841 * of the mode.
2837 2842 */
2838 2843 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2839 2844 sa->sa_mode != (uint32_t)-1) {
2840 2845 vap->va_mask |= AT_MODE;
2841 2846 vap->va_mode = sa->sa_mode;
2842 2847 }
2843 2848 if (sa->sa_uid != (uint32_t)-1) {
2844 2849 vap->va_mask |= AT_UID;
2845 2850 vap->va_uid = sa->sa_uid;
2846 2851 }
2847 2852 if (sa->sa_gid != (uint32_t)-1) {
2848 2853 vap->va_mask |= AT_GID;
2849 2854 vap->va_gid = sa->sa_gid;
2850 2855 }
2851 2856 if (sa->sa_size != (uint32_t)-1) {
2852 2857 vap->va_mask |= AT_SIZE;
2853 2858 vap->va_size = sa->sa_size;
2854 2859 }
2855 2860 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2856 2861 sa->sa_atime.tv_usec != (int32_t)-1) {
2857 2862 #ifndef _LP64
2858 2863 /* return error if time overflow */
2859 2864 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2860 2865 return (EOVERFLOW);
2861 2866 #endif
2862 2867 vap->va_mask |= AT_ATIME;
2863 2868 /*
2864 2869 * nfs protocol defines times as unsigned so don't extend sign,
2865 2870 * unless sysadmin set nfs_allow_preepoch_time.
2866 2871 */
2867 2872 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2868 2873 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2869 2874 }
2870 2875 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2871 2876 sa->sa_mtime.tv_usec != (int32_t)-1) {
2872 2877 #ifndef _LP64
2873 2878 /* return error if time overflow */
2874 2879 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2875 2880 return (EOVERFLOW);
2876 2881 #endif
2877 2882 vap->va_mask |= AT_MTIME;
2878 2883 /*
2879 2884 * nfs protocol defines times as unsigned so don't extend sign,
2880 2885 * unless sysadmin set nfs_allow_preepoch_time.
2881 2886 */
2882 2887 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2883 2888 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2884 2889 }
2885 2890 return (0);
2886 2891 }
2887 2892
2888 2893 static const enum nfsftype vt_to_nf[] = {
2889 2894 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2890 2895 };
2891 2896
2892 2897 /*
2893 2898 * check the following fields for overflow: nodeid, size, and time.
2894 2899 * There could be a problem when converting 64-bit LP64 fields
2895 2900 * into 32-bit ones. Return an error if there is an overflow.
2896 2901 */
2897 2902 int
2898 2903 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2899 2904 {
2900 2905 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2901 2906 na->na_type = vt_to_nf[vap->va_type];
2902 2907
2903 2908 if (vap->va_mode == (unsigned short) -1)
2904 2909 na->na_mode = (uint32_t)-1;
2905 2910 else
2906 2911 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2907 2912
2908 2913 if (vap->va_uid == (unsigned short)(-1))
2909 2914 na->na_uid = (uint32_t)(-1);
2910 2915 else if (vap->va_uid == UID_NOBODY)
2911 2916 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2912 2917 else
2913 2918 na->na_uid = vap->va_uid;
2914 2919
2915 2920 if (vap->va_gid == (unsigned short)(-1))
2916 2921 na->na_gid = (uint32_t)-1;
2917 2922 else if (vap->va_gid == GID_NOBODY)
2918 2923 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2919 2924 else
2920 2925 na->na_gid = vap->va_gid;
2921 2926
2922 2927 /*
2923 2928 * Do we need to check fsid for overflow? It is 64-bit in the
2924 2929 * vattr, but are bigger than 32 bit values supported?
2925 2930 */
2926 2931 na->na_fsid = vap->va_fsid;
2927 2932
2928 2933 na->na_nodeid = vap->va_nodeid;
2929 2934
2930 2935 /*
2931 2936 * Check to make sure that the nodeid is representable over the
2932 2937 * wire without losing bits.
2933 2938 */
2934 2939 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2935 2940 return (EFBIG);
2936 2941 na->na_nlink = vap->va_nlink;
2937 2942
2938 2943 /*
2939 2944 * Check for big files here, instead of at the caller. See
2940 2945 * comments in cstat for large special file explanation.
2941 2946 */
2942 2947 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2943 2948 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2944 2949 return (EFBIG);
2945 2950 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2946 2951 /* UNKNOWN_SIZE | OVERFLOW */
2947 2952 na->na_size = MAXOFF32_T;
2948 2953 } else
2949 2954 na->na_size = vap->va_size;
2950 2955 } else
2951 2956 na->na_size = vap->va_size;
2952 2957
2953 2958 /*
2954 2959 * If the vnode times overflow the 32-bit times that NFS2
2955 2960 * uses on the wire then return an error.
2956 2961 */
2957 2962 if (!NFS_VAP_TIME_OK(vap)) {
2958 2963 return (EOVERFLOW);
2959 2964 }
2960 2965 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2961 2966 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2962 2967
2963 2968 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2964 2969 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2965 2970
2966 2971 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2967 2972 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2968 2973
2969 2974 /*
2970 2975 * If the dev_t will fit into 16 bits then compress
2971 2976 * it, otherwise leave it alone. See comments in
2972 2977 * nfs_client.c.
2973 2978 */
2974 2979 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2975 2980 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2976 2981 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2977 2982 else
2978 2983 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2979 2984
2980 2985 na->na_blocks = vap->va_nblocks;
2981 2986 na->na_blocksize = vap->va_blksize;
2982 2987
2983 2988 /*
2984 2989 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2985 2990 * over-the-wire protocols for named-pipe vnodes. It remaps the
2986 2991 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2987 2992 *
2988 2993 * BUYER BEWARE:
2989 2994 * If you are porting the NFS to a non-Sun server, you probably
2990 2995 * don't want to include the following block of code. The
2991 2996 * over-the-wire special file types will be changing with the
2992 2997 * NFS Protocol Revision.
2993 2998 */
2994 2999 if (vap->va_type == VFIFO)
2995 3000 NA_SETFIFO(na);
2996 3001 return (0);
2997 3002 }
2998 3003
2999 3004 /*
3000 3005 * acl v2 support: returns approximate permission.
3001 3006 * default: returns minimal permission (more restrictive)
3002 3007 * aclok: returns maximal permission (less restrictive)
3003 3008 * This routine changes the permissions that are alaredy in *va.
3004 3009 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
3005 3010 * CLASS_OBJ is always the same as GROUP_OBJ entry.
3006 3011 */
3007 3012 static void
3008 3013 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
3009 3014 {
3010 3015 vsecattr_t vsa;
3011 3016 int aclcnt;
3012 3017 aclent_t *aclentp;
3013 3018 mode_t mask_perm;
3014 3019 mode_t grp_perm;
3015 3020 mode_t other_perm;
3016 3021 mode_t other_orig;
3017 3022 int error;
3018 3023
3019 3024 /* dont care default acl */
3020 3025 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
3021 3026 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
3022 3027
3023 3028 if (!error) {
3024 3029 aclcnt = vsa.vsa_aclcnt;
3025 3030 if (aclcnt > MIN_ACL_ENTRIES) {
3026 3031 /* non-trivial ACL */
3027 3032 aclentp = vsa.vsa_aclentp;
3028 3033 if (exi->exi_export.ex_flags & EX_ACLOK) {
3029 3034 /* maximal permissions */
3030 3035 grp_perm = 0;
3031 3036 other_perm = 0;
3032 3037 for (; aclcnt > 0; aclcnt--, aclentp++) {
3033 3038 switch (aclentp->a_type) {
3034 3039 case USER_OBJ:
3035 3040 break;
3036 3041 case USER:
3037 3042 grp_perm |=
3038 3043 aclentp->a_perm << 3;
3039 3044 other_perm |= aclentp->a_perm;
3040 3045 break;
3041 3046 case GROUP_OBJ:
3042 3047 grp_perm |=
3043 3048 aclentp->a_perm << 3;
3044 3049 break;
3045 3050 case GROUP:
3046 3051 other_perm |= aclentp->a_perm;
3047 3052 break;
3048 3053 case OTHER_OBJ:
3049 3054 other_orig = aclentp->a_perm;
3050 3055 break;
3051 3056 case CLASS_OBJ:
3052 3057 mask_perm = aclentp->a_perm;
3053 3058 break;
3054 3059 default:
3055 3060 break;
3056 3061 }
3057 3062 }
3058 3063 grp_perm &= mask_perm << 3;
3059 3064 other_perm &= mask_perm;
3060 3065 other_perm |= other_orig;
3061 3066
3062 3067 } else {
3063 3068 /* minimal permissions */
3064 3069 grp_perm = 070;
3065 3070 other_perm = 07;
3066 3071 for (; aclcnt > 0; aclcnt--, aclentp++) {
3067 3072 switch (aclentp->a_type) {
3068 3073 case USER_OBJ:
3069 3074 break;
3070 3075 case USER:
3071 3076 case CLASS_OBJ:
3072 3077 grp_perm &=
3073 3078 aclentp->a_perm << 3;
3074 3079 other_perm &=
3075 3080 aclentp->a_perm;
3076 3081 break;
3077 3082 case GROUP_OBJ:
3078 3083 grp_perm &=
3079 3084 aclentp->a_perm << 3;
3080 3085 break;
3081 3086 case GROUP:
3082 3087 other_perm &=
3083 3088 aclentp->a_perm;
3084 3089 break;
3085 3090 case OTHER_OBJ:
3086 3091 other_perm &=
3087 3092 aclentp->a_perm;
3088 3093 break;
3089 3094 default:
3090 3095 break;
3091 3096 }
3092 3097 }
3093 3098 }
3094 3099 /* copy to va */
3095 3100 va->va_mode &= ~077;
3096 3101 va->va_mode |= grp_perm | other_perm;
3097 3102 }
3098 3103 if (vsa.vsa_aclcnt)
3099 3104 kmem_free(vsa.vsa_aclentp,
3100 3105 vsa.vsa_aclcnt * sizeof (aclent_t));
3101 3106 }
3102 3107 }
3103 3108
3104 3109 void
3105 3110 rfs_srvrinit(void)
3106 3111 {
3107 3112 nfs2_srv_caller_id = fs_new_caller_id();
3108 3113 }
3109 3114
3110 3115 void
3111 3116 rfs_srvrfini(void)
3112 3117 {
3113 3118 }
3114 3119
3115 3120 /* ARGSUSED */
3116 3121 void
3117 3122 rfs_srv_zone_init(nfs_globals_t *ng)
3118 3123 {
3119 3124 nfs_srv_t *ns;
3120 3125
3121 3126 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3122 3127
3123 3128 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3124 3129 ns->write_async = 1;
3125 3130
3126 3131 ng->nfs_srv = ns;
3127 3132 }
3128 3133
3129 3134 /* ARGSUSED */
3130 3135 void
3131 3136 rfs_srv_zone_fini(nfs_globals_t *ng)
3132 3137 {
3133 3138 nfs_srv_t *ns = ng->nfs_srv;
3134 3139
3135 3140 ng->nfs_srv = NULL;
3136 3141
3137 3142 mutex_destroy(&ns->async_write_lock);
3138 3143 kmem_free(ns, sizeof (*ns));
3139 3144 }
3140 3145
3141 3146 static int
3142 3147 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3143 3148 {
3144 3149 struct clist *wcl;
3145 3150 int wlist_len;
3146 3151 uint32_t count = rr->rr_count;
3147 3152
3148 3153 wcl = ra->ra_wlist;
3149 3154
3150 3155 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3151 3156 return (FALSE);
3152 3157 }
3153 3158
3154 3159 wcl = ra->ra_wlist;
3155 3160 rr->rr_ok.rrok_wlist_len = wlist_len;
3156 3161 rr->rr_ok.rrok_wlist = wcl;
3157 3162
3158 3163 return (TRUE);
3159 3164 }
|
↓ open down ↓ |
1829 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX