Print this page
curzone reality check and teardown changes to use the RIGHT zone
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 30 * All rights reserved.
31 31 */
32 32
33 33 /*
34 34 * Copyright 2018 Nexenta Systems, Inc.
35 35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 36 */
37 37
38 38 #include <sys/param.h>
39 39 #include <sys/types.h>
40 40 #include <sys/systm.h>
41 41 #include <sys/cred.h>
42 42 #include <sys/buf.h>
43 43 #include <sys/vfs.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/stat.h>
47 47 #include <sys/errno.h>
48 48 #include <sys/sysmacros.h>
49 49 #include <sys/statvfs.h>
50 50 #include <sys/kmem.h>
51 51 #include <sys/kstat.h>
52 52 #include <sys/dirent.h>
53 53 #include <sys/cmn_err.h>
54 54 #include <sys/debug.h>
55 55 #include <sys/vtrace.h>
56 56 #include <sys/mode.h>
57 57 #include <sys/acl.h>
58 58 #include <sys/nbmlock.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/sdt.h>
61 61
62 62 #include <rpc/types.h>
63 63 #include <rpc/auth.h>
64 64 #include <rpc/svc.h>
65 65
66 66 #include <nfs/nfs.h>
67 67 #include <nfs/export.h>
68 68 #include <nfs/nfs_cmd.h>
69 69
70 70 #include <vm/hat.h>
71 71 #include <vm/as.h>
72 72 #include <vm/seg.h>
73 73 #include <vm/seg_map.h>
74 74 #include <vm/seg_kmem.h>
75 75
76 76 #include <sys/strsubr.h>
77 77
78 78 struct rfs_async_write_list;
79 79
80 80 /*
81 81 * Zone globals of NFSv2 server
82 82 */
83 83 typedef struct nfs_srv {
84 84 kmutex_t async_write_lock;
85 85 struct rfs_async_write_list *async_write_head;
86 86
87 87 /*
88 88 * enables write clustering if == 1
89 89 */
90 90 int write_async;
91 91 } nfs_srv_t;
92 92
93 93 /*
94 94 * These are the interface routines for the server side of the
95 95 * Network File System. See the NFS version 2 protocol specification
96 96 * for a description of this interface.
97 97 */
98 98
99 99 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
100 100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
101 101 cred_t *);
102 102 static void *rfs_zone_init(zoneid_t zoneid);
103 103 static void rfs_zone_fini(zoneid_t zoneid, void *data);
104 104
105 105
106 106 /*
107 107 * Some "over the wire" UNIX file types. These are encoded
108 108 * into the mode. This needs to be fixed in the next rev.
109 109 */
110 110 #define IFMT 0170000 /* type of file */
111 111 #define IFCHR 0020000 /* character special */
112 112 #define IFBLK 0060000 /* block special */
113 113 #define IFSOCK 0140000 /* socket */
114 114
115 115 u_longlong_t nfs2_srv_caller_id;
116 116 static zone_key_t rfs_zone_key;
117 117
118 118 /*
119 119 * Get file attributes.
120 120 * Returns the current attributes of the file with the given fhandle.
121 121 */
122 122 /* ARGSUSED */
123 123 void
124 124 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
125 125 struct svc_req *req, cred_t *cr, bool_t ro)
126 126 {
127 127 int error;
128 128 vnode_t *vp;
129 129 struct vattr va;
130 130
131 131 vp = nfs_fhtovp(fhp, exi);
132 132 if (vp == NULL) {
133 133 ns->ns_status = NFSERR_STALE;
134 134 return;
135 135 }
136 136
137 137 /*
138 138 * Do the getattr.
139 139 */
140 140 va.va_mask = AT_ALL; /* we want all the attributes */
141 141
142 142 error = rfs4_delegated_getattr(vp, &va, 0, cr);
143 143
144 144 /* check for overflows */
145 145 if (!error) {
146 146 /* Lie about the object type for a referral */
147 147 if (vn_is_nfs_reparse(vp, cr))
148 148 va.va_type = VLNK;
149 149
150 150 acl_perm(vp, exi, &va, cr);
151 151 error = vattr_to_nattr(&va, &ns->ns_attr);
152 152 }
153 153
154 154 VN_RELE(vp);
155 155
156 156 ns->ns_status = puterrno(error);
157 157 }
158 158 void *
159 159 rfs_getattr_getfh(fhandle_t *fhp)
160 160 {
161 161 return (fhp);
162 162 }
163 163
164 164 /*
165 165 * Set file attributes.
166 166 * Sets the attributes of the file with the given fhandle. Returns
167 167 * the new attributes.
168 168 */
169 169 /* ARGSUSED */
170 170 void
171 171 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
172 172 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
173 173 {
174 174 int error;
175 175 int flag;
176 176 int in_crit = 0;
177 177 vnode_t *vp;
178 178 struct vattr va;
179 179 struct vattr bva;
180 180 struct flock64 bf;
181 181 caller_context_t ct;
182 182
183 183
184 184 vp = nfs_fhtovp(&args->saa_fh, exi);
185 185 if (vp == NULL) {
186 186 ns->ns_status = NFSERR_STALE;
187 187 return;
188 188 }
189 189
190 190 if (rdonly(ro, vp)) {
191 191 VN_RELE(vp);
192 192 ns->ns_status = NFSERR_ROFS;
193 193 return;
194 194 }
195 195
196 196 error = sattr_to_vattr(&args->saa_sa, &va);
197 197 if (error) {
198 198 VN_RELE(vp);
199 199 ns->ns_status = puterrno(error);
200 200 return;
201 201 }
202 202
203 203 /*
204 204 * If the client is requesting a change to the mtime,
205 205 * but the nanosecond field is set to 1 billion, then
206 206 * this is a flag to the server that it should set the
207 207 * atime and mtime fields to the server's current time.
208 208 * The 1 billion number actually came from the client
209 209 * as 1 million, but the units in the over the wire
210 210 * request are microseconds instead of nanoseconds.
211 211 *
212 212 * This is an overload of the protocol and should be
213 213 * documented in the NFS Version 2 protocol specification.
214 214 */
215 215 if (va.va_mask & AT_MTIME) {
216 216 if (va.va_mtime.tv_nsec == 1000000000) {
217 217 gethrestime(&va.va_mtime);
218 218 va.va_atime = va.va_mtime;
219 219 va.va_mask |= AT_ATIME;
220 220 flag = 0;
221 221 } else
222 222 flag = ATTR_UTIME;
223 223 } else
224 224 flag = 0;
225 225
226 226 /*
227 227 * If the filesystem is exported with nosuid, then mask off
228 228 * the setuid and setgid bits.
229 229 */
230 230 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
231 231 (exi->exi_export.ex_flags & EX_NOSUID))
232 232 va.va_mode &= ~(VSUID | VSGID);
233 233
234 234 ct.cc_sysid = 0;
235 235 ct.cc_pid = 0;
236 236 ct.cc_caller_id = nfs2_srv_caller_id;
237 237 ct.cc_flags = CC_DONTBLOCK;
238 238
239 239 /*
240 240 * We need to specially handle size changes because it is
241 241 * possible for the client to create a file with modes
242 242 * which indicate read-only, but with the file opened for
243 243 * writing. If the client then tries to set the size of
244 244 * the file, then the normal access checking done in
245 245 * VOP_SETATTR would prevent the client from doing so,
246 246 * although it should be legal for it to do so. To get
247 247 * around this, we do the access checking for ourselves
248 248 * and then use VOP_SPACE which doesn't do the access
249 249 * checking which VOP_SETATTR does. VOP_SPACE can only
250 250 * operate on VREG files, let VOP_SETATTR handle the other
251 251 * extremely rare cases.
252 252 * Also the client should not be allowed to change the
253 253 * size of the file if there is a conflicting non-blocking
254 254 * mandatory lock in the region of change.
255 255 */
256 256 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
257 257 if (nbl_need_check(vp)) {
258 258 nbl_start_crit(vp, RW_READER);
259 259 in_crit = 1;
260 260 }
261 261
262 262 bva.va_mask = AT_UID | AT_SIZE;
263 263
264 264 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
265 265
266 266 if (error) {
267 267 if (in_crit)
268 268 nbl_end_crit(vp);
269 269 VN_RELE(vp);
270 270 ns->ns_status = puterrno(error);
271 271 return;
272 272 }
273 273
274 274 if (in_crit) {
275 275 u_offset_t offset;
276 276 ssize_t length;
277 277
278 278 if (va.va_size < bva.va_size) {
279 279 offset = va.va_size;
280 280 length = bva.va_size - va.va_size;
281 281 } else {
282 282 offset = bva.va_size;
283 283 length = va.va_size - bva.va_size;
284 284 }
285 285 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
286 286 NULL)) {
287 287 error = EACCES;
288 288 }
289 289 }
290 290
291 291 if (crgetuid(cr) == bva.va_uid && !error &&
292 292 va.va_size != bva.va_size) {
293 293 va.va_mask &= ~AT_SIZE;
294 294 bf.l_type = F_WRLCK;
295 295 bf.l_whence = 0;
296 296 bf.l_start = (off64_t)va.va_size;
297 297 bf.l_len = 0;
298 298 bf.l_sysid = 0;
299 299 bf.l_pid = 0;
300 300
301 301 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
302 302 (offset_t)va.va_size, cr, &ct);
303 303 }
304 304 if (in_crit)
305 305 nbl_end_crit(vp);
306 306 } else
307 307 error = 0;
308 308
309 309 /*
310 310 * Do the setattr.
311 311 */
312 312 if (!error && va.va_mask) {
313 313 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
314 314 }
315 315
316 316 /*
317 317 * check if the monitor on either vop_space or vop_setattr detected
318 318 * a delegation conflict and if so, mark the thread flag as
319 319 * wouldblock so that the response is dropped and the client will
320 320 * try again.
321 321 */
322 322 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
323 323 VN_RELE(vp);
324 324 curthread->t_flag |= T_WOULDBLOCK;
325 325 return;
326 326 }
327 327
328 328 if (!error) {
329 329 va.va_mask = AT_ALL; /* get everything */
330 330
331 331 error = rfs4_delegated_getattr(vp, &va, 0, cr);
332 332
333 333 /* check for overflows */
334 334 if (!error) {
335 335 acl_perm(vp, exi, &va, cr);
336 336 error = vattr_to_nattr(&va, &ns->ns_attr);
337 337 }
338 338 }
339 339
340 340 ct.cc_flags = 0;
341 341
342 342 /*
343 343 * Force modified metadata out to stable storage.
344 344 */
345 345 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
346 346
347 347 VN_RELE(vp);
348 348
349 349 ns->ns_status = puterrno(error);
350 350 }
351 351 void *
352 352 rfs_setattr_getfh(struct nfssaargs *args)
353 353 {
354 354 return (&args->saa_fh);
355 355 }
356 356
357 357 /* Change and release @exip and @vpp only in success */
358 358 int
359 359 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
360 360 {
361 361 struct exportinfo *exi;
362 362 vnode_t *vp = *vpp;
363 363 fid_t fid;
364 364 int error;
365 365
366 366 VN_HOLD(vp);
367 367
368 368 if ((error = traverse(&vp)) != 0) {
369 369 VN_RELE(vp);
370 370 return (error);
371 371 }
372 372
373 373 bzero(&fid, sizeof (fid));
374 374 fid.fid_len = MAXFIDSZ;
375 375 error = VOP_FID(vp, &fid, NULL);
376 376 if (error) {
377 377 VN_RELE(vp);
378 378 return (error);
379 379 }
380 380
381 381 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
382 382 if (exi == NULL ||
383 383 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
384 384 /*
385 385 * It is not error, just subdir is not exported
386 386 * or "nohide" is not set
387 387 */
388 388 if (exi != NULL)
389 389 exi_rele(exi);
390 390 VN_RELE(vp);
391 391 } else {
392 392 /* go to submount */
393 393 exi_rele(*exip);
394 394 *exip = exi;
395 395
396 396 VN_RELE(*vpp);
397 397 *vpp = vp;
398 398 }
399 399
400 400 return (0);
401 401 }
402 402
403 403 /*
|
↓ open down ↓ |
403 lines elided |
↑ open up ↑ |
404 404 * Given mounted "dvp" and "exi", go upper mountpoint
405 405 * with dvp/exi correction
406 406 * Return 0 in success
407 407 */
408 408 int
409 409 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
410 410 {
411 411 struct exportinfo *exi;
412 412 vnode_t *dvp = *dvpp;
413 413
414 + ASSERT3P((*exip)->exi_zone, ==, curzone);
414 415 ASSERT((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp));
415 416
416 417 VN_HOLD(dvp);
417 418 dvp = untraverse(dvp);
418 419 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
419 420 if (exi == NULL) {
420 421 VN_RELE(dvp);
421 422 return (-1);
422 423 }
423 424
425 + ASSERT3P(exi->exi_zone, ==, curzone);
424 426 exi_rele(*exip);
425 427 *exip = exi;
426 428 VN_RELE(*dvpp);
427 429 *dvpp = dvp;
428 430
429 431 return (0);
430 432 }
431 433 /*
432 434 * Directory lookup.
433 435 * Returns an fhandle and file attributes for file name in a directory.
434 436 */
435 437 /* ARGSUSED */
436 438 void
437 439 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
438 440 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
439 441 {
440 442 int error;
441 443 vnode_t *dvp;
442 444 vnode_t *vp;
443 445 struct vattr va;
444 446 fhandle_t *fhp = da->da_fhandle;
445 447 struct sec_ol sec = {0, 0};
446 448 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
447 449 char *name;
448 450 struct sockaddr *ca;
449 451
450 452 /*
451 453 * Trusted Extension doesn't support NFSv2. MOUNT
452 454 * will reject v2 clients. Need to prevent v2 client
453 455 * access via WebNFS here.
454 456 */
455 457 if (is_system_labeled() && req->rq_vers == 2) {
456 458 dr->dr_status = NFSERR_ACCES;
457 459 return;
458 460 }
459 461
460 462 /*
461 463 * Disallow NULL paths
462 464 */
463 465 if (da->da_name == NULL || *da->da_name == '\0') {
464 466 dr->dr_status = NFSERR_ACCES;
465 467 return;
466 468 }
467 469
468 470 /*
469 471 * Allow lookups from the root - the default
470 472 * location of the public filehandle.
471 473 */
472 474 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
473 475 dvp = ZONE_ROOTVP();
|
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
474 476 VN_HOLD(dvp);
475 477 } else {
476 478 dvp = nfs_fhtovp(fhp, exi);
477 479 if (dvp == NULL) {
478 480 dr->dr_status = NFSERR_STALE;
479 481 return;
480 482 }
481 483 }
482 484
483 485 exi_hold(exi);
486 + ASSERT3P(exi->exi_zone, ==, curzone);
484 487
485 488 /*
486 489 * Not allow lookup beyond root.
487 490 * If the filehandle matches a filehandle of the exi,
488 491 * then the ".." refers beyond the root of an exported filesystem.
489 492 */
490 493 if (strcmp(da->da_name, "..") == 0 &&
491 494 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
492 495 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
493 496 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
494 497 /*
495 498 * special case for ".." and 'nohide'exported root
496 499 */
497 500 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
498 501 error = NFSERR_ACCES;
499 502 goto out;
500 503 }
501 504 } else {
502 505 error = NFSERR_NOENT;
503 506 goto out;
504 507 }
505 508 }
506 509
507 510 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
508 511 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
509 512 MAXPATHLEN);
510 513
511 514 if (name == NULL) {
512 515 error = NFSERR_ACCES;
513 516 goto out;
514 517 }
515 518
516 519 /*
517 520 * If the public filehandle is used then allow
518 521 * a multi-component lookup, i.e. evaluate
519 522 * a pathname and follow symbolic links if
520 523 * necessary.
521 524 *
522 525 * This may result in a vnode in another filesystem
523 526 * which is OK as long as the filesystem is exported.
524 527 */
525 528 if (PUBLIC_FH2(fhp)) {
526 529 publicfh_flag = TRUE;
527 530
528 531 exi_rele(exi);
529 532
530 533 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
531 534 &sec);
532 535 } else {
533 536 /*
534 537 * Do a normal single component lookup.
535 538 */
536 539 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
537 540 NULL, NULL, NULL);
538 541 }
539 542
540 543 if (name != da->da_name)
541 544 kmem_free(name, MAXPATHLEN);
542 545
543 546 if (error == 0 && vn_ismntpt(vp)) {
544 547 error = rfs_cross_mnt(&vp, &exi);
545 548 if (error)
546 549 VN_RELE(vp);
547 550 }
548 551
549 552 if (!error) {
550 553 va.va_mask = AT_ALL; /* we want everything */
551 554
552 555 error = rfs4_delegated_getattr(vp, &va, 0, cr);
553 556
554 557 /* check for overflows */
555 558 if (!error) {
556 559 acl_perm(vp, exi, &va, cr);
557 560 error = vattr_to_nattr(&va, &dr->dr_attr);
558 561 if (!error) {
559 562 if (sec.sec_flags & SEC_QUERY)
560 563 error = makefh_ol(&dr->dr_fhandle, exi,
561 564 sec.sec_index);
562 565 else {
563 566 error = makefh(&dr->dr_fhandle, vp,
564 567 exi);
565 568 if (!error && publicfh_flag &&
566 569 !chk_clnt_sec(exi, req))
567 570 auth_weak = TRUE;
568 571 }
569 572 }
570 573 }
571 574 VN_RELE(vp);
572 575 }
573 576
574 577 out:
575 578 VN_RELE(dvp);
576 579
577 580 if (exi != NULL)
578 581 exi_rele(exi);
579 582
580 583 /*
581 584 * If it's public fh, no 0x81, and client's flavor is
582 585 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
583 586 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
584 587 */
585 588 if (auth_weak)
586 589 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
587 590 else
588 591 dr->dr_status = puterrno(error);
589 592 }
590 593 void *
591 594 rfs_lookup_getfh(struct nfsdiropargs *da)
592 595 {
593 596 return (da->da_fhandle);
594 597 }
595 598
596 599 /*
597 600 * Read symbolic link.
598 601 * Returns the string in the symbolic link at the given fhandle.
599 602 */
600 603 /* ARGSUSED */
601 604 void
602 605 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
603 606 struct svc_req *req, cred_t *cr, bool_t ro)
604 607 {
605 608 int error;
606 609 struct iovec iov;
607 610 struct uio uio;
608 611 vnode_t *vp;
609 612 struct vattr va;
610 613 struct sockaddr *ca;
611 614 char *name = NULL;
612 615 int is_referral = 0;
613 616
614 617 vp = nfs_fhtovp(fhp, exi);
615 618 if (vp == NULL) {
616 619 rl->rl_data = NULL;
617 620 rl->rl_status = NFSERR_STALE;
618 621 return;
619 622 }
620 623
621 624 va.va_mask = AT_MODE;
622 625
623 626 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
624 627
625 628 if (error) {
626 629 VN_RELE(vp);
627 630 rl->rl_data = NULL;
628 631 rl->rl_status = puterrno(error);
629 632 return;
630 633 }
631 634
632 635 if (MANDLOCK(vp, va.va_mode)) {
633 636 VN_RELE(vp);
634 637 rl->rl_data = NULL;
635 638 rl->rl_status = NFSERR_ACCES;
636 639 return;
637 640 }
638 641
639 642 /* We lied about the object type for a referral */
640 643 if (vn_is_nfs_reparse(vp, cr))
641 644 is_referral = 1;
642 645
643 646 /*
644 647 * XNFS and RFC1094 require us to return ENXIO if argument
645 648 * is not a link. BUGID 1138002.
646 649 */
647 650 if (vp->v_type != VLNK && !is_referral) {
648 651 VN_RELE(vp);
649 652 rl->rl_data = NULL;
650 653 rl->rl_status = NFSERR_NXIO;
651 654 return;
652 655 }
653 656
654 657 /*
655 658 * Allocate data for pathname. This will be freed by rfs_rlfree.
656 659 */
657 660 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
658 661
659 662 if (is_referral) {
660 663 char *s;
661 664 size_t strsz;
662 665
663 666 /* Get an artificial symlink based on a referral */
664 667 s = build_symlink(vp, cr, &strsz);
665 668 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
666 669 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
667 670 vnode_t *, vp, char *, s);
668 671 if (s == NULL)
669 672 error = EINVAL;
670 673 else {
671 674 error = 0;
672 675 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
673 676 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
674 677 kmem_free(s, strsz);
675 678 }
676 679
677 680 } else {
678 681
679 682 /*
680 683 * Set up io vector to read sym link data
681 684 */
682 685 iov.iov_base = rl->rl_data;
683 686 iov.iov_len = NFS_MAXPATHLEN;
684 687 uio.uio_iov = &iov;
685 688 uio.uio_iovcnt = 1;
686 689 uio.uio_segflg = UIO_SYSSPACE;
687 690 uio.uio_extflg = UIO_COPY_CACHED;
688 691 uio.uio_loffset = (offset_t)0;
689 692 uio.uio_resid = NFS_MAXPATHLEN;
690 693
691 694 /*
692 695 * Do the readlink.
693 696 */
694 697 error = VOP_READLINK(vp, &uio, cr, NULL);
695 698
696 699 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
697 700
698 701 if (!error)
699 702 rl->rl_data[rl->rl_count] = '\0';
700 703
701 704 }
702 705
703 706
704 707 VN_RELE(vp);
705 708
706 709 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
707 710 name = nfscmd_convname(ca, exi, rl->rl_data,
708 711 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
709 712
710 713 if (name != NULL && name != rl->rl_data) {
711 714 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
712 715 rl->rl_data = name;
713 716 }
714 717
715 718 /*
716 719 * XNFS and RFC1094 require us to return ENXIO if argument
717 720 * is not a link. UFS returns EINVAL if this is the case,
718 721 * so we do the mapping here. BUGID 1138002.
719 722 */
720 723 if (error == EINVAL)
721 724 rl->rl_status = NFSERR_NXIO;
722 725 else
723 726 rl->rl_status = puterrno(error);
724 727
725 728 }
726 729 void *
727 730 rfs_readlink_getfh(fhandle_t *fhp)
728 731 {
729 732 return (fhp);
730 733 }
731 734 /*
732 735 * Free data allocated by rfs_readlink
733 736 */
734 737 void
735 738 rfs_rlfree(struct nfsrdlnres *rl)
736 739 {
737 740 if (rl->rl_data != NULL)
738 741 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
739 742 }
740 743
741 744 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
742 745
743 746 /*
744 747 * Read data.
745 748 * Returns some data read from the file at the given fhandle.
746 749 */
747 750 /* ARGSUSED */
748 751 void
749 752 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
750 753 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
751 754 {
752 755 vnode_t *vp;
753 756 int error;
754 757 struct vattr va;
755 758 struct iovec iov;
756 759 struct uio uio;
757 760 mblk_t *mp;
758 761 int alloc_err = 0;
759 762 int in_crit = 0;
760 763 caller_context_t ct;
761 764
762 765 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
763 766 if (vp == NULL) {
764 767 rr->rr_data = NULL;
765 768 rr->rr_status = NFSERR_STALE;
766 769 return;
767 770 }
768 771
769 772 if (vp->v_type != VREG) {
770 773 VN_RELE(vp);
771 774 rr->rr_data = NULL;
772 775 rr->rr_status = NFSERR_ISDIR;
773 776 return;
774 777 }
775 778
776 779 ct.cc_sysid = 0;
777 780 ct.cc_pid = 0;
778 781 ct.cc_caller_id = nfs2_srv_caller_id;
779 782 ct.cc_flags = CC_DONTBLOCK;
780 783
781 784 /*
782 785 * Enter the critical region before calling VOP_RWLOCK
783 786 * to avoid a deadlock with write requests.
784 787 */
785 788 if (nbl_need_check(vp)) {
786 789 nbl_start_crit(vp, RW_READER);
787 790 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
788 791 0, NULL)) {
789 792 nbl_end_crit(vp);
790 793 VN_RELE(vp);
791 794 rr->rr_data = NULL;
792 795 rr->rr_status = NFSERR_ACCES;
793 796 return;
794 797 }
795 798 in_crit = 1;
796 799 }
797 800
798 801 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
799 802
800 803 /* check if a monitor detected a delegation conflict */
801 804 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
802 805 if (in_crit)
803 806 nbl_end_crit(vp);
804 807 VN_RELE(vp);
805 808 /* mark as wouldblock so response is dropped */
806 809 curthread->t_flag |= T_WOULDBLOCK;
807 810
808 811 rr->rr_data = NULL;
809 812 return;
810 813 }
811 814
812 815 va.va_mask = AT_ALL;
813 816
814 817 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
815 818
816 819 if (error) {
817 820 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
818 821 if (in_crit)
819 822 nbl_end_crit(vp);
820 823
821 824 VN_RELE(vp);
822 825 rr->rr_data = NULL;
823 826 rr->rr_status = puterrno(error);
824 827
825 828 return;
826 829 }
827 830
828 831 /*
829 832 * This is a kludge to allow reading of files created
830 833 * with no read permission. The owner of the file
831 834 * is always allowed to read it.
832 835 */
833 836 if (crgetuid(cr) != va.va_uid) {
834 837 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
835 838
836 839 if (error) {
837 840 /*
838 841 * Exec is the same as read over the net because
839 842 * of demand loading.
840 843 */
841 844 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
842 845 }
843 846 if (error) {
844 847 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
845 848 if (in_crit)
846 849 nbl_end_crit(vp);
847 850 VN_RELE(vp);
848 851 rr->rr_data = NULL;
849 852 rr->rr_status = puterrno(error);
850 853
851 854 return;
852 855 }
853 856 }
854 857
855 858 if (MANDLOCK(vp, va.va_mode)) {
856 859 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
857 860 if (in_crit)
858 861 nbl_end_crit(vp);
859 862
860 863 VN_RELE(vp);
861 864 rr->rr_data = NULL;
862 865 rr->rr_status = NFSERR_ACCES;
863 866
864 867 return;
865 868 }
866 869
867 870 rr->rr_ok.rrok_wlist_len = 0;
868 871 rr->rr_ok.rrok_wlist = NULL;
869 872
870 873 if ((u_offset_t)ra->ra_offset >= va.va_size) {
871 874 rr->rr_count = 0;
872 875 rr->rr_data = NULL;
873 876 /*
874 877 * In this case, status is NFS_OK, but there is no data
875 878 * to encode. So set rr_mp to NULL.
876 879 */
877 880 rr->rr_mp = NULL;
878 881 rr->rr_ok.rrok_wlist = ra->ra_wlist;
879 882 if (rr->rr_ok.rrok_wlist)
880 883 clist_zero_len(rr->rr_ok.rrok_wlist);
881 884 goto done;
882 885 }
883 886
884 887 if (ra->ra_wlist) {
885 888 mp = NULL;
886 889 rr->rr_mp = NULL;
887 890 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
888 891 if (ra->ra_count > iov.iov_len) {
889 892 rr->rr_data = NULL;
890 893 rr->rr_status = NFSERR_INVAL;
891 894 goto done;
892 895 }
893 896 } else {
894 897 /*
895 898 * mp will contain the data to be sent out in the read reply.
896 899 * This will be freed after the reply has been sent out (by the
897 900 * driver).
898 901 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
899 902 * that the call to xdrmblk_putmblk() never fails.
900 903 */
901 904 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
902 905 &alloc_err);
903 906 ASSERT(mp != NULL);
904 907 ASSERT(alloc_err == 0);
905 908
906 909 rr->rr_mp = mp;
907 910
908 911 /*
909 912 * Set up io vector
910 913 */
911 914 iov.iov_base = (caddr_t)mp->b_datap->db_base;
912 915 iov.iov_len = ra->ra_count;
913 916 }
914 917
915 918 uio.uio_iov = &iov;
916 919 uio.uio_iovcnt = 1;
917 920 uio.uio_segflg = UIO_SYSSPACE;
918 921 uio.uio_extflg = UIO_COPY_CACHED;
919 922 uio.uio_loffset = (offset_t)ra->ra_offset;
920 923 uio.uio_resid = ra->ra_count;
921 924
922 925 error = VOP_READ(vp, &uio, 0, cr, &ct);
923 926
924 927 if (error) {
925 928 if (mp)
926 929 freeb(mp);
927 930
928 931 /*
929 932 * check if a monitor detected a delegation conflict and
930 933 * mark as wouldblock so response is dropped
931 934 */
932 935 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
933 936 curthread->t_flag |= T_WOULDBLOCK;
934 937 else
935 938 rr->rr_status = puterrno(error);
936 939
937 940 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
938 941 if (in_crit)
939 942 nbl_end_crit(vp);
940 943
941 944 VN_RELE(vp);
942 945 rr->rr_data = NULL;
943 946
944 947 return;
945 948 }
946 949
947 950 /*
948 951 * Get attributes again so we can send the latest access
949 952 * time to the client side for its cache.
950 953 */
951 954 va.va_mask = AT_ALL;
952 955
953 956 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
954 957
955 958 if (error) {
956 959 if (mp)
957 960 freeb(mp);
958 961
959 962 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
960 963 if (in_crit)
961 964 nbl_end_crit(vp);
962 965
963 966 VN_RELE(vp);
964 967 rr->rr_data = NULL;
965 968 rr->rr_status = puterrno(error);
966 969
967 970 return;
968 971 }
969 972
970 973 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
971 974
972 975 if (mp) {
973 976 rr->rr_data = (char *)mp->b_datap->db_base;
974 977 } else {
975 978 if (ra->ra_wlist) {
976 979 rr->rr_data = (caddr_t)iov.iov_base;
977 980 if (!rdma_setup_read_data2(ra, rr)) {
978 981 rr->rr_data = NULL;
979 982 rr->rr_status = puterrno(NFSERR_INVAL);
980 983 }
981 984 }
982 985 }
983 986 done:
984 987 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
985 988 if (in_crit)
986 989 nbl_end_crit(vp);
987 990
988 991 acl_perm(vp, exi, &va, cr);
989 992
990 993 /* check for overflows */
991 994 error = vattr_to_nattr(&va, &rr->rr_attr);
992 995
993 996 VN_RELE(vp);
994 997
995 998 rr->rr_status = puterrno(error);
996 999 }
997 1000
998 1001 /*
999 1002 * Free data allocated by rfs_read
1000 1003 */
1001 1004 void
1002 1005 rfs_rdfree(struct nfsrdresult *rr)
1003 1006 {
1004 1007 mblk_t *mp;
1005 1008
1006 1009 if (rr->rr_status == NFS_OK) {
1007 1010 mp = rr->rr_mp;
1008 1011 if (mp != NULL)
1009 1012 freeb(mp);
1010 1013 }
1011 1014 }
1012 1015
1013 1016 void *
1014 1017 rfs_read_getfh(struct nfsreadargs *ra)
1015 1018 {
1016 1019 return (&ra->ra_fhandle);
1017 1020 }
1018 1021
1019 1022 #define MAX_IOVECS 12
1020 1023
1021 1024 #ifdef DEBUG
1022 1025 static int rfs_write_sync_hits = 0;
1023 1026 static int rfs_write_sync_misses = 0;
1024 1027 #endif
1025 1028
1026 1029 /*
1027 1030 * Write data to file.
1028 1031 * Returns attributes of a file after writing some data to it.
1029 1032 *
1030 1033 * Any changes made here, especially in error handling might have
1031 1034 * to also be done in rfs_write (which clusters write requests).
1032 1035 */
1033 1036 /* ARGSUSED */
1034 1037 void
1035 1038 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1036 1039 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1037 1040 {
1038 1041 int error;
1039 1042 vnode_t *vp;
1040 1043 rlim64_t rlimit;
1041 1044 struct vattr va;
1042 1045 struct uio uio;
1043 1046 struct iovec iov[MAX_IOVECS];
1044 1047 mblk_t *m;
1045 1048 struct iovec *iovp;
1046 1049 int iovcnt;
1047 1050 cred_t *savecred;
1048 1051 int in_crit = 0;
1049 1052 caller_context_t ct;
1050 1053
1051 1054 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1052 1055 if (vp == NULL) {
1053 1056 ns->ns_status = NFSERR_STALE;
1054 1057 return;
1055 1058 }
1056 1059
1057 1060 if (rdonly(ro, vp)) {
1058 1061 VN_RELE(vp);
1059 1062 ns->ns_status = NFSERR_ROFS;
1060 1063 return;
1061 1064 }
1062 1065
1063 1066 if (vp->v_type != VREG) {
1064 1067 VN_RELE(vp);
1065 1068 ns->ns_status = NFSERR_ISDIR;
1066 1069 return;
1067 1070 }
1068 1071
1069 1072 ct.cc_sysid = 0;
1070 1073 ct.cc_pid = 0;
1071 1074 ct.cc_caller_id = nfs2_srv_caller_id;
1072 1075 ct.cc_flags = CC_DONTBLOCK;
1073 1076
1074 1077 va.va_mask = AT_UID|AT_MODE;
1075 1078
1076 1079 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1077 1080
1078 1081 if (error) {
1079 1082 VN_RELE(vp);
1080 1083 ns->ns_status = puterrno(error);
1081 1084
1082 1085 return;
1083 1086 }
1084 1087
1085 1088 if (crgetuid(cr) != va.va_uid) {
1086 1089 /*
1087 1090 * This is a kludge to allow writes of files created
1088 1091 * with read only permission. The owner of the file
1089 1092 * is always allowed to write it.
1090 1093 */
1091 1094 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1092 1095
1093 1096 if (error) {
1094 1097 VN_RELE(vp);
1095 1098 ns->ns_status = puterrno(error);
1096 1099 return;
1097 1100 }
1098 1101 }
1099 1102
1100 1103 /*
1101 1104 * Can't access a mandatory lock file. This might cause
1102 1105 * the NFS service thread to block forever waiting for a
1103 1106 * lock to be released that will never be released.
1104 1107 */
1105 1108 if (MANDLOCK(vp, va.va_mode)) {
1106 1109 VN_RELE(vp);
1107 1110 ns->ns_status = NFSERR_ACCES;
1108 1111 return;
1109 1112 }
1110 1113
1111 1114 /*
1112 1115 * We have to enter the critical region before calling VOP_RWLOCK
1113 1116 * to avoid a deadlock with ufs.
1114 1117 */
1115 1118 if (nbl_need_check(vp)) {
1116 1119 nbl_start_crit(vp, RW_READER);
1117 1120 in_crit = 1;
1118 1121 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1119 1122 wa->wa_count, 0, NULL)) {
1120 1123 error = EACCES;
1121 1124 goto out;
1122 1125 }
1123 1126 }
1124 1127
1125 1128 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1126 1129
1127 1130 /* check if a monitor detected a delegation conflict */
1128 1131 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1129 1132 goto out;
1130 1133 }
1131 1134
1132 1135 if (wa->wa_data || wa->wa_rlist) {
1133 1136 /* Do the RDMA thing if necessary */
1134 1137 if (wa->wa_rlist) {
1135 1138 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1136 1139 iov[0].iov_len = wa->wa_count;
1137 1140 } else {
1138 1141 iov[0].iov_base = wa->wa_data;
1139 1142 iov[0].iov_len = wa->wa_count;
1140 1143 }
1141 1144 uio.uio_iov = iov;
1142 1145 uio.uio_iovcnt = 1;
1143 1146 uio.uio_segflg = UIO_SYSSPACE;
1144 1147 uio.uio_extflg = UIO_COPY_DEFAULT;
1145 1148 uio.uio_loffset = (offset_t)wa->wa_offset;
1146 1149 uio.uio_resid = wa->wa_count;
1147 1150 /*
1148 1151 * The limit is checked on the client. We
1149 1152 * should allow any size writes here.
1150 1153 */
1151 1154 uio.uio_llimit = curproc->p_fsz_ctl;
1152 1155 rlimit = uio.uio_llimit - wa->wa_offset;
1153 1156 if (rlimit < (rlim64_t)uio.uio_resid)
1154 1157 uio.uio_resid = (uint_t)rlimit;
1155 1158
1156 1159 /*
1157 1160 * for now we assume no append mode
1158 1161 */
1159 1162 /*
1160 1163 * We're changing creds because VM may fault and we need
1161 1164 * the cred of the current thread to be used if quota
1162 1165 * checking is enabled.
1163 1166 */
1164 1167 savecred = curthread->t_cred;
1165 1168 curthread->t_cred = cr;
1166 1169 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1167 1170 curthread->t_cred = savecred;
1168 1171 } else {
1169 1172
1170 1173 iovcnt = 0;
1171 1174 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1172 1175 iovcnt++;
1173 1176 if (iovcnt <= MAX_IOVECS) {
1174 1177 #ifdef DEBUG
1175 1178 rfs_write_sync_hits++;
1176 1179 #endif
1177 1180 iovp = iov;
1178 1181 } else {
1179 1182 #ifdef DEBUG
1180 1183 rfs_write_sync_misses++;
1181 1184 #endif
1182 1185 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1183 1186 }
1184 1187 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1185 1188 uio.uio_iov = iovp;
1186 1189 uio.uio_iovcnt = iovcnt;
1187 1190 uio.uio_segflg = UIO_SYSSPACE;
1188 1191 uio.uio_extflg = UIO_COPY_DEFAULT;
1189 1192 uio.uio_loffset = (offset_t)wa->wa_offset;
1190 1193 uio.uio_resid = wa->wa_count;
1191 1194 /*
1192 1195 * The limit is checked on the client. We
1193 1196 * should allow any size writes here.
1194 1197 */
1195 1198 uio.uio_llimit = curproc->p_fsz_ctl;
1196 1199 rlimit = uio.uio_llimit - wa->wa_offset;
1197 1200 if (rlimit < (rlim64_t)uio.uio_resid)
1198 1201 uio.uio_resid = (uint_t)rlimit;
1199 1202
1200 1203 /*
1201 1204 * For now we assume no append mode.
1202 1205 */
1203 1206 /*
1204 1207 * We're changing creds because VM may fault and we need
1205 1208 * the cred of the current thread to be used if quota
1206 1209 * checking is enabled.
1207 1210 */
1208 1211 savecred = curthread->t_cred;
1209 1212 curthread->t_cred = cr;
1210 1213 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1211 1214 curthread->t_cred = savecred;
1212 1215
1213 1216 if (iovp != iov)
1214 1217 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1215 1218 }
1216 1219
1217 1220 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1218 1221
1219 1222 if (!error) {
1220 1223 /*
1221 1224 * Get attributes again so we send the latest mod
1222 1225 * time to the client side for its cache.
1223 1226 */
1224 1227 va.va_mask = AT_ALL; /* now we want everything */
1225 1228
1226 1229 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1227 1230
1228 1231 /* check for overflows */
1229 1232 if (!error) {
1230 1233 acl_perm(vp, exi, &va, cr);
1231 1234 error = vattr_to_nattr(&va, &ns->ns_attr);
1232 1235 }
1233 1236 }
1234 1237
1235 1238 out:
1236 1239 if (in_crit)
1237 1240 nbl_end_crit(vp);
1238 1241 VN_RELE(vp);
1239 1242
1240 1243 /* check if a monitor detected a delegation conflict */
1241 1244 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1242 1245 /* mark as wouldblock so response is dropped */
1243 1246 curthread->t_flag |= T_WOULDBLOCK;
1244 1247 else
1245 1248 ns->ns_status = puterrno(error);
1246 1249
1247 1250 }
1248 1251
1249 1252 struct rfs_async_write {
1250 1253 struct nfswriteargs *wa;
1251 1254 struct nfsattrstat *ns;
1252 1255 struct svc_req *req;
1253 1256 cred_t *cr;
1254 1257 bool_t ro;
1255 1258 kthread_t *thread;
1256 1259 struct rfs_async_write *list;
1257 1260 };
1258 1261
1259 1262 struct rfs_async_write_list {
1260 1263 fhandle_t *fhp;
1261 1264 kcondvar_t cv;
1262 1265 struct rfs_async_write *list;
1263 1266 struct rfs_async_write_list *next;
1264 1267 };
1265 1268
1266 1269 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1267 1270 static kmutex_t rfs_async_write_lock;
1268 1271 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1269 1272
1270 1273 #define MAXCLIOVECS 42
1271 1274 #define RFSWRITE_INITVAL (enum nfsstat) -1
1272 1275
1273 1276 #ifdef DEBUG
1274 1277 static int rfs_write_hits = 0;
1275 1278 static int rfs_write_misses = 0;
1276 1279 #endif
1277 1280
1278 1281 /*
1279 1282 * Write data to file.
1280 1283 * Returns attributes of a file after writing some data to it.
1281 1284 */
1282 1285 void
1283 1286 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1284 1287 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1285 1288 {
1286 1289 int error;
1287 1290 vnode_t *vp;
1288 1291 rlim64_t rlimit;
1289 1292 struct vattr va;
1290 1293 struct uio uio;
1291 1294 struct rfs_async_write_list *lp;
1292 1295 struct rfs_async_write_list *nlp;
1293 1296 struct rfs_async_write *rp;
1294 1297 struct rfs_async_write *nrp;
1295 1298 struct rfs_async_write *trp;
1296 1299 struct rfs_async_write *lrp;
1297 1300 int data_written;
1298 1301 int iovcnt;
1299 1302 mblk_t *m;
1300 1303 struct iovec *iovp;
1301 1304 struct iovec *niovp;
1302 1305 struct iovec iov[MAXCLIOVECS];
1303 1306 int count;
1304 1307 int rcount;
|
↓ open down ↓ |
811 lines elided |
↑ open up ↑ |
1305 1308 uint_t off;
1306 1309 uint_t len;
1307 1310 struct rfs_async_write nrpsp;
1308 1311 struct rfs_async_write_list nlpsp;
1309 1312 ushort_t t_flag;
1310 1313 cred_t *savecred;
1311 1314 int in_crit = 0;
1312 1315 caller_context_t ct;
1313 1316 nfs_srv_t *nsrv;
1314 1317
1318 + ASSERT3P(curzone, ==, ((exi == NULL) ? curzone : exi->exi_zone));
1315 1319 nsrv = zone_getspecific(rfs_zone_key, curzone);
1316 1320 if (!nsrv->write_async) {
1317 1321 rfs_write_sync(wa, ns, exi, req, cr, ro);
1318 1322 return;
1319 1323 }
1320 1324
1321 1325 /*
1322 1326 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1323 1327 * is considered an OK.
1324 1328 */
1325 1329 ns->ns_status = RFSWRITE_INITVAL;
1326 1330
1327 1331 nrp = &nrpsp;
1328 1332 nrp->wa = wa;
1329 1333 nrp->ns = ns;
1330 1334 nrp->req = req;
1331 1335 nrp->cr = cr;
1332 1336 nrp->ro = ro;
1333 1337 nrp->thread = curthread;
1334 1338
1335 1339 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1336 1340
1337 1341 /*
1338 1342 * Look to see if there is already a cluster started
1339 1343 * for this file.
1340 1344 */
1341 1345 mutex_enter(&nsrv->async_write_lock);
1342 1346 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1343 1347 if (bcmp(&wa->wa_fhandle, lp->fhp,
1344 1348 sizeof (fhandle_t)) == 0)
1345 1349 break;
1346 1350 }
1347 1351
1348 1352 /*
1349 1353 * If lp is non-NULL, then there is already a cluster
1350 1354 * started. We need to place ourselves in the cluster
1351 1355 * list in the right place as determined by starting
1352 1356 * offset. Conflicts with non-blocking mandatory locked
1353 1357 * regions will be checked when the cluster is processed.
1354 1358 */
1355 1359 if (lp != NULL) {
1356 1360 rp = lp->list;
1357 1361 trp = NULL;
1358 1362 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1359 1363 trp = rp;
1360 1364 rp = rp->list;
1361 1365 }
1362 1366 nrp->list = rp;
1363 1367 if (trp == NULL)
1364 1368 lp->list = nrp;
1365 1369 else
1366 1370 trp->list = nrp;
1367 1371 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1368 1372 cv_wait(&lp->cv, &nsrv->async_write_lock);
1369 1373 mutex_exit(&nsrv->async_write_lock);
1370 1374
1371 1375 return;
1372 1376 }
1373 1377
1374 1378 /*
1375 1379 * No cluster started yet, start one and add ourselves
1376 1380 * to the list of clusters.
1377 1381 */
1378 1382 nrp->list = NULL;
1379 1383
1380 1384 nlp = &nlpsp;
1381 1385 nlp->fhp = &wa->wa_fhandle;
1382 1386 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1383 1387 nlp->list = nrp;
1384 1388 nlp->next = NULL;
1385 1389
1386 1390 if (nsrv->async_write_head == NULL) {
1387 1391 nsrv->async_write_head = nlp;
1388 1392 } else {
1389 1393 lp = nsrv->async_write_head;
1390 1394 while (lp->next != NULL)
1391 1395 lp = lp->next;
1392 1396 lp->next = nlp;
1393 1397 }
1394 1398 mutex_exit(&nsrv->async_write_lock);
1395 1399
1396 1400 /*
1397 1401 * Convert the file handle common to all of the requests
1398 1402 * in this cluster to a vnode.
1399 1403 */
1400 1404 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1401 1405 if (vp == NULL) {
1402 1406 mutex_enter(&nsrv->async_write_lock);
1403 1407 if (nsrv->async_write_head == nlp)
1404 1408 nsrv->async_write_head = nlp->next;
1405 1409 else {
1406 1410 lp = nsrv->async_write_head;
1407 1411 while (lp->next != nlp)
1408 1412 lp = lp->next;
1409 1413 lp->next = nlp->next;
1410 1414 }
1411 1415 t_flag = curthread->t_flag & T_WOULDBLOCK;
1412 1416 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1413 1417 rp->ns->ns_status = NFSERR_STALE;
1414 1418 rp->thread->t_flag |= t_flag;
1415 1419 }
1416 1420 cv_broadcast(&nlp->cv);
1417 1421 mutex_exit(&nsrv->async_write_lock);
1418 1422
1419 1423 return;
1420 1424 }
1421 1425
1422 1426 /*
1423 1427 * Can only write regular files. Attempts to write any
1424 1428 * other file types fail with EISDIR.
1425 1429 */
1426 1430 if (vp->v_type != VREG) {
1427 1431 VN_RELE(vp);
1428 1432 mutex_enter(&nsrv->async_write_lock);
1429 1433 if (nsrv->async_write_head == nlp)
1430 1434 nsrv->async_write_head = nlp->next;
1431 1435 else {
1432 1436 lp = nsrv->async_write_head;
1433 1437 while (lp->next != nlp)
1434 1438 lp = lp->next;
1435 1439 lp->next = nlp->next;
1436 1440 }
1437 1441 t_flag = curthread->t_flag & T_WOULDBLOCK;
1438 1442 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1439 1443 rp->ns->ns_status = NFSERR_ISDIR;
1440 1444 rp->thread->t_flag |= t_flag;
1441 1445 }
1442 1446 cv_broadcast(&nlp->cv);
1443 1447 mutex_exit(&nsrv->async_write_lock);
1444 1448
1445 1449 return;
1446 1450 }
1447 1451
1448 1452 /*
1449 1453 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1450 1454 * deadlock with ufs.
1451 1455 */
1452 1456 if (nbl_need_check(vp)) {
1453 1457 nbl_start_crit(vp, RW_READER);
1454 1458 in_crit = 1;
1455 1459 }
1456 1460
1457 1461 ct.cc_sysid = 0;
1458 1462 ct.cc_pid = 0;
1459 1463 ct.cc_caller_id = nfs2_srv_caller_id;
1460 1464 ct.cc_flags = CC_DONTBLOCK;
1461 1465
1462 1466 /*
1463 1467 * Lock the file for writing. This operation provides
1464 1468 * the delay which allows clusters to grow.
1465 1469 */
1466 1470 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1467 1471
1468 1472 /* check if a monitor detected a delegation conflict */
1469 1473 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1470 1474 if (in_crit)
1471 1475 nbl_end_crit(vp);
1472 1476 VN_RELE(vp);
1473 1477 /* mark as wouldblock so response is dropped */
1474 1478 curthread->t_flag |= T_WOULDBLOCK;
1475 1479 mutex_enter(&nsrv->async_write_lock);
1476 1480 if (nsrv->async_write_head == nlp)
1477 1481 nsrv->async_write_head = nlp->next;
1478 1482 else {
1479 1483 lp = nsrv->async_write_head;
1480 1484 while (lp->next != nlp)
1481 1485 lp = lp->next;
1482 1486 lp->next = nlp->next;
1483 1487 }
1484 1488 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1485 1489 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1486 1490 rp->ns->ns_status = puterrno(error);
1487 1491 rp->thread->t_flag |= T_WOULDBLOCK;
1488 1492 }
1489 1493 }
1490 1494 cv_broadcast(&nlp->cv);
1491 1495 mutex_exit(&nsrv->async_write_lock);
1492 1496
1493 1497 return;
1494 1498 }
1495 1499
1496 1500 /*
1497 1501 * Disconnect this cluster from the list of clusters.
1498 1502 * The cluster that is being dealt with must be fixed
1499 1503 * in size after this point, so there is no reason
1500 1504 * to leave it on the list so that new requests can
1501 1505 * find it.
1502 1506 *
1503 1507 * The algorithm is that the first write request will
1504 1508 * create a cluster, convert the file handle to a
1505 1509 * vnode pointer, and then lock the file for writing.
1506 1510 * This request is not likely to be clustered with
1507 1511 * any others. However, the next request will create
1508 1512 * a new cluster and be blocked in VOP_RWLOCK while
1509 1513 * the first request is being processed. This delay
1510 1514 * will allow more requests to be clustered in this
1511 1515 * second cluster.
1512 1516 */
1513 1517 mutex_enter(&nsrv->async_write_lock);
1514 1518 if (nsrv->async_write_head == nlp)
1515 1519 nsrv->async_write_head = nlp->next;
1516 1520 else {
1517 1521 lp = nsrv->async_write_head;
1518 1522 while (lp->next != nlp)
1519 1523 lp = lp->next;
1520 1524 lp->next = nlp->next;
1521 1525 }
1522 1526 mutex_exit(&nsrv->async_write_lock);
1523 1527
1524 1528 /*
1525 1529 * Step through the list of requests in this cluster.
1526 1530 * We need to check permissions to make sure that all
1527 1531 * of the requests have sufficient permission to write
1528 1532 * the file. A cluster can be composed of requests
1529 1533 * from different clients and different users on each
1530 1534 * client.
1531 1535 *
1532 1536 * As a side effect, we also calculate the size of the
1533 1537 * byte range that this cluster encompasses.
1534 1538 */
1535 1539 rp = nlp->list;
1536 1540 off = rp->wa->wa_offset;
1537 1541 len = (uint_t)0;
1538 1542 do {
1539 1543 if (rdonly(rp->ro, vp)) {
1540 1544 rp->ns->ns_status = NFSERR_ROFS;
1541 1545 t_flag = curthread->t_flag & T_WOULDBLOCK;
1542 1546 rp->thread->t_flag |= t_flag;
1543 1547 continue;
1544 1548 }
1545 1549
1546 1550 va.va_mask = AT_UID|AT_MODE;
1547 1551
1548 1552 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1549 1553
1550 1554 if (!error) {
1551 1555 if (crgetuid(rp->cr) != va.va_uid) {
1552 1556 /*
1553 1557 * This is a kludge to allow writes of files
1554 1558 * created with read only permission. The
1555 1559 * owner of the file is always allowed to
1556 1560 * write it.
1557 1561 */
1558 1562 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1559 1563 }
1560 1564 if (!error && MANDLOCK(vp, va.va_mode))
1561 1565 error = EACCES;
1562 1566 }
1563 1567
1564 1568 /*
1565 1569 * Check for a conflict with a nbmand-locked region.
1566 1570 */
1567 1571 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1568 1572 rp->wa->wa_count, 0, NULL)) {
1569 1573 error = EACCES;
1570 1574 }
1571 1575
1572 1576 if (error) {
1573 1577 rp->ns->ns_status = puterrno(error);
1574 1578 t_flag = curthread->t_flag & T_WOULDBLOCK;
1575 1579 rp->thread->t_flag |= t_flag;
1576 1580 continue;
1577 1581 }
1578 1582 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1579 1583 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1580 1584 } while ((rp = rp->list) != NULL);
1581 1585
1582 1586 /*
1583 1587 * Step through the cluster attempting to gather as many
1584 1588 * requests which are contiguous as possible. These
1585 1589 * contiguous requests are handled via one call to VOP_WRITE
1586 1590 * instead of different calls to VOP_WRITE. We also keep
1587 1591 * track of the fact that any data was written.
1588 1592 */
1589 1593 rp = nlp->list;
1590 1594 data_written = 0;
1591 1595 do {
1592 1596 /*
1593 1597 * Skip any requests which are already marked as having an
1594 1598 * error.
1595 1599 */
1596 1600 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1597 1601 rp = rp->list;
1598 1602 continue;
1599 1603 }
1600 1604
1601 1605 /*
1602 1606 * Count the number of iovec's which are required
1603 1607 * to handle this set of requests. One iovec is
1604 1608 * needed for each data buffer, whether addressed
1605 1609 * by wa_data or by the b_rptr pointers in the
1606 1610 * mblk chains.
1607 1611 */
1608 1612 iovcnt = 0;
1609 1613 lrp = rp;
1610 1614 for (;;) {
1611 1615 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1612 1616 iovcnt++;
1613 1617 else {
1614 1618 m = lrp->wa->wa_mblk;
1615 1619 while (m != NULL) {
1616 1620 iovcnt++;
1617 1621 m = m->b_cont;
1618 1622 }
1619 1623 }
1620 1624 if (lrp->list == NULL ||
1621 1625 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1622 1626 lrp->wa->wa_offset + lrp->wa->wa_count !=
1623 1627 lrp->list->wa->wa_offset) {
1624 1628 lrp = lrp->list;
1625 1629 break;
1626 1630 }
1627 1631 lrp = lrp->list;
1628 1632 }
1629 1633
1630 1634 if (iovcnt <= MAXCLIOVECS) {
1631 1635 #ifdef DEBUG
1632 1636 rfs_write_hits++;
1633 1637 #endif
1634 1638 niovp = iov;
1635 1639 } else {
1636 1640 #ifdef DEBUG
1637 1641 rfs_write_misses++;
1638 1642 #endif
1639 1643 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1640 1644 }
1641 1645 /*
1642 1646 * Put together the scatter/gather iovecs.
1643 1647 */
1644 1648 iovp = niovp;
1645 1649 trp = rp;
1646 1650 count = 0;
1647 1651 do {
1648 1652 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1649 1653 if (trp->wa->wa_rlist) {
1650 1654 iovp->iov_base =
1651 1655 (char *)((trp->wa->wa_rlist)->
1652 1656 u.c_daddr3);
1653 1657 iovp->iov_len = trp->wa->wa_count;
1654 1658 } else {
1655 1659 iovp->iov_base = trp->wa->wa_data;
1656 1660 iovp->iov_len = trp->wa->wa_count;
1657 1661 }
1658 1662 iovp++;
1659 1663 } else {
1660 1664 m = trp->wa->wa_mblk;
1661 1665 rcount = trp->wa->wa_count;
1662 1666 while (m != NULL) {
1663 1667 iovp->iov_base = (caddr_t)m->b_rptr;
1664 1668 iovp->iov_len = (m->b_wptr - m->b_rptr);
1665 1669 rcount -= iovp->iov_len;
1666 1670 if (rcount < 0)
1667 1671 iovp->iov_len += rcount;
1668 1672 iovp++;
1669 1673 if (rcount <= 0)
1670 1674 break;
1671 1675 m = m->b_cont;
1672 1676 }
1673 1677 }
1674 1678 count += trp->wa->wa_count;
1675 1679 trp = trp->list;
1676 1680 } while (trp != lrp);
1677 1681
1678 1682 uio.uio_iov = niovp;
1679 1683 uio.uio_iovcnt = iovcnt;
1680 1684 uio.uio_segflg = UIO_SYSSPACE;
1681 1685 uio.uio_extflg = UIO_COPY_DEFAULT;
1682 1686 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1683 1687 uio.uio_resid = count;
1684 1688 /*
1685 1689 * The limit is checked on the client. We
1686 1690 * should allow any size writes here.
1687 1691 */
1688 1692 uio.uio_llimit = curproc->p_fsz_ctl;
1689 1693 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1690 1694 if (rlimit < (rlim64_t)uio.uio_resid)
1691 1695 uio.uio_resid = (uint_t)rlimit;
1692 1696
1693 1697 /*
1694 1698 * For now we assume no append mode.
1695 1699 */
1696 1700
1697 1701 /*
1698 1702 * We're changing creds because VM may fault
1699 1703 * and we need the cred of the current
1700 1704 * thread to be used if quota * checking is
1701 1705 * enabled.
1702 1706 */
1703 1707 savecred = curthread->t_cred;
1704 1708 curthread->t_cred = cr;
1705 1709 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1706 1710 curthread->t_cred = savecred;
1707 1711
1708 1712 /* check if a monitor detected a delegation conflict */
1709 1713 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1710 1714 /* mark as wouldblock so response is dropped */
1711 1715 curthread->t_flag |= T_WOULDBLOCK;
1712 1716
1713 1717 if (niovp != iov)
1714 1718 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1715 1719
1716 1720 if (!error) {
1717 1721 data_written = 1;
1718 1722 /*
1719 1723 * Get attributes again so we send the latest mod
1720 1724 * time to the client side for its cache.
1721 1725 */
1722 1726 va.va_mask = AT_ALL; /* now we want everything */
1723 1727
1724 1728 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1725 1729
1726 1730 if (!error)
1727 1731 acl_perm(vp, exi, &va, rp->cr);
1728 1732 }
1729 1733
1730 1734 /*
1731 1735 * Fill in the status responses for each request
1732 1736 * which was just handled. Also, copy the latest
1733 1737 * attributes in to the attribute responses if
1734 1738 * appropriate.
1735 1739 */
1736 1740 t_flag = curthread->t_flag & T_WOULDBLOCK;
1737 1741 do {
1738 1742 rp->thread->t_flag |= t_flag;
1739 1743 /* check for overflows */
1740 1744 if (!error) {
1741 1745 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1742 1746 }
1743 1747 rp->ns->ns_status = puterrno(error);
1744 1748 rp = rp->list;
1745 1749 } while (rp != lrp);
1746 1750 } while (rp != NULL);
1747 1751
1748 1752 /*
1749 1753 * If any data was written at all, then we need to flush
1750 1754 * the data and metadata to stable storage.
1751 1755 */
1752 1756 if (data_written) {
1753 1757 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1754 1758
1755 1759 if (!error) {
1756 1760 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1757 1761 }
1758 1762 }
1759 1763
1760 1764 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1761 1765
1762 1766 if (in_crit)
1763 1767 nbl_end_crit(vp);
1764 1768 VN_RELE(vp);
1765 1769
1766 1770 t_flag = curthread->t_flag & T_WOULDBLOCK;
1767 1771 mutex_enter(&nsrv->async_write_lock);
1768 1772 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1769 1773 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1770 1774 rp->ns->ns_status = puterrno(error);
1771 1775 rp->thread->t_flag |= t_flag;
1772 1776 }
1773 1777 }
1774 1778 cv_broadcast(&nlp->cv);
1775 1779 mutex_exit(&nsrv->async_write_lock);
1776 1780
1777 1781 }
1778 1782
1779 1783 void *
1780 1784 rfs_write_getfh(struct nfswriteargs *wa)
1781 1785 {
1782 1786 return (&wa->wa_fhandle);
1783 1787 }
1784 1788
1785 1789 /*
1786 1790 * Create a file.
1787 1791 * Creates a file with given attributes and returns those attributes
1788 1792 * and an fhandle for the new file.
1789 1793 */
1790 1794 void
1791 1795 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1792 1796 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1793 1797 {
1794 1798 int error;
1795 1799 int lookuperr;
1796 1800 int in_crit = 0;
1797 1801 struct vattr va;
1798 1802 vnode_t *vp;
1799 1803 vnode_t *realvp;
1800 1804 vnode_t *dvp;
1801 1805 char *name = args->ca_da.da_name;
1802 1806 vnode_t *tvp = NULL;
1803 1807 int mode;
1804 1808 int lookup_ok;
1805 1809 bool_t trunc;
1806 1810 struct sockaddr *ca;
1807 1811
1808 1812 /*
1809 1813 * Disallow NULL paths
1810 1814 */
1811 1815 if (name == NULL || *name == '\0') {
1812 1816 dr->dr_status = NFSERR_ACCES;
1813 1817 return;
1814 1818 }
1815 1819
1816 1820 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1817 1821 if (dvp == NULL) {
1818 1822 dr->dr_status = NFSERR_STALE;
1819 1823 return;
1820 1824 }
1821 1825
1822 1826 error = sattr_to_vattr(args->ca_sa, &va);
1823 1827 if (error) {
1824 1828 dr->dr_status = puterrno(error);
1825 1829 return;
1826 1830 }
1827 1831
1828 1832 /*
1829 1833 * Must specify the mode.
1830 1834 */
1831 1835 if (!(va.va_mask & AT_MODE)) {
1832 1836 VN_RELE(dvp);
1833 1837 dr->dr_status = NFSERR_INVAL;
1834 1838 return;
1835 1839 }
1836 1840
1837 1841 /*
1838 1842 * This is a completely gross hack to make mknod
1839 1843 * work over the wire until we can wack the protocol
1840 1844 */
1841 1845 if ((va.va_mode & IFMT) == IFCHR) {
1842 1846 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1843 1847 va.va_type = VFIFO; /* xtra kludge for named pipe */
1844 1848 else {
1845 1849 va.va_type = VCHR;
1846 1850 /*
1847 1851 * uncompress the received dev_t
1848 1852 * if the top half is zero indicating a request
1849 1853 * from an `older style' OS.
1850 1854 */
1851 1855 if ((va.va_size & 0xffff0000) == 0)
1852 1856 va.va_rdev = nfsv2_expdev(va.va_size);
1853 1857 else
1854 1858 va.va_rdev = (dev_t)va.va_size;
1855 1859 }
1856 1860 va.va_mask &= ~AT_SIZE;
1857 1861 } else if ((va.va_mode & IFMT) == IFBLK) {
1858 1862 va.va_type = VBLK;
1859 1863 /*
1860 1864 * uncompress the received dev_t
1861 1865 * if the top half is zero indicating a request
1862 1866 * from an `older style' OS.
1863 1867 */
1864 1868 if ((va.va_size & 0xffff0000) == 0)
1865 1869 va.va_rdev = nfsv2_expdev(va.va_size);
1866 1870 else
1867 1871 va.va_rdev = (dev_t)va.va_size;
1868 1872 va.va_mask &= ~AT_SIZE;
1869 1873 } else if ((va.va_mode & IFMT) == IFSOCK) {
1870 1874 va.va_type = VSOCK;
1871 1875 } else {
1872 1876 va.va_type = VREG;
1873 1877 }
1874 1878 va.va_mode &= ~IFMT;
1875 1879 va.va_mask |= AT_TYPE;
1876 1880
1877 1881 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1878 1882 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1879 1883 MAXPATHLEN);
1880 1884 if (name == NULL) {
1881 1885 dr->dr_status = puterrno(EINVAL);
1882 1886 return;
1883 1887 }
1884 1888
1885 1889 /*
1886 1890 * Why was the choice made to use VWRITE as the mode to the
1887 1891 * call to VOP_CREATE ? This results in a bug. When a client
1888 1892 * opens a file that already exists and is RDONLY, the second
1889 1893 * open fails with an EACESS because of the mode.
1890 1894 * bug ID 1054648.
1891 1895 */
1892 1896 lookup_ok = 0;
1893 1897 mode = VWRITE;
1894 1898 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1895 1899 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1896 1900 NULL, NULL, NULL);
1897 1901 if (!error) {
1898 1902 struct vattr at;
1899 1903
1900 1904 lookup_ok = 1;
1901 1905 at.va_mask = AT_MODE;
1902 1906 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1903 1907 if (!error)
1904 1908 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1905 1909 VN_RELE(tvp);
1906 1910 tvp = NULL;
1907 1911 }
1908 1912 }
1909 1913
1910 1914 if (!lookup_ok) {
1911 1915 if (rdonly(ro, dvp)) {
1912 1916 error = EROFS;
1913 1917 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1914 1918 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1915 1919 error = EPERM;
1916 1920 } else {
1917 1921 error = 0;
1918 1922 }
1919 1923 }
1920 1924
1921 1925 /*
1922 1926 * If file size is being modified on an already existing file
1923 1927 * make sure that there are no conflicting non-blocking mandatory
1924 1928 * locks in the region being manipulated. Return EACCES if there
1925 1929 * are conflicting locks.
1926 1930 */
1927 1931 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1928 1932 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1929 1933 NULL, NULL, NULL);
1930 1934
1931 1935 if (!lookuperr &&
1932 1936 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1933 1937 VN_RELE(tvp);
1934 1938 curthread->t_flag |= T_WOULDBLOCK;
1935 1939 goto out;
1936 1940 }
1937 1941
1938 1942 if (!lookuperr && nbl_need_check(tvp)) {
1939 1943 /*
1940 1944 * The file exists. Now check if it has any
1941 1945 * conflicting non-blocking mandatory locks
1942 1946 * in the region being changed.
1943 1947 */
1944 1948 struct vattr bva;
1945 1949 u_offset_t offset;
1946 1950 ssize_t length;
1947 1951
1948 1952 nbl_start_crit(tvp, RW_READER);
1949 1953 in_crit = 1;
1950 1954
1951 1955 bva.va_mask = AT_SIZE;
1952 1956 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1953 1957 if (!error) {
1954 1958 if (va.va_size < bva.va_size) {
1955 1959 offset = va.va_size;
1956 1960 length = bva.va_size - va.va_size;
1957 1961 } else {
1958 1962 offset = bva.va_size;
1959 1963 length = va.va_size - bva.va_size;
1960 1964 }
1961 1965 if (length) {
1962 1966 if (nbl_conflict(tvp, NBL_WRITE,
1963 1967 offset, length, 0, NULL)) {
1964 1968 error = EACCES;
1965 1969 }
1966 1970 }
1967 1971 }
1968 1972 if (error) {
1969 1973 nbl_end_crit(tvp);
1970 1974 VN_RELE(tvp);
1971 1975 in_crit = 0;
1972 1976 }
1973 1977 } else if (tvp != NULL) {
1974 1978 VN_RELE(tvp);
1975 1979 }
1976 1980 }
1977 1981
1978 1982 if (!error) {
1979 1983 /*
1980 1984 * If filesystem is shared with nosuid the remove any
1981 1985 * setuid/setgid bits on create.
1982 1986 */
1983 1987 if (va.va_type == VREG &&
1984 1988 exi->exi_export.ex_flags & EX_NOSUID)
1985 1989 va.va_mode &= ~(VSUID | VSGID);
1986 1990
1987 1991 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1988 1992 NULL, NULL);
1989 1993
1990 1994 if (!error) {
1991 1995
1992 1996 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1993 1997 trunc = TRUE;
1994 1998 else
1995 1999 trunc = FALSE;
1996 2000
1997 2001 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1998 2002 VN_RELE(vp);
1999 2003 curthread->t_flag |= T_WOULDBLOCK;
2000 2004 goto out;
2001 2005 }
2002 2006 va.va_mask = AT_ALL;
2003 2007
2004 2008 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
2005 2009
2006 2010 /* check for overflows */
2007 2011 if (!error) {
2008 2012 acl_perm(vp, exi, &va, cr);
2009 2013 error = vattr_to_nattr(&va, &dr->dr_attr);
2010 2014 if (!error) {
2011 2015 error = makefh(&dr->dr_fhandle, vp,
2012 2016 exi);
2013 2017 }
2014 2018 }
2015 2019 /*
2016 2020 * Force modified metadata out to stable storage.
2017 2021 *
2018 2022 * if a underlying vp exists, pass it to VOP_FSYNC
2019 2023 */
2020 2024 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2021 2025 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2022 2026 else
2023 2027 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2024 2028 VN_RELE(vp);
2025 2029 }
2026 2030
2027 2031 if (in_crit) {
2028 2032 nbl_end_crit(tvp);
2029 2033 VN_RELE(tvp);
2030 2034 }
2031 2035 }
2032 2036
2033 2037 /*
2034 2038 * Force modified data and metadata out to stable storage.
2035 2039 */
2036 2040 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2037 2041
2038 2042 out:
2039 2043
2040 2044 VN_RELE(dvp);
2041 2045
2042 2046 dr->dr_status = puterrno(error);
2043 2047
2044 2048 if (name != args->ca_da.da_name)
2045 2049 kmem_free(name, MAXPATHLEN);
2046 2050 }
2047 2051 void *
2048 2052 rfs_create_getfh(struct nfscreatargs *args)
2049 2053 {
2050 2054 return (args->ca_da.da_fhandle);
2051 2055 }
2052 2056
2053 2057 /*
2054 2058 * Remove a file.
2055 2059 * Remove named file from parent directory.
2056 2060 */
2057 2061 /* ARGSUSED */
2058 2062 void
2059 2063 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2060 2064 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2061 2065 {
2062 2066 int error = 0;
2063 2067 vnode_t *vp;
2064 2068 vnode_t *targvp;
2065 2069 int in_crit = 0;
2066 2070
2067 2071 /*
2068 2072 * Disallow NULL paths
2069 2073 */
2070 2074 if (da->da_name == NULL || *da->da_name == '\0') {
2071 2075 *status = NFSERR_ACCES;
2072 2076 return;
2073 2077 }
2074 2078
2075 2079 vp = nfs_fhtovp(da->da_fhandle, exi);
2076 2080 if (vp == NULL) {
2077 2081 *status = NFSERR_STALE;
2078 2082 return;
2079 2083 }
2080 2084
2081 2085 if (rdonly(ro, vp)) {
2082 2086 VN_RELE(vp);
2083 2087 *status = NFSERR_ROFS;
2084 2088 return;
2085 2089 }
2086 2090
2087 2091 /*
2088 2092 * Check for a conflict with a non-blocking mandatory share reservation.
2089 2093 */
2090 2094 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2091 2095 NULL, cr, NULL, NULL, NULL);
2092 2096 if (error != 0) {
2093 2097 VN_RELE(vp);
2094 2098 *status = puterrno(error);
2095 2099 return;
2096 2100 }
2097 2101
2098 2102 /*
2099 2103 * If the file is delegated to an v4 client, then initiate
2100 2104 * recall and drop this request (by setting T_WOULDBLOCK).
2101 2105 * The client will eventually re-transmit the request and
2102 2106 * (hopefully), by then, the v4 client will have returned
2103 2107 * the delegation.
2104 2108 */
2105 2109
2106 2110 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2107 2111 VN_RELE(vp);
2108 2112 VN_RELE(targvp);
2109 2113 curthread->t_flag |= T_WOULDBLOCK;
2110 2114 return;
2111 2115 }
2112 2116
2113 2117 if (nbl_need_check(targvp)) {
2114 2118 nbl_start_crit(targvp, RW_READER);
2115 2119 in_crit = 1;
2116 2120 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2117 2121 error = EACCES;
2118 2122 goto out;
2119 2123 }
2120 2124 }
2121 2125
2122 2126 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2123 2127
2124 2128 /*
2125 2129 * Force modified data and metadata out to stable storage.
2126 2130 */
2127 2131 (void) VOP_FSYNC(vp, 0, cr, NULL);
2128 2132
2129 2133 out:
2130 2134 if (in_crit)
2131 2135 nbl_end_crit(targvp);
2132 2136 VN_RELE(targvp);
2133 2137 VN_RELE(vp);
2134 2138
2135 2139 *status = puterrno(error);
2136 2140
2137 2141 }
2138 2142
2139 2143 void *
2140 2144 rfs_remove_getfh(struct nfsdiropargs *da)
2141 2145 {
2142 2146 return (da->da_fhandle);
2143 2147 }
2144 2148
2145 2149 /*
2146 2150 * rename a file
2147 2151 * Give a file (from) a new name (to).
2148 2152 */
2149 2153 /* ARGSUSED */
2150 2154 void
2151 2155 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2152 2156 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2153 2157 {
2154 2158 int error = 0;
2155 2159 vnode_t *fromvp;
2156 2160 vnode_t *tovp;
2157 2161 struct exportinfo *to_exi;
2158 2162 fhandle_t *fh;
2159 2163 vnode_t *srcvp;
2160 2164 vnode_t *targvp;
2161 2165 int in_crit = 0;
2162 2166
2163 2167 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2164 2168 if (fromvp == NULL) {
2165 2169 *status = NFSERR_STALE;
2166 2170 return;
2167 2171 }
2168 2172
2169 2173 fh = args->rna_to.da_fhandle;
2170 2174 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2171 2175 if (to_exi == NULL) {
2172 2176 VN_RELE(fromvp);
2173 2177 *status = NFSERR_ACCES;
2174 2178 return;
2175 2179 }
2176 2180 exi_rele(to_exi);
2177 2181
2178 2182 if (to_exi != exi) {
2179 2183 VN_RELE(fromvp);
2180 2184 *status = NFSERR_XDEV;
2181 2185 return;
2182 2186 }
2183 2187
2184 2188 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2185 2189 if (tovp == NULL) {
2186 2190 VN_RELE(fromvp);
2187 2191 *status = NFSERR_STALE;
2188 2192 return;
2189 2193 }
2190 2194
2191 2195 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2192 2196 VN_RELE(tovp);
2193 2197 VN_RELE(fromvp);
2194 2198 *status = NFSERR_NOTDIR;
2195 2199 return;
2196 2200 }
2197 2201
2198 2202 /*
2199 2203 * Disallow NULL paths
2200 2204 */
2201 2205 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2202 2206 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2203 2207 VN_RELE(tovp);
2204 2208 VN_RELE(fromvp);
2205 2209 *status = NFSERR_ACCES;
2206 2210 return;
2207 2211 }
2208 2212
2209 2213 if (rdonly(ro, tovp)) {
2210 2214 VN_RELE(tovp);
2211 2215 VN_RELE(fromvp);
2212 2216 *status = NFSERR_ROFS;
2213 2217 return;
2214 2218 }
2215 2219
2216 2220 /*
2217 2221 * Check for a conflict with a non-blocking mandatory share reservation.
2218 2222 */
2219 2223 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2220 2224 NULL, cr, NULL, NULL, NULL);
2221 2225 if (error != 0) {
2222 2226 VN_RELE(tovp);
2223 2227 VN_RELE(fromvp);
2224 2228 *status = puterrno(error);
2225 2229 return;
2226 2230 }
2227 2231
2228 2232 /* Check for delegations on the source file */
2229 2233
2230 2234 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2231 2235 VN_RELE(tovp);
2232 2236 VN_RELE(fromvp);
2233 2237 VN_RELE(srcvp);
2234 2238 curthread->t_flag |= T_WOULDBLOCK;
2235 2239 return;
2236 2240 }
2237 2241
2238 2242 /* Check for delegation on the file being renamed over, if it exists */
2239 2243
2240 2244 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2241 2245 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2242 2246 NULL, NULL, NULL) == 0) {
2243 2247
2244 2248 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2245 2249 VN_RELE(tovp);
2246 2250 VN_RELE(fromvp);
2247 2251 VN_RELE(srcvp);
2248 2252 VN_RELE(targvp);
2249 2253 curthread->t_flag |= T_WOULDBLOCK;
2250 2254 return;
2251 2255 }
2252 2256 VN_RELE(targvp);
2253 2257 }
2254 2258
2255 2259
2256 2260 if (nbl_need_check(srcvp)) {
2257 2261 nbl_start_crit(srcvp, RW_READER);
2258 2262 in_crit = 1;
2259 2263 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2260 2264 error = EACCES;
2261 2265 goto out;
2262 2266 }
2263 2267 }
2264 2268
2265 2269 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2266 2270 tovp, args->rna_to.da_name, cr, NULL, 0);
2267 2271
2268 2272 if (error == 0)
2269 2273 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2270 2274 strlen(args->rna_to.da_name));
2271 2275
2272 2276 /*
2273 2277 * Force modified data and metadata out to stable storage.
2274 2278 */
2275 2279 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2276 2280 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2277 2281
2278 2282 out:
2279 2283 if (in_crit)
2280 2284 nbl_end_crit(srcvp);
2281 2285 VN_RELE(srcvp);
2282 2286 VN_RELE(tovp);
2283 2287 VN_RELE(fromvp);
2284 2288
2285 2289 *status = puterrno(error);
2286 2290
2287 2291 }
2288 2292 void *
2289 2293 rfs_rename_getfh(struct nfsrnmargs *args)
2290 2294 {
2291 2295 return (args->rna_from.da_fhandle);
2292 2296 }
2293 2297
2294 2298 /*
2295 2299 * Link to a file.
2296 2300 * Create a file (to) which is a hard link to the given file (from).
2297 2301 */
2298 2302 /* ARGSUSED */
2299 2303 void
2300 2304 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2301 2305 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2302 2306 {
2303 2307 int error;
2304 2308 vnode_t *fromvp;
2305 2309 vnode_t *tovp;
2306 2310 struct exportinfo *to_exi;
2307 2311 fhandle_t *fh;
2308 2312
2309 2313 fromvp = nfs_fhtovp(args->la_from, exi);
2310 2314 if (fromvp == NULL) {
2311 2315 *status = NFSERR_STALE;
2312 2316 return;
2313 2317 }
2314 2318
2315 2319 fh = args->la_to.da_fhandle;
2316 2320 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2317 2321 if (to_exi == NULL) {
2318 2322 VN_RELE(fromvp);
2319 2323 *status = NFSERR_ACCES;
2320 2324 return;
2321 2325 }
2322 2326 exi_rele(to_exi);
2323 2327
2324 2328 if (to_exi != exi) {
2325 2329 VN_RELE(fromvp);
2326 2330 *status = NFSERR_XDEV;
2327 2331 return;
2328 2332 }
2329 2333
2330 2334 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2331 2335 if (tovp == NULL) {
2332 2336 VN_RELE(fromvp);
2333 2337 *status = NFSERR_STALE;
2334 2338 return;
2335 2339 }
2336 2340
2337 2341 if (tovp->v_type != VDIR) {
2338 2342 VN_RELE(tovp);
2339 2343 VN_RELE(fromvp);
2340 2344 *status = NFSERR_NOTDIR;
2341 2345 return;
2342 2346 }
2343 2347 /*
2344 2348 * Disallow NULL paths
2345 2349 */
2346 2350 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2347 2351 VN_RELE(tovp);
2348 2352 VN_RELE(fromvp);
2349 2353 *status = NFSERR_ACCES;
2350 2354 return;
2351 2355 }
2352 2356
2353 2357 if (rdonly(ro, tovp)) {
2354 2358 VN_RELE(tovp);
2355 2359 VN_RELE(fromvp);
2356 2360 *status = NFSERR_ROFS;
2357 2361 return;
2358 2362 }
2359 2363
2360 2364 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2361 2365
2362 2366 /*
2363 2367 * Force modified data and metadata out to stable storage.
2364 2368 */
2365 2369 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2366 2370 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2367 2371
2368 2372 VN_RELE(tovp);
2369 2373 VN_RELE(fromvp);
2370 2374
2371 2375 *status = puterrno(error);
2372 2376
2373 2377 }
2374 2378 void *
2375 2379 rfs_link_getfh(struct nfslinkargs *args)
2376 2380 {
2377 2381 return (args->la_from);
2378 2382 }
2379 2383
2380 2384 /*
2381 2385 * Symbolicly link to a file.
2382 2386 * Create a file (to) with the given attributes which is a symbolic link
2383 2387 * to the given path name (to).
2384 2388 */
2385 2389 void
2386 2390 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2387 2391 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2388 2392 {
2389 2393 int error;
2390 2394 struct vattr va;
2391 2395 vnode_t *vp;
2392 2396 vnode_t *svp;
2393 2397 int lerror;
2394 2398 struct sockaddr *ca;
2395 2399 char *name = NULL;
2396 2400
2397 2401 /*
2398 2402 * Disallow NULL paths
2399 2403 */
2400 2404 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2401 2405 *status = NFSERR_ACCES;
2402 2406 return;
2403 2407 }
2404 2408
2405 2409 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2406 2410 if (vp == NULL) {
2407 2411 *status = NFSERR_STALE;
2408 2412 return;
2409 2413 }
2410 2414
2411 2415 if (rdonly(ro, vp)) {
2412 2416 VN_RELE(vp);
2413 2417 *status = NFSERR_ROFS;
2414 2418 return;
2415 2419 }
2416 2420
2417 2421 error = sattr_to_vattr(args->sla_sa, &va);
2418 2422 if (error) {
2419 2423 VN_RELE(vp);
2420 2424 *status = puterrno(error);
2421 2425 return;
2422 2426 }
2423 2427
2424 2428 if (!(va.va_mask & AT_MODE)) {
2425 2429 VN_RELE(vp);
2426 2430 *status = NFSERR_INVAL;
2427 2431 return;
2428 2432 }
2429 2433
2430 2434 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2431 2435 name = nfscmd_convname(ca, exi, args->sla_tnm,
2432 2436 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2433 2437
2434 2438 if (name == NULL) {
2435 2439 *status = NFSERR_ACCES;
2436 2440 return;
2437 2441 }
2438 2442
2439 2443 va.va_type = VLNK;
2440 2444 va.va_mask |= AT_TYPE;
2441 2445
2442 2446 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2443 2447
2444 2448 /*
2445 2449 * Force new data and metadata out to stable storage.
2446 2450 */
2447 2451 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2448 2452 NULL, cr, NULL, NULL, NULL);
2449 2453
2450 2454 if (!lerror) {
2451 2455 (void) VOP_FSYNC(svp, 0, cr, NULL);
2452 2456 VN_RELE(svp);
2453 2457 }
2454 2458
2455 2459 /*
2456 2460 * Force modified data and metadata out to stable storage.
2457 2461 */
2458 2462 (void) VOP_FSYNC(vp, 0, cr, NULL);
2459 2463
2460 2464 VN_RELE(vp);
2461 2465
2462 2466 *status = puterrno(error);
2463 2467 if (name != args->sla_tnm)
2464 2468 kmem_free(name, MAXPATHLEN);
2465 2469
2466 2470 }
2467 2471 void *
2468 2472 rfs_symlink_getfh(struct nfsslargs *args)
2469 2473 {
2470 2474 return (args->sla_from.da_fhandle);
2471 2475 }
2472 2476
2473 2477 /*
2474 2478 * Make a directory.
2475 2479 * Create a directory with the given name, parent directory, and attributes.
2476 2480 * Returns a file handle and attributes for the new directory.
2477 2481 */
2478 2482 /* ARGSUSED */
2479 2483 void
2480 2484 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2481 2485 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2482 2486 {
2483 2487 int error;
2484 2488 struct vattr va;
2485 2489 vnode_t *dvp = NULL;
2486 2490 vnode_t *vp;
2487 2491 char *name = args->ca_da.da_name;
2488 2492
2489 2493 /*
2490 2494 * Disallow NULL paths
2491 2495 */
2492 2496 if (name == NULL || *name == '\0') {
2493 2497 dr->dr_status = NFSERR_ACCES;
2494 2498 return;
2495 2499 }
2496 2500
2497 2501 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2498 2502 if (vp == NULL) {
2499 2503 dr->dr_status = NFSERR_STALE;
2500 2504 return;
2501 2505 }
2502 2506
2503 2507 if (rdonly(ro, vp)) {
2504 2508 VN_RELE(vp);
2505 2509 dr->dr_status = NFSERR_ROFS;
2506 2510 return;
2507 2511 }
2508 2512
2509 2513 error = sattr_to_vattr(args->ca_sa, &va);
2510 2514 if (error) {
2511 2515 VN_RELE(vp);
2512 2516 dr->dr_status = puterrno(error);
2513 2517 return;
2514 2518 }
2515 2519
2516 2520 if (!(va.va_mask & AT_MODE)) {
2517 2521 VN_RELE(vp);
2518 2522 dr->dr_status = NFSERR_INVAL;
2519 2523 return;
2520 2524 }
2521 2525
2522 2526 va.va_type = VDIR;
2523 2527 va.va_mask |= AT_TYPE;
2524 2528
2525 2529 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2526 2530
2527 2531 if (!error) {
2528 2532 /*
2529 2533 * Attribtutes of the newly created directory should
2530 2534 * be returned to the client.
2531 2535 */
2532 2536 va.va_mask = AT_ALL; /* We want everything */
2533 2537 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2534 2538
2535 2539 /* check for overflows */
2536 2540 if (!error) {
2537 2541 acl_perm(vp, exi, &va, cr);
2538 2542 error = vattr_to_nattr(&va, &dr->dr_attr);
2539 2543 if (!error) {
2540 2544 error = makefh(&dr->dr_fhandle, dvp, exi);
2541 2545 }
2542 2546 }
2543 2547 /*
2544 2548 * Force new data and metadata out to stable storage.
2545 2549 */
2546 2550 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2547 2551 VN_RELE(dvp);
2548 2552 }
2549 2553
2550 2554 /*
2551 2555 * Force modified data and metadata out to stable storage.
2552 2556 */
2553 2557 (void) VOP_FSYNC(vp, 0, cr, NULL);
2554 2558
2555 2559 VN_RELE(vp);
2556 2560
2557 2561 dr->dr_status = puterrno(error);
2558 2562
2559 2563 }
2560 2564 void *
2561 2565 rfs_mkdir_getfh(struct nfscreatargs *args)
2562 2566 {
2563 2567 return (args->ca_da.da_fhandle);
2564 2568 }
2565 2569
2566 2570 /*
2567 2571 * Remove a directory.
2568 2572 * Remove the given directory name from the given parent directory.
2569 2573 */
2570 2574 /* ARGSUSED */
2571 2575 void
2572 2576 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2573 2577 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2574 2578 {
2575 2579 int error;
2576 2580 vnode_t *vp;
2577 2581
2578 2582 /*
2579 2583 * Disallow NULL paths
2580 2584 */
2581 2585 if (da->da_name == NULL || *da->da_name == '\0') {
2582 2586 *status = NFSERR_ACCES;
2583 2587 return;
2584 2588 }
2585 2589
2586 2590 vp = nfs_fhtovp(da->da_fhandle, exi);
2587 2591 if (vp == NULL) {
2588 2592 *status = NFSERR_STALE;
2589 2593 return;
2590 2594 }
2591 2595
2592 2596 if (rdonly(ro, vp)) {
2593 2597 VN_RELE(vp);
2594 2598 *status = NFSERR_ROFS;
2595 2599 return;
2596 2600 }
2597 2601
2598 2602 /*
2599 2603 * VOP_RMDIR takes a third argument (the current
2600 2604 * directory of the process). That's because someone
2601 2605 * wants to return EINVAL if one tries to remove ".".
2602 2606 * Of course, NFS servers have no idea what their
2603 2607 * clients' current directories are. We fake it by
2604 2608 * supplying a vnode known to exist and illegal to
2605 2609 * remove.
2606 2610 */
2607 2611 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2608 2612
2609 2613 /*
2610 2614 * Force modified data and metadata out to stable storage.
2611 2615 */
2612 2616 (void) VOP_FSYNC(vp, 0, cr, NULL);
2613 2617
2614 2618 VN_RELE(vp);
2615 2619
2616 2620 /*
2617 2621 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2618 2622 * if the directory is not empty. A System V NFS server
2619 2623 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2620 2624 * over the wire.
2621 2625 */
2622 2626 if (error == EEXIST)
2623 2627 *status = NFSERR_NOTEMPTY;
2624 2628 else
2625 2629 *status = puterrno(error);
2626 2630
2627 2631 }
2628 2632 void *
2629 2633 rfs_rmdir_getfh(struct nfsdiropargs *da)
2630 2634 {
2631 2635 return (da->da_fhandle);
2632 2636 }
2633 2637
2634 2638 /* ARGSUSED */
2635 2639 void
2636 2640 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2637 2641 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2638 2642 {
2639 2643 int error;
2640 2644 int iseof;
2641 2645 struct iovec iov;
2642 2646 struct uio uio;
2643 2647 vnode_t *vp;
2644 2648 char *ndata = NULL;
2645 2649 struct sockaddr *ca;
2646 2650 size_t nents;
2647 2651 int ret;
2648 2652
2649 2653 vp = nfs_fhtovp(&rda->rda_fh, exi);
2650 2654 if (vp == NULL) {
2651 2655 rd->rd_entries = NULL;
2652 2656 rd->rd_status = NFSERR_STALE;
2653 2657 return;
2654 2658 }
2655 2659
2656 2660 if (vp->v_type != VDIR) {
2657 2661 VN_RELE(vp);
2658 2662 rd->rd_entries = NULL;
2659 2663 rd->rd_status = NFSERR_NOTDIR;
2660 2664 return;
2661 2665 }
2662 2666
2663 2667 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2664 2668
2665 2669 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2666 2670
2667 2671 if (error) {
2668 2672 rd->rd_entries = NULL;
2669 2673 goto bad;
2670 2674 }
2671 2675
2672 2676 if (rda->rda_count == 0) {
2673 2677 rd->rd_entries = NULL;
2674 2678 rd->rd_size = 0;
2675 2679 rd->rd_eof = FALSE;
2676 2680 goto bad;
2677 2681 }
2678 2682
2679 2683 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2680 2684
2681 2685 /*
2682 2686 * Allocate data for entries. This will be freed by rfs_rddirfree.
2683 2687 */
2684 2688 rd->rd_bufsize = (uint_t)rda->rda_count;
2685 2689 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2686 2690
2687 2691 /*
2688 2692 * Set up io vector to read directory data
2689 2693 */
2690 2694 iov.iov_base = (caddr_t)rd->rd_entries;
2691 2695 iov.iov_len = rda->rda_count;
2692 2696 uio.uio_iov = &iov;
2693 2697 uio.uio_iovcnt = 1;
2694 2698 uio.uio_segflg = UIO_SYSSPACE;
2695 2699 uio.uio_extflg = UIO_COPY_CACHED;
2696 2700 uio.uio_loffset = (offset_t)rda->rda_offset;
2697 2701 uio.uio_resid = rda->rda_count;
2698 2702
2699 2703 /*
2700 2704 * read directory
2701 2705 */
2702 2706 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2703 2707
2704 2708 /*
2705 2709 * Clean up
2706 2710 */
2707 2711 if (!error) {
2708 2712 /*
2709 2713 * set size and eof
2710 2714 */
2711 2715 if (uio.uio_resid == rda->rda_count) {
2712 2716 rd->rd_size = 0;
2713 2717 rd->rd_eof = TRUE;
2714 2718 } else {
2715 2719 rd->rd_size = (uint32_t)(rda->rda_count -
2716 2720 uio.uio_resid);
2717 2721 rd->rd_eof = iseof ? TRUE : FALSE;
2718 2722 }
2719 2723 }
2720 2724
2721 2725 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2722 2726 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2723 2727 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2724 2728 rda->rda_count, &ndata);
2725 2729
2726 2730 if (ret != 0) {
2727 2731 size_t dropbytes;
2728 2732 /*
2729 2733 * We had to drop one or more entries in order to fit
2730 2734 * during the character conversion. We need to patch
2731 2735 * up the size and eof info.
2732 2736 */
2733 2737 if (rd->rd_eof)
2734 2738 rd->rd_eof = FALSE;
2735 2739 dropbytes = nfscmd_dropped_entrysize(
2736 2740 (struct dirent64 *)rd->rd_entries, nents, ret);
2737 2741 rd->rd_size -= dropbytes;
2738 2742 }
2739 2743 if (ndata == NULL) {
2740 2744 ndata = (char *)rd->rd_entries;
2741 2745 } else if (ndata != (char *)rd->rd_entries) {
2742 2746 kmem_free(rd->rd_entries, rd->rd_bufsize);
2743 2747 rd->rd_entries = (void *)ndata;
2744 2748 rd->rd_bufsize = rda->rda_count;
2745 2749 }
2746 2750
2747 2751 bad:
2748 2752 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2749 2753
2750 2754 #if 0 /* notyet */
2751 2755 /*
2752 2756 * Don't do this. It causes local disk writes when just
2753 2757 * reading the file and the overhead is deemed larger
2754 2758 * than the benefit.
2755 2759 */
2756 2760 /*
2757 2761 * Force modified metadata out to stable storage.
2758 2762 */
2759 2763 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2760 2764 #endif
2761 2765
2762 2766 VN_RELE(vp);
2763 2767
2764 2768 rd->rd_status = puterrno(error);
2765 2769
2766 2770 }
2767 2771 void *
2768 2772 rfs_readdir_getfh(struct nfsrddirargs *rda)
2769 2773 {
2770 2774 return (&rda->rda_fh);
2771 2775 }
2772 2776 void
2773 2777 rfs_rddirfree(struct nfsrddirres *rd)
2774 2778 {
2775 2779 if (rd->rd_entries != NULL)
2776 2780 kmem_free(rd->rd_entries, rd->rd_bufsize);
2777 2781 }
2778 2782
2779 2783 /* ARGSUSED */
2780 2784 void
2781 2785 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2782 2786 struct svc_req *req, cred_t *cr, bool_t ro)
2783 2787 {
2784 2788 int error;
2785 2789 struct statvfs64 sb;
2786 2790 vnode_t *vp;
2787 2791
2788 2792 vp = nfs_fhtovp(fh, exi);
2789 2793 if (vp == NULL) {
2790 2794 fs->fs_status = NFSERR_STALE;
2791 2795 return;
2792 2796 }
2793 2797
2794 2798 error = VFS_STATVFS(vp->v_vfsp, &sb);
2795 2799
2796 2800 if (!error) {
2797 2801 fs->fs_tsize = nfstsize();
2798 2802 fs->fs_bsize = sb.f_frsize;
2799 2803 fs->fs_blocks = sb.f_blocks;
2800 2804 fs->fs_bfree = sb.f_bfree;
2801 2805 fs->fs_bavail = sb.f_bavail;
2802 2806 }
2803 2807
2804 2808 VN_RELE(vp);
2805 2809
2806 2810 fs->fs_status = puterrno(error);
2807 2811
2808 2812 }
2809 2813 void *
2810 2814 rfs_statfs_getfh(fhandle_t *fh)
2811 2815 {
2812 2816 return (fh);
2813 2817 }
2814 2818
2815 2819 static int
2816 2820 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2817 2821 {
2818 2822 vap->va_mask = 0;
2819 2823
2820 2824 /*
2821 2825 * There was a sign extension bug in some VFS based systems
2822 2826 * which stored the mode as a short. When it would get
2823 2827 * assigned to a u_long, no sign extension would occur.
2824 2828 * It needed to, but this wasn't noticed because sa_mode
2825 2829 * would then get assigned back to the short, thus ignoring
2826 2830 * the upper 16 bits of sa_mode.
2827 2831 *
2828 2832 * To make this implementation work for both broken
2829 2833 * clients and good clients, we check for both versions
2830 2834 * of the mode.
2831 2835 */
2832 2836 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2833 2837 sa->sa_mode != (uint32_t)-1) {
2834 2838 vap->va_mask |= AT_MODE;
2835 2839 vap->va_mode = sa->sa_mode;
2836 2840 }
2837 2841 if (sa->sa_uid != (uint32_t)-1) {
2838 2842 vap->va_mask |= AT_UID;
2839 2843 vap->va_uid = sa->sa_uid;
2840 2844 }
2841 2845 if (sa->sa_gid != (uint32_t)-1) {
2842 2846 vap->va_mask |= AT_GID;
2843 2847 vap->va_gid = sa->sa_gid;
2844 2848 }
2845 2849 if (sa->sa_size != (uint32_t)-1) {
2846 2850 vap->va_mask |= AT_SIZE;
2847 2851 vap->va_size = sa->sa_size;
2848 2852 }
2849 2853 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2850 2854 sa->sa_atime.tv_usec != (int32_t)-1) {
2851 2855 #ifndef _LP64
2852 2856 /* return error if time overflow */
2853 2857 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2854 2858 return (EOVERFLOW);
2855 2859 #endif
2856 2860 vap->va_mask |= AT_ATIME;
2857 2861 /*
2858 2862 * nfs protocol defines times as unsigned so don't extend sign,
2859 2863 * unless sysadmin set nfs_allow_preepoch_time.
2860 2864 */
2861 2865 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2862 2866 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2863 2867 }
2864 2868 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2865 2869 sa->sa_mtime.tv_usec != (int32_t)-1) {
2866 2870 #ifndef _LP64
2867 2871 /* return error if time overflow */
2868 2872 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2869 2873 return (EOVERFLOW);
2870 2874 #endif
2871 2875 vap->va_mask |= AT_MTIME;
2872 2876 /*
2873 2877 * nfs protocol defines times as unsigned so don't extend sign,
2874 2878 * unless sysadmin set nfs_allow_preepoch_time.
2875 2879 */
2876 2880 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2877 2881 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2878 2882 }
2879 2883 return (0);
2880 2884 }
2881 2885
2882 2886 static const enum nfsftype vt_to_nf[] = {
2883 2887 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2884 2888 };
2885 2889
2886 2890 /*
2887 2891 * check the following fields for overflow: nodeid, size, and time.
2888 2892 * There could be a problem when converting 64-bit LP64 fields
2889 2893 * into 32-bit ones. Return an error if there is an overflow.
2890 2894 */
2891 2895 int
2892 2896 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2893 2897 {
2894 2898 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2895 2899 na->na_type = vt_to_nf[vap->va_type];
2896 2900
2897 2901 if (vap->va_mode == (unsigned short) -1)
2898 2902 na->na_mode = (uint32_t)-1;
2899 2903 else
2900 2904 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2901 2905
2902 2906 if (vap->va_uid == (unsigned short)(-1))
2903 2907 na->na_uid = (uint32_t)(-1);
2904 2908 else if (vap->va_uid == UID_NOBODY)
2905 2909 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2906 2910 else
2907 2911 na->na_uid = vap->va_uid;
2908 2912
2909 2913 if (vap->va_gid == (unsigned short)(-1))
2910 2914 na->na_gid = (uint32_t)-1;
2911 2915 else if (vap->va_gid == GID_NOBODY)
2912 2916 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2913 2917 else
2914 2918 na->na_gid = vap->va_gid;
2915 2919
2916 2920 /*
2917 2921 * Do we need to check fsid for overflow? It is 64-bit in the
2918 2922 * vattr, but are bigger than 32 bit values supported?
2919 2923 */
2920 2924 na->na_fsid = vap->va_fsid;
2921 2925
2922 2926 na->na_nodeid = vap->va_nodeid;
2923 2927
2924 2928 /*
2925 2929 * Check to make sure that the nodeid is representable over the
2926 2930 * wire without losing bits.
2927 2931 */
2928 2932 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2929 2933 return (EFBIG);
2930 2934 na->na_nlink = vap->va_nlink;
2931 2935
2932 2936 /*
2933 2937 * Check for big files here, instead of at the caller. See
2934 2938 * comments in cstat for large special file explanation.
2935 2939 */
2936 2940 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2937 2941 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2938 2942 return (EFBIG);
2939 2943 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2940 2944 /* UNKNOWN_SIZE | OVERFLOW */
2941 2945 na->na_size = MAXOFF32_T;
2942 2946 } else
2943 2947 na->na_size = vap->va_size;
2944 2948 } else
2945 2949 na->na_size = vap->va_size;
2946 2950
2947 2951 /*
2948 2952 * If the vnode times overflow the 32-bit times that NFS2
2949 2953 * uses on the wire then return an error.
2950 2954 */
2951 2955 if (!NFS_VAP_TIME_OK(vap)) {
2952 2956 return (EOVERFLOW);
2953 2957 }
2954 2958 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2955 2959 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2956 2960
2957 2961 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2958 2962 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2959 2963
2960 2964 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2961 2965 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2962 2966
2963 2967 /*
2964 2968 * If the dev_t will fit into 16 bits then compress
2965 2969 * it, otherwise leave it alone. See comments in
2966 2970 * nfs_client.c.
2967 2971 */
2968 2972 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2969 2973 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2970 2974 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2971 2975 else
2972 2976 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2973 2977
2974 2978 na->na_blocks = vap->va_nblocks;
2975 2979 na->na_blocksize = vap->va_blksize;
2976 2980
2977 2981 /*
2978 2982 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2979 2983 * over-the-wire protocols for named-pipe vnodes. It remaps the
2980 2984 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2981 2985 *
2982 2986 * BUYER BEWARE:
2983 2987 * If you are porting the NFS to a non-Sun server, you probably
2984 2988 * don't want to include the following block of code. The
2985 2989 * over-the-wire special file types will be changing with the
2986 2990 * NFS Protocol Revision.
2987 2991 */
2988 2992 if (vap->va_type == VFIFO)
2989 2993 NA_SETFIFO(na);
2990 2994 return (0);
2991 2995 }
2992 2996
2993 2997 /*
2994 2998 * acl v2 support: returns approximate permission.
2995 2999 * default: returns minimal permission (more restrictive)
2996 3000 * aclok: returns maximal permission (less restrictive)
2997 3001 * This routine changes the permissions that are alaredy in *va.
2998 3002 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2999 3003 * CLASS_OBJ is always the same as GROUP_OBJ entry.
3000 3004 */
3001 3005 static void
3002 3006 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
3003 3007 {
3004 3008 vsecattr_t vsa;
3005 3009 int aclcnt;
3006 3010 aclent_t *aclentp;
3007 3011 mode_t mask_perm;
3008 3012 mode_t grp_perm;
3009 3013 mode_t other_perm;
3010 3014 mode_t other_orig;
3011 3015 int error;
3012 3016
3013 3017 /* dont care default acl */
3014 3018 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
3015 3019 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
3016 3020
3017 3021 if (!error) {
3018 3022 aclcnt = vsa.vsa_aclcnt;
3019 3023 if (aclcnt > MIN_ACL_ENTRIES) {
3020 3024 /* non-trivial ACL */
3021 3025 aclentp = vsa.vsa_aclentp;
3022 3026 if (exi->exi_export.ex_flags & EX_ACLOK) {
3023 3027 /* maximal permissions */
3024 3028 grp_perm = 0;
3025 3029 other_perm = 0;
3026 3030 for (; aclcnt > 0; aclcnt--, aclentp++) {
3027 3031 switch (aclentp->a_type) {
3028 3032 case USER_OBJ:
3029 3033 break;
3030 3034 case USER:
3031 3035 grp_perm |=
3032 3036 aclentp->a_perm << 3;
3033 3037 other_perm |= aclentp->a_perm;
3034 3038 break;
3035 3039 case GROUP_OBJ:
3036 3040 grp_perm |=
3037 3041 aclentp->a_perm << 3;
3038 3042 break;
3039 3043 case GROUP:
3040 3044 other_perm |= aclentp->a_perm;
3041 3045 break;
3042 3046 case OTHER_OBJ:
3043 3047 other_orig = aclentp->a_perm;
3044 3048 break;
3045 3049 case CLASS_OBJ:
3046 3050 mask_perm = aclentp->a_perm;
3047 3051 break;
3048 3052 default:
3049 3053 break;
3050 3054 }
3051 3055 }
3052 3056 grp_perm &= mask_perm << 3;
3053 3057 other_perm &= mask_perm;
3054 3058 other_perm |= other_orig;
3055 3059
3056 3060 } else {
3057 3061 /* minimal permissions */
3058 3062 grp_perm = 070;
3059 3063 other_perm = 07;
3060 3064 for (; aclcnt > 0; aclcnt--, aclentp++) {
3061 3065 switch (aclentp->a_type) {
3062 3066 case USER_OBJ:
3063 3067 break;
3064 3068 case USER:
3065 3069 case CLASS_OBJ:
3066 3070 grp_perm &=
3067 3071 aclentp->a_perm << 3;
3068 3072 other_perm &=
3069 3073 aclentp->a_perm;
3070 3074 break;
3071 3075 case GROUP_OBJ:
3072 3076 grp_perm &=
3073 3077 aclentp->a_perm << 3;
3074 3078 break;
3075 3079 case GROUP:
3076 3080 other_perm &=
3077 3081 aclentp->a_perm;
3078 3082 break;
3079 3083 case OTHER_OBJ:
3080 3084 other_perm &=
3081 3085 aclentp->a_perm;
3082 3086 break;
3083 3087 default:
3084 3088 break;
3085 3089 }
3086 3090 }
3087 3091 }
3088 3092 /* copy to va */
3089 3093 va->va_mode &= ~077;
3090 3094 va->va_mode |= grp_perm | other_perm;
3091 3095 }
3092 3096 if (vsa.vsa_aclcnt)
3093 3097 kmem_free(vsa.vsa_aclentp,
3094 3098 vsa.vsa_aclcnt * sizeof (aclent_t));
3095 3099 }
3096 3100 }
3097 3101
3098 3102 void
3099 3103 rfs_srvrinit(void)
3100 3104 {
3101 3105 nfs2_srv_caller_id = fs_new_caller_id();
3102 3106 zone_key_create(&rfs_zone_key, rfs_zone_init, NULL, rfs_zone_fini);
3103 3107 }
3104 3108
3105 3109 void
3106 3110 rfs_srvrfini(void)
3107 3111 {
3108 3112 }
3109 3113
3110 3114 /* ARGSUSED */
3111 3115 static void *
3112 3116 rfs_zone_init(zoneid_t zoneid)
3113 3117 {
3114 3118 nfs_srv_t *ns;
3115 3119
3116 3120 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3117 3121
3118 3122 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3119 3123 ns->write_async = 1;
3120 3124
3121 3125 return (ns);
3122 3126 }
3123 3127
3124 3128 /* ARGSUSED */
3125 3129 static void
3126 3130 rfs_zone_fini(zoneid_t zoneid, void *data)
3127 3131 {
3128 3132 nfs_srv_t *ns;
3129 3133
3130 3134 ns = (nfs_srv_t *)data;
3131 3135 mutex_destroy(&ns->async_write_lock);
3132 3136 kmem_free(ns, sizeof (*ns));
3133 3137 }
3134 3138
3135 3139 static int
3136 3140 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3137 3141 {
3138 3142 struct clist *wcl;
3139 3143 int wlist_len;
3140 3144 uint32_t count = rr->rr_count;
3141 3145
3142 3146 wcl = ra->ra_wlist;
3143 3147
3144 3148 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3145 3149 return (FALSE);
3146 3150 }
3147 3151
3148 3152 wcl = ra->ra_wlist;
3149 3153 rr->rr_ok.rrok_wlist_len = wlist_len;
3150 3154 rr->rr_ok.rrok_wlist = wcl;
3151 3155
3152 3156 return (TRUE);
3153 3157 }
|
↓ open down ↓ |
1829 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX