Print this page
Fix NFS design problems re. multiple zone keys
Make NFS server zone-specific data all have the same lifetime
Fix rfs4_clean_state_exi
Fix exi_cache_reclaim
Fix mistakes in zone keys work
More fixes re. exi_zoneid and exi_tree
(danmcd -> Keep some ASSERT()s around for readability.)
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 30 * All rights reserved.
31 31 */
32 32
33 33 /*
34 34 * Copyright 2018 Nexenta Systems, Inc.
35 35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 36 */
37 37
38 38 #include <sys/param.h>
39 39 #include <sys/types.h>
40 40 #include <sys/systm.h>
41 41 #include <sys/cred.h>
42 42 #include <sys/buf.h>
43 43 #include <sys/vfs.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/stat.h>
47 47 #include <sys/errno.h>
48 48 #include <sys/sysmacros.h>
49 49 #include <sys/statvfs.h>
50 50 #include <sys/kmem.h>
51 51 #include <sys/kstat.h>
52 52 #include <sys/dirent.h>
53 53 #include <sys/cmn_err.h>
54 54 #include <sys/debug.h>
55 55 #include <sys/vtrace.h>
56 56 #include <sys/mode.h>
57 57 #include <sys/acl.h>
58 58 #include <sys/nbmlock.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/sdt.h>
61 61
62 62 #include <rpc/types.h>
63 63 #include <rpc/auth.h>
64 64 #include <rpc/svc.h>
65 65
66 66 #include <nfs/nfs.h>
67 67 #include <nfs/export.h>
68 68 #include <nfs/nfs_cmd.h>
69 69
70 70 #include <vm/hat.h>
71 71 #include <vm/as.h>
72 72 #include <vm/seg.h>
73 73 #include <vm/seg_map.h>
74 74 #include <vm/seg_kmem.h>
75 75
76 76 #include <sys/strsubr.h>
77 77
78 78 struct rfs_async_write_list;
79 79
80 80 /*
81 81 * Zone globals of NFSv2 server
82 82 */
83 83 typedef struct nfs_srv {
84 84 kmutex_t async_write_lock;
85 85 struct rfs_async_write_list *async_write_head;
86 86
87 87 /*
88 88 * enables write clustering if == 1
89 89 */
90 90 int write_async;
91 91 } nfs_srv_t;
|
↓ open down ↓ |
91 lines elided |
↑ open up ↑ |
92 92
93 93 /*
94 94 * These are the interface routines for the server side of the
95 95 * Network File System. See the NFS version 2 protocol specification
96 96 * for a description of this interface.
97 97 */
98 98
99 99 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
100 100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
101 101 cred_t *);
102 -static void *rfs_zone_init(zoneid_t zoneid);
103 -static void rfs_zone_fini(zoneid_t zoneid, void *data);
104 102
105 103
106 104 /*
107 105 * Some "over the wire" UNIX file types. These are encoded
108 106 * into the mode. This needs to be fixed in the next rev.
109 107 */
110 108 #define IFMT 0170000 /* type of file */
111 109 #define IFCHR 0020000 /* character special */
112 110 #define IFBLK 0060000 /* block special */
113 111 #define IFSOCK 0140000 /* socket */
114 112
115 113 u_longlong_t nfs2_srv_caller_id;
116 -static zone_key_t rfs_zone_key;
117 114
115 +static nfs_srv_t *
116 +nfs_get_srv(void)
117 +{
118 + nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
119 + nfs_srv_t *srv = ng->nfs_srv;
120 + ASSERT(srv != NULL);
121 + return (srv);
122 +}
123 +
118 124 /*
119 125 * Get file attributes.
120 126 * Returns the current attributes of the file with the given fhandle.
121 127 */
122 128 /* ARGSUSED */
123 129 void
124 130 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
125 131 struct svc_req *req, cred_t *cr, bool_t ro)
126 132 {
127 133 int error;
128 134 vnode_t *vp;
129 135 struct vattr va;
130 136
131 137 vp = nfs_fhtovp(fhp, exi);
132 138 if (vp == NULL) {
133 139 ns->ns_status = NFSERR_STALE;
134 140 return;
135 141 }
136 142
137 143 /*
138 144 * Do the getattr.
139 145 */
140 146 va.va_mask = AT_ALL; /* we want all the attributes */
141 147
142 148 error = rfs4_delegated_getattr(vp, &va, 0, cr);
143 149
144 150 /* check for overflows */
145 151 if (!error) {
146 152 /* Lie about the object type for a referral */
147 153 if (vn_is_nfs_reparse(vp, cr))
148 154 va.va_type = VLNK;
149 155
150 156 acl_perm(vp, exi, &va, cr);
151 157 error = vattr_to_nattr(&va, &ns->ns_attr);
152 158 }
153 159
154 160 VN_RELE(vp);
155 161
156 162 ns->ns_status = puterrno(error);
157 163 }
158 164 void *
159 165 rfs_getattr_getfh(fhandle_t *fhp)
160 166 {
161 167 return (fhp);
162 168 }
163 169
164 170 /*
165 171 * Set file attributes.
166 172 * Sets the attributes of the file with the given fhandle. Returns
167 173 * the new attributes.
168 174 */
169 175 /* ARGSUSED */
170 176 void
171 177 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
172 178 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
173 179 {
174 180 int error;
175 181 int flag;
176 182 int in_crit = 0;
177 183 vnode_t *vp;
178 184 struct vattr va;
179 185 struct vattr bva;
180 186 struct flock64 bf;
181 187 caller_context_t ct;
182 188
183 189
184 190 vp = nfs_fhtovp(&args->saa_fh, exi);
185 191 if (vp == NULL) {
186 192 ns->ns_status = NFSERR_STALE;
187 193 return;
188 194 }
189 195
190 196 if (rdonly(ro, vp)) {
191 197 VN_RELE(vp);
192 198 ns->ns_status = NFSERR_ROFS;
193 199 return;
194 200 }
195 201
196 202 error = sattr_to_vattr(&args->saa_sa, &va);
197 203 if (error) {
198 204 VN_RELE(vp);
199 205 ns->ns_status = puterrno(error);
200 206 return;
201 207 }
202 208
203 209 /*
204 210 * If the client is requesting a change to the mtime,
205 211 * but the nanosecond field is set to 1 billion, then
206 212 * this is a flag to the server that it should set the
207 213 * atime and mtime fields to the server's current time.
208 214 * The 1 billion number actually came from the client
209 215 * as 1 million, but the units in the over the wire
210 216 * request are microseconds instead of nanoseconds.
211 217 *
212 218 * This is an overload of the protocol and should be
213 219 * documented in the NFS Version 2 protocol specification.
214 220 */
215 221 if (va.va_mask & AT_MTIME) {
216 222 if (va.va_mtime.tv_nsec == 1000000000) {
217 223 gethrestime(&va.va_mtime);
218 224 va.va_atime = va.va_mtime;
219 225 va.va_mask |= AT_ATIME;
220 226 flag = 0;
221 227 } else
222 228 flag = ATTR_UTIME;
223 229 } else
224 230 flag = 0;
225 231
226 232 /*
227 233 * If the filesystem is exported with nosuid, then mask off
228 234 * the setuid and setgid bits.
229 235 */
230 236 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
231 237 (exi->exi_export.ex_flags & EX_NOSUID))
232 238 va.va_mode &= ~(VSUID | VSGID);
233 239
234 240 ct.cc_sysid = 0;
235 241 ct.cc_pid = 0;
236 242 ct.cc_caller_id = nfs2_srv_caller_id;
237 243 ct.cc_flags = CC_DONTBLOCK;
238 244
239 245 /*
240 246 * We need to specially handle size changes because it is
241 247 * possible for the client to create a file with modes
242 248 * which indicate read-only, but with the file opened for
243 249 * writing. If the client then tries to set the size of
244 250 * the file, then the normal access checking done in
245 251 * VOP_SETATTR would prevent the client from doing so,
246 252 * although it should be legal for it to do so. To get
247 253 * around this, we do the access checking for ourselves
248 254 * and then use VOP_SPACE which doesn't do the access
249 255 * checking which VOP_SETATTR does. VOP_SPACE can only
250 256 * operate on VREG files, let VOP_SETATTR handle the other
251 257 * extremely rare cases.
252 258 * Also the client should not be allowed to change the
253 259 * size of the file if there is a conflicting non-blocking
254 260 * mandatory lock in the region of change.
255 261 */
256 262 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
257 263 if (nbl_need_check(vp)) {
258 264 nbl_start_crit(vp, RW_READER);
259 265 in_crit = 1;
260 266 }
261 267
262 268 bva.va_mask = AT_UID | AT_SIZE;
263 269
264 270 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
265 271
266 272 if (error) {
267 273 if (in_crit)
268 274 nbl_end_crit(vp);
269 275 VN_RELE(vp);
270 276 ns->ns_status = puterrno(error);
271 277 return;
272 278 }
273 279
274 280 if (in_crit) {
275 281 u_offset_t offset;
276 282 ssize_t length;
277 283
278 284 if (va.va_size < bva.va_size) {
279 285 offset = va.va_size;
280 286 length = bva.va_size - va.va_size;
281 287 } else {
282 288 offset = bva.va_size;
283 289 length = va.va_size - bva.va_size;
284 290 }
285 291 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
286 292 NULL)) {
287 293 error = EACCES;
288 294 }
289 295 }
290 296
291 297 if (crgetuid(cr) == bva.va_uid && !error &&
292 298 va.va_size != bva.va_size) {
293 299 va.va_mask &= ~AT_SIZE;
294 300 bf.l_type = F_WRLCK;
295 301 bf.l_whence = 0;
296 302 bf.l_start = (off64_t)va.va_size;
297 303 bf.l_len = 0;
298 304 bf.l_sysid = 0;
299 305 bf.l_pid = 0;
300 306
301 307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
302 308 (offset_t)va.va_size, cr, &ct);
303 309 }
304 310 if (in_crit)
305 311 nbl_end_crit(vp);
306 312 } else
307 313 error = 0;
308 314
309 315 /*
310 316 * Do the setattr.
311 317 */
312 318 if (!error && va.va_mask) {
313 319 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
314 320 }
315 321
316 322 /*
317 323 * check if the monitor on either vop_space or vop_setattr detected
318 324 * a delegation conflict and if so, mark the thread flag as
319 325 * wouldblock so that the response is dropped and the client will
320 326 * try again.
321 327 */
322 328 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
323 329 VN_RELE(vp);
324 330 curthread->t_flag |= T_WOULDBLOCK;
325 331 return;
326 332 }
327 333
328 334 if (!error) {
329 335 va.va_mask = AT_ALL; /* get everything */
330 336
331 337 error = rfs4_delegated_getattr(vp, &va, 0, cr);
332 338
333 339 /* check for overflows */
334 340 if (!error) {
335 341 acl_perm(vp, exi, &va, cr);
336 342 error = vattr_to_nattr(&va, &ns->ns_attr);
337 343 }
338 344 }
339 345
340 346 ct.cc_flags = 0;
341 347
342 348 /*
343 349 * Force modified metadata out to stable storage.
344 350 */
345 351 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
346 352
347 353 VN_RELE(vp);
348 354
349 355 ns->ns_status = puterrno(error);
350 356 }
351 357 void *
352 358 rfs_setattr_getfh(struct nfssaargs *args)
353 359 {
354 360 return (&args->saa_fh);
355 361 }
356 362
357 363 /* Change and release @exip and @vpp only in success */
358 364 int
359 365 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
360 366 {
361 367 struct exportinfo *exi;
362 368 vnode_t *vp = *vpp;
363 369 fid_t fid;
364 370 int error;
365 371
366 372 VN_HOLD(vp);
367 373
368 374 if ((error = traverse(&vp)) != 0) {
369 375 VN_RELE(vp);
370 376 return (error);
371 377 }
372 378
373 379 bzero(&fid, sizeof (fid));
374 380 fid.fid_len = MAXFIDSZ;
375 381 error = VOP_FID(vp, &fid, NULL);
376 382 if (error) {
377 383 VN_RELE(vp);
378 384 return (error);
379 385 }
380 386
381 387 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
382 388 if (exi == NULL ||
383 389 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
384 390 /*
385 391 * It is not error, just subdir is not exported
386 392 * or "nohide" is not set
387 393 */
388 394 if (exi != NULL)
389 395 exi_rele(exi);
390 396 VN_RELE(vp);
391 397 } else {
392 398 /* go to submount */
393 399 exi_rele(*exip);
394 400 *exip = exi;
395 401
396 402 VN_RELE(*vpp);
397 403 *vpp = vp;
398 404 }
399 405
400 406 return (0);
401 407 }
402 408
403 409 /*
404 410 * Given mounted "dvp" and "exi", go upper mountpoint
405 411 * with dvp/exi correction
406 412 * Return 0 in success
407 413 */
408 414 int
409 415 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
410 416 {
411 417 struct exportinfo *exi;
412 418 vnode_t *dvp = *dvpp;
413 419
414 420 ASSERT3P((*exip)->exi_zone, ==, curzone);
415 421 ASSERT((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp));
416 422
417 423 VN_HOLD(dvp);
418 424 dvp = untraverse(dvp);
419 425 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
420 426 if (exi == NULL) {
421 427 VN_RELE(dvp);
422 428 return (-1);
423 429 }
424 430
425 431 ASSERT3P(exi->exi_zone, ==, curzone);
426 432 exi_rele(*exip);
427 433 *exip = exi;
428 434 VN_RELE(*dvpp);
429 435 *dvpp = dvp;
430 436
431 437 return (0);
432 438 }
433 439 /*
434 440 * Directory lookup.
435 441 * Returns an fhandle and file attributes for file name in a directory.
436 442 */
437 443 /* ARGSUSED */
438 444 void
439 445 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
440 446 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
441 447 {
442 448 int error;
443 449 vnode_t *dvp;
444 450 vnode_t *vp;
445 451 struct vattr va;
446 452 fhandle_t *fhp = da->da_fhandle;
447 453 struct sec_ol sec = {0, 0};
448 454 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
449 455 char *name;
450 456 struct sockaddr *ca;
451 457
452 458 /*
453 459 * Trusted Extension doesn't support NFSv2. MOUNT
454 460 * will reject v2 clients. Need to prevent v2 client
455 461 * access via WebNFS here.
456 462 */
457 463 if (is_system_labeled() && req->rq_vers == 2) {
458 464 dr->dr_status = NFSERR_ACCES;
459 465 return;
460 466 }
461 467
462 468 /*
463 469 * Disallow NULL paths
464 470 */
465 471 if (da->da_name == NULL || *da->da_name == '\0') {
466 472 dr->dr_status = NFSERR_ACCES;
467 473 return;
468 474 }
469 475
470 476 /*
471 477 * Allow lookups from the root - the default
472 478 * location of the public filehandle.
473 479 */
474 480 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
475 481 dvp = ZONE_ROOTVP();
476 482 VN_HOLD(dvp);
477 483 } else {
478 484 dvp = nfs_fhtovp(fhp, exi);
479 485 if (dvp == NULL) {
480 486 dr->dr_status = NFSERR_STALE;
481 487 return;
482 488 }
483 489 }
484 490
485 491 exi_hold(exi);
486 492 ASSERT3P(exi->exi_zone, ==, curzone);
487 493
488 494 /*
489 495 * Not allow lookup beyond root.
490 496 * If the filehandle matches a filehandle of the exi,
491 497 * then the ".." refers beyond the root of an exported filesystem.
492 498 */
493 499 if (strcmp(da->da_name, "..") == 0 &&
494 500 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
495 501 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
496 502 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
497 503 /*
498 504 * special case for ".." and 'nohide'exported root
499 505 */
500 506 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
501 507 error = NFSERR_ACCES;
502 508 goto out;
503 509 }
504 510 } else {
505 511 error = NFSERR_NOENT;
506 512 goto out;
507 513 }
508 514 }
509 515
510 516 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
511 517 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
512 518 MAXPATHLEN);
513 519
514 520 if (name == NULL) {
515 521 error = NFSERR_ACCES;
516 522 goto out;
517 523 }
518 524
519 525 /*
520 526 * If the public filehandle is used then allow
521 527 * a multi-component lookup, i.e. evaluate
522 528 * a pathname and follow symbolic links if
523 529 * necessary.
524 530 *
525 531 * This may result in a vnode in another filesystem
526 532 * which is OK as long as the filesystem is exported.
527 533 */
528 534 if (PUBLIC_FH2(fhp)) {
529 535 publicfh_flag = TRUE;
530 536
531 537 exi_rele(exi);
532 538
533 539 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
534 540 &sec);
535 541 } else {
536 542 /*
537 543 * Do a normal single component lookup.
538 544 */
539 545 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
540 546 NULL, NULL, NULL);
541 547 }
542 548
543 549 if (name != da->da_name)
544 550 kmem_free(name, MAXPATHLEN);
545 551
546 552 if (error == 0 && vn_ismntpt(vp)) {
547 553 error = rfs_cross_mnt(&vp, &exi);
548 554 if (error)
549 555 VN_RELE(vp);
550 556 }
551 557
552 558 if (!error) {
553 559 va.va_mask = AT_ALL; /* we want everything */
554 560
555 561 error = rfs4_delegated_getattr(vp, &va, 0, cr);
556 562
557 563 /* check for overflows */
558 564 if (!error) {
559 565 acl_perm(vp, exi, &va, cr);
560 566 error = vattr_to_nattr(&va, &dr->dr_attr);
561 567 if (!error) {
562 568 if (sec.sec_flags & SEC_QUERY)
563 569 error = makefh_ol(&dr->dr_fhandle, exi,
564 570 sec.sec_index);
565 571 else {
566 572 error = makefh(&dr->dr_fhandle, vp,
567 573 exi);
568 574 if (!error && publicfh_flag &&
569 575 !chk_clnt_sec(exi, req))
570 576 auth_weak = TRUE;
571 577 }
572 578 }
573 579 }
574 580 VN_RELE(vp);
575 581 }
576 582
577 583 out:
578 584 VN_RELE(dvp);
579 585
580 586 if (exi != NULL)
581 587 exi_rele(exi);
582 588
583 589 /*
584 590 * If it's public fh, no 0x81, and client's flavor is
585 591 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
586 592 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
587 593 */
588 594 if (auth_weak)
589 595 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
590 596 else
591 597 dr->dr_status = puterrno(error);
592 598 }
593 599 void *
594 600 rfs_lookup_getfh(struct nfsdiropargs *da)
595 601 {
596 602 return (da->da_fhandle);
597 603 }
598 604
599 605 /*
600 606 * Read symbolic link.
601 607 * Returns the string in the symbolic link at the given fhandle.
602 608 */
603 609 /* ARGSUSED */
604 610 void
605 611 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
606 612 struct svc_req *req, cred_t *cr, bool_t ro)
607 613 {
608 614 int error;
609 615 struct iovec iov;
610 616 struct uio uio;
611 617 vnode_t *vp;
612 618 struct vattr va;
613 619 struct sockaddr *ca;
614 620 char *name = NULL;
615 621 int is_referral = 0;
616 622
617 623 vp = nfs_fhtovp(fhp, exi);
618 624 if (vp == NULL) {
619 625 rl->rl_data = NULL;
620 626 rl->rl_status = NFSERR_STALE;
621 627 return;
622 628 }
623 629
624 630 va.va_mask = AT_MODE;
625 631
626 632 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
627 633
628 634 if (error) {
629 635 VN_RELE(vp);
630 636 rl->rl_data = NULL;
631 637 rl->rl_status = puterrno(error);
632 638 return;
633 639 }
634 640
635 641 if (MANDLOCK(vp, va.va_mode)) {
636 642 VN_RELE(vp);
637 643 rl->rl_data = NULL;
638 644 rl->rl_status = NFSERR_ACCES;
639 645 return;
640 646 }
641 647
642 648 /* We lied about the object type for a referral */
643 649 if (vn_is_nfs_reparse(vp, cr))
644 650 is_referral = 1;
645 651
646 652 /*
647 653 * XNFS and RFC1094 require us to return ENXIO if argument
648 654 * is not a link. BUGID 1138002.
649 655 */
650 656 if (vp->v_type != VLNK && !is_referral) {
651 657 VN_RELE(vp);
652 658 rl->rl_data = NULL;
653 659 rl->rl_status = NFSERR_NXIO;
654 660 return;
655 661 }
656 662
657 663 /*
658 664 * Allocate data for pathname. This will be freed by rfs_rlfree.
659 665 */
660 666 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
661 667
662 668 if (is_referral) {
663 669 char *s;
664 670 size_t strsz;
665 671
666 672 /* Get an artificial symlink based on a referral */
667 673 s = build_symlink(vp, cr, &strsz);
668 674 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
669 675 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
670 676 vnode_t *, vp, char *, s);
671 677 if (s == NULL)
672 678 error = EINVAL;
673 679 else {
674 680 error = 0;
675 681 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
676 682 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
677 683 kmem_free(s, strsz);
678 684 }
679 685
680 686 } else {
681 687
682 688 /*
683 689 * Set up io vector to read sym link data
684 690 */
685 691 iov.iov_base = rl->rl_data;
686 692 iov.iov_len = NFS_MAXPATHLEN;
687 693 uio.uio_iov = &iov;
688 694 uio.uio_iovcnt = 1;
689 695 uio.uio_segflg = UIO_SYSSPACE;
690 696 uio.uio_extflg = UIO_COPY_CACHED;
691 697 uio.uio_loffset = (offset_t)0;
692 698 uio.uio_resid = NFS_MAXPATHLEN;
693 699
694 700 /*
695 701 * Do the readlink.
696 702 */
697 703 error = VOP_READLINK(vp, &uio, cr, NULL);
698 704
699 705 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
700 706
701 707 if (!error)
702 708 rl->rl_data[rl->rl_count] = '\0';
703 709
704 710 }
705 711
706 712
707 713 VN_RELE(vp);
708 714
709 715 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
710 716 name = nfscmd_convname(ca, exi, rl->rl_data,
711 717 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
712 718
713 719 if (name != NULL && name != rl->rl_data) {
714 720 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
715 721 rl->rl_data = name;
716 722 }
717 723
718 724 /*
719 725 * XNFS and RFC1094 require us to return ENXIO if argument
720 726 * is not a link. UFS returns EINVAL if this is the case,
721 727 * so we do the mapping here. BUGID 1138002.
722 728 */
723 729 if (error == EINVAL)
724 730 rl->rl_status = NFSERR_NXIO;
725 731 else
726 732 rl->rl_status = puterrno(error);
727 733
728 734 }
729 735 void *
730 736 rfs_readlink_getfh(fhandle_t *fhp)
731 737 {
732 738 return (fhp);
733 739 }
734 740 /*
735 741 * Free data allocated by rfs_readlink
736 742 */
737 743 void
738 744 rfs_rlfree(struct nfsrdlnres *rl)
739 745 {
740 746 if (rl->rl_data != NULL)
741 747 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
742 748 }
743 749
744 750 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
745 751
746 752 /*
747 753 * Read data.
748 754 * Returns some data read from the file at the given fhandle.
749 755 */
750 756 /* ARGSUSED */
751 757 void
752 758 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
753 759 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
754 760 {
755 761 vnode_t *vp;
756 762 int error;
757 763 struct vattr va;
758 764 struct iovec iov;
759 765 struct uio uio;
760 766 mblk_t *mp;
761 767 int alloc_err = 0;
762 768 int in_crit = 0;
763 769 caller_context_t ct;
764 770
765 771 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
766 772 if (vp == NULL) {
767 773 rr->rr_data = NULL;
768 774 rr->rr_status = NFSERR_STALE;
769 775 return;
770 776 }
771 777
772 778 if (vp->v_type != VREG) {
773 779 VN_RELE(vp);
774 780 rr->rr_data = NULL;
775 781 rr->rr_status = NFSERR_ISDIR;
776 782 return;
777 783 }
778 784
779 785 ct.cc_sysid = 0;
780 786 ct.cc_pid = 0;
781 787 ct.cc_caller_id = nfs2_srv_caller_id;
782 788 ct.cc_flags = CC_DONTBLOCK;
783 789
784 790 /*
785 791 * Enter the critical region before calling VOP_RWLOCK
786 792 * to avoid a deadlock with write requests.
787 793 */
788 794 if (nbl_need_check(vp)) {
789 795 nbl_start_crit(vp, RW_READER);
790 796 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
791 797 0, NULL)) {
792 798 nbl_end_crit(vp);
793 799 VN_RELE(vp);
794 800 rr->rr_data = NULL;
795 801 rr->rr_status = NFSERR_ACCES;
796 802 return;
797 803 }
798 804 in_crit = 1;
799 805 }
800 806
801 807 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
802 808
803 809 /* check if a monitor detected a delegation conflict */
804 810 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
805 811 if (in_crit)
806 812 nbl_end_crit(vp);
807 813 VN_RELE(vp);
808 814 /* mark as wouldblock so response is dropped */
809 815 curthread->t_flag |= T_WOULDBLOCK;
810 816
811 817 rr->rr_data = NULL;
812 818 return;
813 819 }
814 820
815 821 va.va_mask = AT_ALL;
816 822
817 823 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
818 824
819 825 if (error) {
820 826 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
821 827 if (in_crit)
822 828 nbl_end_crit(vp);
823 829
824 830 VN_RELE(vp);
825 831 rr->rr_data = NULL;
826 832 rr->rr_status = puterrno(error);
827 833
828 834 return;
829 835 }
830 836
831 837 /*
832 838 * This is a kludge to allow reading of files created
833 839 * with no read permission. The owner of the file
834 840 * is always allowed to read it.
835 841 */
836 842 if (crgetuid(cr) != va.va_uid) {
837 843 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
838 844
839 845 if (error) {
840 846 /*
841 847 * Exec is the same as read over the net because
842 848 * of demand loading.
843 849 */
844 850 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
845 851 }
846 852 if (error) {
847 853 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
848 854 if (in_crit)
849 855 nbl_end_crit(vp);
850 856 VN_RELE(vp);
851 857 rr->rr_data = NULL;
852 858 rr->rr_status = puterrno(error);
853 859
854 860 return;
855 861 }
856 862 }
857 863
858 864 if (MANDLOCK(vp, va.va_mode)) {
859 865 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
860 866 if (in_crit)
861 867 nbl_end_crit(vp);
862 868
863 869 VN_RELE(vp);
864 870 rr->rr_data = NULL;
865 871 rr->rr_status = NFSERR_ACCES;
866 872
867 873 return;
868 874 }
869 875
870 876 rr->rr_ok.rrok_wlist_len = 0;
871 877 rr->rr_ok.rrok_wlist = NULL;
872 878
873 879 if ((u_offset_t)ra->ra_offset >= va.va_size) {
874 880 rr->rr_count = 0;
875 881 rr->rr_data = NULL;
876 882 /*
877 883 * In this case, status is NFS_OK, but there is no data
878 884 * to encode. So set rr_mp to NULL.
879 885 */
880 886 rr->rr_mp = NULL;
881 887 rr->rr_ok.rrok_wlist = ra->ra_wlist;
882 888 if (rr->rr_ok.rrok_wlist)
883 889 clist_zero_len(rr->rr_ok.rrok_wlist);
884 890 goto done;
885 891 }
886 892
887 893 if (ra->ra_wlist) {
888 894 mp = NULL;
889 895 rr->rr_mp = NULL;
890 896 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
891 897 if (ra->ra_count > iov.iov_len) {
892 898 rr->rr_data = NULL;
893 899 rr->rr_status = NFSERR_INVAL;
894 900 goto done;
895 901 }
896 902 } else {
897 903 /*
898 904 * mp will contain the data to be sent out in the read reply.
899 905 * This will be freed after the reply has been sent out (by the
900 906 * driver).
901 907 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
902 908 * that the call to xdrmblk_putmblk() never fails.
903 909 */
904 910 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
905 911 &alloc_err);
906 912 ASSERT(mp != NULL);
907 913 ASSERT(alloc_err == 0);
908 914
909 915 rr->rr_mp = mp;
910 916
911 917 /*
912 918 * Set up io vector
913 919 */
914 920 iov.iov_base = (caddr_t)mp->b_datap->db_base;
915 921 iov.iov_len = ra->ra_count;
916 922 }
917 923
918 924 uio.uio_iov = &iov;
919 925 uio.uio_iovcnt = 1;
920 926 uio.uio_segflg = UIO_SYSSPACE;
921 927 uio.uio_extflg = UIO_COPY_CACHED;
922 928 uio.uio_loffset = (offset_t)ra->ra_offset;
923 929 uio.uio_resid = ra->ra_count;
924 930
925 931 error = VOP_READ(vp, &uio, 0, cr, &ct);
926 932
927 933 if (error) {
928 934 if (mp)
929 935 freeb(mp);
930 936
931 937 /*
932 938 * check if a monitor detected a delegation conflict and
933 939 * mark as wouldblock so response is dropped
934 940 */
935 941 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
936 942 curthread->t_flag |= T_WOULDBLOCK;
937 943 else
938 944 rr->rr_status = puterrno(error);
939 945
940 946 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
941 947 if (in_crit)
942 948 nbl_end_crit(vp);
943 949
944 950 VN_RELE(vp);
945 951 rr->rr_data = NULL;
946 952
947 953 return;
948 954 }
949 955
950 956 /*
951 957 * Get attributes again so we can send the latest access
952 958 * time to the client side for its cache.
953 959 */
954 960 va.va_mask = AT_ALL;
955 961
956 962 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
957 963
958 964 if (error) {
959 965 if (mp)
960 966 freeb(mp);
961 967
962 968 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
963 969 if (in_crit)
964 970 nbl_end_crit(vp);
965 971
966 972 VN_RELE(vp);
967 973 rr->rr_data = NULL;
968 974 rr->rr_status = puterrno(error);
969 975
970 976 return;
971 977 }
972 978
973 979 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
974 980
975 981 if (mp) {
976 982 rr->rr_data = (char *)mp->b_datap->db_base;
977 983 } else {
978 984 if (ra->ra_wlist) {
979 985 rr->rr_data = (caddr_t)iov.iov_base;
980 986 if (!rdma_setup_read_data2(ra, rr)) {
981 987 rr->rr_data = NULL;
982 988 rr->rr_status = puterrno(NFSERR_INVAL);
983 989 }
984 990 }
985 991 }
986 992 done:
987 993 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
988 994 if (in_crit)
989 995 nbl_end_crit(vp);
990 996
991 997 acl_perm(vp, exi, &va, cr);
992 998
993 999 /* check for overflows */
994 1000 error = vattr_to_nattr(&va, &rr->rr_attr);
995 1001
996 1002 VN_RELE(vp);
997 1003
998 1004 rr->rr_status = puterrno(error);
999 1005 }
1000 1006
1001 1007 /*
1002 1008 * Free data allocated by rfs_read
1003 1009 */
1004 1010 void
1005 1011 rfs_rdfree(struct nfsrdresult *rr)
1006 1012 {
1007 1013 mblk_t *mp;
1008 1014
1009 1015 if (rr->rr_status == NFS_OK) {
1010 1016 mp = rr->rr_mp;
1011 1017 if (mp != NULL)
1012 1018 freeb(mp);
1013 1019 }
1014 1020 }
1015 1021
1016 1022 void *
1017 1023 rfs_read_getfh(struct nfsreadargs *ra)
1018 1024 {
1019 1025 return (&ra->ra_fhandle);
1020 1026 }
1021 1027
1022 1028 #define MAX_IOVECS 12
1023 1029
1024 1030 #ifdef DEBUG
1025 1031 static int rfs_write_sync_hits = 0;
1026 1032 static int rfs_write_sync_misses = 0;
1027 1033 #endif
1028 1034
1029 1035 /*
1030 1036 * Write data to file.
1031 1037 * Returns attributes of a file after writing some data to it.
1032 1038 *
1033 1039 * Any changes made here, especially in error handling might have
1034 1040 * to also be done in rfs_write (which clusters write requests).
1035 1041 */
1036 1042 /* ARGSUSED */
1037 1043 void
1038 1044 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1039 1045 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1040 1046 {
1041 1047 int error;
1042 1048 vnode_t *vp;
1043 1049 rlim64_t rlimit;
1044 1050 struct vattr va;
1045 1051 struct uio uio;
1046 1052 struct iovec iov[MAX_IOVECS];
1047 1053 mblk_t *m;
1048 1054 struct iovec *iovp;
1049 1055 int iovcnt;
1050 1056 cred_t *savecred;
1051 1057 int in_crit = 0;
1052 1058 caller_context_t ct;
1053 1059
1054 1060 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1055 1061 if (vp == NULL) {
1056 1062 ns->ns_status = NFSERR_STALE;
1057 1063 return;
1058 1064 }
1059 1065
1060 1066 if (rdonly(ro, vp)) {
1061 1067 VN_RELE(vp);
1062 1068 ns->ns_status = NFSERR_ROFS;
1063 1069 return;
1064 1070 }
1065 1071
1066 1072 if (vp->v_type != VREG) {
1067 1073 VN_RELE(vp);
1068 1074 ns->ns_status = NFSERR_ISDIR;
1069 1075 return;
1070 1076 }
1071 1077
1072 1078 ct.cc_sysid = 0;
1073 1079 ct.cc_pid = 0;
1074 1080 ct.cc_caller_id = nfs2_srv_caller_id;
1075 1081 ct.cc_flags = CC_DONTBLOCK;
1076 1082
1077 1083 va.va_mask = AT_UID|AT_MODE;
1078 1084
1079 1085 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1080 1086
1081 1087 if (error) {
1082 1088 VN_RELE(vp);
1083 1089 ns->ns_status = puterrno(error);
1084 1090
1085 1091 return;
1086 1092 }
1087 1093
1088 1094 if (crgetuid(cr) != va.va_uid) {
1089 1095 /*
1090 1096 * This is a kludge to allow writes of files created
1091 1097 * with read only permission. The owner of the file
1092 1098 * is always allowed to write it.
1093 1099 */
1094 1100 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1095 1101
1096 1102 if (error) {
1097 1103 VN_RELE(vp);
1098 1104 ns->ns_status = puterrno(error);
1099 1105 return;
1100 1106 }
1101 1107 }
1102 1108
1103 1109 /*
1104 1110 * Can't access a mandatory lock file. This might cause
1105 1111 * the NFS service thread to block forever waiting for a
1106 1112 * lock to be released that will never be released.
1107 1113 */
1108 1114 if (MANDLOCK(vp, va.va_mode)) {
1109 1115 VN_RELE(vp);
1110 1116 ns->ns_status = NFSERR_ACCES;
1111 1117 return;
1112 1118 }
1113 1119
1114 1120 /*
1115 1121 * We have to enter the critical region before calling VOP_RWLOCK
1116 1122 * to avoid a deadlock with ufs.
1117 1123 */
1118 1124 if (nbl_need_check(vp)) {
1119 1125 nbl_start_crit(vp, RW_READER);
1120 1126 in_crit = 1;
1121 1127 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1122 1128 wa->wa_count, 0, NULL)) {
1123 1129 error = EACCES;
1124 1130 goto out;
1125 1131 }
1126 1132 }
1127 1133
1128 1134 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1129 1135
1130 1136 /* check if a monitor detected a delegation conflict */
1131 1137 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1132 1138 goto out;
1133 1139 }
1134 1140
1135 1141 if (wa->wa_data || wa->wa_rlist) {
1136 1142 /* Do the RDMA thing if necessary */
1137 1143 if (wa->wa_rlist) {
1138 1144 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1139 1145 iov[0].iov_len = wa->wa_count;
1140 1146 } else {
1141 1147 iov[0].iov_base = wa->wa_data;
1142 1148 iov[0].iov_len = wa->wa_count;
1143 1149 }
1144 1150 uio.uio_iov = iov;
1145 1151 uio.uio_iovcnt = 1;
1146 1152 uio.uio_segflg = UIO_SYSSPACE;
1147 1153 uio.uio_extflg = UIO_COPY_DEFAULT;
1148 1154 uio.uio_loffset = (offset_t)wa->wa_offset;
1149 1155 uio.uio_resid = wa->wa_count;
1150 1156 /*
1151 1157 * The limit is checked on the client. We
1152 1158 * should allow any size writes here.
1153 1159 */
1154 1160 uio.uio_llimit = curproc->p_fsz_ctl;
1155 1161 rlimit = uio.uio_llimit - wa->wa_offset;
1156 1162 if (rlimit < (rlim64_t)uio.uio_resid)
1157 1163 uio.uio_resid = (uint_t)rlimit;
1158 1164
1159 1165 /*
1160 1166 * for now we assume no append mode
1161 1167 */
1162 1168 /*
1163 1169 * We're changing creds because VM may fault and we need
1164 1170 * the cred of the current thread to be used if quota
1165 1171 * checking is enabled.
1166 1172 */
1167 1173 savecred = curthread->t_cred;
1168 1174 curthread->t_cred = cr;
1169 1175 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1170 1176 curthread->t_cred = savecred;
1171 1177 } else {
1172 1178
1173 1179 iovcnt = 0;
1174 1180 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1175 1181 iovcnt++;
1176 1182 if (iovcnt <= MAX_IOVECS) {
1177 1183 #ifdef DEBUG
1178 1184 rfs_write_sync_hits++;
1179 1185 #endif
1180 1186 iovp = iov;
1181 1187 } else {
1182 1188 #ifdef DEBUG
1183 1189 rfs_write_sync_misses++;
1184 1190 #endif
1185 1191 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1186 1192 }
1187 1193 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1188 1194 uio.uio_iov = iovp;
1189 1195 uio.uio_iovcnt = iovcnt;
1190 1196 uio.uio_segflg = UIO_SYSSPACE;
1191 1197 uio.uio_extflg = UIO_COPY_DEFAULT;
1192 1198 uio.uio_loffset = (offset_t)wa->wa_offset;
1193 1199 uio.uio_resid = wa->wa_count;
1194 1200 /*
1195 1201 * The limit is checked on the client. We
1196 1202 * should allow any size writes here.
1197 1203 */
1198 1204 uio.uio_llimit = curproc->p_fsz_ctl;
1199 1205 rlimit = uio.uio_llimit - wa->wa_offset;
1200 1206 if (rlimit < (rlim64_t)uio.uio_resid)
1201 1207 uio.uio_resid = (uint_t)rlimit;
1202 1208
1203 1209 /*
1204 1210 * For now we assume no append mode.
1205 1211 */
1206 1212 /*
1207 1213 * We're changing creds because VM may fault and we need
1208 1214 * the cred of the current thread to be used if quota
1209 1215 * checking is enabled.
1210 1216 */
1211 1217 savecred = curthread->t_cred;
1212 1218 curthread->t_cred = cr;
1213 1219 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1214 1220 curthread->t_cred = savecred;
1215 1221
1216 1222 if (iovp != iov)
1217 1223 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1218 1224 }
1219 1225
1220 1226 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1221 1227
1222 1228 if (!error) {
1223 1229 /*
1224 1230 * Get attributes again so we send the latest mod
1225 1231 * time to the client side for its cache.
1226 1232 */
1227 1233 va.va_mask = AT_ALL; /* now we want everything */
1228 1234
1229 1235 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1230 1236
1231 1237 /* check for overflows */
1232 1238 if (!error) {
1233 1239 acl_perm(vp, exi, &va, cr);
1234 1240 error = vattr_to_nattr(&va, &ns->ns_attr);
1235 1241 }
1236 1242 }
1237 1243
1238 1244 out:
1239 1245 if (in_crit)
1240 1246 nbl_end_crit(vp);
1241 1247 VN_RELE(vp);
1242 1248
1243 1249 /* check if a monitor detected a delegation conflict */
1244 1250 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1245 1251 /* mark as wouldblock so response is dropped */
1246 1252 curthread->t_flag |= T_WOULDBLOCK;
1247 1253 else
1248 1254 ns->ns_status = puterrno(error);
1249 1255
1250 1256 }
1251 1257
1252 1258 struct rfs_async_write {
1253 1259 struct nfswriteargs *wa;
1254 1260 struct nfsattrstat *ns;
1255 1261 struct svc_req *req;
1256 1262 cred_t *cr;
1257 1263 bool_t ro;
1258 1264 kthread_t *thread;
1259 1265 struct rfs_async_write *list;
1260 1266 };
1261 1267
1262 1268 struct rfs_async_write_list {
1263 1269 fhandle_t *fhp;
1264 1270 kcondvar_t cv;
1265 1271 struct rfs_async_write *list;
1266 1272 struct rfs_async_write_list *next;
1267 1273 };
1268 1274
1269 1275 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1270 1276 static kmutex_t rfs_async_write_lock;
1271 1277 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1272 1278
1273 1279 #define MAXCLIOVECS 42
1274 1280 #define RFSWRITE_INITVAL (enum nfsstat) -1
1275 1281
1276 1282 #ifdef DEBUG
1277 1283 static int rfs_write_hits = 0;
1278 1284 static int rfs_write_misses = 0;
1279 1285 #endif
1280 1286
1281 1287 /*
1282 1288 * Write data to file.
1283 1289 * Returns attributes of a file after writing some data to it.
1284 1290 */
1285 1291 void
1286 1292 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1287 1293 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1288 1294 {
1289 1295 int error;
1290 1296 vnode_t *vp;
1291 1297 rlim64_t rlimit;
1292 1298 struct vattr va;
1293 1299 struct uio uio;
1294 1300 struct rfs_async_write_list *lp;
1295 1301 struct rfs_async_write_list *nlp;
1296 1302 struct rfs_async_write *rp;
1297 1303 struct rfs_async_write *nrp;
1298 1304 struct rfs_async_write *trp;
1299 1305 struct rfs_async_write *lrp;
1300 1306 int data_written;
1301 1307 int iovcnt;
1302 1308 mblk_t *m;
1303 1309 struct iovec *iovp;
1304 1310 struct iovec *niovp;
1305 1311 struct iovec iov[MAXCLIOVECS];
1306 1312 int count;
1307 1313 int rcount;
|
↓ open down ↓ |
1180 lines elided |
↑ open up ↑ |
1308 1314 uint_t off;
1309 1315 uint_t len;
1310 1316 struct rfs_async_write nrpsp;
1311 1317 struct rfs_async_write_list nlpsp;
1312 1318 ushort_t t_flag;
1313 1319 cred_t *savecred;
1314 1320 int in_crit = 0;
1315 1321 caller_context_t ct;
1316 1322 nfs_srv_t *nsrv;
1317 1323
1318 - ASSERT3P(curzone, ==, ((exi == NULL) ? curzone : exi->exi_zone));
1319 - nsrv = zone_getspecific(rfs_zone_key, curzone);
1324 + ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
1325 + nsrv = nfs_get_srv();
1320 1326 if (!nsrv->write_async) {
1321 1327 rfs_write_sync(wa, ns, exi, req, cr, ro);
1322 1328 return;
1323 1329 }
1324 1330
1325 1331 /*
1326 1332 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1327 1333 * is considered an OK.
1328 1334 */
1329 1335 ns->ns_status = RFSWRITE_INITVAL;
1330 1336
1331 1337 nrp = &nrpsp;
1332 1338 nrp->wa = wa;
1333 1339 nrp->ns = ns;
1334 1340 nrp->req = req;
1335 1341 nrp->cr = cr;
1336 1342 nrp->ro = ro;
1337 1343 nrp->thread = curthread;
1338 1344
1339 1345 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1340 1346
1341 1347 /*
1342 1348 * Look to see if there is already a cluster started
1343 1349 * for this file.
1344 1350 */
1345 1351 mutex_enter(&nsrv->async_write_lock);
1346 1352 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1347 1353 if (bcmp(&wa->wa_fhandle, lp->fhp,
1348 1354 sizeof (fhandle_t)) == 0)
1349 1355 break;
1350 1356 }
1351 1357
1352 1358 /*
1353 1359 * If lp is non-NULL, then there is already a cluster
1354 1360 * started. We need to place ourselves in the cluster
1355 1361 * list in the right place as determined by starting
1356 1362 * offset. Conflicts with non-blocking mandatory locked
1357 1363 * regions will be checked when the cluster is processed.
1358 1364 */
1359 1365 if (lp != NULL) {
1360 1366 rp = lp->list;
1361 1367 trp = NULL;
1362 1368 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1363 1369 trp = rp;
1364 1370 rp = rp->list;
1365 1371 }
1366 1372 nrp->list = rp;
1367 1373 if (trp == NULL)
1368 1374 lp->list = nrp;
1369 1375 else
1370 1376 trp->list = nrp;
1371 1377 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1372 1378 cv_wait(&lp->cv, &nsrv->async_write_lock);
1373 1379 mutex_exit(&nsrv->async_write_lock);
1374 1380
1375 1381 return;
1376 1382 }
1377 1383
1378 1384 /*
1379 1385 * No cluster started yet, start one and add ourselves
1380 1386 * to the list of clusters.
1381 1387 */
1382 1388 nrp->list = NULL;
1383 1389
1384 1390 nlp = &nlpsp;
1385 1391 nlp->fhp = &wa->wa_fhandle;
1386 1392 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1387 1393 nlp->list = nrp;
1388 1394 nlp->next = NULL;
1389 1395
1390 1396 if (nsrv->async_write_head == NULL) {
1391 1397 nsrv->async_write_head = nlp;
1392 1398 } else {
1393 1399 lp = nsrv->async_write_head;
1394 1400 while (lp->next != NULL)
1395 1401 lp = lp->next;
1396 1402 lp->next = nlp;
1397 1403 }
1398 1404 mutex_exit(&nsrv->async_write_lock);
1399 1405
1400 1406 /*
1401 1407 * Convert the file handle common to all of the requests
1402 1408 * in this cluster to a vnode.
1403 1409 */
1404 1410 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1405 1411 if (vp == NULL) {
1406 1412 mutex_enter(&nsrv->async_write_lock);
1407 1413 if (nsrv->async_write_head == nlp)
1408 1414 nsrv->async_write_head = nlp->next;
1409 1415 else {
1410 1416 lp = nsrv->async_write_head;
1411 1417 while (lp->next != nlp)
1412 1418 lp = lp->next;
1413 1419 lp->next = nlp->next;
1414 1420 }
1415 1421 t_flag = curthread->t_flag & T_WOULDBLOCK;
1416 1422 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1417 1423 rp->ns->ns_status = NFSERR_STALE;
1418 1424 rp->thread->t_flag |= t_flag;
1419 1425 }
1420 1426 cv_broadcast(&nlp->cv);
1421 1427 mutex_exit(&nsrv->async_write_lock);
1422 1428
1423 1429 return;
1424 1430 }
1425 1431
1426 1432 /*
1427 1433 * Can only write regular files. Attempts to write any
1428 1434 * other file types fail with EISDIR.
1429 1435 */
1430 1436 if (vp->v_type != VREG) {
1431 1437 VN_RELE(vp);
1432 1438 mutex_enter(&nsrv->async_write_lock);
1433 1439 if (nsrv->async_write_head == nlp)
1434 1440 nsrv->async_write_head = nlp->next;
1435 1441 else {
1436 1442 lp = nsrv->async_write_head;
1437 1443 while (lp->next != nlp)
1438 1444 lp = lp->next;
1439 1445 lp->next = nlp->next;
1440 1446 }
1441 1447 t_flag = curthread->t_flag & T_WOULDBLOCK;
1442 1448 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1443 1449 rp->ns->ns_status = NFSERR_ISDIR;
1444 1450 rp->thread->t_flag |= t_flag;
1445 1451 }
1446 1452 cv_broadcast(&nlp->cv);
1447 1453 mutex_exit(&nsrv->async_write_lock);
1448 1454
1449 1455 return;
1450 1456 }
1451 1457
1452 1458 /*
1453 1459 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1454 1460 * deadlock with ufs.
1455 1461 */
1456 1462 if (nbl_need_check(vp)) {
1457 1463 nbl_start_crit(vp, RW_READER);
1458 1464 in_crit = 1;
1459 1465 }
1460 1466
1461 1467 ct.cc_sysid = 0;
1462 1468 ct.cc_pid = 0;
1463 1469 ct.cc_caller_id = nfs2_srv_caller_id;
1464 1470 ct.cc_flags = CC_DONTBLOCK;
1465 1471
1466 1472 /*
1467 1473 * Lock the file for writing. This operation provides
1468 1474 * the delay which allows clusters to grow.
1469 1475 */
1470 1476 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1471 1477
1472 1478 /* check if a monitor detected a delegation conflict */
1473 1479 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1474 1480 if (in_crit)
1475 1481 nbl_end_crit(vp);
1476 1482 VN_RELE(vp);
1477 1483 /* mark as wouldblock so response is dropped */
1478 1484 curthread->t_flag |= T_WOULDBLOCK;
1479 1485 mutex_enter(&nsrv->async_write_lock);
1480 1486 if (nsrv->async_write_head == nlp)
1481 1487 nsrv->async_write_head = nlp->next;
1482 1488 else {
1483 1489 lp = nsrv->async_write_head;
1484 1490 while (lp->next != nlp)
1485 1491 lp = lp->next;
1486 1492 lp->next = nlp->next;
1487 1493 }
1488 1494 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1489 1495 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1490 1496 rp->ns->ns_status = puterrno(error);
1491 1497 rp->thread->t_flag |= T_WOULDBLOCK;
1492 1498 }
1493 1499 }
1494 1500 cv_broadcast(&nlp->cv);
1495 1501 mutex_exit(&nsrv->async_write_lock);
1496 1502
1497 1503 return;
1498 1504 }
1499 1505
1500 1506 /*
1501 1507 * Disconnect this cluster from the list of clusters.
1502 1508 * The cluster that is being dealt with must be fixed
1503 1509 * in size after this point, so there is no reason
1504 1510 * to leave it on the list so that new requests can
1505 1511 * find it.
1506 1512 *
1507 1513 * The algorithm is that the first write request will
1508 1514 * create a cluster, convert the file handle to a
1509 1515 * vnode pointer, and then lock the file for writing.
1510 1516 * This request is not likely to be clustered with
1511 1517 * any others. However, the next request will create
1512 1518 * a new cluster and be blocked in VOP_RWLOCK while
1513 1519 * the first request is being processed. This delay
1514 1520 * will allow more requests to be clustered in this
1515 1521 * second cluster.
1516 1522 */
1517 1523 mutex_enter(&nsrv->async_write_lock);
1518 1524 if (nsrv->async_write_head == nlp)
1519 1525 nsrv->async_write_head = nlp->next;
1520 1526 else {
1521 1527 lp = nsrv->async_write_head;
1522 1528 while (lp->next != nlp)
1523 1529 lp = lp->next;
1524 1530 lp->next = nlp->next;
1525 1531 }
1526 1532 mutex_exit(&nsrv->async_write_lock);
1527 1533
1528 1534 /*
1529 1535 * Step through the list of requests in this cluster.
1530 1536 * We need to check permissions to make sure that all
1531 1537 * of the requests have sufficient permission to write
1532 1538 * the file. A cluster can be composed of requests
1533 1539 * from different clients and different users on each
1534 1540 * client.
1535 1541 *
1536 1542 * As a side effect, we also calculate the size of the
1537 1543 * byte range that this cluster encompasses.
1538 1544 */
1539 1545 rp = nlp->list;
1540 1546 off = rp->wa->wa_offset;
1541 1547 len = (uint_t)0;
1542 1548 do {
1543 1549 if (rdonly(rp->ro, vp)) {
1544 1550 rp->ns->ns_status = NFSERR_ROFS;
1545 1551 t_flag = curthread->t_flag & T_WOULDBLOCK;
1546 1552 rp->thread->t_flag |= t_flag;
1547 1553 continue;
1548 1554 }
1549 1555
1550 1556 va.va_mask = AT_UID|AT_MODE;
1551 1557
1552 1558 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1553 1559
1554 1560 if (!error) {
1555 1561 if (crgetuid(rp->cr) != va.va_uid) {
1556 1562 /*
1557 1563 * This is a kludge to allow writes of files
1558 1564 * created with read only permission. The
1559 1565 * owner of the file is always allowed to
1560 1566 * write it.
1561 1567 */
1562 1568 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1563 1569 }
1564 1570 if (!error && MANDLOCK(vp, va.va_mode))
1565 1571 error = EACCES;
1566 1572 }
1567 1573
1568 1574 /*
1569 1575 * Check for a conflict with a nbmand-locked region.
1570 1576 */
1571 1577 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1572 1578 rp->wa->wa_count, 0, NULL)) {
1573 1579 error = EACCES;
1574 1580 }
1575 1581
1576 1582 if (error) {
1577 1583 rp->ns->ns_status = puterrno(error);
1578 1584 t_flag = curthread->t_flag & T_WOULDBLOCK;
1579 1585 rp->thread->t_flag |= t_flag;
1580 1586 continue;
1581 1587 }
1582 1588 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1583 1589 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1584 1590 } while ((rp = rp->list) != NULL);
1585 1591
1586 1592 /*
1587 1593 * Step through the cluster attempting to gather as many
1588 1594 * requests which are contiguous as possible. These
1589 1595 * contiguous requests are handled via one call to VOP_WRITE
1590 1596 * instead of different calls to VOP_WRITE. We also keep
1591 1597 * track of the fact that any data was written.
1592 1598 */
1593 1599 rp = nlp->list;
1594 1600 data_written = 0;
1595 1601 do {
1596 1602 /*
1597 1603 * Skip any requests which are already marked as having an
1598 1604 * error.
1599 1605 */
1600 1606 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1601 1607 rp = rp->list;
1602 1608 continue;
1603 1609 }
1604 1610
1605 1611 /*
1606 1612 * Count the number of iovec's which are required
1607 1613 * to handle this set of requests. One iovec is
1608 1614 * needed for each data buffer, whether addressed
1609 1615 * by wa_data or by the b_rptr pointers in the
1610 1616 * mblk chains.
1611 1617 */
1612 1618 iovcnt = 0;
1613 1619 lrp = rp;
1614 1620 for (;;) {
1615 1621 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1616 1622 iovcnt++;
1617 1623 else {
1618 1624 m = lrp->wa->wa_mblk;
1619 1625 while (m != NULL) {
1620 1626 iovcnt++;
1621 1627 m = m->b_cont;
1622 1628 }
1623 1629 }
1624 1630 if (lrp->list == NULL ||
1625 1631 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1626 1632 lrp->wa->wa_offset + lrp->wa->wa_count !=
1627 1633 lrp->list->wa->wa_offset) {
1628 1634 lrp = lrp->list;
1629 1635 break;
1630 1636 }
1631 1637 lrp = lrp->list;
1632 1638 }
1633 1639
1634 1640 if (iovcnt <= MAXCLIOVECS) {
1635 1641 #ifdef DEBUG
1636 1642 rfs_write_hits++;
1637 1643 #endif
1638 1644 niovp = iov;
1639 1645 } else {
1640 1646 #ifdef DEBUG
1641 1647 rfs_write_misses++;
1642 1648 #endif
1643 1649 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1644 1650 }
1645 1651 /*
1646 1652 * Put together the scatter/gather iovecs.
1647 1653 */
1648 1654 iovp = niovp;
1649 1655 trp = rp;
1650 1656 count = 0;
1651 1657 do {
1652 1658 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1653 1659 if (trp->wa->wa_rlist) {
1654 1660 iovp->iov_base =
1655 1661 (char *)((trp->wa->wa_rlist)->
1656 1662 u.c_daddr3);
1657 1663 iovp->iov_len = trp->wa->wa_count;
1658 1664 } else {
1659 1665 iovp->iov_base = trp->wa->wa_data;
1660 1666 iovp->iov_len = trp->wa->wa_count;
1661 1667 }
1662 1668 iovp++;
1663 1669 } else {
1664 1670 m = trp->wa->wa_mblk;
1665 1671 rcount = trp->wa->wa_count;
1666 1672 while (m != NULL) {
1667 1673 iovp->iov_base = (caddr_t)m->b_rptr;
1668 1674 iovp->iov_len = (m->b_wptr - m->b_rptr);
1669 1675 rcount -= iovp->iov_len;
1670 1676 if (rcount < 0)
1671 1677 iovp->iov_len += rcount;
1672 1678 iovp++;
1673 1679 if (rcount <= 0)
1674 1680 break;
1675 1681 m = m->b_cont;
1676 1682 }
1677 1683 }
1678 1684 count += trp->wa->wa_count;
1679 1685 trp = trp->list;
1680 1686 } while (trp != lrp);
1681 1687
1682 1688 uio.uio_iov = niovp;
1683 1689 uio.uio_iovcnt = iovcnt;
1684 1690 uio.uio_segflg = UIO_SYSSPACE;
1685 1691 uio.uio_extflg = UIO_COPY_DEFAULT;
1686 1692 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1687 1693 uio.uio_resid = count;
1688 1694 /*
1689 1695 * The limit is checked on the client. We
1690 1696 * should allow any size writes here.
1691 1697 */
1692 1698 uio.uio_llimit = curproc->p_fsz_ctl;
1693 1699 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1694 1700 if (rlimit < (rlim64_t)uio.uio_resid)
1695 1701 uio.uio_resid = (uint_t)rlimit;
1696 1702
1697 1703 /*
1698 1704 * For now we assume no append mode.
1699 1705 */
1700 1706
1701 1707 /*
1702 1708 * We're changing creds because VM may fault
1703 1709 * and we need the cred of the current
1704 1710 * thread to be used if quota * checking is
1705 1711 * enabled.
1706 1712 */
1707 1713 savecred = curthread->t_cred;
1708 1714 curthread->t_cred = cr;
1709 1715 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1710 1716 curthread->t_cred = savecred;
1711 1717
1712 1718 /* check if a monitor detected a delegation conflict */
1713 1719 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1714 1720 /* mark as wouldblock so response is dropped */
1715 1721 curthread->t_flag |= T_WOULDBLOCK;
1716 1722
1717 1723 if (niovp != iov)
1718 1724 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1719 1725
1720 1726 if (!error) {
1721 1727 data_written = 1;
1722 1728 /*
1723 1729 * Get attributes again so we send the latest mod
1724 1730 * time to the client side for its cache.
1725 1731 */
1726 1732 va.va_mask = AT_ALL; /* now we want everything */
1727 1733
1728 1734 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1729 1735
1730 1736 if (!error)
1731 1737 acl_perm(vp, exi, &va, rp->cr);
1732 1738 }
1733 1739
1734 1740 /*
1735 1741 * Fill in the status responses for each request
1736 1742 * which was just handled. Also, copy the latest
1737 1743 * attributes in to the attribute responses if
1738 1744 * appropriate.
1739 1745 */
1740 1746 t_flag = curthread->t_flag & T_WOULDBLOCK;
1741 1747 do {
1742 1748 rp->thread->t_flag |= t_flag;
1743 1749 /* check for overflows */
1744 1750 if (!error) {
1745 1751 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1746 1752 }
1747 1753 rp->ns->ns_status = puterrno(error);
1748 1754 rp = rp->list;
1749 1755 } while (rp != lrp);
1750 1756 } while (rp != NULL);
1751 1757
1752 1758 /*
1753 1759 * If any data was written at all, then we need to flush
1754 1760 * the data and metadata to stable storage.
1755 1761 */
1756 1762 if (data_written) {
1757 1763 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1758 1764
1759 1765 if (!error) {
1760 1766 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1761 1767 }
1762 1768 }
1763 1769
1764 1770 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1765 1771
1766 1772 if (in_crit)
1767 1773 nbl_end_crit(vp);
1768 1774 VN_RELE(vp);
1769 1775
1770 1776 t_flag = curthread->t_flag & T_WOULDBLOCK;
1771 1777 mutex_enter(&nsrv->async_write_lock);
1772 1778 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1773 1779 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1774 1780 rp->ns->ns_status = puterrno(error);
1775 1781 rp->thread->t_flag |= t_flag;
1776 1782 }
1777 1783 }
1778 1784 cv_broadcast(&nlp->cv);
1779 1785 mutex_exit(&nsrv->async_write_lock);
1780 1786
1781 1787 }
1782 1788
1783 1789 void *
1784 1790 rfs_write_getfh(struct nfswriteargs *wa)
1785 1791 {
1786 1792 return (&wa->wa_fhandle);
1787 1793 }
1788 1794
1789 1795 /*
1790 1796 * Create a file.
1791 1797 * Creates a file with given attributes and returns those attributes
1792 1798 * and an fhandle for the new file.
1793 1799 */
1794 1800 void
1795 1801 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1796 1802 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1797 1803 {
1798 1804 int error;
1799 1805 int lookuperr;
1800 1806 int in_crit = 0;
1801 1807 struct vattr va;
1802 1808 vnode_t *vp;
1803 1809 vnode_t *realvp;
1804 1810 vnode_t *dvp;
1805 1811 char *name = args->ca_da.da_name;
1806 1812 vnode_t *tvp = NULL;
1807 1813 int mode;
1808 1814 int lookup_ok;
1809 1815 bool_t trunc;
1810 1816 struct sockaddr *ca;
1811 1817
1812 1818 /*
1813 1819 * Disallow NULL paths
1814 1820 */
1815 1821 if (name == NULL || *name == '\0') {
1816 1822 dr->dr_status = NFSERR_ACCES;
1817 1823 return;
1818 1824 }
1819 1825
1820 1826 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1821 1827 if (dvp == NULL) {
1822 1828 dr->dr_status = NFSERR_STALE;
1823 1829 return;
1824 1830 }
1825 1831
1826 1832 error = sattr_to_vattr(args->ca_sa, &va);
1827 1833 if (error) {
1828 1834 dr->dr_status = puterrno(error);
1829 1835 return;
1830 1836 }
1831 1837
1832 1838 /*
1833 1839 * Must specify the mode.
1834 1840 */
1835 1841 if (!(va.va_mask & AT_MODE)) {
1836 1842 VN_RELE(dvp);
1837 1843 dr->dr_status = NFSERR_INVAL;
1838 1844 return;
1839 1845 }
1840 1846
1841 1847 /*
1842 1848 * This is a completely gross hack to make mknod
1843 1849 * work over the wire until we can wack the protocol
1844 1850 */
1845 1851 if ((va.va_mode & IFMT) == IFCHR) {
1846 1852 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1847 1853 va.va_type = VFIFO; /* xtra kludge for named pipe */
1848 1854 else {
1849 1855 va.va_type = VCHR;
1850 1856 /*
1851 1857 * uncompress the received dev_t
1852 1858 * if the top half is zero indicating a request
1853 1859 * from an `older style' OS.
1854 1860 */
1855 1861 if ((va.va_size & 0xffff0000) == 0)
1856 1862 va.va_rdev = nfsv2_expdev(va.va_size);
1857 1863 else
1858 1864 va.va_rdev = (dev_t)va.va_size;
1859 1865 }
1860 1866 va.va_mask &= ~AT_SIZE;
1861 1867 } else if ((va.va_mode & IFMT) == IFBLK) {
1862 1868 va.va_type = VBLK;
1863 1869 /*
1864 1870 * uncompress the received dev_t
1865 1871 * if the top half is zero indicating a request
1866 1872 * from an `older style' OS.
1867 1873 */
1868 1874 if ((va.va_size & 0xffff0000) == 0)
1869 1875 va.va_rdev = nfsv2_expdev(va.va_size);
1870 1876 else
1871 1877 va.va_rdev = (dev_t)va.va_size;
1872 1878 va.va_mask &= ~AT_SIZE;
1873 1879 } else if ((va.va_mode & IFMT) == IFSOCK) {
1874 1880 va.va_type = VSOCK;
1875 1881 } else {
1876 1882 va.va_type = VREG;
1877 1883 }
1878 1884 va.va_mode &= ~IFMT;
1879 1885 va.va_mask |= AT_TYPE;
1880 1886
1881 1887 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1882 1888 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1883 1889 MAXPATHLEN);
1884 1890 if (name == NULL) {
1885 1891 dr->dr_status = puterrno(EINVAL);
1886 1892 return;
1887 1893 }
1888 1894
1889 1895 /*
1890 1896 * Why was the choice made to use VWRITE as the mode to the
1891 1897 * call to VOP_CREATE ? This results in a bug. When a client
1892 1898 * opens a file that already exists and is RDONLY, the second
1893 1899 * open fails with an EACESS because of the mode.
1894 1900 * bug ID 1054648.
1895 1901 */
1896 1902 lookup_ok = 0;
1897 1903 mode = VWRITE;
1898 1904 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1899 1905 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1900 1906 NULL, NULL, NULL);
1901 1907 if (!error) {
1902 1908 struct vattr at;
1903 1909
1904 1910 lookup_ok = 1;
1905 1911 at.va_mask = AT_MODE;
1906 1912 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1907 1913 if (!error)
1908 1914 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1909 1915 VN_RELE(tvp);
1910 1916 tvp = NULL;
1911 1917 }
1912 1918 }
1913 1919
1914 1920 if (!lookup_ok) {
1915 1921 if (rdonly(ro, dvp)) {
1916 1922 error = EROFS;
1917 1923 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1918 1924 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1919 1925 error = EPERM;
1920 1926 } else {
1921 1927 error = 0;
1922 1928 }
1923 1929 }
1924 1930
1925 1931 /*
1926 1932 * If file size is being modified on an already existing file
1927 1933 * make sure that there are no conflicting non-blocking mandatory
1928 1934 * locks in the region being manipulated. Return EACCES if there
1929 1935 * are conflicting locks.
1930 1936 */
1931 1937 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1932 1938 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1933 1939 NULL, NULL, NULL);
1934 1940
1935 1941 if (!lookuperr &&
1936 1942 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1937 1943 VN_RELE(tvp);
1938 1944 curthread->t_flag |= T_WOULDBLOCK;
1939 1945 goto out;
1940 1946 }
1941 1947
1942 1948 if (!lookuperr && nbl_need_check(tvp)) {
1943 1949 /*
1944 1950 * The file exists. Now check if it has any
1945 1951 * conflicting non-blocking mandatory locks
1946 1952 * in the region being changed.
1947 1953 */
1948 1954 struct vattr bva;
1949 1955 u_offset_t offset;
1950 1956 ssize_t length;
1951 1957
1952 1958 nbl_start_crit(tvp, RW_READER);
1953 1959 in_crit = 1;
1954 1960
1955 1961 bva.va_mask = AT_SIZE;
1956 1962 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1957 1963 if (!error) {
1958 1964 if (va.va_size < bva.va_size) {
1959 1965 offset = va.va_size;
1960 1966 length = bva.va_size - va.va_size;
1961 1967 } else {
1962 1968 offset = bva.va_size;
1963 1969 length = va.va_size - bva.va_size;
1964 1970 }
1965 1971 if (length) {
1966 1972 if (nbl_conflict(tvp, NBL_WRITE,
1967 1973 offset, length, 0, NULL)) {
1968 1974 error = EACCES;
1969 1975 }
1970 1976 }
1971 1977 }
1972 1978 if (error) {
1973 1979 nbl_end_crit(tvp);
1974 1980 VN_RELE(tvp);
1975 1981 in_crit = 0;
1976 1982 }
1977 1983 } else if (tvp != NULL) {
1978 1984 VN_RELE(tvp);
1979 1985 }
1980 1986 }
1981 1987
1982 1988 if (!error) {
1983 1989 /*
1984 1990 * If filesystem is shared with nosuid the remove any
1985 1991 * setuid/setgid bits on create.
1986 1992 */
1987 1993 if (va.va_type == VREG &&
1988 1994 exi->exi_export.ex_flags & EX_NOSUID)
1989 1995 va.va_mode &= ~(VSUID | VSGID);
1990 1996
1991 1997 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1992 1998 NULL, NULL);
1993 1999
1994 2000 if (!error) {
1995 2001
1996 2002 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1997 2003 trunc = TRUE;
1998 2004 else
1999 2005 trunc = FALSE;
2000 2006
2001 2007 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
2002 2008 VN_RELE(vp);
2003 2009 curthread->t_flag |= T_WOULDBLOCK;
2004 2010 goto out;
2005 2011 }
2006 2012 va.va_mask = AT_ALL;
2007 2013
2008 2014 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
2009 2015
2010 2016 /* check for overflows */
2011 2017 if (!error) {
2012 2018 acl_perm(vp, exi, &va, cr);
2013 2019 error = vattr_to_nattr(&va, &dr->dr_attr);
2014 2020 if (!error) {
2015 2021 error = makefh(&dr->dr_fhandle, vp,
2016 2022 exi);
2017 2023 }
2018 2024 }
2019 2025 /*
2020 2026 * Force modified metadata out to stable storage.
2021 2027 *
2022 2028 * if a underlying vp exists, pass it to VOP_FSYNC
2023 2029 */
2024 2030 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2025 2031 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2026 2032 else
2027 2033 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2028 2034 VN_RELE(vp);
2029 2035 }
2030 2036
2031 2037 if (in_crit) {
2032 2038 nbl_end_crit(tvp);
2033 2039 VN_RELE(tvp);
2034 2040 }
2035 2041 }
2036 2042
2037 2043 /*
2038 2044 * Force modified data and metadata out to stable storage.
2039 2045 */
2040 2046 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2041 2047
2042 2048 out:
2043 2049
2044 2050 VN_RELE(dvp);
2045 2051
2046 2052 dr->dr_status = puterrno(error);
2047 2053
2048 2054 if (name != args->ca_da.da_name)
2049 2055 kmem_free(name, MAXPATHLEN);
2050 2056 }
2051 2057 void *
2052 2058 rfs_create_getfh(struct nfscreatargs *args)
2053 2059 {
2054 2060 return (args->ca_da.da_fhandle);
2055 2061 }
2056 2062
2057 2063 /*
2058 2064 * Remove a file.
2059 2065 * Remove named file from parent directory.
2060 2066 */
2061 2067 /* ARGSUSED */
2062 2068 void
2063 2069 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2064 2070 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2065 2071 {
2066 2072 int error = 0;
2067 2073 vnode_t *vp;
2068 2074 vnode_t *targvp;
2069 2075 int in_crit = 0;
2070 2076
2071 2077 /*
2072 2078 * Disallow NULL paths
2073 2079 */
2074 2080 if (da->da_name == NULL || *da->da_name == '\0') {
2075 2081 *status = NFSERR_ACCES;
2076 2082 return;
2077 2083 }
2078 2084
2079 2085 vp = nfs_fhtovp(da->da_fhandle, exi);
2080 2086 if (vp == NULL) {
2081 2087 *status = NFSERR_STALE;
2082 2088 return;
2083 2089 }
2084 2090
2085 2091 if (rdonly(ro, vp)) {
2086 2092 VN_RELE(vp);
2087 2093 *status = NFSERR_ROFS;
2088 2094 return;
2089 2095 }
2090 2096
2091 2097 /*
2092 2098 * Check for a conflict with a non-blocking mandatory share reservation.
2093 2099 */
2094 2100 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2095 2101 NULL, cr, NULL, NULL, NULL);
2096 2102 if (error != 0) {
2097 2103 VN_RELE(vp);
2098 2104 *status = puterrno(error);
2099 2105 return;
2100 2106 }
2101 2107
2102 2108 /*
2103 2109 * If the file is delegated to an v4 client, then initiate
2104 2110 * recall and drop this request (by setting T_WOULDBLOCK).
2105 2111 * The client will eventually re-transmit the request and
2106 2112 * (hopefully), by then, the v4 client will have returned
2107 2113 * the delegation.
2108 2114 */
2109 2115
2110 2116 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2111 2117 VN_RELE(vp);
2112 2118 VN_RELE(targvp);
2113 2119 curthread->t_flag |= T_WOULDBLOCK;
2114 2120 return;
2115 2121 }
2116 2122
2117 2123 if (nbl_need_check(targvp)) {
2118 2124 nbl_start_crit(targvp, RW_READER);
2119 2125 in_crit = 1;
2120 2126 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2121 2127 error = EACCES;
2122 2128 goto out;
2123 2129 }
2124 2130 }
2125 2131
2126 2132 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2127 2133
2128 2134 /*
2129 2135 * Force modified data and metadata out to stable storage.
2130 2136 */
2131 2137 (void) VOP_FSYNC(vp, 0, cr, NULL);
2132 2138
2133 2139 out:
2134 2140 if (in_crit)
2135 2141 nbl_end_crit(targvp);
2136 2142 VN_RELE(targvp);
2137 2143 VN_RELE(vp);
2138 2144
2139 2145 *status = puterrno(error);
2140 2146
2141 2147 }
2142 2148
2143 2149 void *
2144 2150 rfs_remove_getfh(struct nfsdiropargs *da)
2145 2151 {
2146 2152 return (da->da_fhandle);
2147 2153 }
2148 2154
2149 2155 /*
2150 2156 * rename a file
2151 2157 * Give a file (from) a new name (to).
2152 2158 */
2153 2159 /* ARGSUSED */
2154 2160 void
2155 2161 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2156 2162 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2157 2163 {
2158 2164 int error = 0;
2159 2165 vnode_t *fromvp;
2160 2166 vnode_t *tovp;
2161 2167 struct exportinfo *to_exi;
2162 2168 fhandle_t *fh;
2163 2169 vnode_t *srcvp;
2164 2170 vnode_t *targvp;
2165 2171 int in_crit = 0;
2166 2172
2167 2173 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2168 2174 if (fromvp == NULL) {
2169 2175 *status = NFSERR_STALE;
2170 2176 return;
2171 2177 }
2172 2178
2173 2179 fh = args->rna_to.da_fhandle;
2174 2180 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2175 2181 if (to_exi == NULL) {
2176 2182 VN_RELE(fromvp);
2177 2183 *status = NFSERR_ACCES;
2178 2184 return;
2179 2185 }
2180 2186 exi_rele(to_exi);
2181 2187
2182 2188 if (to_exi != exi) {
2183 2189 VN_RELE(fromvp);
2184 2190 *status = NFSERR_XDEV;
2185 2191 return;
2186 2192 }
2187 2193
2188 2194 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2189 2195 if (tovp == NULL) {
2190 2196 VN_RELE(fromvp);
2191 2197 *status = NFSERR_STALE;
2192 2198 return;
2193 2199 }
2194 2200
2195 2201 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2196 2202 VN_RELE(tovp);
2197 2203 VN_RELE(fromvp);
2198 2204 *status = NFSERR_NOTDIR;
2199 2205 return;
2200 2206 }
2201 2207
2202 2208 /*
2203 2209 * Disallow NULL paths
2204 2210 */
2205 2211 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2206 2212 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2207 2213 VN_RELE(tovp);
2208 2214 VN_RELE(fromvp);
2209 2215 *status = NFSERR_ACCES;
2210 2216 return;
2211 2217 }
2212 2218
2213 2219 if (rdonly(ro, tovp)) {
2214 2220 VN_RELE(tovp);
2215 2221 VN_RELE(fromvp);
2216 2222 *status = NFSERR_ROFS;
2217 2223 return;
2218 2224 }
2219 2225
2220 2226 /*
2221 2227 * Check for a conflict with a non-blocking mandatory share reservation.
2222 2228 */
2223 2229 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2224 2230 NULL, cr, NULL, NULL, NULL);
2225 2231 if (error != 0) {
2226 2232 VN_RELE(tovp);
2227 2233 VN_RELE(fromvp);
2228 2234 *status = puterrno(error);
2229 2235 return;
2230 2236 }
2231 2237
2232 2238 /* Check for delegations on the source file */
2233 2239
2234 2240 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2235 2241 VN_RELE(tovp);
2236 2242 VN_RELE(fromvp);
2237 2243 VN_RELE(srcvp);
2238 2244 curthread->t_flag |= T_WOULDBLOCK;
2239 2245 return;
2240 2246 }
2241 2247
2242 2248 /* Check for delegation on the file being renamed over, if it exists */
2243 2249
2244 2250 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2245 2251 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2246 2252 NULL, NULL, NULL) == 0) {
2247 2253
2248 2254 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2249 2255 VN_RELE(tovp);
2250 2256 VN_RELE(fromvp);
2251 2257 VN_RELE(srcvp);
2252 2258 VN_RELE(targvp);
2253 2259 curthread->t_flag |= T_WOULDBLOCK;
2254 2260 return;
2255 2261 }
2256 2262 VN_RELE(targvp);
2257 2263 }
2258 2264
2259 2265
2260 2266 if (nbl_need_check(srcvp)) {
2261 2267 nbl_start_crit(srcvp, RW_READER);
2262 2268 in_crit = 1;
2263 2269 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2264 2270 error = EACCES;
2265 2271 goto out;
2266 2272 }
2267 2273 }
2268 2274
2269 2275 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2270 2276 tovp, args->rna_to.da_name, cr, NULL, 0);
2271 2277
2272 2278 if (error == 0)
2273 2279 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2274 2280 strlen(args->rna_to.da_name));
2275 2281
2276 2282 /*
2277 2283 * Force modified data and metadata out to stable storage.
2278 2284 */
2279 2285 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2280 2286 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2281 2287
2282 2288 out:
2283 2289 if (in_crit)
2284 2290 nbl_end_crit(srcvp);
2285 2291 VN_RELE(srcvp);
2286 2292 VN_RELE(tovp);
2287 2293 VN_RELE(fromvp);
2288 2294
2289 2295 *status = puterrno(error);
2290 2296
2291 2297 }
2292 2298 void *
2293 2299 rfs_rename_getfh(struct nfsrnmargs *args)
2294 2300 {
2295 2301 return (args->rna_from.da_fhandle);
2296 2302 }
2297 2303
2298 2304 /*
2299 2305 * Link to a file.
2300 2306 * Create a file (to) which is a hard link to the given file (from).
2301 2307 */
2302 2308 /* ARGSUSED */
2303 2309 void
2304 2310 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2305 2311 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2306 2312 {
2307 2313 int error;
2308 2314 vnode_t *fromvp;
2309 2315 vnode_t *tovp;
2310 2316 struct exportinfo *to_exi;
2311 2317 fhandle_t *fh;
2312 2318
2313 2319 fromvp = nfs_fhtovp(args->la_from, exi);
2314 2320 if (fromvp == NULL) {
2315 2321 *status = NFSERR_STALE;
2316 2322 return;
2317 2323 }
2318 2324
2319 2325 fh = args->la_to.da_fhandle;
2320 2326 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2321 2327 if (to_exi == NULL) {
2322 2328 VN_RELE(fromvp);
2323 2329 *status = NFSERR_ACCES;
2324 2330 return;
2325 2331 }
2326 2332 exi_rele(to_exi);
2327 2333
2328 2334 if (to_exi != exi) {
2329 2335 VN_RELE(fromvp);
2330 2336 *status = NFSERR_XDEV;
2331 2337 return;
2332 2338 }
2333 2339
2334 2340 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2335 2341 if (tovp == NULL) {
2336 2342 VN_RELE(fromvp);
2337 2343 *status = NFSERR_STALE;
2338 2344 return;
2339 2345 }
2340 2346
2341 2347 if (tovp->v_type != VDIR) {
2342 2348 VN_RELE(tovp);
2343 2349 VN_RELE(fromvp);
2344 2350 *status = NFSERR_NOTDIR;
2345 2351 return;
2346 2352 }
2347 2353 /*
2348 2354 * Disallow NULL paths
2349 2355 */
2350 2356 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2351 2357 VN_RELE(tovp);
2352 2358 VN_RELE(fromvp);
2353 2359 *status = NFSERR_ACCES;
2354 2360 return;
2355 2361 }
2356 2362
2357 2363 if (rdonly(ro, tovp)) {
2358 2364 VN_RELE(tovp);
2359 2365 VN_RELE(fromvp);
2360 2366 *status = NFSERR_ROFS;
2361 2367 return;
2362 2368 }
2363 2369
2364 2370 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2365 2371
2366 2372 /*
2367 2373 * Force modified data and metadata out to stable storage.
2368 2374 */
2369 2375 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2370 2376 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2371 2377
2372 2378 VN_RELE(tovp);
2373 2379 VN_RELE(fromvp);
2374 2380
2375 2381 *status = puterrno(error);
2376 2382
2377 2383 }
2378 2384 void *
2379 2385 rfs_link_getfh(struct nfslinkargs *args)
2380 2386 {
2381 2387 return (args->la_from);
2382 2388 }
2383 2389
2384 2390 /*
2385 2391 * Symbolicly link to a file.
2386 2392 * Create a file (to) with the given attributes which is a symbolic link
2387 2393 * to the given path name (to).
2388 2394 */
2389 2395 void
2390 2396 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2391 2397 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2392 2398 {
2393 2399 int error;
2394 2400 struct vattr va;
2395 2401 vnode_t *vp;
2396 2402 vnode_t *svp;
2397 2403 int lerror;
2398 2404 struct sockaddr *ca;
2399 2405 char *name = NULL;
2400 2406
2401 2407 /*
2402 2408 * Disallow NULL paths
2403 2409 */
2404 2410 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2405 2411 *status = NFSERR_ACCES;
2406 2412 return;
2407 2413 }
2408 2414
2409 2415 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2410 2416 if (vp == NULL) {
2411 2417 *status = NFSERR_STALE;
2412 2418 return;
2413 2419 }
2414 2420
2415 2421 if (rdonly(ro, vp)) {
2416 2422 VN_RELE(vp);
2417 2423 *status = NFSERR_ROFS;
2418 2424 return;
2419 2425 }
2420 2426
2421 2427 error = sattr_to_vattr(args->sla_sa, &va);
2422 2428 if (error) {
2423 2429 VN_RELE(vp);
2424 2430 *status = puterrno(error);
2425 2431 return;
2426 2432 }
2427 2433
2428 2434 if (!(va.va_mask & AT_MODE)) {
2429 2435 VN_RELE(vp);
2430 2436 *status = NFSERR_INVAL;
2431 2437 return;
2432 2438 }
2433 2439
2434 2440 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2435 2441 name = nfscmd_convname(ca, exi, args->sla_tnm,
2436 2442 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2437 2443
2438 2444 if (name == NULL) {
2439 2445 *status = NFSERR_ACCES;
2440 2446 return;
2441 2447 }
2442 2448
2443 2449 va.va_type = VLNK;
2444 2450 va.va_mask |= AT_TYPE;
2445 2451
2446 2452 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2447 2453
2448 2454 /*
2449 2455 * Force new data and metadata out to stable storage.
2450 2456 */
2451 2457 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2452 2458 NULL, cr, NULL, NULL, NULL);
2453 2459
2454 2460 if (!lerror) {
2455 2461 (void) VOP_FSYNC(svp, 0, cr, NULL);
2456 2462 VN_RELE(svp);
2457 2463 }
2458 2464
2459 2465 /*
2460 2466 * Force modified data and metadata out to stable storage.
2461 2467 */
2462 2468 (void) VOP_FSYNC(vp, 0, cr, NULL);
2463 2469
2464 2470 VN_RELE(vp);
2465 2471
2466 2472 *status = puterrno(error);
2467 2473 if (name != args->sla_tnm)
2468 2474 kmem_free(name, MAXPATHLEN);
2469 2475
2470 2476 }
2471 2477 void *
2472 2478 rfs_symlink_getfh(struct nfsslargs *args)
2473 2479 {
2474 2480 return (args->sla_from.da_fhandle);
2475 2481 }
2476 2482
2477 2483 /*
2478 2484 * Make a directory.
2479 2485 * Create a directory with the given name, parent directory, and attributes.
2480 2486 * Returns a file handle and attributes for the new directory.
2481 2487 */
2482 2488 /* ARGSUSED */
2483 2489 void
2484 2490 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2485 2491 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2486 2492 {
2487 2493 int error;
2488 2494 struct vattr va;
2489 2495 vnode_t *dvp = NULL;
2490 2496 vnode_t *vp;
2491 2497 char *name = args->ca_da.da_name;
2492 2498
2493 2499 /*
2494 2500 * Disallow NULL paths
2495 2501 */
2496 2502 if (name == NULL || *name == '\0') {
2497 2503 dr->dr_status = NFSERR_ACCES;
2498 2504 return;
2499 2505 }
2500 2506
2501 2507 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2502 2508 if (vp == NULL) {
2503 2509 dr->dr_status = NFSERR_STALE;
2504 2510 return;
2505 2511 }
2506 2512
2507 2513 if (rdonly(ro, vp)) {
2508 2514 VN_RELE(vp);
2509 2515 dr->dr_status = NFSERR_ROFS;
2510 2516 return;
2511 2517 }
2512 2518
2513 2519 error = sattr_to_vattr(args->ca_sa, &va);
2514 2520 if (error) {
2515 2521 VN_RELE(vp);
2516 2522 dr->dr_status = puterrno(error);
2517 2523 return;
2518 2524 }
2519 2525
2520 2526 if (!(va.va_mask & AT_MODE)) {
2521 2527 VN_RELE(vp);
2522 2528 dr->dr_status = NFSERR_INVAL;
2523 2529 return;
2524 2530 }
2525 2531
2526 2532 va.va_type = VDIR;
2527 2533 va.va_mask |= AT_TYPE;
2528 2534
2529 2535 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2530 2536
2531 2537 if (!error) {
2532 2538 /*
2533 2539 * Attribtutes of the newly created directory should
2534 2540 * be returned to the client.
2535 2541 */
2536 2542 va.va_mask = AT_ALL; /* We want everything */
2537 2543 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2538 2544
2539 2545 /* check for overflows */
2540 2546 if (!error) {
2541 2547 acl_perm(vp, exi, &va, cr);
2542 2548 error = vattr_to_nattr(&va, &dr->dr_attr);
2543 2549 if (!error) {
2544 2550 error = makefh(&dr->dr_fhandle, dvp, exi);
2545 2551 }
2546 2552 }
2547 2553 /*
2548 2554 * Force new data and metadata out to stable storage.
2549 2555 */
2550 2556 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2551 2557 VN_RELE(dvp);
2552 2558 }
2553 2559
2554 2560 /*
2555 2561 * Force modified data and metadata out to stable storage.
2556 2562 */
2557 2563 (void) VOP_FSYNC(vp, 0, cr, NULL);
2558 2564
2559 2565 VN_RELE(vp);
2560 2566
2561 2567 dr->dr_status = puterrno(error);
2562 2568
2563 2569 }
2564 2570 void *
2565 2571 rfs_mkdir_getfh(struct nfscreatargs *args)
2566 2572 {
2567 2573 return (args->ca_da.da_fhandle);
2568 2574 }
2569 2575
2570 2576 /*
2571 2577 * Remove a directory.
2572 2578 * Remove the given directory name from the given parent directory.
2573 2579 */
2574 2580 /* ARGSUSED */
2575 2581 void
2576 2582 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2577 2583 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2578 2584 {
2579 2585 int error;
2580 2586 vnode_t *vp;
2581 2587
2582 2588 /*
2583 2589 * Disallow NULL paths
2584 2590 */
2585 2591 if (da->da_name == NULL || *da->da_name == '\0') {
2586 2592 *status = NFSERR_ACCES;
2587 2593 return;
2588 2594 }
2589 2595
2590 2596 vp = nfs_fhtovp(da->da_fhandle, exi);
2591 2597 if (vp == NULL) {
2592 2598 *status = NFSERR_STALE;
2593 2599 return;
2594 2600 }
2595 2601
2596 2602 if (rdonly(ro, vp)) {
2597 2603 VN_RELE(vp);
2598 2604 *status = NFSERR_ROFS;
2599 2605 return;
2600 2606 }
2601 2607
2602 2608 /*
2603 2609 * VOP_RMDIR takes a third argument (the current
2604 2610 * directory of the process). That's because someone
2605 2611 * wants to return EINVAL if one tries to remove ".".
2606 2612 * Of course, NFS servers have no idea what their
2607 2613 * clients' current directories are. We fake it by
2608 2614 * supplying a vnode known to exist and illegal to
2609 2615 * remove.
2610 2616 */
2611 2617 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2612 2618
2613 2619 /*
2614 2620 * Force modified data and metadata out to stable storage.
2615 2621 */
2616 2622 (void) VOP_FSYNC(vp, 0, cr, NULL);
2617 2623
2618 2624 VN_RELE(vp);
2619 2625
2620 2626 /*
2621 2627 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2622 2628 * if the directory is not empty. A System V NFS server
2623 2629 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2624 2630 * over the wire.
2625 2631 */
2626 2632 if (error == EEXIST)
2627 2633 *status = NFSERR_NOTEMPTY;
2628 2634 else
2629 2635 *status = puterrno(error);
2630 2636
2631 2637 }
2632 2638 void *
2633 2639 rfs_rmdir_getfh(struct nfsdiropargs *da)
2634 2640 {
2635 2641 return (da->da_fhandle);
2636 2642 }
2637 2643
2638 2644 /* ARGSUSED */
2639 2645 void
2640 2646 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2641 2647 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2642 2648 {
2643 2649 int error;
2644 2650 int iseof;
2645 2651 struct iovec iov;
2646 2652 struct uio uio;
2647 2653 vnode_t *vp;
2648 2654 char *ndata = NULL;
2649 2655 struct sockaddr *ca;
2650 2656 size_t nents;
2651 2657 int ret;
2652 2658
2653 2659 vp = nfs_fhtovp(&rda->rda_fh, exi);
2654 2660 if (vp == NULL) {
2655 2661 rd->rd_entries = NULL;
2656 2662 rd->rd_status = NFSERR_STALE;
2657 2663 return;
2658 2664 }
2659 2665
2660 2666 if (vp->v_type != VDIR) {
2661 2667 VN_RELE(vp);
2662 2668 rd->rd_entries = NULL;
2663 2669 rd->rd_status = NFSERR_NOTDIR;
2664 2670 return;
2665 2671 }
2666 2672
2667 2673 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2668 2674
2669 2675 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2670 2676
2671 2677 if (error) {
2672 2678 rd->rd_entries = NULL;
2673 2679 goto bad;
2674 2680 }
2675 2681
2676 2682 if (rda->rda_count == 0) {
2677 2683 rd->rd_entries = NULL;
2678 2684 rd->rd_size = 0;
2679 2685 rd->rd_eof = FALSE;
2680 2686 goto bad;
2681 2687 }
2682 2688
2683 2689 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2684 2690
2685 2691 /*
2686 2692 * Allocate data for entries. This will be freed by rfs_rddirfree.
2687 2693 */
2688 2694 rd->rd_bufsize = (uint_t)rda->rda_count;
2689 2695 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2690 2696
2691 2697 /*
2692 2698 * Set up io vector to read directory data
2693 2699 */
2694 2700 iov.iov_base = (caddr_t)rd->rd_entries;
2695 2701 iov.iov_len = rda->rda_count;
2696 2702 uio.uio_iov = &iov;
2697 2703 uio.uio_iovcnt = 1;
2698 2704 uio.uio_segflg = UIO_SYSSPACE;
2699 2705 uio.uio_extflg = UIO_COPY_CACHED;
2700 2706 uio.uio_loffset = (offset_t)rda->rda_offset;
2701 2707 uio.uio_resid = rda->rda_count;
2702 2708
2703 2709 /*
2704 2710 * read directory
2705 2711 */
2706 2712 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2707 2713
2708 2714 /*
2709 2715 * Clean up
2710 2716 */
2711 2717 if (!error) {
2712 2718 /*
2713 2719 * set size and eof
2714 2720 */
2715 2721 if (uio.uio_resid == rda->rda_count) {
2716 2722 rd->rd_size = 0;
2717 2723 rd->rd_eof = TRUE;
2718 2724 } else {
2719 2725 rd->rd_size = (uint32_t)(rda->rda_count -
2720 2726 uio.uio_resid);
2721 2727 rd->rd_eof = iseof ? TRUE : FALSE;
2722 2728 }
2723 2729 }
2724 2730
2725 2731 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2726 2732 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2727 2733 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2728 2734 rda->rda_count, &ndata);
2729 2735
2730 2736 if (ret != 0) {
2731 2737 size_t dropbytes;
2732 2738 /*
2733 2739 * We had to drop one or more entries in order to fit
2734 2740 * during the character conversion. We need to patch
2735 2741 * up the size and eof info.
2736 2742 */
2737 2743 if (rd->rd_eof)
2738 2744 rd->rd_eof = FALSE;
2739 2745 dropbytes = nfscmd_dropped_entrysize(
2740 2746 (struct dirent64 *)rd->rd_entries, nents, ret);
2741 2747 rd->rd_size -= dropbytes;
2742 2748 }
2743 2749 if (ndata == NULL) {
2744 2750 ndata = (char *)rd->rd_entries;
2745 2751 } else if (ndata != (char *)rd->rd_entries) {
2746 2752 kmem_free(rd->rd_entries, rd->rd_bufsize);
2747 2753 rd->rd_entries = (void *)ndata;
2748 2754 rd->rd_bufsize = rda->rda_count;
2749 2755 }
2750 2756
2751 2757 bad:
2752 2758 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2753 2759
2754 2760 #if 0 /* notyet */
2755 2761 /*
2756 2762 * Don't do this. It causes local disk writes when just
2757 2763 * reading the file and the overhead is deemed larger
2758 2764 * than the benefit.
2759 2765 */
2760 2766 /*
2761 2767 * Force modified metadata out to stable storage.
2762 2768 */
2763 2769 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2764 2770 #endif
2765 2771
2766 2772 VN_RELE(vp);
2767 2773
2768 2774 rd->rd_status = puterrno(error);
2769 2775
2770 2776 }
2771 2777 void *
2772 2778 rfs_readdir_getfh(struct nfsrddirargs *rda)
2773 2779 {
2774 2780 return (&rda->rda_fh);
2775 2781 }
2776 2782 void
2777 2783 rfs_rddirfree(struct nfsrddirres *rd)
2778 2784 {
2779 2785 if (rd->rd_entries != NULL)
2780 2786 kmem_free(rd->rd_entries, rd->rd_bufsize);
2781 2787 }
2782 2788
2783 2789 /* ARGSUSED */
2784 2790 void
2785 2791 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2786 2792 struct svc_req *req, cred_t *cr, bool_t ro)
2787 2793 {
2788 2794 int error;
2789 2795 struct statvfs64 sb;
2790 2796 vnode_t *vp;
2791 2797
2792 2798 vp = nfs_fhtovp(fh, exi);
2793 2799 if (vp == NULL) {
2794 2800 fs->fs_status = NFSERR_STALE;
2795 2801 return;
2796 2802 }
2797 2803
2798 2804 error = VFS_STATVFS(vp->v_vfsp, &sb);
2799 2805
2800 2806 if (!error) {
2801 2807 fs->fs_tsize = nfstsize();
2802 2808 fs->fs_bsize = sb.f_frsize;
2803 2809 fs->fs_blocks = sb.f_blocks;
2804 2810 fs->fs_bfree = sb.f_bfree;
2805 2811 fs->fs_bavail = sb.f_bavail;
2806 2812 }
2807 2813
2808 2814 VN_RELE(vp);
2809 2815
2810 2816 fs->fs_status = puterrno(error);
2811 2817
2812 2818 }
2813 2819 void *
2814 2820 rfs_statfs_getfh(fhandle_t *fh)
2815 2821 {
2816 2822 return (fh);
2817 2823 }
2818 2824
2819 2825 static int
2820 2826 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2821 2827 {
2822 2828 vap->va_mask = 0;
2823 2829
2824 2830 /*
2825 2831 * There was a sign extension bug in some VFS based systems
2826 2832 * which stored the mode as a short. When it would get
2827 2833 * assigned to a u_long, no sign extension would occur.
2828 2834 * It needed to, but this wasn't noticed because sa_mode
2829 2835 * would then get assigned back to the short, thus ignoring
2830 2836 * the upper 16 bits of sa_mode.
2831 2837 *
2832 2838 * To make this implementation work for both broken
2833 2839 * clients and good clients, we check for both versions
2834 2840 * of the mode.
2835 2841 */
2836 2842 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2837 2843 sa->sa_mode != (uint32_t)-1) {
2838 2844 vap->va_mask |= AT_MODE;
2839 2845 vap->va_mode = sa->sa_mode;
2840 2846 }
2841 2847 if (sa->sa_uid != (uint32_t)-1) {
2842 2848 vap->va_mask |= AT_UID;
2843 2849 vap->va_uid = sa->sa_uid;
2844 2850 }
2845 2851 if (sa->sa_gid != (uint32_t)-1) {
2846 2852 vap->va_mask |= AT_GID;
2847 2853 vap->va_gid = sa->sa_gid;
2848 2854 }
2849 2855 if (sa->sa_size != (uint32_t)-1) {
2850 2856 vap->va_mask |= AT_SIZE;
2851 2857 vap->va_size = sa->sa_size;
2852 2858 }
2853 2859 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2854 2860 sa->sa_atime.tv_usec != (int32_t)-1) {
2855 2861 #ifndef _LP64
2856 2862 /* return error if time overflow */
2857 2863 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2858 2864 return (EOVERFLOW);
2859 2865 #endif
2860 2866 vap->va_mask |= AT_ATIME;
2861 2867 /*
2862 2868 * nfs protocol defines times as unsigned so don't extend sign,
2863 2869 * unless sysadmin set nfs_allow_preepoch_time.
2864 2870 */
2865 2871 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2866 2872 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2867 2873 }
2868 2874 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2869 2875 sa->sa_mtime.tv_usec != (int32_t)-1) {
2870 2876 #ifndef _LP64
2871 2877 /* return error if time overflow */
2872 2878 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2873 2879 return (EOVERFLOW);
2874 2880 #endif
2875 2881 vap->va_mask |= AT_MTIME;
2876 2882 /*
2877 2883 * nfs protocol defines times as unsigned so don't extend sign,
2878 2884 * unless sysadmin set nfs_allow_preepoch_time.
2879 2885 */
2880 2886 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2881 2887 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2882 2888 }
2883 2889 return (0);
2884 2890 }
2885 2891
2886 2892 static const enum nfsftype vt_to_nf[] = {
2887 2893 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2888 2894 };
2889 2895
2890 2896 /*
2891 2897 * check the following fields for overflow: nodeid, size, and time.
2892 2898 * There could be a problem when converting 64-bit LP64 fields
2893 2899 * into 32-bit ones. Return an error if there is an overflow.
2894 2900 */
2895 2901 int
2896 2902 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2897 2903 {
2898 2904 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2899 2905 na->na_type = vt_to_nf[vap->va_type];
2900 2906
2901 2907 if (vap->va_mode == (unsigned short) -1)
2902 2908 na->na_mode = (uint32_t)-1;
2903 2909 else
2904 2910 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2905 2911
2906 2912 if (vap->va_uid == (unsigned short)(-1))
2907 2913 na->na_uid = (uint32_t)(-1);
2908 2914 else if (vap->va_uid == UID_NOBODY)
2909 2915 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2910 2916 else
2911 2917 na->na_uid = vap->va_uid;
2912 2918
2913 2919 if (vap->va_gid == (unsigned short)(-1))
2914 2920 na->na_gid = (uint32_t)-1;
2915 2921 else if (vap->va_gid == GID_NOBODY)
2916 2922 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2917 2923 else
2918 2924 na->na_gid = vap->va_gid;
2919 2925
2920 2926 /*
2921 2927 * Do we need to check fsid for overflow? It is 64-bit in the
2922 2928 * vattr, but are bigger than 32 bit values supported?
2923 2929 */
2924 2930 na->na_fsid = vap->va_fsid;
2925 2931
2926 2932 na->na_nodeid = vap->va_nodeid;
2927 2933
2928 2934 /*
2929 2935 * Check to make sure that the nodeid is representable over the
2930 2936 * wire without losing bits.
2931 2937 */
2932 2938 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2933 2939 return (EFBIG);
2934 2940 na->na_nlink = vap->va_nlink;
2935 2941
2936 2942 /*
2937 2943 * Check for big files here, instead of at the caller. See
2938 2944 * comments in cstat for large special file explanation.
2939 2945 */
2940 2946 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2941 2947 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2942 2948 return (EFBIG);
2943 2949 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2944 2950 /* UNKNOWN_SIZE | OVERFLOW */
2945 2951 na->na_size = MAXOFF32_T;
2946 2952 } else
2947 2953 na->na_size = vap->va_size;
2948 2954 } else
2949 2955 na->na_size = vap->va_size;
2950 2956
2951 2957 /*
2952 2958 * If the vnode times overflow the 32-bit times that NFS2
2953 2959 * uses on the wire then return an error.
2954 2960 */
2955 2961 if (!NFS_VAP_TIME_OK(vap)) {
2956 2962 return (EOVERFLOW);
2957 2963 }
2958 2964 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2959 2965 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2960 2966
2961 2967 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2962 2968 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2963 2969
2964 2970 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2965 2971 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2966 2972
2967 2973 /*
2968 2974 * If the dev_t will fit into 16 bits then compress
2969 2975 * it, otherwise leave it alone. See comments in
2970 2976 * nfs_client.c.
2971 2977 */
2972 2978 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2973 2979 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2974 2980 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2975 2981 else
2976 2982 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2977 2983
2978 2984 na->na_blocks = vap->va_nblocks;
2979 2985 na->na_blocksize = vap->va_blksize;
2980 2986
2981 2987 /*
2982 2988 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2983 2989 * over-the-wire protocols for named-pipe vnodes. It remaps the
2984 2990 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2985 2991 *
2986 2992 * BUYER BEWARE:
2987 2993 * If you are porting the NFS to a non-Sun server, you probably
2988 2994 * don't want to include the following block of code. The
2989 2995 * over-the-wire special file types will be changing with the
2990 2996 * NFS Protocol Revision.
2991 2997 */
2992 2998 if (vap->va_type == VFIFO)
2993 2999 NA_SETFIFO(na);
2994 3000 return (0);
2995 3001 }
2996 3002
2997 3003 /*
2998 3004 * acl v2 support: returns approximate permission.
2999 3005 * default: returns minimal permission (more restrictive)
3000 3006 * aclok: returns maximal permission (less restrictive)
3001 3007 * This routine changes the permissions that are alaredy in *va.
3002 3008 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
3003 3009 * CLASS_OBJ is always the same as GROUP_OBJ entry.
3004 3010 */
3005 3011 static void
3006 3012 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
3007 3013 {
3008 3014 vsecattr_t vsa;
3009 3015 int aclcnt;
3010 3016 aclent_t *aclentp;
3011 3017 mode_t mask_perm;
3012 3018 mode_t grp_perm;
3013 3019 mode_t other_perm;
3014 3020 mode_t other_orig;
3015 3021 int error;
3016 3022
3017 3023 /* dont care default acl */
3018 3024 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
3019 3025 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
3020 3026
3021 3027 if (!error) {
3022 3028 aclcnt = vsa.vsa_aclcnt;
3023 3029 if (aclcnt > MIN_ACL_ENTRIES) {
3024 3030 /* non-trivial ACL */
3025 3031 aclentp = vsa.vsa_aclentp;
3026 3032 if (exi->exi_export.ex_flags & EX_ACLOK) {
3027 3033 /* maximal permissions */
3028 3034 grp_perm = 0;
3029 3035 other_perm = 0;
3030 3036 for (; aclcnt > 0; aclcnt--, aclentp++) {
3031 3037 switch (aclentp->a_type) {
3032 3038 case USER_OBJ:
3033 3039 break;
3034 3040 case USER:
3035 3041 grp_perm |=
3036 3042 aclentp->a_perm << 3;
3037 3043 other_perm |= aclentp->a_perm;
3038 3044 break;
3039 3045 case GROUP_OBJ:
3040 3046 grp_perm |=
3041 3047 aclentp->a_perm << 3;
3042 3048 break;
3043 3049 case GROUP:
3044 3050 other_perm |= aclentp->a_perm;
3045 3051 break;
3046 3052 case OTHER_OBJ:
3047 3053 other_orig = aclentp->a_perm;
3048 3054 break;
3049 3055 case CLASS_OBJ:
3050 3056 mask_perm = aclentp->a_perm;
3051 3057 break;
3052 3058 default:
3053 3059 break;
3054 3060 }
3055 3061 }
3056 3062 grp_perm &= mask_perm << 3;
3057 3063 other_perm &= mask_perm;
3058 3064 other_perm |= other_orig;
3059 3065
3060 3066 } else {
3061 3067 /* minimal permissions */
3062 3068 grp_perm = 070;
3063 3069 other_perm = 07;
3064 3070 for (; aclcnt > 0; aclcnt--, aclentp++) {
3065 3071 switch (aclentp->a_type) {
3066 3072 case USER_OBJ:
3067 3073 break;
3068 3074 case USER:
3069 3075 case CLASS_OBJ:
3070 3076 grp_perm &=
3071 3077 aclentp->a_perm << 3;
3072 3078 other_perm &=
3073 3079 aclentp->a_perm;
3074 3080 break;
3075 3081 case GROUP_OBJ:
3076 3082 grp_perm &=
3077 3083 aclentp->a_perm << 3;
3078 3084 break;
3079 3085 case GROUP:
3080 3086 other_perm &=
3081 3087 aclentp->a_perm;
3082 3088 break;
3083 3089 case OTHER_OBJ:
3084 3090 other_perm &=
3085 3091 aclentp->a_perm;
3086 3092 break;
3087 3093 default:
3088 3094 break;
3089 3095 }
3090 3096 }
3091 3097 }
3092 3098 /* copy to va */
3093 3099 va->va_mode &= ~077;
3094 3100 va->va_mode |= grp_perm | other_perm;
3095 3101 }
|
↓ open down ↓ |
1766 lines elided |
↑ open up ↑ |
3096 3102 if (vsa.vsa_aclcnt)
3097 3103 kmem_free(vsa.vsa_aclentp,
3098 3104 vsa.vsa_aclcnt * sizeof (aclent_t));
3099 3105 }
3100 3106 }
3101 3107
3102 3108 void
3103 3109 rfs_srvrinit(void)
3104 3110 {
3105 3111 nfs2_srv_caller_id = fs_new_caller_id();
3106 - zone_key_create(&rfs_zone_key, rfs_zone_init, NULL, rfs_zone_fini);
3107 3112 }
3108 3113
3109 3114 void
3110 3115 rfs_srvrfini(void)
3111 3116 {
3112 3117 }
3113 3118
3114 3119 /* ARGSUSED */
3115 -static void *
3116 -rfs_zone_init(zoneid_t zoneid)
3120 +void
3121 +rfs_srv_zone_init(nfs_globals_t *ng)
3117 3122 {
3118 3123 nfs_srv_t *ns;
3119 3124
3120 3125 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3121 3126
3122 3127 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3123 3128 ns->write_async = 1;
3124 3129
3125 - return (ns);
3130 + ng->nfs_srv = ns;
3126 3131 }
3127 3132
3128 3133 /* ARGSUSED */
3129 -static void
3130 -rfs_zone_fini(zoneid_t zoneid, void *data)
3134 +void
3135 +rfs_srv_zone_fini(nfs_globals_t *ng)
3131 3136 {
3132 - nfs_srv_t *ns;
3137 + nfs_srv_t *ns = ng->nfs_srv;
3133 3138
3134 - ns = (nfs_srv_t *)data;
3139 + ng->nfs_srv = NULL;
3140 +
3135 3141 mutex_destroy(&ns->async_write_lock);
3136 3142 kmem_free(ns, sizeof (*ns));
3137 3143 }
3138 3144
3139 3145 static int
3140 3146 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3141 3147 {
3142 3148 struct clist *wcl;
3143 3149 int wlist_len;
3144 3150 uint32_t count = rr->rr_count;
3145 3151
3146 3152 wcl = ra->ra_wlist;
3147 3153
3148 3154 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3149 3155 return (FALSE);
3150 3156 }
3151 3157
3152 3158 wcl = ra->ra_wlist;
3153 3159 rr->rr_ok.rrok_wlist_len = wlist_len;
3154 3160 rr->rr_ok.rrok_wlist = wcl;
3155 3161
3156 3162 return (TRUE);
3157 3163 }
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX