1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
25 */
26
27 /*
28 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
29 * All rights reserved.
30 */
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/stat.h>
41 #include <sys/errno.h>
42 #include <sys/sysmacros.h>
43 #include <sys/statvfs.h>
44 #include <sys/kmem.h>
45 #include <sys/kstat.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/vtrace.h>
50 #include <sys/mode.h>
51 #include <sys/acl.h>
52 #include <sys/nbmlock.h>
53 #include <sys/policy.h>
54 #include <sys/sdt.h>
55
56 #include <rpc/types.h>
57 #include <rpc/auth.h>
58 #include <rpc/svc.h>
59
60 #include <nfs/nfs.h>
61 #include <nfs/export.h>
62 #include <nfs/nfs_cmd.h>
63
64 #include <vm/hat.h>
65 #include <vm/as.h>
66 #include <vm/seg.h>
67 #include <vm/seg_map.h>
68 #include <vm/seg_kmem.h>
69
70 #include <sys/strsubr.h>
71
72 /*
73 * These are the interface routines for the server side of the
74 * Network File System. See the NFS version 2 protocol specification
75 * for a description of this interface.
76 */
77
78 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
79 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
80 cred_t *);
81
82 /*
83 * Some "over the wire" UNIX file types. These are encoded
84 * into the mode. This needs to be fixed in the next rev.
85 */
86 #define IFMT 0170000 /* type of file */
87 #define IFCHR 0020000 /* character special */
88 #define IFBLK 0060000 /* block special */
89 #define IFSOCK 0140000 /* socket */
90
91 u_longlong_t nfs2_srv_caller_id;
92
93 /*
94 * Get file attributes.
95 * Returns the current attributes of the file with the given fhandle.
96 */
97 /* ARGSUSED */
98 void
99 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
100 struct svc_req *req, cred_t *cr, bool_t ro)
101 {
102 int error;
103 vnode_t *vp;
104 struct vattr va;
105
106 vp = nfs_fhtovp(fhp, exi);
107 if (vp == NULL) {
108 ns->ns_status = NFSERR_STALE;
109 return;
110 }
111
112 /*
113 * Do the getattr.
114 */
115 va.va_mask = AT_ALL; /* we want all the attributes */
116
117 error = rfs4_delegated_getattr(vp, &va, 0, cr);
118
119 /* check for overflows */
120 if (!error) {
121 /* Lie about the object type for a referral */
122 if (vn_is_nfs_reparse(vp, cr))
123 va.va_type = VLNK;
124
125 acl_perm(vp, exi, &va, cr);
126 error = vattr_to_nattr(&va, &ns->ns_attr);
127 }
128
129 VN_RELE(vp);
130
131 ns->ns_status = puterrno(error);
132 }
133 void *
134 rfs_getattr_getfh(fhandle_t *fhp)
135 {
136 return (fhp);
137 }
138
139 /*
140 * Set file attributes.
141 * Sets the attributes of the file with the given fhandle. Returns
142 * the new attributes.
143 */
144 /* ARGSUSED */
145 void
146 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
147 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
148 {
149 int error;
150 int flag;
151 int in_crit = 0;
152 vnode_t *vp;
153 struct vattr va;
154 struct vattr bva;
155 struct flock64 bf;
156 caller_context_t ct;
157
158
159 vp = nfs_fhtovp(&args->saa_fh, exi);
160 if (vp == NULL) {
161 ns->ns_status = NFSERR_STALE;
162 return;
163 }
164
165 if (rdonly(ro, vp)) {
166 VN_RELE(vp);
167 ns->ns_status = NFSERR_ROFS;
168 return;
169 }
170
171 error = sattr_to_vattr(&args->saa_sa, &va);
172 if (error) {
173 VN_RELE(vp);
174 ns->ns_status = puterrno(error);
175 return;
176 }
177
178 /*
179 * If the client is requesting a change to the mtime,
180 * but the nanosecond field is set to 1 billion, then
181 * this is a flag to the server that it should set the
182 * atime and mtime fields to the server's current time.
183 * The 1 billion number actually came from the client
184 * as 1 million, but the units in the over the wire
185 * request are microseconds instead of nanoseconds.
186 *
187 * This is an overload of the protocol and should be
188 * documented in the NFS Version 2 protocol specification.
189 */
190 if (va.va_mask & AT_MTIME) {
191 if (va.va_mtime.tv_nsec == 1000000000) {
192 gethrestime(&va.va_mtime);
193 va.va_atime = va.va_mtime;
194 va.va_mask |= AT_ATIME;
195 flag = 0;
196 } else
197 flag = ATTR_UTIME;
198 } else
199 flag = 0;
200
201 /*
202 * If the filesystem is exported with nosuid, then mask off
203 * the setuid and setgid bits.
204 */
205 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
206 (exi->exi_export.ex_flags & EX_NOSUID))
207 va.va_mode &= ~(VSUID | VSGID);
208
209 ct.cc_sysid = 0;
210 ct.cc_pid = 0;
211 ct.cc_caller_id = nfs2_srv_caller_id;
212 ct.cc_flags = CC_DONTBLOCK;
213
214 /*
215 * We need to specially handle size changes because it is
216 * possible for the client to create a file with modes
217 * which indicate read-only, but with the file opened for
218 * writing. If the client then tries to set the size of
219 * the file, then the normal access checking done in
220 * VOP_SETATTR would prevent the client from doing so,
221 * although it should be legal for it to do so. To get
222 * around this, we do the access checking for ourselves
223 * and then use VOP_SPACE which doesn't do the access
224 * checking which VOP_SETATTR does. VOP_SPACE can only
225 * operate on VREG files, let VOP_SETATTR handle the other
226 * extremely rare cases.
227 * Also the client should not be allowed to change the
228 * size of the file if there is a conflicting non-blocking
229 * mandatory lock in the region of change.
230 */
231 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
232 if (nbl_need_check(vp)) {
233 nbl_start_crit(vp, RW_READER);
234 in_crit = 1;
235 }
236
237 bva.va_mask = AT_UID | AT_SIZE;
238
239 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
240
241 if (error) {
242 if (in_crit)
243 nbl_end_crit(vp);
244 VN_RELE(vp);
245 ns->ns_status = puterrno(error);
246 return;
247 }
248
249 if (in_crit) {
250 u_offset_t offset;
251 ssize_t length;
252
253 if (va.va_size < bva.va_size) {
254 offset = va.va_size;
255 length = bva.va_size - va.va_size;
256 } else {
257 offset = bva.va_size;
258 length = va.va_size - bva.va_size;
259 }
260 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
261 NULL)) {
262 error = EACCES;
263 }
264 }
265
266 if (crgetuid(cr) == bva.va_uid && !error &&
267 va.va_size != bva.va_size) {
268 va.va_mask &= ~AT_SIZE;
269 bf.l_type = F_WRLCK;
270 bf.l_whence = 0;
271 bf.l_start = (off64_t)va.va_size;
272 bf.l_len = 0;
273 bf.l_sysid = 0;
274 bf.l_pid = 0;
275
276 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
277 (offset_t)va.va_size, cr, &ct);
278 }
279 if (in_crit)
280 nbl_end_crit(vp);
281 } else
282 error = 0;
283
284 /*
285 * Do the setattr.
286 */
287 if (!error && va.va_mask) {
288 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
289 }
290
291 /*
292 * check if the monitor on either vop_space or vop_setattr detected
293 * a delegation conflict and if so, mark the thread flag as
294 * wouldblock so that the response is dropped and the client will
295 * try again.
296 */
297 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
298 VN_RELE(vp);
299 curthread->t_flag |= T_WOULDBLOCK;
300 return;
301 }
302
303 if (!error) {
304 va.va_mask = AT_ALL; /* get everything */
305
306 error = rfs4_delegated_getattr(vp, &va, 0, cr);
307
308 /* check for overflows */
309 if (!error) {
310 acl_perm(vp, exi, &va, cr);
311 error = vattr_to_nattr(&va, &ns->ns_attr);
312 }
313 }
314
315 ct.cc_flags = 0;
316
317 /*
318 * Force modified metadata out to stable storage.
319 */
320 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
321
322 VN_RELE(vp);
323
324 ns->ns_status = puterrno(error);
325 }
326 void *
327 rfs_setattr_getfh(struct nfssaargs *args)
328 {
329 return (&args->saa_fh);
330 }
331
332 /*
333 * Directory lookup.
334 * Returns an fhandle and file attributes for file name in a directory.
335 */
336 /* ARGSUSED */
337 void
338 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
339 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
340 {
341 int error;
342 vnode_t *dvp;
343 vnode_t *vp;
344 struct vattr va;
345 fhandle_t *fhp = da->da_fhandle;
346 struct sec_ol sec = {0, 0};
347 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
348 char *name;
349 struct sockaddr *ca;
350
351 /*
352 * Trusted Extension doesn't support NFSv2. MOUNT
353 * will reject v2 clients. Need to prevent v2 client
354 * access via WebNFS here.
355 */
356 if (is_system_labeled() && req->rq_vers == 2) {
357 dr->dr_status = NFSERR_ACCES;
358 return;
359 }
360
361 /*
362 * Disallow NULL paths
363 */
364 if (da->da_name == NULL || *da->da_name == '\0') {
365 dr->dr_status = NFSERR_ACCES;
366 return;
367 }
368
369 /*
370 * Allow lookups from the root - the default
371 * location of the public filehandle.
372 */
373 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
374 dvp = rootdir;
375 VN_HOLD(dvp);
376 } else {
377 dvp = nfs_fhtovp(fhp, exi);
378 if (dvp == NULL) {
379 dr->dr_status = NFSERR_STALE;
380 return;
381 }
382 }
383
384 /*
385 * Not allow lookup beyond root.
386 * If the filehandle matches a filehandle of the exi,
387 * then the ".." refers beyond the root of an exported filesystem.
388 */
389 if (strcmp(da->da_name, "..") == 0 &&
390 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
391 VN_RELE(dvp);
392 dr->dr_status = NFSERR_NOENT;
393 return;
394 }
395
396 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
397 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
398 MAXPATHLEN);
399
400 if (name == NULL) {
401 dr->dr_status = NFSERR_ACCES;
402 return;
403 }
404
405 /*
406 * If the public filehandle is used then allow
407 * a multi-component lookup, i.e. evaluate
408 * a pathname and follow symbolic links if
409 * necessary.
410 *
411 * This may result in a vnode in another filesystem
412 * which is OK as long as the filesystem is exported.
413 */
414 if (PUBLIC_FH2(fhp)) {
415 publicfh_flag = TRUE;
416 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
417 &sec);
418 } else {
419 /*
420 * Do a normal single component lookup.
421 */
422 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
423 NULL, NULL, NULL);
424 }
425
426 if (name != da->da_name)
427 kmem_free(name, MAXPATHLEN);
428
429
430 if (!error) {
431 va.va_mask = AT_ALL; /* we want everything */
432
433 error = rfs4_delegated_getattr(vp, &va, 0, cr);
434
435 /* check for overflows */
436 if (!error) {
437 acl_perm(vp, exi, &va, cr);
438 error = vattr_to_nattr(&va, &dr->dr_attr);
439 if (!error) {
440 if (sec.sec_flags & SEC_QUERY)
441 error = makefh_ol(&dr->dr_fhandle, exi,
442 sec.sec_index);
443 else {
444 error = makefh(&dr->dr_fhandle, vp,
445 exi);
446 if (!error && publicfh_flag &&
447 !chk_clnt_sec(exi, req))
448 auth_weak = TRUE;
449 }
450 }
451 }
452 VN_RELE(vp);
453 }
454
455 VN_RELE(dvp);
456
457 /*
458 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
459 * and have obtained a new exportinfo in exi which needs to be
460 * released. Note the the original exportinfo pointed to by exi
461 * will be released by the caller, comon_dispatch.
462 */
463 if (publicfh_flag && exi != NULL)
464 exi_rele(exi);
465
466 /*
467 * If it's public fh, no 0x81, and client's flavor is
468 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
469 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
470 */
471 if (auth_weak)
472 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
473 else
474 dr->dr_status = puterrno(error);
475 }
476 void *
477 rfs_lookup_getfh(struct nfsdiropargs *da)
478 {
479 return (da->da_fhandle);
480 }
481
482 /*
483 * Read symbolic link.
484 * Returns the string in the symbolic link at the given fhandle.
485 */
486 /* ARGSUSED */
487 void
488 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
489 struct svc_req *req, cred_t *cr, bool_t ro)
490 {
491 int error;
492 struct iovec iov;
493 struct uio uio;
494 vnode_t *vp;
495 struct vattr va;
496 struct sockaddr *ca;
497 char *name = NULL;
498 int is_referral = 0;
499
500 vp = nfs_fhtovp(fhp, exi);
501 if (vp == NULL) {
502 rl->rl_data = NULL;
503 rl->rl_status = NFSERR_STALE;
504 return;
505 }
506
507 va.va_mask = AT_MODE;
508
509 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
510
511 if (error) {
512 VN_RELE(vp);
513 rl->rl_data = NULL;
514 rl->rl_status = puterrno(error);
515 return;
516 }
517
518 if (MANDLOCK(vp, va.va_mode)) {
519 VN_RELE(vp);
520 rl->rl_data = NULL;
521 rl->rl_status = NFSERR_ACCES;
522 return;
523 }
524
525 /* We lied about the object type for a referral */
526 if (vn_is_nfs_reparse(vp, cr))
527 is_referral = 1;
528
529 /*
530 * XNFS and RFC1094 require us to return ENXIO if argument
531 * is not a link. BUGID 1138002.
532 */
533 if (vp->v_type != VLNK && !is_referral) {
534 VN_RELE(vp);
535 rl->rl_data = NULL;
536 rl->rl_status = NFSERR_NXIO;
537 return;
538 }
539
540 /*
541 * Allocate data for pathname. This will be freed by rfs_rlfree.
542 */
543 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
544
545 if (is_referral) {
546 char *s;
547 size_t strsz;
548
549 /* Get an artificial symlink based on a referral */
550 s = build_symlink(vp, cr, &strsz);
551 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
552 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
553 vnode_t *, vp, char *, s);
554 if (s == NULL)
555 error = EINVAL;
556 else {
557 error = 0;
558 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
559 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
560 kmem_free(s, strsz);
561 }
562
563 } else {
564
565 /*
566 * Set up io vector to read sym link data
567 */
568 iov.iov_base = rl->rl_data;
569 iov.iov_len = NFS_MAXPATHLEN;
570 uio.uio_iov = &iov;
571 uio.uio_iovcnt = 1;
572 uio.uio_segflg = UIO_SYSSPACE;
573 uio.uio_extflg = UIO_COPY_CACHED;
574 uio.uio_loffset = (offset_t)0;
575 uio.uio_resid = NFS_MAXPATHLEN;
576
577 /*
578 * Do the readlink.
579 */
580 error = VOP_READLINK(vp, &uio, cr, NULL);
581
582 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
583
584 if (!error)
585 rl->rl_data[rl->rl_count] = '\0';
586
587 }
588
589
590 VN_RELE(vp);
591
592 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
593 name = nfscmd_convname(ca, exi, rl->rl_data,
594 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
595
596 if (name != NULL && name != rl->rl_data) {
597 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
598 rl->rl_data = name;
599 }
600
601 /*
602 * XNFS and RFC1094 require us to return ENXIO if argument
603 * is not a link. UFS returns EINVAL if this is the case,
604 * so we do the mapping here. BUGID 1138002.
605 */
606 if (error == EINVAL)
607 rl->rl_status = NFSERR_NXIO;
608 else
609 rl->rl_status = puterrno(error);
610
611 }
612 void *
613 rfs_readlink_getfh(fhandle_t *fhp)
614 {
615 return (fhp);
616 }
617 /*
618 * Free data allocated by rfs_readlink
619 */
620 void
621 rfs_rlfree(struct nfsrdlnres *rl)
622 {
623 if (rl->rl_data != NULL)
624 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
625 }
626
627 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
628
629 /*
630 * Read data.
631 * Returns some data read from the file at the given fhandle.
632 */
633 /* ARGSUSED */
634 void
635 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
636 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
637 {
638 vnode_t *vp;
639 int error;
640 struct vattr va;
641 struct iovec iov;
642 struct uio uio;
643 mblk_t *mp;
644 int alloc_err = 0;
645 int in_crit = 0;
646 caller_context_t ct;
647
648 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
649 if (vp == NULL) {
650 rr->rr_data = NULL;
651 rr->rr_status = NFSERR_STALE;
652 return;
653 }
654
655 if (vp->v_type != VREG) {
656 VN_RELE(vp);
657 rr->rr_data = NULL;
658 rr->rr_status = NFSERR_ISDIR;
659 return;
660 }
661
662 ct.cc_sysid = 0;
663 ct.cc_pid = 0;
664 ct.cc_caller_id = nfs2_srv_caller_id;
665 ct.cc_flags = CC_DONTBLOCK;
666
667 /*
668 * Enter the critical region before calling VOP_RWLOCK
669 * to avoid a deadlock with write requests.
670 */
671 if (nbl_need_check(vp)) {
672 nbl_start_crit(vp, RW_READER);
673 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
674 0, NULL)) {
675 nbl_end_crit(vp);
676 VN_RELE(vp);
677 rr->rr_data = NULL;
678 rr->rr_status = NFSERR_ACCES;
679 return;
680 }
681 in_crit = 1;
682 }
683
684 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
685
686 /* check if a monitor detected a delegation conflict */
687 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
688 VN_RELE(vp);
689 /* mark as wouldblock so response is dropped */
690 curthread->t_flag |= T_WOULDBLOCK;
691
692 rr->rr_data = NULL;
693 return;
694 }
695
696 va.va_mask = AT_ALL;
697
698 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
699
700 if (error) {
701 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
702 if (in_crit)
703 nbl_end_crit(vp);
704
705 VN_RELE(vp);
706 rr->rr_data = NULL;
707 rr->rr_status = puterrno(error);
708
709 return;
710 }
711
712 /*
713 * This is a kludge to allow reading of files created
714 * with no read permission. The owner of the file
715 * is always allowed to read it.
716 */
717 if (crgetuid(cr) != va.va_uid) {
718 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
719
720 if (error) {
721 /*
722 * Exec is the same as read over the net because
723 * of demand loading.
724 */
725 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
726 }
727 if (error) {
728 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
729 if (in_crit)
730 nbl_end_crit(vp);
731 VN_RELE(vp);
732 rr->rr_data = NULL;
733 rr->rr_status = puterrno(error);
734
735 return;
736 }
737 }
738
739 if (MANDLOCK(vp, va.va_mode)) {
740 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
741 if (in_crit)
742 nbl_end_crit(vp);
743
744 VN_RELE(vp);
745 rr->rr_data = NULL;
746 rr->rr_status = NFSERR_ACCES;
747
748 return;
749 }
750
751 rr->rr_ok.rrok_wlist_len = 0;
752 rr->rr_ok.rrok_wlist = NULL;
753
754 if ((u_offset_t)ra->ra_offset >= va.va_size) {
755 rr->rr_count = 0;
756 rr->rr_data = NULL;
757 /*
758 * In this case, status is NFS_OK, but there is no data
759 * to encode. So set rr_mp to NULL.
760 */
761 rr->rr_mp = NULL;
762 rr->rr_ok.rrok_wlist = ra->ra_wlist;
763 if (rr->rr_ok.rrok_wlist)
764 clist_zero_len(rr->rr_ok.rrok_wlist);
765 goto done;
766 }
767
768 if (ra->ra_wlist) {
769 mp = NULL;
770 rr->rr_mp = NULL;
771 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
772 if (ra->ra_count > iov.iov_len) {
773 rr->rr_data = NULL;
774 rr->rr_status = NFSERR_INVAL;
775 goto done;
776 }
777 } else {
778 /*
779 * mp will contain the data to be sent out in the read reply.
780 * This will be freed after the reply has been sent out (by the
781 * driver).
782 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
783 * that the call to xdrmblk_putmblk() never fails.
784 */
785 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
786 &alloc_err);
787 ASSERT(mp != NULL);
788 ASSERT(alloc_err == 0);
789
790 rr->rr_mp = mp;
791
792 /*
793 * Set up io vector
794 */
795 iov.iov_base = (caddr_t)mp->b_datap->db_base;
796 iov.iov_len = ra->ra_count;
797 }
798
799 uio.uio_iov = &iov;
800 uio.uio_iovcnt = 1;
801 uio.uio_segflg = UIO_SYSSPACE;
802 uio.uio_extflg = UIO_COPY_CACHED;
803 uio.uio_loffset = (offset_t)ra->ra_offset;
804 uio.uio_resid = ra->ra_count;
805
806 error = VOP_READ(vp, &uio, 0, cr, &ct);
807
808 if (error) {
809 if (mp)
810 freeb(mp);
811
812 /*
813 * check if a monitor detected a delegation conflict and
814 * mark as wouldblock so response is dropped
815 */
816 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
817 curthread->t_flag |= T_WOULDBLOCK;
818 else
819 rr->rr_status = puterrno(error);
820
821 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
822 if (in_crit)
823 nbl_end_crit(vp);
824
825 VN_RELE(vp);
826 rr->rr_data = NULL;
827
828 return;
829 }
830
831 /*
832 * Get attributes again so we can send the latest access
833 * time to the client side for its cache.
834 */
835 va.va_mask = AT_ALL;
836
837 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
838
839 if (error) {
840 if (mp)
841 freeb(mp);
842
843 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
844 if (in_crit)
845 nbl_end_crit(vp);
846
847 VN_RELE(vp);
848 rr->rr_data = NULL;
849 rr->rr_status = puterrno(error);
850
851 return;
852 }
853
854 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
855
856 if (mp) {
857 rr->rr_data = (char *)mp->b_datap->db_base;
858 } else {
859 if (ra->ra_wlist) {
860 rr->rr_data = (caddr_t)iov.iov_base;
861 if (!rdma_setup_read_data2(ra, rr)) {
862 rr->rr_data = NULL;
863 rr->rr_status = puterrno(NFSERR_INVAL);
864 }
865 }
866 }
867 done:
868 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
869 if (in_crit)
870 nbl_end_crit(vp);
871
872 acl_perm(vp, exi, &va, cr);
873
874 /* check for overflows */
875 error = vattr_to_nattr(&va, &rr->rr_attr);
876
877 VN_RELE(vp);
878
879 rr->rr_status = puterrno(error);
880 }
881
882 /*
883 * Free data allocated by rfs_read
884 */
885 void
886 rfs_rdfree(struct nfsrdresult *rr)
887 {
888 mblk_t *mp;
889
890 if (rr->rr_status == NFS_OK) {
891 mp = rr->rr_mp;
892 if (mp != NULL)
893 freeb(mp);
894 }
895 }
896
897 void *
898 rfs_read_getfh(struct nfsreadargs *ra)
899 {
900 return (&ra->ra_fhandle);
901 }
902
903 #define MAX_IOVECS 12
904
905 #ifdef DEBUG
906 static int rfs_write_sync_hits = 0;
907 static int rfs_write_sync_misses = 0;
908 #endif
909
910 /*
911 * Write data to file.
912 * Returns attributes of a file after writing some data to it.
913 *
914 * Any changes made here, especially in error handling might have
915 * to also be done in rfs_write (which clusters write requests).
916 */
917 /* ARGSUSED */
918 void
919 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
920 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
921 {
922 int error;
923 vnode_t *vp;
924 rlim64_t rlimit;
925 struct vattr va;
926 struct uio uio;
927 struct iovec iov[MAX_IOVECS];
928 mblk_t *m;
929 struct iovec *iovp;
930 int iovcnt;
931 cred_t *savecred;
932 int in_crit = 0;
933 caller_context_t ct;
934
935 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
936 if (vp == NULL) {
937 ns->ns_status = NFSERR_STALE;
938 return;
939 }
940
941 if (rdonly(ro, vp)) {
942 VN_RELE(vp);
943 ns->ns_status = NFSERR_ROFS;
944 return;
945 }
946
947 if (vp->v_type != VREG) {
948 VN_RELE(vp);
949 ns->ns_status = NFSERR_ISDIR;
950 return;
951 }
952
953 ct.cc_sysid = 0;
954 ct.cc_pid = 0;
955 ct.cc_caller_id = nfs2_srv_caller_id;
956 ct.cc_flags = CC_DONTBLOCK;
957
958 va.va_mask = AT_UID|AT_MODE;
959
960 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
961
962 if (error) {
963 VN_RELE(vp);
964 ns->ns_status = puterrno(error);
965
966 return;
967 }
968
969 if (crgetuid(cr) != va.va_uid) {
970 /*
971 * This is a kludge to allow writes of files created
972 * with read only permission. The owner of the file
973 * is always allowed to write it.
974 */
975 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
976
977 if (error) {
978 VN_RELE(vp);
979 ns->ns_status = puterrno(error);
980 return;
981 }
982 }
983
984 /*
985 * Can't access a mandatory lock file. This might cause
986 * the NFS service thread to block forever waiting for a
987 * lock to be released that will never be released.
988 */
989 if (MANDLOCK(vp, va.va_mode)) {
990 VN_RELE(vp);
991 ns->ns_status = NFSERR_ACCES;
992 return;
993 }
994
995 /*
996 * We have to enter the critical region before calling VOP_RWLOCK
997 * to avoid a deadlock with ufs.
998 */
999 if (nbl_need_check(vp)) {
1000 nbl_start_crit(vp, RW_READER);
1001 in_crit = 1;
1002 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1003 wa->wa_count, 0, NULL)) {
1004 error = EACCES;
1005 goto out;
1006 }
1007 }
1008
1009 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1010
1011 /* check if a monitor detected a delegation conflict */
1012 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1013 VN_RELE(vp);
1014 /* mark as wouldblock so response is dropped */
1015 curthread->t_flag |= T_WOULDBLOCK;
1016 return;
1017 }
1018
1019 if (wa->wa_data || wa->wa_rlist) {
1020 /* Do the RDMA thing if necessary */
1021 if (wa->wa_rlist) {
1022 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1023 iov[0].iov_len = wa->wa_count;
1024 } else {
1025 iov[0].iov_base = wa->wa_data;
1026 iov[0].iov_len = wa->wa_count;
1027 }
1028 uio.uio_iov = iov;
1029 uio.uio_iovcnt = 1;
1030 uio.uio_segflg = UIO_SYSSPACE;
1031 uio.uio_extflg = UIO_COPY_DEFAULT;
1032 uio.uio_loffset = (offset_t)wa->wa_offset;
1033 uio.uio_resid = wa->wa_count;
1034 /*
1035 * The limit is checked on the client. We
1036 * should allow any size writes here.
1037 */
1038 uio.uio_llimit = curproc->p_fsz_ctl;
1039 rlimit = uio.uio_llimit - wa->wa_offset;
1040 if (rlimit < (rlim64_t)uio.uio_resid)
1041 uio.uio_resid = (uint_t)rlimit;
1042
1043 /*
1044 * for now we assume no append mode
1045 */
1046 /*
1047 * We're changing creds because VM may fault and we need
1048 * the cred of the current thread to be used if quota
1049 * checking is enabled.
1050 */
1051 savecred = curthread->t_cred;
1052 curthread->t_cred = cr;
1053 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1054 curthread->t_cred = savecred;
1055 } else {
1056 iovcnt = 0;
1057 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1058 iovcnt++;
1059 if (iovcnt <= MAX_IOVECS) {
1060 #ifdef DEBUG
1061 rfs_write_sync_hits++;
1062 #endif
1063 iovp = iov;
1064 } else {
1065 #ifdef DEBUG
1066 rfs_write_sync_misses++;
1067 #endif
1068 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1069 }
1070 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1071 uio.uio_iov = iovp;
1072 uio.uio_iovcnt = iovcnt;
1073 uio.uio_segflg = UIO_SYSSPACE;
1074 uio.uio_extflg = UIO_COPY_DEFAULT;
1075 uio.uio_loffset = (offset_t)wa->wa_offset;
1076 uio.uio_resid = wa->wa_count;
1077 /*
1078 * The limit is checked on the client. We
1079 * should allow any size writes here.
1080 */
1081 uio.uio_llimit = curproc->p_fsz_ctl;
1082 rlimit = uio.uio_llimit - wa->wa_offset;
1083 if (rlimit < (rlim64_t)uio.uio_resid)
1084 uio.uio_resid = (uint_t)rlimit;
1085
1086 /*
1087 * For now we assume no append mode.
1088 */
1089 /*
1090 * We're changing creds because VM may fault and we need
1091 * the cred of the current thread to be used if quota
1092 * checking is enabled.
1093 */
1094 savecred = curthread->t_cred;
1095 curthread->t_cred = cr;
1096 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1097 curthread->t_cred = savecred;
1098
1099 if (iovp != iov)
1100 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1101 }
1102
1103 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1104
1105 if (!error) {
1106 /*
1107 * Get attributes again so we send the latest mod
1108 * time to the client side for its cache.
1109 */
1110 va.va_mask = AT_ALL; /* now we want everything */
1111
1112 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1113
1114 /* check for overflows */
1115 if (!error) {
1116 acl_perm(vp, exi, &va, cr);
1117 error = vattr_to_nattr(&va, &ns->ns_attr);
1118 }
1119 }
1120
1121 out:
1122 if (in_crit)
1123 nbl_end_crit(vp);
1124 VN_RELE(vp);
1125
1126 /* check if a monitor detected a delegation conflict */
1127 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1128 /* mark as wouldblock so response is dropped */
1129 curthread->t_flag |= T_WOULDBLOCK;
1130 else
1131 ns->ns_status = puterrno(error);
1132
1133 }
1134
1135 struct rfs_async_write {
1136 struct nfswriteargs *wa;
1137 struct nfsattrstat *ns;
1138 struct svc_req *req;
1139 cred_t *cr;
1140 bool_t ro;
1141 kthread_t *thread;
1142 struct rfs_async_write *list;
1143 };
1144
1145 struct rfs_async_write_list {
1146 fhandle_t *fhp;
1147 kcondvar_t cv;
1148 struct rfs_async_write *list;
1149 struct rfs_async_write_list *next;
1150 };
1151
1152 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1153 static kmutex_t rfs_async_write_lock;
1154 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1155
1156 #define MAXCLIOVECS 42
1157 #define RFSWRITE_INITVAL (enum nfsstat) -1
1158
1159 #ifdef DEBUG
1160 static int rfs_write_hits = 0;
1161 static int rfs_write_misses = 0;
1162 #endif
1163
1164 /*
1165 * Write data to file.
1166 * Returns attributes of a file after writing some data to it.
1167 */
1168 void
1169 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1170 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1171 {
1172 int error;
1173 vnode_t *vp;
1174 rlim64_t rlimit;
1175 struct vattr va;
1176 struct uio uio;
1177 struct rfs_async_write_list *lp;
1178 struct rfs_async_write_list *nlp;
1179 struct rfs_async_write *rp;
1180 struct rfs_async_write *nrp;
1181 struct rfs_async_write *trp;
1182 struct rfs_async_write *lrp;
1183 int data_written;
1184 int iovcnt;
1185 mblk_t *m;
1186 struct iovec *iovp;
1187 struct iovec *niovp;
1188 struct iovec iov[MAXCLIOVECS];
1189 int count;
1190 int rcount;
1191 uint_t off;
1192 uint_t len;
1193 struct rfs_async_write nrpsp;
1194 struct rfs_async_write_list nlpsp;
1195 ushort_t t_flag;
1196 cred_t *savecred;
1197 int in_crit = 0;
1198 caller_context_t ct;
1199
1200 if (!rfs_write_async) {
1201 rfs_write_sync(wa, ns, exi, req, cr, ro);
1202 return;
1203 }
1204
1205 /*
1206 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1207 * is considered an OK.
1208 */
1209 ns->ns_status = RFSWRITE_INITVAL;
1210
1211 nrp = &nrpsp;
1212 nrp->wa = wa;
1213 nrp->ns = ns;
1214 nrp->req = req;
1215 nrp->cr = cr;
1216 nrp->ro = ro;
1217 nrp->thread = curthread;
1218
1219 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1220
1221 /*
1222 * Look to see if there is already a cluster started
1223 * for this file.
1224 */
1225 mutex_enter(&rfs_async_write_lock);
1226 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1227 if (bcmp(&wa->wa_fhandle, lp->fhp,
1228 sizeof (fhandle_t)) == 0)
1229 break;
1230 }
1231
1232 /*
1233 * If lp is non-NULL, then there is already a cluster
1234 * started. We need to place ourselves in the cluster
1235 * list in the right place as determined by starting
1236 * offset. Conflicts with non-blocking mandatory locked
1237 * regions will be checked when the cluster is processed.
1238 */
1239 if (lp != NULL) {
1240 rp = lp->list;
1241 trp = NULL;
1242 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1243 trp = rp;
1244 rp = rp->list;
1245 }
1246 nrp->list = rp;
1247 if (trp == NULL)
1248 lp->list = nrp;
1249 else
1250 trp->list = nrp;
1251 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1252 cv_wait(&lp->cv, &rfs_async_write_lock);
1253 mutex_exit(&rfs_async_write_lock);
1254
1255 return;
1256 }
1257
1258 /*
1259 * No cluster started yet, start one and add ourselves
1260 * to the list of clusters.
1261 */
1262 nrp->list = NULL;
1263
1264 nlp = &nlpsp;
1265 nlp->fhp = &wa->wa_fhandle;
1266 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1267 nlp->list = nrp;
1268 nlp->next = NULL;
1269
1270 if (rfs_async_write_head == NULL) {
1271 rfs_async_write_head = nlp;
1272 } else {
1273 lp = rfs_async_write_head;
1274 while (lp->next != NULL)
1275 lp = lp->next;
1276 lp->next = nlp;
1277 }
1278 mutex_exit(&rfs_async_write_lock);
1279
1280 /*
1281 * Convert the file handle common to all of the requests
1282 * in this cluster to a vnode.
1283 */
1284 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1285 if (vp == NULL) {
1286 mutex_enter(&rfs_async_write_lock);
1287 if (rfs_async_write_head == nlp)
1288 rfs_async_write_head = nlp->next;
1289 else {
1290 lp = rfs_async_write_head;
1291 while (lp->next != nlp)
1292 lp = lp->next;
1293 lp->next = nlp->next;
1294 }
1295 t_flag = curthread->t_flag & T_WOULDBLOCK;
1296 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1297 rp->ns->ns_status = NFSERR_STALE;
1298 rp->thread->t_flag |= t_flag;
1299 }
1300 cv_broadcast(&nlp->cv);
1301 mutex_exit(&rfs_async_write_lock);
1302
1303 return;
1304 }
1305
1306 /*
1307 * Can only write regular files. Attempts to write any
1308 * other file types fail with EISDIR.
1309 */
1310 if (vp->v_type != VREG) {
1311 VN_RELE(vp);
1312 mutex_enter(&rfs_async_write_lock);
1313 if (rfs_async_write_head == nlp)
1314 rfs_async_write_head = nlp->next;
1315 else {
1316 lp = rfs_async_write_head;
1317 while (lp->next != nlp)
1318 lp = lp->next;
1319 lp->next = nlp->next;
1320 }
1321 t_flag = curthread->t_flag & T_WOULDBLOCK;
1322 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1323 rp->ns->ns_status = NFSERR_ISDIR;
1324 rp->thread->t_flag |= t_flag;
1325 }
1326 cv_broadcast(&nlp->cv);
1327 mutex_exit(&rfs_async_write_lock);
1328
1329 return;
1330 }
1331
1332 /*
1333 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1334 * deadlock with ufs.
1335 */
1336 if (nbl_need_check(vp)) {
1337 nbl_start_crit(vp, RW_READER);
1338 in_crit = 1;
1339 }
1340
1341 ct.cc_sysid = 0;
1342 ct.cc_pid = 0;
1343 ct.cc_caller_id = nfs2_srv_caller_id;
1344 ct.cc_flags = CC_DONTBLOCK;
1345
1346 /*
1347 * Lock the file for writing. This operation provides
1348 * the delay which allows clusters to grow.
1349 */
1350 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1351
1352 /* check if a monitor detected a delegation conflict */
1353 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1354 if (in_crit)
1355 nbl_end_crit(vp);
1356 VN_RELE(vp);
1357 /* mark as wouldblock so response is dropped */
1358 curthread->t_flag |= T_WOULDBLOCK;
1359 mutex_enter(&rfs_async_write_lock);
1360 if (rfs_async_write_head == nlp)
1361 rfs_async_write_head = nlp->next;
1362 else {
1363 lp = rfs_async_write_head;
1364 while (lp->next != nlp)
1365 lp = lp->next;
1366 lp->next = nlp->next;
1367 }
1368 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1369 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1370 rp->ns->ns_status = puterrno(error);
1371 rp->thread->t_flag |= T_WOULDBLOCK;
1372 }
1373 }
1374 cv_broadcast(&nlp->cv);
1375 mutex_exit(&rfs_async_write_lock);
1376
1377 return;
1378 }
1379
1380 /*
1381 * Disconnect this cluster from the list of clusters.
1382 * The cluster that is being dealt with must be fixed
1383 * in size after this point, so there is no reason
1384 * to leave it on the list so that new requests can
1385 * find it.
1386 *
1387 * The algorithm is that the first write request will
1388 * create a cluster, convert the file handle to a
1389 * vnode pointer, and then lock the file for writing.
1390 * This request is not likely to be clustered with
1391 * any others. However, the next request will create
1392 * a new cluster and be blocked in VOP_RWLOCK while
1393 * the first request is being processed. This delay
1394 * will allow more requests to be clustered in this
1395 * second cluster.
1396 */
1397 mutex_enter(&rfs_async_write_lock);
1398 if (rfs_async_write_head == nlp)
1399 rfs_async_write_head = nlp->next;
1400 else {
1401 lp = rfs_async_write_head;
1402 while (lp->next != nlp)
1403 lp = lp->next;
1404 lp->next = nlp->next;
1405 }
1406 mutex_exit(&rfs_async_write_lock);
1407
1408 /*
1409 * Step through the list of requests in this cluster.
1410 * We need to check permissions to make sure that all
1411 * of the requests have sufficient permission to write
1412 * the file. A cluster can be composed of requests
1413 * from different clients and different users on each
1414 * client.
1415 *
1416 * As a side effect, we also calculate the size of the
1417 * byte range that this cluster encompasses.
1418 */
1419 rp = nlp->list;
1420 off = rp->wa->wa_offset;
1421 len = (uint_t)0;
1422 do {
1423 if (rdonly(rp->ro, vp)) {
1424 rp->ns->ns_status = NFSERR_ROFS;
1425 t_flag = curthread->t_flag & T_WOULDBLOCK;
1426 rp->thread->t_flag |= t_flag;
1427 continue;
1428 }
1429
1430 va.va_mask = AT_UID|AT_MODE;
1431
1432 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1433
1434 if (!error) {
1435 if (crgetuid(rp->cr) != va.va_uid) {
1436 /*
1437 * This is a kludge to allow writes of files
1438 * created with read only permission. The
1439 * owner of the file is always allowed to
1440 * write it.
1441 */
1442 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1443 }
1444 if (!error && MANDLOCK(vp, va.va_mode))
1445 error = EACCES;
1446 }
1447
1448 /*
1449 * Check for a conflict with a nbmand-locked region.
1450 */
1451 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1452 rp->wa->wa_count, 0, NULL)) {
1453 error = EACCES;
1454 }
1455
1456 if (error) {
1457 rp->ns->ns_status = puterrno(error);
1458 t_flag = curthread->t_flag & T_WOULDBLOCK;
1459 rp->thread->t_flag |= t_flag;
1460 continue;
1461 }
1462 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1463 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1464 } while ((rp = rp->list) != NULL);
1465
1466 /*
1467 * Step through the cluster attempting to gather as many
1468 * requests which are contiguous as possible. These
1469 * contiguous requests are handled via one call to VOP_WRITE
1470 * instead of different calls to VOP_WRITE. We also keep
1471 * track of the fact that any data was written.
1472 */
1473 rp = nlp->list;
1474 data_written = 0;
1475 do {
1476 /*
1477 * Skip any requests which are already marked as having an
1478 * error.
1479 */
1480 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1481 rp = rp->list;
1482 continue;
1483 }
1484
1485 /*
1486 * Count the number of iovec's which are required
1487 * to handle this set of requests. One iovec is
1488 * needed for each data buffer, whether addressed
1489 * by wa_data or by the b_rptr pointers in the
1490 * mblk chains.
1491 */
1492 iovcnt = 0;
1493 lrp = rp;
1494 for (;;) {
1495 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1496 iovcnt++;
1497 else {
1498 m = lrp->wa->wa_mblk;
1499 while (m != NULL) {
1500 iovcnt++;
1501 m = m->b_cont;
1502 }
1503 }
1504 if (lrp->list == NULL ||
1505 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1506 lrp->wa->wa_offset + lrp->wa->wa_count !=
1507 lrp->list->wa->wa_offset) {
1508 lrp = lrp->list;
1509 break;
1510 }
1511 lrp = lrp->list;
1512 }
1513
1514 if (iovcnt <= MAXCLIOVECS) {
1515 #ifdef DEBUG
1516 rfs_write_hits++;
1517 #endif
1518 niovp = iov;
1519 } else {
1520 #ifdef DEBUG
1521 rfs_write_misses++;
1522 #endif
1523 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1524 }
1525 /*
1526 * Put together the scatter/gather iovecs.
1527 */
1528 iovp = niovp;
1529 trp = rp;
1530 count = 0;
1531 do {
1532 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1533 if (trp->wa->wa_rlist) {
1534 iovp->iov_base =
1535 (char *)((trp->wa->wa_rlist)->
1536 u.c_daddr3);
1537 iovp->iov_len = trp->wa->wa_count;
1538 } else {
1539 iovp->iov_base = trp->wa->wa_data;
1540 iovp->iov_len = trp->wa->wa_count;
1541 }
1542 iovp++;
1543 } else {
1544 m = trp->wa->wa_mblk;
1545 rcount = trp->wa->wa_count;
1546 while (m != NULL) {
1547 iovp->iov_base = (caddr_t)m->b_rptr;
1548 iovp->iov_len = (m->b_wptr - m->b_rptr);
1549 rcount -= iovp->iov_len;
1550 if (rcount < 0)
1551 iovp->iov_len += rcount;
1552 iovp++;
1553 if (rcount <= 0)
1554 break;
1555 m = m->b_cont;
1556 }
1557 }
1558 count += trp->wa->wa_count;
1559 trp = trp->list;
1560 } while (trp != lrp);
1561
1562 uio.uio_iov = niovp;
1563 uio.uio_iovcnt = iovcnt;
1564 uio.uio_segflg = UIO_SYSSPACE;
1565 uio.uio_extflg = UIO_COPY_DEFAULT;
1566 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1567 uio.uio_resid = count;
1568 /*
1569 * The limit is checked on the client. We
1570 * should allow any size writes here.
1571 */
1572 uio.uio_llimit = curproc->p_fsz_ctl;
1573 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1574 if (rlimit < (rlim64_t)uio.uio_resid)
1575 uio.uio_resid = (uint_t)rlimit;
1576
1577 /*
1578 * For now we assume no append mode.
1579 */
1580
1581 /*
1582 * We're changing creds because VM may fault
1583 * and we need the cred of the current
1584 * thread to be used if quota * checking is
1585 * enabled.
1586 */
1587 savecred = curthread->t_cred;
1588 curthread->t_cred = cr;
1589 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1590 curthread->t_cred = savecred;
1591
1592 /* check if a monitor detected a delegation conflict */
1593 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1594 /* mark as wouldblock so response is dropped */
1595 curthread->t_flag |= T_WOULDBLOCK;
1596
1597 if (niovp != iov)
1598 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1599
1600 if (!error) {
1601 data_written = 1;
1602 /*
1603 * Get attributes again so we send the latest mod
1604 * time to the client side for its cache.
1605 */
1606 va.va_mask = AT_ALL; /* now we want everything */
1607
1608 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1609
1610 if (!error)
1611 acl_perm(vp, exi, &va, rp->cr);
1612 }
1613
1614 /*
1615 * Fill in the status responses for each request
1616 * which was just handled. Also, copy the latest
1617 * attributes in to the attribute responses if
1618 * appropriate.
1619 */
1620 t_flag = curthread->t_flag & T_WOULDBLOCK;
1621 do {
1622 rp->thread->t_flag |= t_flag;
1623 /* check for overflows */
1624 if (!error) {
1625 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1626 }
1627 rp->ns->ns_status = puterrno(error);
1628 rp = rp->list;
1629 } while (rp != lrp);
1630 } while (rp != NULL);
1631
1632 /*
1633 * If any data was written at all, then we need to flush
1634 * the data and metadata to stable storage.
1635 */
1636 if (data_written) {
1637 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1638
1639 if (!error) {
1640 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1641 }
1642 }
1643
1644 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1645
1646 if (in_crit)
1647 nbl_end_crit(vp);
1648 VN_RELE(vp);
1649
1650 t_flag = curthread->t_flag & T_WOULDBLOCK;
1651 mutex_enter(&rfs_async_write_lock);
1652 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1653 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1654 rp->ns->ns_status = puterrno(error);
1655 rp->thread->t_flag |= t_flag;
1656 }
1657 }
1658 cv_broadcast(&nlp->cv);
1659 mutex_exit(&rfs_async_write_lock);
1660
1661 }
1662
1663 void *
1664 rfs_write_getfh(struct nfswriteargs *wa)
1665 {
1666 return (&wa->wa_fhandle);
1667 }
1668
1669 /*
1670 * Create a file.
1671 * Creates a file with given attributes and returns those attributes
1672 * and an fhandle for the new file.
1673 */
1674 void
1675 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1676 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1677 {
1678 int error;
1679 int lookuperr;
1680 int in_crit = 0;
1681 struct vattr va;
1682 vnode_t *vp;
1683 vnode_t *realvp;
1684 vnode_t *dvp;
1685 char *name = args->ca_da.da_name;
1686 vnode_t *tvp = NULL;
1687 int mode;
1688 int lookup_ok;
1689 bool_t trunc;
1690 struct sockaddr *ca;
1691
1692 /*
1693 * Disallow NULL paths
1694 */
1695 if (name == NULL || *name == '\0') {
1696 dr->dr_status = NFSERR_ACCES;
1697 return;
1698 }
1699
1700 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1701 if (dvp == NULL) {
1702 dr->dr_status = NFSERR_STALE;
1703 return;
1704 }
1705
1706 error = sattr_to_vattr(args->ca_sa, &va);
1707 if (error) {
1708 dr->dr_status = puterrno(error);
1709 return;
1710 }
1711
1712 /*
1713 * Must specify the mode.
1714 */
1715 if (!(va.va_mask & AT_MODE)) {
1716 VN_RELE(dvp);
1717 dr->dr_status = NFSERR_INVAL;
1718 return;
1719 }
1720
1721 /*
1722 * This is a completely gross hack to make mknod
1723 * work over the wire until we can wack the protocol
1724 */
1725 if ((va.va_mode & IFMT) == IFCHR) {
1726 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1727 va.va_type = VFIFO; /* xtra kludge for named pipe */
1728 else {
1729 va.va_type = VCHR;
1730 /*
1731 * uncompress the received dev_t
1732 * if the top half is zero indicating a request
1733 * from an `older style' OS.
1734 */
1735 if ((va.va_size & 0xffff0000) == 0)
1736 va.va_rdev = nfsv2_expdev(va.va_size);
1737 else
1738 va.va_rdev = (dev_t)va.va_size;
1739 }
1740 va.va_mask &= ~AT_SIZE;
1741 } else if ((va.va_mode & IFMT) == IFBLK) {
1742 va.va_type = VBLK;
1743 /*
1744 * uncompress the received dev_t
1745 * if the top half is zero indicating a request
1746 * from an `older style' OS.
1747 */
1748 if ((va.va_size & 0xffff0000) == 0)
1749 va.va_rdev = nfsv2_expdev(va.va_size);
1750 else
1751 va.va_rdev = (dev_t)va.va_size;
1752 va.va_mask &= ~AT_SIZE;
1753 } else if ((va.va_mode & IFMT) == IFSOCK) {
1754 va.va_type = VSOCK;
1755 } else {
1756 va.va_type = VREG;
1757 }
1758 va.va_mode &= ~IFMT;
1759 va.va_mask |= AT_TYPE;
1760
1761 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1762 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1763 MAXPATHLEN);
1764 if (name == NULL) {
1765 dr->dr_status = puterrno(EINVAL);
1766 return;
1767 }
1768
1769 /*
1770 * Why was the choice made to use VWRITE as the mode to the
1771 * call to VOP_CREATE ? This results in a bug. When a client
1772 * opens a file that already exists and is RDONLY, the second
1773 * open fails with an EACESS because of the mode.
1774 * bug ID 1054648.
1775 */
1776 lookup_ok = 0;
1777 mode = VWRITE;
1778 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1779 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1780 NULL, NULL, NULL);
1781 if (!error) {
1782 struct vattr at;
1783
1784 lookup_ok = 1;
1785 at.va_mask = AT_MODE;
1786 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1787 if (!error)
1788 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1789 VN_RELE(tvp);
1790 tvp = NULL;
1791 }
1792 }
1793
1794 if (!lookup_ok) {
1795 if (rdonly(ro, dvp)) {
1796 error = EROFS;
1797 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1798 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1799 error = EPERM;
1800 } else {
1801 error = 0;
1802 }
1803 }
1804
1805 /*
1806 * If file size is being modified on an already existing file
1807 * make sure that there are no conflicting non-blocking mandatory
1808 * locks in the region being manipulated. Return EACCES if there
1809 * are conflicting locks.
1810 */
1811 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1812 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1813 NULL, NULL, NULL);
1814
1815 if (!lookuperr &&
1816 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1817 VN_RELE(tvp);
1818 curthread->t_flag |= T_WOULDBLOCK;
1819 goto out;
1820 }
1821
1822 if (!lookuperr && nbl_need_check(tvp)) {
1823 /*
1824 * The file exists. Now check if it has any
1825 * conflicting non-blocking mandatory locks
1826 * in the region being changed.
1827 */
1828 struct vattr bva;
1829 u_offset_t offset;
1830 ssize_t length;
1831
1832 nbl_start_crit(tvp, RW_READER);
1833 in_crit = 1;
1834
1835 bva.va_mask = AT_SIZE;
1836 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1837 if (!error) {
1838 if (va.va_size < bva.va_size) {
1839 offset = va.va_size;
1840 length = bva.va_size - va.va_size;
1841 } else {
1842 offset = bva.va_size;
1843 length = va.va_size - bva.va_size;
1844 }
1845 if (length) {
1846 if (nbl_conflict(tvp, NBL_WRITE,
1847 offset, length, 0, NULL)) {
1848 error = EACCES;
1849 }
1850 }
1851 }
1852 if (error) {
1853 nbl_end_crit(tvp);
1854 VN_RELE(tvp);
1855 in_crit = 0;
1856 }
1857 } else if (tvp != NULL) {
1858 VN_RELE(tvp);
1859 }
1860 }
1861
1862 if (!error) {
1863 /*
1864 * If filesystem is shared with nosuid the remove any
1865 * setuid/setgid bits on create.
1866 */
1867 if (va.va_type == VREG &&
1868 exi->exi_export.ex_flags & EX_NOSUID)
1869 va.va_mode &= ~(VSUID | VSGID);
1870
1871 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1872 NULL, NULL);
1873
1874 if (!error) {
1875
1876 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1877 trunc = TRUE;
1878 else
1879 trunc = FALSE;
1880
1881 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1882 VN_RELE(vp);
1883 curthread->t_flag |= T_WOULDBLOCK;
1884 goto out;
1885 }
1886 va.va_mask = AT_ALL;
1887
1888 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1889
1890 /* check for overflows */
1891 if (!error) {
1892 acl_perm(vp, exi, &va, cr);
1893 error = vattr_to_nattr(&va, &dr->dr_attr);
1894 if (!error) {
1895 error = makefh(&dr->dr_fhandle, vp,
1896 exi);
1897 }
1898 }
1899 /*
1900 * Force modified metadata out to stable storage.
1901 *
1902 * if a underlying vp exists, pass it to VOP_FSYNC
1903 */
1904 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1905 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1906 else
1907 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1908 VN_RELE(vp);
1909 }
1910
1911 if (in_crit) {
1912 nbl_end_crit(tvp);
1913 VN_RELE(tvp);
1914 }
1915 }
1916
1917 /*
1918 * Force modified data and metadata out to stable storage.
1919 */
1920 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1921
1922 out:
1923
1924 VN_RELE(dvp);
1925
1926 dr->dr_status = puterrno(error);
1927
1928 if (name != args->ca_da.da_name)
1929 kmem_free(name, MAXPATHLEN);
1930 }
1931 void *
1932 rfs_create_getfh(struct nfscreatargs *args)
1933 {
1934 return (args->ca_da.da_fhandle);
1935 }
1936
1937 /*
1938 * Remove a file.
1939 * Remove named file from parent directory.
1940 */
1941 /* ARGSUSED */
1942 void
1943 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
1944 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1945 {
1946 int error = 0;
1947 vnode_t *vp;
1948 vnode_t *targvp;
1949 int in_crit = 0;
1950
1951 /*
1952 * Disallow NULL paths
1953 */
1954 if (da->da_name == NULL || *da->da_name == '\0') {
1955 *status = NFSERR_ACCES;
1956 return;
1957 }
1958
1959 vp = nfs_fhtovp(da->da_fhandle, exi);
1960 if (vp == NULL) {
1961 *status = NFSERR_STALE;
1962 return;
1963 }
1964
1965 if (rdonly(ro, vp)) {
1966 VN_RELE(vp);
1967 *status = NFSERR_ROFS;
1968 return;
1969 }
1970
1971 /*
1972 * Check for a conflict with a non-blocking mandatory share reservation.
1973 */
1974 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
1975 NULL, cr, NULL, NULL, NULL);
1976 if (error != 0) {
1977 VN_RELE(vp);
1978 *status = puterrno(error);
1979 return;
1980 }
1981
1982 /*
1983 * If the file is delegated to an v4 client, then initiate
1984 * recall and drop this request (by setting T_WOULDBLOCK).
1985 * The client will eventually re-transmit the request and
1986 * (hopefully), by then, the v4 client will have returned
1987 * the delegation.
1988 */
1989
1990 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
1991 VN_RELE(vp);
1992 VN_RELE(targvp);
1993 curthread->t_flag |= T_WOULDBLOCK;
1994 return;
1995 }
1996
1997 if (nbl_need_check(targvp)) {
1998 nbl_start_crit(targvp, RW_READER);
1999 in_crit = 1;
2000 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2001 error = EACCES;
2002 goto out;
2003 }
2004 }
2005
2006 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2007
2008 /*
2009 * Force modified data and metadata out to stable storage.
2010 */
2011 (void) VOP_FSYNC(vp, 0, cr, NULL);
2012
2013 out:
2014 if (in_crit)
2015 nbl_end_crit(targvp);
2016 VN_RELE(targvp);
2017 VN_RELE(vp);
2018
2019 *status = puterrno(error);
2020
2021 }
2022
2023 void *
2024 rfs_remove_getfh(struct nfsdiropargs *da)
2025 {
2026 return (da->da_fhandle);
2027 }
2028
2029 /*
2030 * rename a file
2031 * Give a file (from) a new name (to).
2032 */
2033 /* ARGSUSED */
2034 void
2035 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2036 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2037 {
2038 int error = 0;
2039 vnode_t *fromvp;
2040 vnode_t *tovp;
2041 struct exportinfo *to_exi;
2042 fhandle_t *fh;
2043 vnode_t *srcvp;
2044 vnode_t *targvp;
2045 int in_crit = 0;
2046
2047 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2048 if (fromvp == NULL) {
2049 *status = NFSERR_STALE;
2050 return;
2051 }
2052
2053 fh = args->rna_to.da_fhandle;
2054 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2055 if (to_exi == NULL) {
2056 VN_RELE(fromvp);
2057 *status = NFSERR_ACCES;
2058 return;
2059 }
2060 exi_rele(to_exi);
2061
2062 if (to_exi != exi) {
2063 VN_RELE(fromvp);
2064 *status = NFSERR_XDEV;
2065 return;
2066 }
2067
2068 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2069 if (tovp == NULL) {
2070 VN_RELE(fromvp);
2071 *status = NFSERR_STALE;
2072 return;
2073 }
2074
2075 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2076 VN_RELE(tovp);
2077 VN_RELE(fromvp);
2078 *status = NFSERR_NOTDIR;
2079 return;
2080 }
2081
2082 /*
2083 * Disallow NULL paths
2084 */
2085 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2086 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2087 VN_RELE(tovp);
2088 VN_RELE(fromvp);
2089 *status = NFSERR_ACCES;
2090 return;
2091 }
2092
2093 if (rdonly(ro, tovp)) {
2094 VN_RELE(tovp);
2095 VN_RELE(fromvp);
2096 *status = NFSERR_ROFS;
2097 return;
2098 }
2099
2100 /*
2101 * Check for a conflict with a non-blocking mandatory share reservation.
2102 */
2103 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2104 NULL, cr, NULL, NULL, NULL);
2105 if (error != 0) {
2106 VN_RELE(tovp);
2107 VN_RELE(fromvp);
2108 *status = puterrno(error);
2109 return;
2110 }
2111
2112 /* Check for delegations on the source file */
2113
2114 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2115 VN_RELE(tovp);
2116 VN_RELE(fromvp);
2117 VN_RELE(srcvp);
2118 curthread->t_flag |= T_WOULDBLOCK;
2119 return;
2120 }
2121
2122 /* Check for delegation on the file being renamed over, if it exists */
2123
2124 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2125 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2126 NULL, NULL, NULL) == 0) {
2127
2128 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2129 VN_RELE(tovp);
2130 VN_RELE(fromvp);
2131 VN_RELE(srcvp);
2132 VN_RELE(targvp);
2133 curthread->t_flag |= T_WOULDBLOCK;
2134 return;
2135 }
2136 VN_RELE(targvp);
2137 }
2138
2139
2140 if (nbl_need_check(srcvp)) {
2141 nbl_start_crit(srcvp, RW_READER);
2142 in_crit = 1;
2143 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2144 error = EACCES;
2145 goto out;
2146 }
2147 }
2148
2149 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2150 tovp, args->rna_to.da_name, cr, NULL, 0);
2151
2152 if (error == 0)
2153 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2154 strlen(args->rna_to.da_name));
2155
2156 /*
2157 * Force modified data and metadata out to stable storage.
2158 */
2159 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2160 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2161
2162 out:
2163 if (in_crit)
2164 nbl_end_crit(srcvp);
2165 VN_RELE(srcvp);
2166 VN_RELE(tovp);
2167 VN_RELE(fromvp);
2168
2169 *status = puterrno(error);
2170
2171 }
2172 void *
2173 rfs_rename_getfh(struct nfsrnmargs *args)
2174 {
2175 return (args->rna_from.da_fhandle);
2176 }
2177
2178 /*
2179 * Link to a file.
2180 * Create a file (to) which is a hard link to the given file (from).
2181 */
2182 /* ARGSUSED */
2183 void
2184 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2185 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2186 {
2187 int error;
2188 vnode_t *fromvp;
2189 vnode_t *tovp;
2190 struct exportinfo *to_exi;
2191 fhandle_t *fh;
2192
2193 fromvp = nfs_fhtovp(args->la_from, exi);
2194 if (fromvp == NULL) {
2195 *status = NFSERR_STALE;
2196 return;
2197 }
2198
2199 fh = args->la_to.da_fhandle;
2200 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2201 if (to_exi == NULL) {
2202 VN_RELE(fromvp);
2203 *status = NFSERR_ACCES;
2204 return;
2205 }
2206 exi_rele(to_exi);
2207
2208 if (to_exi != exi) {
2209 VN_RELE(fromvp);
2210 *status = NFSERR_XDEV;
2211 return;
2212 }
2213
2214 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2215 if (tovp == NULL) {
2216 VN_RELE(fromvp);
2217 *status = NFSERR_STALE;
2218 return;
2219 }
2220
2221 if (tovp->v_type != VDIR) {
2222 VN_RELE(tovp);
2223 VN_RELE(fromvp);
2224 *status = NFSERR_NOTDIR;
2225 return;
2226 }
2227 /*
2228 * Disallow NULL paths
2229 */
2230 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2231 VN_RELE(tovp);
2232 VN_RELE(fromvp);
2233 *status = NFSERR_ACCES;
2234 return;
2235 }
2236
2237 if (rdonly(ro, tovp)) {
2238 VN_RELE(tovp);
2239 VN_RELE(fromvp);
2240 *status = NFSERR_ROFS;
2241 return;
2242 }
2243
2244 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2245
2246 /*
2247 * Force modified data and metadata out to stable storage.
2248 */
2249 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2250 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2251
2252 VN_RELE(tovp);
2253 VN_RELE(fromvp);
2254
2255 *status = puterrno(error);
2256
2257 }
2258 void *
2259 rfs_link_getfh(struct nfslinkargs *args)
2260 {
2261 return (args->la_from);
2262 }
2263
2264 /*
2265 * Symbolicly link to a file.
2266 * Create a file (to) with the given attributes which is a symbolic link
2267 * to the given path name (to).
2268 */
2269 void
2270 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2271 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2272 {
2273 int error;
2274 struct vattr va;
2275 vnode_t *vp;
2276 vnode_t *svp;
2277 int lerror;
2278 struct sockaddr *ca;
2279 char *name = NULL;
2280
2281 /*
2282 * Disallow NULL paths
2283 */
2284 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2285 *status = NFSERR_ACCES;
2286 return;
2287 }
2288
2289 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2290 if (vp == NULL) {
2291 *status = NFSERR_STALE;
2292 return;
2293 }
2294
2295 if (rdonly(ro, vp)) {
2296 VN_RELE(vp);
2297 *status = NFSERR_ROFS;
2298 return;
2299 }
2300
2301 error = sattr_to_vattr(args->sla_sa, &va);
2302 if (error) {
2303 VN_RELE(vp);
2304 *status = puterrno(error);
2305 return;
2306 }
2307
2308 if (!(va.va_mask & AT_MODE)) {
2309 VN_RELE(vp);
2310 *status = NFSERR_INVAL;
2311 return;
2312 }
2313
2314 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2315 name = nfscmd_convname(ca, exi, args->sla_tnm,
2316 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2317
2318 if (name == NULL) {
2319 *status = NFSERR_ACCES;
2320 return;
2321 }
2322
2323 va.va_type = VLNK;
2324 va.va_mask |= AT_TYPE;
2325
2326 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2327
2328 /*
2329 * Force new data and metadata out to stable storage.
2330 */
2331 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2332 NULL, cr, NULL, NULL, NULL);
2333
2334 if (!lerror) {
2335 (void) VOP_FSYNC(svp, 0, cr, NULL);
2336 VN_RELE(svp);
2337 }
2338
2339 /*
2340 * Force modified data and metadata out to stable storage.
2341 */
2342 (void) VOP_FSYNC(vp, 0, cr, NULL);
2343
2344 VN_RELE(vp);
2345
2346 *status = puterrno(error);
2347 if (name != args->sla_tnm)
2348 kmem_free(name, MAXPATHLEN);
2349
2350 }
2351 void *
2352 rfs_symlink_getfh(struct nfsslargs *args)
2353 {
2354 return (args->sla_from.da_fhandle);
2355 }
2356
2357 /*
2358 * Make a directory.
2359 * Create a directory with the given name, parent directory, and attributes.
2360 * Returns a file handle and attributes for the new directory.
2361 */
2362 /* ARGSUSED */
2363 void
2364 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2365 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2366 {
2367 int error;
2368 struct vattr va;
2369 vnode_t *dvp = NULL;
2370 vnode_t *vp;
2371 char *name = args->ca_da.da_name;
2372
2373 /*
2374 * Disallow NULL paths
2375 */
2376 if (name == NULL || *name == '\0') {
2377 dr->dr_status = NFSERR_ACCES;
2378 return;
2379 }
2380
2381 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2382 if (vp == NULL) {
2383 dr->dr_status = NFSERR_STALE;
2384 return;
2385 }
2386
2387 if (rdonly(ro, vp)) {
2388 VN_RELE(vp);
2389 dr->dr_status = NFSERR_ROFS;
2390 return;
2391 }
2392
2393 error = sattr_to_vattr(args->ca_sa, &va);
2394 if (error) {
2395 VN_RELE(vp);
2396 dr->dr_status = puterrno(error);
2397 return;
2398 }
2399
2400 if (!(va.va_mask & AT_MODE)) {
2401 VN_RELE(vp);
2402 dr->dr_status = NFSERR_INVAL;
2403 return;
2404 }
2405
2406 va.va_type = VDIR;
2407 va.va_mask |= AT_TYPE;
2408
2409 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2410
2411 if (!error) {
2412 /*
2413 * Attribtutes of the newly created directory should
2414 * be returned to the client.
2415 */
2416 va.va_mask = AT_ALL; /* We want everything */
2417 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2418
2419 /* check for overflows */
2420 if (!error) {
2421 acl_perm(vp, exi, &va, cr);
2422 error = vattr_to_nattr(&va, &dr->dr_attr);
2423 if (!error) {
2424 error = makefh(&dr->dr_fhandle, dvp, exi);
2425 }
2426 }
2427 /*
2428 * Force new data and metadata out to stable storage.
2429 */
2430 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2431 VN_RELE(dvp);
2432 }
2433
2434 /*
2435 * Force modified data and metadata out to stable storage.
2436 */
2437 (void) VOP_FSYNC(vp, 0, cr, NULL);
2438
2439 VN_RELE(vp);
2440
2441 dr->dr_status = puterrno(error);
2442
2443 }
2444 void *
2445 rfs_mkdir_getfh(struct nfscreatargs *args)
2446 {
2447 return (args->ca_da.da_fhandle);
2448 }
2449
2450 /*
2451 * Remove a directory.
2452 * Remove the given directory name from the given parent directory.
2453 */
2454 /* ARGSUSED */
2455 void
2456 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2457 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2458 {
2459 int error;
2460 vnode_t *vp;
2461
2462 /*
2463 * Disallow NULL paths
2464 */
2465 if (da->da_name == NULL || *da->da_name == '\0') {
2466 *status = NFSERR_ACCES;
2467 return;
2468 }
2469
2470 vp = nfs_fhtovp(da->da_fhandle, exi);
2471 if (vp == NULL) {
2472 *status = NFSERR_STALE;
2473 return;
2474 }
2475
2476 if (rdonly(ro, vp)) {
2477 VN_RELE(vp);
2478 *status = NFSERR_ROFS;
2479 return;
2480 }
2481
2482 /*
2483 * VOP_RMDIR takes a third argument (the current
2484 * directory of the process). That's because someone
2485 * wants to return EINVAL if one tries to remove ".".
2486 * Of course, NFS servers have no idea what their
2487 * clients' current directories are. We fake it by
2488 * supplying a vnode known to exist and illegal to
2489 * remove.
2490 */
2491 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2492
2493 /*
2494 * Force modified data and metadata out to stable storage.
2495 */
2496 (void) VOP_FSYNC(vp, 0, cr, NULL);
2497
2498 VN_RELE(vp);
2499
2500 /*
2501 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2502 * if the directory is not empty. A System V NFS server
2503 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2504 * over the wire.
2505 */
2506 if (error == EEXIST)
2507 *status = NFSERR_NOTEMPTY;
2508 else
2509 *status = puterrno(error);
2510
2511 }
2512 void *
2513 rfs_rmdir_getfh(struct nfsdiropargs *da)
2514 {
2515 return (da->da_fhandle);
2516 }
2517
2518 /* ARGSUSED */
2519 void
2520 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2521 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2522 {
2523 int error;
2524 int iseof;
2525 struct iovec iov;
2526 struct uio uio;
2527 vnode_t *vp;
2528 char *ndata = NULL;
2529 struct sockaddr *ca;
2530 size_t nents;
2531 int ret;
2532
2533 vp = nfs_fhtovp(&rda->rda_fh, exi);
2534 if (vp == NULL) {
2535 rd->rd_entries = NULL;
2536 rd->rd_status = NFSERR_STALE;
2537 return;
2538 }
2539
2540 if (vp->v_type != VDIR) {
2541 VN_RELE(vp);
2542 rd->rd_entries = NULL;
2543 rd->rd_status = NFSERR_NOTDIR;
2544 return;
2545 }
2546
2547 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2548
2549 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2550
2551 if (error) {
2552 rd->rd_entries = NULL;
2553 goto bad;
2554 }
2555
2556 if (rda->rda_count == 0) {
2557 rd->rd_entries = NULL;
2558 rd->rd_size = 0;
2559 rd->rd_eof = FALSE;
2560 goto bad;
2561 }
2562
2563 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2564
2565 /*
2566 * Allocate data for entries. This will be freed by rfs_rddirfree.
2567 */
2568 rd->rd_bufsize = (uint_t)rda->rda_count;
2569 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2570
2571 /*
2572 * Set up io vector to read directory data
2573 */
2574 iov.iov_base = (caddr_t)rd->rd_entries;
2575 iov.iov_len = rda->rda_count;
2576 uio.uio_iov = &iov;
2577 uio.uio_iovcnt = 1;
2578 uio.uio_segflg = UIO_SYSSPACE;
2579 uio.uio_extflg = UIO_COPY_CACHED;
2580 uio.uio_loffset = (offset_t)rda->rda_offset;
2581 uio.uio_resid = rda->rda_count;
2582
2583 /*
2584 * read directory
2585 */
2586 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2587
2588 /*
2589 * Clean up
2590 */
2591 if (!error) {
2592 /*
2593 * set size and eof
2594 */
2595 if (uio.uio_resid == rda->rda_count) {
2596 rd->rd_size = 0;
2597 rd->rd_eof = TRUE;
2598 } else {
2599 rd->rd_size = (uint32_t)(rda->rda_count -
2600 uio.uio_resid);
2601 rd->rd_eof = iseof ? TRUE : FALSE;
2602 }
2603 }
2604
2605 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2606 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2607 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2608 rda->rda_count, &ndata);
2609
2610 if (ret != 0) {
2611 size_t dropbytes;
2612 /*
2613 * We had to drop one or more entries in order to fit
2614 * during the character conversion. We need to patch
2615 * up the size and eof info.
2616 */
2617 if (rd->rd_eof)
2618 rd->rd_eof = FALSE;
2619 dropbytes = nfscmd_dropped_entrysize(
2620 (struct dirent64 *)rd->rd_entries, nents, ret);
2621 rd->rd_size -= dropbytes;
2622 }
2623 if (ndata == NULL) {
2624 ndata = (char *)rd->rd_entries;
2625 } else if (ndata != (char *)rd->rd_entries) {
2626 kmem_free(rd->rd_entries, rd->rd_bufsize);
2627 rd->rd_entries = (void *)ndata;
2628 rd->rd_bufsize = rda->rda_count;
2629 }
2630
2631 bad:
2632 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2633
2634 #if 0 /* notyet */
2635 /*
2636 * Don't do this. It causes local disk writes when just
2637 * reading the file and the overhead is deemed larger
2638 * than the benefit.
2639 */
2640 /*
2641 * Force modified metadata out to stable storage.
2642 */
2643 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2644 #endif
2645
2646 VN_RELE(vp);
2647
2648 rd->rd_status = puterrno(error);
2649
2650 }
2651 void *
2652 rfs_readdir_getfh(struct nfsrddirargs *rda)
2653 {
2654 return (&rda->rda_fh);
2655 }
2656 void
2657 rfs_rddirfree(struct nfsrddirres *rd)
2658 {
2659 if (rd->rd_entries != NULL)
2660 kmem_free(rd->rd_entries, rd->rd_bufsize);
2661 }
2662
2663 /* ARGSUSED */
2664 void
2665 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2666 struct svc_req *req, cred_t *cr, bool_t ro)
2667 {
2668 int error;
2669 struct statvfs64 sb;
2670 vnode_t *vp;
2671
2672 vp = nfs_fhtovp(fh, exi);
2673 if (vp == NULL) {
2674 fs->fs_status = NFSERR_STALE;
2675 return;
2676 }
2677
2678 error = VFS_STATVFS(vp->v_vfsp, &sb);
2679
2680 if (!error) {
2681 fs->fs_tsize = nfstsize();
2682 fs->fs_bsize = sb.f_frsize;
2683 fs->fs_blocks = sb.f_blocks;
2684 fs->fs_bfree = sb.f_bfree;
2685 fs->fs_bavail = sb.f_bavail;
2686 }
2687
2688 VN_RELE(vp);
2689
2690 fs->fs_status = puterrno(error);
2691
2692 }
2693 void *
2694 rfs_statfs_getfh(fhandle_t *fh)
2695 {
2696 return (fh);
2697 }
2698
2699 static int
2700 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2701 {
2702 vap->va_mask = 0;
2703
2704 /*
2705 * There was a sign extension bug in some VFS based systems
2706 * which stored the mode as a short. When it would get
2707 * assigned to a u_long, no sign extension would occur.
2708 * It needed to, but this wasn't noticed because sa_mode
2709 * would then get assigned back to the short, thus ignoring
2710 * the upper 16 bits of sa_mode.
2711 *
2712 * To make this implementation work for both broken
2713 * clients and good clients, we check for both versions
2714 * of the mode.
2715 */
2716 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2717 sa->sa_mode != (uint32_t)-1) {
2718 vap->va_mask |= AT_MODE;
2719 vap->va_mode = sa->sa_mode;
2720 }
2721 if (sa->sa_uid != (uint32_t)-1) {
2722 vap->va_mask |= AT_UID;
2723 vap->va_uid = sa->sa_uid;
2724 }
2725 if (sa->sa_gid != (uint32_t)-1) {
2726 vap->va_mask |= AT_GID;
2727 vap->va_gid = sa->sa_gid;
2728 }
2729 if (sa->sa_size != (uint32_t)-1) {
2730 vap->va_mask |= AT_SIZE;
2731 vap->va_size = sa->sa_size;
2732 }
2733 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2734 sa->sa_atime.tv_usec != (int32_t)-1) {
2735 #ifndef _LP64
2736 /* return error if time overflow */
2737 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2738 return (EOVERFLOW);
2739 #endif
2740 vap->va_mask |= AT_ATIME;
2741 /*
2742 * nfs protocol defines times as unsigned so don't extend sign,
2743 * unless sysadmin set nfs_allow_preepoch_time.
2744 */
2745 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2746 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2747 }
2748 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2749 sa->sa_mtime.tv_usec != (int32_t)-1) {
2750 #ifndef _LP64
2751 /* return error if time overflow */
2752 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2753 return (EOVERFLOW);
2754 #endif
2755 vap->va_mask |= AT_MTIME;
2756 /*
2757 * nfs protocol defines times as unsigned so don't extend sign,
2758 * unless sysadmin set nfs_allow_preepoch_time.
2759 */
2760 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2761 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2762 }
2763 return (0);
2764 }
2765
2766 static enum nfsftype vt_to_nf[] = {
2767 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2768 };
2769
2770 /*
2771 * check the following fields for overflow: nodeid, size, and time.
2772 * There could be a problem when converting 64-bit LP64 fields
2773 * into 32-bit ones. Return an error if there is an overflow.
2774 */
2775 int
2776 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2777 {
2778 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2779 na->na_type = vt_to_nf[vap->va_type];
2780
2781 if (vap->va_mode == (unsigned short) -1)
2782 na->na_mode = (uint32_t)-1;
2783 else
2784 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2785
2786 if (vap->va_uid == (unsigned short)(-1))
2787 na->na_uid = (uint32_t)(-1);
2788 else if (vap->va_uid == UID_NOBODY)
2789 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2790 else
2791 na->na_uid = vap->va_uid;
2792
2793 if (vap->va_gid == (unsigned short)(-1))
2794 na->na_gid = (uint32_t)-1;
2795 else if (vap->va_gid == GID_NOBODY)
2796 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2797 else
2798 na->na_gid = vap->va_gid;
2799
2800 /*
2801 * Do we need to check fsid for overflow? It is 64-bit in the
2802 * vattr, but are bigger than 32 bit values supported?
2803 */
2804 na->na_fsid = vap->va_fsid;
2805
2806 na->na_nodeid = vap->va_nodeid;
2807
2808 /*
2809 * Check to make sure that the nodeid is representable over the
2810 * wire without losing bits.
2811 */
2812 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2813 return (EFBIG);
2814 na->na_nlink = vap->va_nlink;
2815
2816 /*
2817 * Check for big files here, instead of at the caller. See
2818 * comments in cstat for large special file explanation.
2819 */
2820 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2821 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2822 return (EFBIG);
2823 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2824 /* UNKNOWN_SIZE | OVERFLOW */
2825 na->na_size = MAXOFF32_T;
2826 } else
2827 na->na_size = vap->va_size;
2828 } else
2829 na->na_size = vap->va_size;
2830
2831 /*
2832 * If the vnode times overflow the 32-bit times that NFS2
2833 * uses on the wire then return an error.
2834 */
2835 if (!NFS_VAP_TIME_OK(vap)) {
2836 return (EOVERFLOW);
2837 }
2838 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2839 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2840
2841 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2842 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2843
2844 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2845 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2846
2847 /*
2848 * If the dev_t will fit into 16 bits then compress
2849 * it, otherwise leave it alone. See comments in
2850 * nfs_client.c.
2851 */
2852 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2853 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2854 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2855 else
2856 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2857
2858 na->na_blocks = vap->va_nblocks;
2859 na->na_blocksize = vap->va_blksize;
2860
2861 /*
2862 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2863 * over-the-wire protocols for named-pipe vnodes. It remaps the
2864 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2865 *
2866 * BUYER BEWARE:
2867 * If you are porting the NFS to a non-Sun server, you probably
2868 * don't want to include the following block of code. The
2869 * over-the-wire special file types will be changing with the
2870 * NFS Protocol Revision.
2871 */
2872 if (vap->va_type == VFIFO)
2873 NA_SETFIFO(na);
2874 return (0);
2875 }
2876
2877 /*
2878 * acl v2 support: returns approximate permission.
2879 * default: returns minimal permission (more restrictive)
2880 * aclok: returns maximal permission (less restrictive)
2881 * This routine changes the permissions that are alaredy in *va.
2882 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2883 * CLASS_OBJ is always the same as GROUP_OBJ entry.
2884 */
2885 static void
2886 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2887 {
2888 vsecattr_t vsa;
2889 int aclcnt;
2890 aclent_t *aclentp;
2891 mode_t mask_perm;
2892 mode_t grp_perm;
2893 mode_t other_perm;
2894 mode_t other_orig;
2895 int error;
2896
2897 /* dont care default acl */
2898 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2899 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2900
2901 if (!error) {
2902 aclcnt = vsa.vsa_aclcnt;
2903 if (aclcnt > MIN_ACL_ENTRIES) {
2904 /* non-trivial ACL */
2905 aclentp = vsa.vsa_aclentp;
2906 if (exi->exi_export.ex_flags & EX_ACLOK) {
2907 /* maximal permissions */
2908 grp_perm = 0;
2909 other_perm = 0;
2910 for (; aclcnt > 0; aclcnt--, aclentp++) {
2911 switch (aclentp->a_type) {
2912 case USER_OBJ:
2913 break;
2914 case USER:
2915 grp_perm |=
2916 aclentp->a_perm << 3;
2917 other_perm |= aclentp->a_perm;
2918 break;
2919 case GROUP_OBJ:
2920 grp_perm |=
2921 aclentp->a_perm << 3;
2922 break;
2923 case GROUP:
2924 other_perm |= aclentp->a_perm;
2925 break;
2926 case OTHER_OBJ:
2927 other_orig = aclentp->a_perm;
2928 break;
2929 case CLASS_OBJ:
2930 mask_perm = aclentp->a_perm;
2931 break;
2932 default:
2933 break;
2934 }
2935 }
2936 grp_perm &= mask_perm << 3;
2937 other_perm &= mask_perm;
2938 other_perm |= other_orig;
2939
2940 } else {
2941 /* minimal permissions */
2942 grp_perm = 070;
2943 other_perm = 07;
2944 for (; aclcnt > 0; aclcnt--, aclentp++) {
2945 switch (aclentp->a_type) {
2946 case USER_OBJ:
2947 break;
2948 case USER:
2949 case CLASS_OBJ:
2950 grp_perm &=
2951 aclentp->a_perm << 3;
2952 other_perm &=
2953 aclentp->a_perm;
2954 break;
2955 case GROUP_OBJ:
2956 grp_perm &=
2957 aclentp->a_perm << 3;
2958 break;
2959 case GROUP:
2960 other_perm &=
2961 aclentp->a_perm;
2962 break;
2963 case OTHER_OBJ:
2964 other_perm &=
2965 aclentp->a_perm;
2966 break;
2967 default:
2968 break;
2969 }
2970 }
2971 }
2972 /* copy to va */
2973 va->va_mode &= ~077;
2974 va->va_mode |= grp_perm | other_perm;
2975 }
2976 if (vsa.vsa_aclcnt)
2977 kmem_free(vsa.vsa_aclentp,
2978 vsa.vsa_aclcnt * sizeof (aclent_t));
2979 }
2980 }
2981
2982 void
2983 rfs_srvrinit(void)
2984 {
2985 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
2986 nfs2_srv_caller_id = fs_new_caller_id();
2987 }
2988
2989 void
2990 rfs_srvrfini(void)
2991 {
2992 mutex_destroy(&rfs_async_write_lock);
2993 }
2994
2995 static int
2996 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
2997 {
2998 struct clist *wcl;
2999 int wlist_len;
3000 uint32_t count = rr->rr_count;
3001
3002 wcl = ra->ra_wlist;
3003
3004 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3005 return (FALSE);
3006 }
3007
3008 wcl = ra->ra_wlist;
3009 rr->rr_ok.rrok_wlist_len = wlist_len;
3010 rr->rr_ok.rrok_wlist = wcl;
3011
3012 return (TRUE);
3013 }