1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 */
27
28 /*
29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 * All rights reserved.
31 */
32
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 #include <sys/cred.h>
37 #include <sys/buf.h>
38 #include <sys/vfs.h>
39 #include <sys/vnode.h>
40 #include <sys/uio.h>
41 #include <sys/stat.h>
42 #include <sys/errno.h>
43 #include <sys/sysmacros.h>
44 #include <sys/statvfs.h>
45 #include <sys/kmem.h>
46 #include <sys/kstat.h>
47 #include <sys/dirent.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/vtrace.h>
51 #include <sys/mode.h>
52 #include <sys/acl.h>
53 #include <sys/nbmlock.h>
54 #include <sys/policy.h>
55 #include <sys/sdt.h>
56
57 #include <rpc/types.h>
58 #include <rpc/auth.h>
59 #include <rpc/svc.h>
60
61 #include <nfs/nfs.h>
62 #include <nfs/export.h>
63 #include <nfs/nfs_cmd.h>
64
65 #include <vm/hat.h>
66 #include <vm/as.h>
67 #include <vm/seg.h>
68 #include <vm/seg_map.h>
69 #include <vm/seg_kmem.h>
70
71 #include <sys/strsubr.h>
72
73 /*
74 * These are the interface routines for the server side of the
75 * Network File System. See the NFS version 2 protocol specification
76 * for a description of this interface.
77 */
78
79 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
80 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
81 cred_t *);
82
83 /*
84 * Some "over the wire" UNIX file types. These are encoded
85 * into the mode. This needs to be fixed in the next rev.
86 */
87 #define IFMT 0170000 /* type of file */
88 #define IFCHR 0020000 /* character special */
89 #define IFBLK 0060000 /* block special */
90 #define IFSOCK 0140000 /* socket */
91
92 u_longlong_t nfs2_srv_caller_id;
93
94 /*
95 * Get file attributes.
96 * Returns the current attributes of the file with the given fhandle.
97 */
98 /* ARGSUSED */
99 void
100 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
101 struct svc_req *req, cred_t *cr, bool_t ro)
102 {
103 int error;
104 vnode_t *vp;
105 struct vattr va;
106
107 vp = nfs_fhtovp(fhp, exi);
108 if (vp == NULL) {
109 ns->ns_status = NFSERR_STALE;
110 return;
111 }
112
113 /*
114 * Do the getattr.
115 */
116 va.va_mask = AT_ALL; /* we want all the attributes */
117
118 error = rfs4_delegated_getattr(vp, &va, 0, cr);
119
120 /* check for overflows */
121 if (!error) {
122 /* Lie about the object type for a referral */
123 if (vn_is_nfs_reparse(vp, cr))
124 va.va_type = VLNK;
125
126 acl_perm(vp, exi, &va, cr);
127 error = vattr_to_nattr(&va, &ns->ns_attr);
128 }
129
130 VN_RELE(vp);
131
132 ns->ns_status = puterrno(error);
133 }
134 void *
135 rfs_getattr_getfh(fhandle_t *fhp)
136 {
137 return (fhp);
138 }
139
140 /*
141 * Set file attributes.
142 * Sets the attributes of the file with the given fhandle. Returns
143 * the new attributes.
144 */
145 /* ARGSUSED */
146 void
147 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
148 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
149 {
150 int error;
151 int flag;
152 int in_crit = 0;
153 vnode_t *vp;
154 struct vattr va;
155 struct vattr bva;
156 struct flock64 bf;
157 caller_context_t ct;
158
159
160 vp = nfs_fhtovp(&args->saa_fh, exi);
161 if (vp == NULL) {
162 ns->ns_status = NFSERR_STALE;
163 return;
164 }
165
166 if (rdonly(ro, vp)) {
167 VN_RELE(vp);
168 ns->ns_status = NFSERR_ROFS;
169 return;
170 }
171
172 error = sattr_to_vattr(&args->saa_sa, &va);
173 if (error) {
174 VN_RELE(vp);
175 ns->ns_status = puterrno(error);
176 return;
177 }
178
179 /*
180 * If the client is requesting a change to the mtime,
181 * but the nanosecond field is set to 1 billion, then
182 * this is a flag to the server that it should set the
183 * atime and mtime fields to the server's current time.
184 * The 1 billion number actually came from the client
185 * as 1 million, but the units in the over the wire
186 * request are microseconds instead of nanoseconds.
187 *
188 * This is an overload of the protocol and should be
189 * documented in the NFS Version 2 protocol specification.
190 */
191 if (va.va_mask & AT_MTIME) {
192 if (va.va_mtime.tv_nsec == 1000000000) {
193 gethrestime(&va.va_mtime);
194 va.va_atime = va.va_mtime;
195 va.va_mask |= AT_ATIME;
196 flag = 0;
197 } else
198 flag = ATTR_UTIME;
199 } else
200 flag = 0;
201
202 /*
203 * If the filesystem is exported with nosuid, then mask off
204 * the setuid and setgid bits.
205 */
206 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
207 (exi->exi_export.ex_flags & EX_NOSUID))
208 va.va_mode &= ~(VSUID | VSGID);
209
210 ct.cc_sysid = 0;
211 ct.cc_pid = 0;
212 ct.cc_caller_id = nfs2_srv_caller_id;
213 ct.cc_flags = CC_DONTBLOCK;
214
215 /*
216 * We need to specially handle size changes because it is
217 * possible for the client to create a file with modes
218 * which indicate read-only, but with the file opened for
219 * writing. If the client then tries to set the size of
220 * the file, then the normal access checking done in
221 * VOP_SETATTR would prevent the client from doing so,
222 * although it should be legal for it to do so. To get
223 * around this, we do the access checking for ourselves
224 * and then use VOP_SPACE which doesn't do the access
225 * checking which VOP_SETATTR does. VOP_SPACE can only
226 * operate on VREG files, let VOP_SETATTR handle the other
227 * extremely rare cases.
228 * Also the client should not be allowed to change the
229 * size of the file if there is a conflicting non-blocking
230 * mandatory lock in the region of change.
231 */
232 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
233 if (nbl_need_check(vp)) {
234 nbl_start_crit(vp, RW_READER);
235 in_crit = 1;
236 }
237
238 bva.va_mask = AT_UID | AT_SIZE;
239
240 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
241
242 if (error) {
243 if (in_crit)
244 nbl_end_crit(vp);
245 VN_RELE(vp);
246 ns->ns_status = puterrno(error);
247 return;
248 }
249
250 if (in_crit) {
251 u_offset_t offset;
252 ssize_t length;
253
254 if (va.va_size < bva.va_size) {
255 offset = va.va_size;
256 length = bva.va_size - va.va_size;
257 } else {
258 offset = bva.va_size;
259 length = va.va_size - bva.va_size;
260 }
261 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
262 NULL)) {
263 error = EACCES;
264 }
265 }
266
267 if (crgetuid(cr) == bva.va_uid && !error &&
268 va.va_size != bva.va_size) {
269 va.va_mask &= ~AT_SIZE;
270 bf.l_type = F_WRLCK;
271 bf.l_whence = 0;
272 bf.l_start = (off64_t)va.va_size;
273 bf.l_len = 0;
274 bf.l_sysid = 0;
275 bf.l_pid = 0;
276
277 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
278 (offset_t)va.va_size, cr, &ct);
279 }
280 if (in_crit)
281 nbl_end_crit(vp);
282 } else
283 error = 0;
284
285 /*
286 * Do the setattr.
287 */
288 if (!error && va.va_mask) {
289 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
290 }
291
292 /*
293 * check if the monitor on either vop_space or vop_setattr detected
294 * a delegation conflict and if so, mark the thread flag as
295 * wouldblock so that the response is dropped and the client will
296 * try again.
297 */
298 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
299 VN_RELE(vp);
300 curthread->t_flag |= T_WOULDBLOCK;
301 return;
302 }
303
304 if (!error) {
305 va.va_mask = AT_ALL; /* get everything */
306
307 error = rfs4_delegated_getattr(vp, &va, 0, cr);
308
309 /* check for overflows */
310 if (!error) {
311 acl_perm(vp, exi, &va, cr);
312 error = vattr_to_nattr(&va, &ns->ns_attr);
313 }
314 }
315
316 ct.cc_flags = 0;
317
318 /*
319 * Force modified metadata out to stable storage.
320 */
321 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
322
323 VN_RELE(vp);
324
325 ns->ns_status = puterrno(error);
326 }
327 void *
328 rfs_setattr_getfh(struct nfssaargs *args)
329 {
330 return (&args->saa_fh);
331 }
332
333 /* Change and release @exip and @vpp only in success */
334 int
335 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
336 {
337 struct exportinfo *exi;
338 vnode_t *vp = *vpp;
339 fid_t fid;
340 int error;
341
342 VN_HOLD(vp);
343
344 if ((error = traverse(&vp)) != 0) {
345 VN_RELE(vp);
346 return (error);
347 }
348
349 bzero(&fid, sizeof (fid));
350 fid.fid_len = MAXFIDSZ;
351 error = VOP_FID(vp, &fid, NULL);
352 if (error) {
353 VN_RELE(vp);
354 return (error);
355 }
356
357 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
358 if (exi == NULL ||
359 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
360 /*
361 * It is not error, just subdir is not exported
362 * or "nohide" is not set
363 */
364 if (exi != NULL)
365 exi_rele(exi);
366 VN_RELE(vp);
367 } else {
368 /* go to submount */
369 exi_rele(*exip);
370 *exip = exi;
371
372 VN_RELE(*vpp);
373 *vpp = vp;
374 }
375
376 return (0);
377 }
378
379 /*
380 * Given mounted "dvp" and "exi", go upper mountpoint
381 * with dvp/exi correction
382 * Return 0 in success
383 */
384 int
385 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
386 {
387 struct exportinfo *exi;
388 vnode_t *dvp = *dvpp;
389
390 ASSERT(dvp->v_flag & VROOT);
391
392 VN_HOLD(dvp);
393 dvp = untraverse(dvp);
394 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
395 if (exi == NULL) {
396 VN_RELE(dvp);
397 return (-1);
398 }
399
400 exi_rele(*exip);
401 *exip = exi;
402 VN_RELE(*dvpp);
403 *dvpp = dvp;
404
405 return (0);
406 }
407 /*
408 * Directory lookup.
409 * Returns an fhandle and file attributes for file name in a directory.
410 */
411 /* ARGSUSED */
412 void
413 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
414 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
415 {
416 int error;
417 vnode_t *dvp;
418 vnode_t *vp;
419 struct vattr va;
420 fhandle_t *fhp = da->da_fhandle;
421 struct sec_ol sec = {0, 0};
422 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
423 char *name;
424 struct sockaddr *ca;
425
426 /*
427 * Trusted Extension doesn't support NFSv2. MOUNT
428 * will reject v2 clients. Need to prevent v2 client
429 * access via WebNFS here.
430 */
431 if (is_system_labeled() && req->rq_vers == 2) {
432 dr->dr_status = NFSERR_ACCES;
433 return;
434 }
435
436 /*
437 * Disallow NULL paths
438 */
439 if (da->da_name == NULL || *da->da_name == '\0') {
440 dr->dr_status = NFSERR_ACCES;
441 return;
442 }
443
444 /*
445 * Allow lookups from the root - the default
446 * location of the public filehandle.
447 */
448 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
449 dvp = rootdir;
450 VN_HOLD(dvp);
451 } else {
452 dvp = nfs_fhtovp(fhp, exi);
453 if (dvp == NULL) {
454 dr->dr_status = NFSERR_STALE;
455 return;
456 }
457 }
458
459 exi_hold(exi);
460
461 /*
462 * Not allow lookup beyond root.
463 * If the filehandle matches a filehandle of the exi,
464 * then the ".." refers beyond the root of an exported filesystem.
465 */
466 if (strcmp(da->da_name, "..") == 0 &&
467 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
468 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
469 (dvp->v_flag & VROOT)) {
470 /*
471 * special case for ".." and 'nohide'exported root
472 */
473 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
474 error = NFSERR_ACCES;
475 goto out;
476 }
477 } else {
478 error = NFSERR_NOENT;
479 goto out;
480 }
481 }
482
483 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
484 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
485 MAXPATHLEN);
486
487 if (name == NULL) {
488 error = NFSERR_ACCES;
489 goto out;
490 }
491
492 /*
493 * If the public filehandle is used then allow
494 * a multi-component lookup, i.e. evaluate
495 * a pathname and follow symbolic links if
496 * necessary.
497 *
498 * This may result in a vnode in another filesystem
499 * which is OK as long as the filesystem is exported.
500 */
501 if (PUBLIC_FH2(fhp)) {
502 publicfh_flag = TRUE;
503
504 exi_rele(exi);
505
506 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
507 &sec);
508 } else {
509 /*
510 * Do a normal single component lookup.
511 */
512 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
513 NULL, NULL, NULL);
514 }
515
516 if (name != da->da_name)
517 kmem_free(name, MAXPATHLEN);
518
519 if (error == 0 && vn_ismntpt(vp)) {
520 error = rfs_cross_mnt(&vp, &exi);
521 if (error)
522 VN_RELE(vp);
523 }
524
525 if (!error) {
526 va.va_mask = AT_ALL; /* we want everything */
527
528 error = rfs4_delegated_getattr(vp, &va, 0, cr);
529
530 /* check for overflows */
531 if (!error) {
532 acl_perm(vp, exi, &va, cr);
533 error = vattr_to_nattr(&va, &dr->dr_attr);
534 if (!error) {
535 if (sec.sec_flags & SEC_QUERY)
536 error = makefh_ol(&dr->dr_fhandle, exi,
537 sec.sec_index);
538 else {
539 error = makefh(&dr->dr_fhandle, vp,
540 exi);
541 if (!error && publicfh_flag &&
542 !chk_clnt_sec(exi, req))
543 auth_weak = TRUE;
544 }
545 }
546 }
547 VN_RELE(vp);
548 }
549
550 out:
551 VN_RELE(dvp);
552
553 if (exi != NULL)
554 exi_rele(exi);
555
556 /*
557 * If it's public fh, no 0x81, and client's flavor is
558 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
559 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
560 */
561 if (auth_weak)
562 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
563 else
564 dr->dr_status = puterrno(error);
565 }
566 void *
567 rfs_lookup_getfh(struct nfsdiropargs *da)
568 {
569 return (da->da_fhandle);
570 }
571
572 /*
573 * Read symbolic link.
574 * Returns the string in the symbolic link at the given fhandle.
575 */
576 /* ARGSUSED */
577 void
578 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
579 struct svc_req *req, cred_t *cr, bool_t ro)
580 {
581 int error;
582 struct iovec iov;
583 struct uio uio;
584 vnode_t *vp;
585 struct vattr va;
586 struct sockaddr *ca;
587 char *name = NULL;
588 int is_referral = 0;
589
590 vp = nfs_fhtovp(fhp, exi);
591 if (vp == NULL) {
592 rl->rl_data = NULL;
593 rl->rl_status = NFSERR_STALE;
594 return;
595 }
596
597 va.va_mask = AT_MODE;
598
599 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
600
601 if (error) {
602 VN_RELE(vp);
603 rl->rl_data = NULL;
604 rl->rl_status = puterrno(error);
605 return;
606 }
607
608 if (MANDLOCK(vp, va.va_mode)) {
609 VN_RELE(vp);
610 rl->rl_data = NULL;
611 rl->rl_status = NFSERR_ACCES;
612 return;
613 }
614
615 /* We lied about the object type for a referral */
616 if (vn_is_nfs_reparse(vp, cr))
617 is_referral = 1;
618
619 /*
620 * XNFS and RFC1094 require us to return ENXIO if argument
621 * is not a link. BUGID 1138002.
622 */
623 if (vp->v_type != VLNK && !is_referral) {
624 VN_RELE(vp);
625 rl->rl_data = NULL;
626 rl->rl_status = NFSERR_NXIO;
627 return;
628 }
629
630 /*
631 * Allocate data for pathname. This will be freed by rfs_rlfree.
632 */
633 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
634
635 if (is_referral) {
636 char *s;
637 size_t strsz;
638
639 /* Get an artificial symlink based on a referral */
640 s = build_symlink(vp, cr, &strsz);
641 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
642 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
643 vnode_t *, vp, char *, s);
644 if (s == NULL)
645 error = EINVAL;
646 else {
647 error = 0;
648 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
649 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
650 kmem_free(s, strsz);
651 }
652
653 } else {
654
655 /*
656 * Set up io vector to read sym link data
657 */
658 iov.iov_base = rl->rl_data;
659 iov.iov_len = NFS_MAXPATHLEN;
660 uio.uio_iov = &iov;
661 uio.uio_iovcnt = 1;
662 uio.uio_segflg = UIO_SYSSPACE;
663 uio.uio_extflg = UIO_COPY_CACHED;
664 uio.uio_loffset = (offset_t)0;
665 uio.uio_resid = NFS_MAXPATHLEN;
666
667 /*
668 * Do the readlink.
669 */
670 error = VOP_READLINK(vp, &uio, cr, NULL);
671
672 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
673
674 if (!error)
675 rl->rl_data[rl->rl_count] = '\0';
676
677 }
678
679
680 VN_RELE(vp);
681
682 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
683 name = nfscmd_convname(ca, exi, rl->rl_data,
684 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
685
686 if (name != NULL && name != rl->rl_data) {
687 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
688 rl->rl_data = name;
689 }
690
691 /*
692 * XNFS and RFC1094 require us to return ENXIO if argument
693 * is not a link. UFS returns EINVAL if this is the case,
694 * so we do the mapping here. BUGID 1138002.
695 */
696 if (error == EINVAL)
697 rl->rl_status = NFSERR_NXIO;
698 else
699 rl->rl_status = puterrno(error);
700
701 }
702 void *
703 rfs_readlink_getfh(fhandle_t *fhp)
704 {
705 return (fhp);
706 }
707 /*
708 * Free data allocated by rfs_readlink
709 */
710 void
711 rfs_rlfree(struct nfsrdlnres *rl)
712 {
713 if (rl->rl_data != NULL)
714 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
715 }
716
717 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
718
719 /*
720 * Read data.
721 * Returns some data read from the file at the given fhandle.
722 */
723 /* ARGSUSED */
724 void
725 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
726 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
727 {
728 vnode_t *vp;
729 int error;
730 struct vattr va;
731 struct iovec iov;
732 struct uio uio;
733 mblk_t *mp;
734 int alloc_err = 0;
735 int in_crit = 0;
736 caller_context_t ct;
737
738 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
739 if (vp == NULL) {
740 rr->rr_data = NULL;
741 rr->rr_status = NFSERR_STALE;
742 return;
743 }
744
745 if (vp->v_type != VREG) {
746 VN_RELE(vp);
747 rr->rr_data = NULL;
748 rr->rr_status = NFSERR_ISDIR;
749 return;
750 }
751
752 ct.cc_sysid = 0;
753 ct.cc_pid = 0;
754 ct.cc_caller_id = nfs2_srv_caller_id;
755 ct.cc_flags = CC_DONTBLOCK;
756
757 /*
758 * Enter the critical region before calling VOP_RWLOCK
759 * to avoid a deadlock with write requests.
760 */
761 if (nbl_need_check(vp)) {
762 nbl_start_crit(vp, RW_READER);
763 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
764 0, NULL)) {
765 nbl_end_crit(vp);
766 VN_RELE(vp);
767 rr->rr_data = NULL;
768 rr->rr_status = NFSERR_ACCES;
769 return;
770 }
771 in_crit = 1;
772 }
773
774 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
775
776 /* check if a monitor detected a delegation conflict */
777 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
778 VN_RELE(vp);
779 /* mark as wouldblock so response is dropped */
780 curthread->t_flag |= T_WOULDBLOCK;
781
782 rr->rr_data = NULL;
783 return;
784 }
785
786 va.va_mask = AT_ALL;
787
788 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
789
790 if (error) {
791 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
792 if (in_crit)
793 nbl_end_crit(vp);
794
795 VN_RELE(vp);
796 rr->rr_data = NULL;
797 rr->rr_status = puterrno(error);
798
799 return;
800 }
801
802 /*
803 * This is a kludge to allow reading of files created
804 * with no read permission. The owner of the file
805 * is always allowed to read it.
806 */
807 if (crgetuid(cr) != va.va_uid) {
808 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
809
810 if (error) {
811 /*
812 * Exec is the same as read over the net because
813 * of demand loading.
814 */
815 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
816 }
817 if (error) {
818 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
819 if (in_crit)
820 nbl_end_crit(vp);
821 VN_RELE(vp);
822 rr->rr_data = NULL;
823 rr->rr_status = puterrno(error);
824
825 return;
826 }
827 }
828
829 if (MANDLOCK(vp, va.va_mode)) {
830 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
831 if (in_crit)
832 nbl_end_crit(vp);
833
834 VN_RELE(vp);
835 rr->rr_data = NULL;
836 rr->rr_status = NFSERR_ACCES;
837
838 return;
839 }
840
841 rr->rr_ok.rrok_wlist_len = 0;
842 rr->rr_ok.rrok_wlist = NULL;
843
844 if ((u_offset_t)ra->ra_offset >= va.va_size) {
845 rr->rr_count = 0;
846 rr->rr_data = NULL;
847 /*
848 * In this case, status is NFS_OK, but there is no data
849 * to encode. So set rr_mp to NULL.
850 */
851 rr->rr_mp = NULL;
852 rr->rr_ok.rrok_wlist = ra->ra_wlist;
853 if (rr->rr_ok.rrok_wlist)
854 clist_zero_len(rr->rr_ok.rrok_wlist);
855 goto done;
856 }
857
858 if (ra->ra_wlist) {
859 mp = NULL;
860 rr->rr_mp = NULL;
861 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
862 if (ra->ra_count > iov.iov_len) {
863 rr->rr_data = NULL;
864 rr->rr_status = NFSERR_INVAL;
865 goto done;
866 }
867 } else {
868 /*
869 * mp will contain the data to be sent out in the read reply.
870 * This will be freed after the reply has been sent out (by the
871 * driver).
872 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
873 * that the call to xdrmblk_putmblk() never fails.
874 */
875 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
876 &alloc_err);
877 ASSERT(mp != NULL);
878 ASSERT(alloc_err == 0);
879
880 rr->rr_mp = mp;
881
882 /*
883 * Set up io vector
884 */
885 iov.iov_base = (caddr_t)mp->b_datap->db_base;
886 iov.iov_len = ra->ra_count;
887 }
888
889 uio.uio_iov = &iov;
890 uio.uio_iovcnt = 1;
891 uio.uio_segflg = UIO_SYSSPACE;
892 uio.uio_extflg = UIO_COPY_CACHED;
893 uio.uio_loffset = (offset_t)ra->ra_offset;
894 uio.uio_resid = ra->ra_count;
895
896 error = VOP_READ(vp, &uio, 0, cr, &ct);
897
898 if (error) {
899 if (mp)
900 freeb(mp);
901
902 /*
903 * check if a monitor detected a delegation conflict and
904 * mark as wouldblock so response is dropped
905 */
906 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
907 curthread->t_flag |= T_WOULDBLOCK;
908 else
909 rr->rr_status = puterrno(error);
910
911 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
912 if (in_crit)
913 nbl_end_crit(vp);
914
915 VN_RELE(vp);
916 rr->rr_data = NULL;
917
918 return;
919 }
920
921 /*
922 * Get attributes again so we can send the latest access
923 * time to the client side for its cache.
924 */
925 va.va_mask = AT_ALL;
926
927 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
928
929 if (error) {
930 if (mp)
931 freeb(mp);
932
933 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
934 if (in_crit)
935 nbl_end_crit(vp);
936
937 VN_RELE(vp);
938 rr->rr_data = NULL;
939 rr->rr_status = puterrno(error);
940
941 return;
942 }
943
944 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
945
946 if (mp) {
947 rr->rr_data = (char *)mp->b_datap->db_base;
948 } else {
949 if (ra->ra_wlist) {
950 rr->rr_data = (caddr_t)iov.iov_base;
951 if (!rdma_setup_read_data2(ra, rr)) {
952 rr->rr_data = NULL;
953 rr->rr_status = puterrno(NFSERR_INVAL);
954 }
955 }
956 }
957 done:
958 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
959 if (in_crit)
960 nbl_end_crit(vp);
961
962 acl_perm(vp, exi, &va, cr);
963
964 /* check for overflows */
965 error = vattr_to_nattr(&va, &rr->rr_attr);
966
967 VN_RELE(vp);
968
969 rr->rr_status = puterrno(error);
970 }
971
972 /*
973 * Free data allocated by rfs_read
974 */
975 void
976 rfs_rdfree(struct nfsrdresult *rr)
977 {
978 mblk_t *mp;
979
980 if (rr->rr_status == NFS_OK) {
981 mp = rr->rr_mp;
982 if (mp != NULL)
983 freeb(mp);
984 }
985 }
986
987 void *
988 rfs_read_getfh(struct nfsreadargs *ra)
989 {
990 return (&ra->ra_fhandle);
991 }
992
993 #define MAX_IOVECS 12
994
995 #ifdef DEBUG
996 static int rfs_write_sync_hits = 0;
997 static int rfs_write_sync_misses = 0;
998 #endif
999
1000 /*
1001 * Write data to file.
1002 * Returns attributes of a file after writing some data to it.
1003 *
1004 * Any changes made here, especially in error handling might have
1005 * to also be done in rfs_write (which clusters write requests).
1006 */
1007 /* ARGSUSED */
1008 void
1009 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1010 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1011 {
1012 int error;
1013 vnode_t *vp;
1014 rlim64_t rlimit;
1015 struct vattr va;
1016 struct uio uio;
1017 struct iovec iov[MAX_IOVECS];
1018 mblk_t *m;
1019 struct iovec *iovp;
1020 int iovcnt;
1021 cred_t *savecred;
1022 int in_crit = 0;
1023 caller_context_t ct;
1024
1025 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1026 if (vp == NULL) {
1027 ns->ns_status = NFSERR_STALE;
1028 return;
1029 }
1030
1031 if (rdonly(ro, vp)) {
1032 VN_RELE(vp);
1033 ns->ns_status = NFSERR_ROFS;
1034 return;
1035 }
1036
1037 if (vp->v_type != VREG) {
1038 VN_RELE(vp);
1039 ns->ns_status = NFSERR_ISDIR;
1040 return;
1041 }
1042
1043 ct.cc_sysid = 0;
1044 ct.cc_pid = 0;
1045 ct.cc_caller_id = nfs2_srv_caller_id;
1046 ct.cc_flags = CC_DONTBLOCK;
1047
1048 va.va_mask = AT_UID|AT_MODE;
1049
1050 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1051
1052 if (error) {
1053 VN_RELE(vp);
1054 ns->ns_status = puterrno(error);
1055
1056 return;
1057 }
1058
1059 if (crgetuid(cr) != va.va_uid) {
1060 /*
1061 * This is a kludge to allow writes of files created
1062 * with read only permission. The owner of the file
1063 * is always allowed to write it.
1064 */
1065 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1066
1067 if (error) {
1068 VN_RELE(vp);
1069 ns->ns_status = puterrno(error);
1070 return;
1071 }
1072 }
1073
1074 /*
1075 * Can't access a mandatory lock file. This might cause
1076 * the NFS service thread to block forever waiting for a
1077 * lock to be released that will never be released.
1078 */
1079 if (MANDLOCK(vp, va.va_mode)) {
1080 VN_RELE(vp);
1081 ns->ns_status = NFSERR_ACCES;
1082 return;
1083 }
1084
1085 /*
1086 * We have to enter the critical region before calling VOP_RWLOCK
1087 * to avoid a deadlock with ufs.
1088 */
1089 if (nbl_need_check(vp)) {
1090 nbl_start_crit(vp, RW_READER);
1091 in_crit = 1;
1092 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1093 wa->wa_count, 0, NULL)) {
1094 error = EACCES;
1095 goto out;
1096 }
1097 }
1098
1099 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1100
1101 /* check if a monitor detected a delegation conflict */
1102 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1103 VN_RELE(vp);
1104 /* mark as wouldblock so response is dropped */
1105 curthread->t_flag |= T_WOULDBLOCK;
1106 return;
1107 }
1108
1109 if (wa->wa_data || wa->wa_rlist) {
1110 /* Do the RDMA thing if necessary */
1111 if (wa->wa_rlist) {
1112 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1113 iov[0].iov_len = wa->wa_count;
1114 } else {
1115 iov[0].iov_base = wa->wa_data;
1116 iov[0].iov_len = wa->wa_count;
1117 }
1118 uio.uio_iov = iov;
1119 uio.uio_iovcnt = 1;
1120 uio.uio_segflg = UIO_SYSSPACE;
1121 uio.uio_extflg = UIO_COPY_DEFAULT;
1122 uio.uio_loffset = (offset_t)wa->wa_offset;
1123 uio.uio_resid = wa->wa_count;
1124 /*
1125 * The limit is checked on the client. We
1126 * should allow any size writes here.
1127 */
1128 uio.uio_llimit = curproc->p_fsz_ctl;
1129 rlimit = uio.uio_llimit - wa->wa_offset;
1130 if (rlimit < (rlim64_t)uio.uio_resid)
1131 uio.uio_resid = (uint_t)rlimit;
1132
1133 /*
1134 * for now we assume no append mode
1135 */
1136 /*
1137 * We're changing creds because VM may fault and we need
1138 * the cred of the current thread to be used if quota
1139 * checking is enabled.
1140 */
1141 savecred = curthread->t_cred;
1142 curthread->t_cred = cr;
1143 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1144 curthread->t_cred = savecred;
1145 } else {
1146 iovcnt = 0;
1147 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1148 iovcnt++;
1149 if (iovcnt <= MAX_IOVECS) {
1150 #ifdef DEBUG
1151 rfs_write_sync_hits++;
1152 #endif
1153 iovp = iov;
1154 } else {
1155 #ifdef DEBUG
1156 rfs_write_sync_misses++;
1157 #endif
1158 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1159 }
1160 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1161 uio.uio_iov = iovp;
1162 uio.uio_iovcnt = iovcnt;
1163 uio.uio_segflg = UIO_SYSSPACE;
1164 uio.uio_extflg = UIO_COPY_DEFAULT;
1165 uio.uio_loffset = (offset_t)wa->wa_offset;
1166 uio.uio_resid = wa->wa_count;
1167 /*
1168 * The limit is checked on the client. We
1169 * should allow any size writes here.
1170 */
1171 uio.uio_llimit = curproc->p_fsz_ctl;
1172 rlimit = uio.uio_llimit - wa->wa_offset;
1173 if (rlimit < (rlim64_t)uio.uio_resid)
1174 uio.uio_resid = (uint_t)rlimit;
1175
1176 /*
1177 * For now we assume no append mode.
1178 */
1179 /*
1180 * We're changing creds because VM may fault and we need
1181 * the cred of the current thread to be used if quota
1182 * checking is enabled.
1183 */
1184 savecred = curthread->t_cred;
1185 curthread->t_cred = cr;
1186 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1187 curthread->t_cred = savecred;
1188
1189 if (iovp != iov)
1190 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1191 }
1192
1193 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1194
1195 if (!error) {
1196 /*
1197 * Get attributes again so we send the latest mod
1198 * time to the client side for its cache.
1199 */
1200 va.va_mask = AT_ALL; /* now we want everything */
1201
1202 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1203
1204 /* check for overflows */
1205 if (!error) {
1206 acl_perm(vp, exi, &va, cr);
1207 error = vattr_to_nattr(&va, &ns->ns_attr);
1208 }
1209 }
1210
1211 out:
1212 if (in_crit)
1213 nbl_end_crit(vp);
1214 VN_RELE(vp);
1215
1216 /* check if a monitor detected a delegation conflict */
1217 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1218 /* mark as wouldblock so response is dropped */
1219 curthread->t_flag |= T_WOULDBLOCK;
1220 else
1221 ns->ns_status = puterrno(error);
1222
1223 }
1224
1225 struct rfs_async_write {
1226 struct nfswriteargs *wa;
1227 struct nfsattrstat *ns;
1228 struct svc_req *req;
1229 cred_t *cr;
1230 bool_t ro;
1231 kthread_t *thread;
1232 struct rfs_async_write *list;
1233 };
1234
1235 struct rfs_async_write_list {
1236 fhandle_t *fhp;
1237 kcondvar_t cv;
1238 struct rfs_async_write *list;
1239 struct rfs_async_write_list *next;
1240 };
1241
1242 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1243 static kmutex_t rfs_async_write_lock;
1244 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1245
1246 #define MAXCLIOVECS 42
1247 #define RFSWRITE_INITVAL (enum nfsstat) -1
1248
1249 #ifdef DEBUG
1250 static int rfs_write_hits = 0;
1251 static int rfs_write_misses = 0;
1252 #endif
1253
1254 /*
1255 * Write data to file.
1256 * Returns attributes of a file after writing some data to it.
1257 */
1258 void
1259 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1260 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1261 {
1262 int error;
1263 vnode_t *vp;
1264 rlim64_t rlimit;
1265 struct vattr va;
1266 struct uio uio;
1267 struct rfs_async_write_list *lp;
1268 struct rfs_async_write_list *nlp;
1269 struct rfs_async_write *rp;
1270 struct rfs_async_write *nrp;
1271 struct rfs_async_write *trp;
1272 struct rfs_async_write *lrp;
1273 int data_written;
1274 int iovcnt;
1275 mblk_t *m;
1276 struct iovec *iovp;
1277 struct iovec *niovp;
1278 struct iovec iov[MAXCLIOVECS];
1279 int count;
1280 int rcount;
1281 uint_t off;
1282 uint_t len;
1283 struct rfs_async_write nrpsp;
1284 struct rfs_async_write_list nlpsp;
1285 ushort_t t_flag;
1286 cred_t *savecred;
1287 int in_crit = 0;
1288 caller_context_t ct;
1289
1290 if (!rfs_write_async) {
1291 rfs_write_sync(wa, ns, exi, req, cr, ro);
1292 return;
1293 }
1294
1295 /*
1296 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1297 * is considered an OK.
1298 */
1299 ns->ns_status = RFSWRITE_INITVAL;
1300
1301 nrp = &nrpsp;
1302 nrp->wa = wa;
1303 nrp->ns = ns;
1304 nrp->req = req;
1305 nrp->cr = cr;
1306 nrp->ro = ro;
1307 nrp->thread = curthread;
1308
1309 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1310
1311 /*
1312 * Look to see if there is already a cluster started
1313 * for this file.
1314 */
1315 mutex_enter(&rfs_async_write_lock);
1316 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1317 if (bcmp(&wa->wa_fhandle, lp->fhp,
1318 sizeof (fhandle_t)) == 0)
1319 break;
1320 }
1321
1322 /*
1323 * If lp is non-NULL, then there is already a cluster
1324 * started. We need to place ourselves in the cluster
1325 * list in the right place as determined by starting
1326 * offset. Conflicts with non-blocking mandatory locked
1327 * regions will be checked when the cluster is processed.
1328 */
1329 if (lp != NULL) {
1330 rp = lp->list;
1331 trp = NULL;
1332 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1333 trp = rp;
1334 rp = rp->list;
1335 }
1336 nrp->list = rp;
1337 if (trp == NULL)
1338 lp->list = nrp;
1339 else
1340 trp->list = nrp;
1341 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1342 cv_wait(&lp->cv, &rfs_async_write_lock);
1343 mutex_exit(&rfs_async_write_lock);
1344
1345 return;
1346 }
1347
1348 /*
1349 * No cluster started yet, start one and add ourselves
1350 * to the list of clusters.
1351 */
1352 nrp->list = NULL;
1353
1354 nlp = &nlpsp;
1355 nlp->fhp = &wa->wa_fhandle;
1356 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1357 nlp->list = nrp;
1358 nlp->next = NULL;
1359
1360 if (rfs_async_write_head == NULL) {
1361 rfs_async_write_head = nlp;
1362 } else {
1363 lp = rfs_async_write_head;
1364 while (lp->next != NULL)
1365 lp = lp->next;
1366 lp->next = nlp;
1367 }
1368 mutex_exit(&rfs_async_write_lock);
1369
1370 /*
1371 * Convert the file handle common to all of the requests
1372 * in this cluster to a vnode.
1373 */
1374 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1375 if (vp == NULL) {
1376 mutex_enter(&rfs_async_write_lock);
1377 if (rfs_async_write_head == nlp)
1378 rfs_async_write_head = nlp->next;
1379 else {
1380 lp = rfs_async_write_head;
1381 while (lp->next != nlp)
1382 lp = lp->next;
1383 lp->next = nlp->next;
1384 }
1385 t_flag = curthread->t_flag & T_WOULDBLOCK;
1386 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1387 rp->ns->ns_status = NFSERR_STALE;
1388 rp->thread->t_flag |= t_flag;
1389 }
1390 cv_broadcast(&nlp->cv);
1391 mutex_exit(&rfs_async_write_lock);
1392
1393 return;
1394 }
1395
1396 /*
1397 * Can only write regular files. Attempts to write any
1398 * other file types fail with EISDIR.
1399 */
1400 if (vp->v_type != VREG) {
1401 VN_RELE(vp);
1402 mutex_enter(&rfs_async_write_lock);
1403 if (rfs_async_write_head == nlp)
1404 rfs_async_write_head = nlp->next;
1405 else {
1406 lp = rfs_async_write_head;
1407 while (lp->next != nlp)
1408 lp = lp->next;
1409 lp->next = nlp->next;
1410 }
1411 t_flag = curthread->t_flag & T_WOULDBLOCK;
1412 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1413 rp->ns->ns_status = NFSERR_ISDIR;
1414 rp->thread->t_flag |= t_flag;
1415 }
1416 cv_broadcast(&nlp->cv);
1417 mutex_exit(&rfs_async_write_lock);
1418
1419 return;
1420 }
1421
1422 /*
1423 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1424 * deadlock with ufs.
1425 */
1426 if (nbl_need_check(vp)) {
1427 nbl_start_crit(vp, RW_READER);
1428 in_crit = 1;
1429 }
1430
1431 ct.cc_sysid = 0;
1432 ct.cc_pid = 0;
1433 ct.cc_caller_id = nfs2_srv_caller_id;
1434 ct.cc_flags = CC_DONTBLOCK;
1435
1436 /*
1437 * Lock the file for writing. This operation provides
1438 * the delay which allows clusters to grow.
1439 */
1440 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1441
1442 /* check if a monitor detected a delegation conflict */
1443 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1444 if (in_crit)
1445 nbl_end_crit(vp);
1446 VN_RELE(vp);
1447 /* mark as wouldblock so response is dropped */
1448 curthread->t_flag |= T_WOULDBLOCK;
1449 mutex_enter(&rfs_async_write_lock);
1450 if (rfs_async_write_head == nlp)
1451 rfs_async_write_head = nlp->next;
1452 else {
1453 lp = rfs_async_write_head;
1454 while (lp->next != nlp)
1455 lp = lp->next;
1456 lp->next = nlp->next;
1457 }
1458 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1459 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1460 rp->ns->ns_status = puterrno(error);
1461 rp->thread->t_flag |= T_WOULDBLOCK;
1462 }
1463 }
1464 cv_broadcast(&nlp->cv);
1465 mutex_exit(&rfs_async_write_lock);
1466
1467 return;
1468 }
1469
1470 /*
1471 * Disconnect this cluster from the list of clusters.
1472 * The cluster that is being dealt with must be fixed
1473 * in size after this point, so there is no reason
1474 * to leave it on the list so that new requests can
1475 * find it.
1476 *
1477 * The algorithm is that the first write request will
1478 * create a cluster, convert the file handle to a
1479 * vnode pointer, and then lock the file for writing.
1480 * This request is not likely to be clustered with
1481 * any others. However, the next request will create
1482 * a new cluster and be blocked in VOP_RWLOCK while
1483 * the first request is being processed. This delay
1484 * will allow more requests to be clustered in this
1485 * second cluster.
1486 */
1487 mutex_enter(&rfs_async_write_lock);
1488 if (rfs_async_write_head == nlp)
1489 rfs_async_write_head = nlp->next;
1490 else {
1491 lp = rfs_async_write_head;
1492 while (lp->next != nlp)
1493 lp = lp->next;
1494 lp->next = nlp->next;
1495 }
1496 mutex_exit(&rfs_async_write_lock);
1497
1498 /*
1499 * Step through the list of requests in this cluster.
1500 * We need to check permissions to make sure that all
1501 * of the requests have sufficient permission to write
1502 * the file. A cluster can be composed of requests
1503 * from different clients and different users on each
1504 * client.
1505 *
1506 * As a side effect, we also calculate the size of the
1507 * byte range that this cluster encompasses.
1508 */
1509 rp = nlp->list;
1510 off = rp->wa->wa_offset;
1511 len = (uint_t)0;
1512 do {
1513 if (rdonly(rp->ro, vp)) {
1514 rp->ns->ns_status = NFSERR_ROFS;
1515 t_flag = curthread->t_flag & T_WOULDBLOCK;
1516 rp->thread->t_flag |= t_flag;
1517 continue;
1518 }
1519
1520 va.va_mask = AT_UID|AT_MODE;
1521
1522 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1523
1524 if (!error) {
1525 if (crgetuid(rp->cr) != va.va_uid) {
1526 /*
1527 * This is a kludge to allow writes of files
1528 * created with read only permission. The
1529 * owner of the file is always allowed to
1530 * write it.
1531 */
1532 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1533 }
1534 if (!error && MANDLOCK(vp, va.va_mode))
1535 error = EACCES;
1536 }
1537
1538 /*
1539 * Check for a conflict with a nbmand-locked region.
1540 */
1541 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1542 rp->wa->wa_count, 0, NULL)) {
1543 error = EACCES;
1544 }
1545
1546 if (error) {
1547 rp->ns->ns_status = puterrno(error);
1548 t_flag = curthread->t_flag & T_WOULDBLOCK;
1549 rp->thread->t_flag |= t_flag;
1550 continue;
1551 }
1552 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1553 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1554 } while ((rp = rp->list) != NULL);
1555
1556 /*
1557 * Step through the cluster attempting to gather as many
1558 * requests which are contiguous as possible. These
1559 * contiguous requests are handled via one call to VOP_WRITE
1560 * instead of different calls to VOP_WRITE. We also keep
1561 * track of the fact that any data was written.
1562 */
1563 rp = nlp->list;
1564 data_written = 0;
1565 do {
1566 /*
1567 * Skip any requests which are already marked as having an
1568 * error.
1569 */
1570 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1571 rp = rp->list;
1572 continue;
1573 }
1574
1575 /*
1576 * Count the number of iovec's which are required
1577 * to handle this set of requests. One iovec is
1578 * needed for each data buffer, whether addressed
1579 * by wa_data or by the b_rptr pointers in the
1580 * mblk chains.
1581 */
1582 iovcnt = 0;
1583 lrp = rp;
1584 for (;;) {
1585 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1586 iovcnt++;
1587 else {
1588 m = lrp->wa->wa_mblk;
1589 while (m != NULL) {
1590 iovcnt++;
1591 m = m->b_cont;
1592 }
1593 }
1594 if (lrp->list == NULL ||
1595 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1596 lrp->wa->wa_offset + lrp->wa->wa_count !=
1597 lrp->list->wa->wa_offset) {
1598 lrp = lrp->list;
1599 break;
1600 }
1601 lrp = lrp->list;
1602 }
1603
1604 if (iovcnt <= MAXCLIOVECS) {
1605 #ifdef DEBUG
1606 rfs_write_hits++;
1607 #endif
1608 niovp = iov;
1609 } else {
1610 #ifdef DEBUG
1611 rfs_write_misses++;
1612 #endif
1613 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1614 }
1615 /*
1616 * Put together the scatter/gather iovecs.
1617 */
1618 iovp = niovp;
1619 trp = rp;
1620 count = 0;
1621 do {
1622 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1623 if (trp->wa->wa_rlist) {
1624 iovp->iov_base =
1625 (char *)((trp->wa->wa_rlist)->
1626 u.c_daddr3);
1627 iovp->iov_len = trp->wa->wa_count;
1628 } else {
1629 iovp->iov_base = trp->wa->wa_data;
1630 iovp->iov_len = trp->wa->wa_count;
1631 }
1632 iovp++;
1633 } else {
1634 m = trp->wa->wa_mblk;
1635 rcount = trp->wa->wa_count;
1636 while (m != NULL) {
1637 iovp->iov_base = (caddr_t)m->b_rptr;
1638 iovp->iov_len = (m->b_wptr - m->b_rptr);
1639 rcount -= iovp->iov_len;
1640 if (rcount < 0)
1641 iovp->iov_len += rcount;
1642 iovp++;
1643 if (rcount <= 0)
1644 break;
1645 m = m->b_cont;
1646 }
1647 }
1648 count += trp->wa->wa_count;
1649 trp = trp->list;
1650 } while (trp != lrp);
1651
1652 uio.uio_iov = niovp;
1653 uio.uio_iovcnt = iovcnt;
1654 uio.uio_segflg = UIO_SYSSPACE;
1655 uio.uio_extflg = UIO_COPY_DEFAULT;
1656 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1657 uio.uio_resid = count;
1658 /*
1659 * The limit is checked on the client. We
1660 * should allow any size writes here.
1661 */
1662 uio.uio_llimit = curproc->p_fsz_ctl;
1663 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1664 if (rlimit < (rlim64_t)uio.uio_resid)
1665 uio.uio_resid = (uint_t)rlimit;
1666
1667 /*
1668 * For now we assume no append mode.
1669 */
1670
1671 /*
1672 * We're changing creds because VM may fault
1673 * and we need the cred of the current
1674 * thread to be used if quota * checking is
1675 * enabled.
1676 */
1677 savecred = curthread->t_cred;
1678 curthread->t_cred = cr;
1679 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1680 curthread->t_cred = savecred;
1681
1682 /* check if a monitor detected a delegation conflict */
1683 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1684 /* mark as wouldblock so response is dropped */
1685 curthread->t_flag |= T_WOULDBLOCK;
1686
1687 if (niovp != iov)
1688 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1689
1690 if (!error) {
1691 data_written = 1;
1692 /*
1693 * Get attributes again so we send the latest mod
1694 * time to the client side for its cache.
1695 */
1696 va.va_mask = AT_ALL; /* now we want everything */
1697
1698 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1699
1700 if (!error)
1701 acl_perm(vp, exi, &va, rp->cr);
1702 }
1703
1704 /*
1705 * Fill in the status responses for each request
1706 * which was just handled. Also, copy the latest
1707 * attributes in to the attribute responses if
1708 * appropriate.
1709 */
1710 t_flag = curthread->t_flag & T_WOULDBLOCK;
1711 do {
1712 rp->thread->t_flag |= t_flag;
1713 /* check for overflows */
1714 if (!error) {
1715 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1716 }
1717 rp->ns->ns_status = puterrno(error);
1718 rp = rp->list;
1719 } while (rp != lrp);
1720 } while (rp != NULL);
1721
1722 /*
1723 * If any data was written at all, then we need to flush
1724 * the data and metadata to stable storage.
1725 */
1726 if (data_written) {
1727 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1728
1729 if (!error) {
1730 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1731 }
1732 }
1733
1734 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1735
1736 if (in_crit)
1737 nbl_end_crit(vp);
1738 VN_RELE(vp);
1739
1740 t_flag = curthread->t_flag & T_WOULDBLOCK;
1741 mutex_enter(&rfs_async_write_lock);
1742 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1743 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1744 rp->ns->ns_status = puterrno(error);
1745 rp->thread->t_flag |= t_flag;
1746 }
1747 }
1748 cv_broadcast(&nlp->cv);
1749 mutex_exit(&rfs_async_write_lock);
1750
1751 }
1752
1753 void *
1754 rfs_write_getfh(struct nfswriteargs *wa)
1755 {
1756 return (&wa->wa_fhandle);
1757 }
1758
1759 /*
1760 * Create a file.
1761 * Creates a file with given attributes and returns those attributes
1762 * and an fhandle for the new file.
1763 */
1764 void
1765 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1766 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1767 {
1768 int error;
1769 int lookuperr;
1770 int in_crit = 0;
1771 struct vattr va;
1772 vnode_t *vp;
1773 vnode_t *realvp;
1774 vnode_t *dvp;
1775 char *name = args->ca_da.da_name;
1776 vnode_t *tvp = NULL;
1777 int mode;
1778 int lookup_ok;
1779 bool_t trunc;
1780 struct sockaddr *ca;
1781
1782 /*
1783 * Disallow NULL paths
1784 */
1785 if (name == NULL || *name == '\0') {
1786 dr->dr_status = NFSERR_ACCES;
1787 return;
1788 }
1789
1790 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1791 if (dvp == NULL) {
1792 dr->dr_status = NFSERR_STALE;
1793 return;
1794 }
1795
1796 error = sattr_to_vattr(args->ca_sa, &va);
1797 if (error) {
1798 dr->dr_status = puterrno(error);
1799 return;
1800 }
1801
1802 /*
1803 * Must specify the mode.
1804 */
1805 if (!(va.va_mask & AT_MODE)) {
1806 VN_RELE(dvp);
1807 dr->dr_status = NFSERR_INVAL;
1808 return;
1809 }
1810
1811 /*
1812 * This is a completely gross hack to make mknod
1813 * work over the wire until we can wack the protocol
1814 */
1815 if ((va.va_mode & IFMT) == IFCHR) {
1816 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1817 va.va_type = VFIFO; /* xtra kludge for named pipe */
1818 else {
1819 va.va_type = VCHR;
1820 /*
1821 * uncompress the received dev_t
1822 * if the top half is zero indicating a request
1823 * from an `older style' OS.
1824 */
1825 if ((va.va_size & 0xffff0000) == 0)
1826 va.va_rdev = nfsv2_expdev(va.va_size);
1827 else
1828 va.va_rdev = (dev_t)va.va_size;
1829 }
1830 va.va_mask &= ~AT_SIZE;
1831 } else if ((va.va_mode & IFMT) == IFBLK) {
1832 va.va_type = VBLK;
1833 /*
1834 * uncompress the received dev_t
1835 * if the top half is zero indicating a request
1836 * from an `older style' OS.
1837 */
1838 if ((va.va_size & 0xffff0000) == 0)
1839 va.va_rdev = nfsv2_expdev(va.va_size);
1840 else
1841 va.va_rdev = (dev_t)va.va_size;
1842 va.va_mask &= ~AT_SIZE;
1843 } else if ((va.va_mode & IFMT) == IFSOCK) {
1844 va.va_type = VSOCK;
1845 } else {
1846 va.va_type = VREG;
1847 }
1848 va.va_mode &= ~IFMT;
1849 va.va_mask |= AT_TYPE;
1850
1851 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1852 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1853 MAXPATHLEN);
1854 if (name == NULL) {
1855 dr->dr_status = puterrno(EINVAL);
1856 return;
1857 }
1858
1859 /*
1860 * Why was the choice made to use VWRITE as the mode to the
1861 * call to VOP_CREATE ? This results in a bug. When a client
1862 * opens a file that already exists and is RDONLY, the second
1863 * open fails with an EACESS because of the mode.
1864 * bug ID 1054648.
1865 */
1866 lookup_ok = 0;
1867 mode = VWRITE;
1868 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1869 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1870 NULL, NULL, NULL);
1871 if (!error) {
1872 struct vattr at;
1873
1874 lookup_ok = 1;
1875 at.va_mask = AT_MODE;
1876 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1877 if (!error)
1878 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1879 VN_RELE(tvp);
1880 tvp = NULL;
1881 }
1882 }
1883
1884 if (!lookup_ok) {
1885 if (rdonly(ro, dvp)) {
1886 error = EROFS;
1887 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1888 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1889 error = EPERM;
1890 } else {
1891 error = 0;
1892 }
1893 }
1894
1895 /*
1896 * If file size is being modified on an already existing file
1897 * make sure that there are no conflicting non-blocking mandatory
1898 * locks in the region being manipulated. Return EACCES if there
1899 * are conflicting locks.
1900 */
1901 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1902 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1903 NULL, NULL, NULL);
1904
1905 if (!lookuperr &&
1906 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1907 VN_RELE(tvp);
1908 curthread->t_flag |= T_WOULDBLOCK;
1909 goto out;
1910 }
1911
1912 if (!lookuperr && nbl_need_check(tvp)) {
1913 /*
1914 * The file exists. Now check if it has any
1915 * conflicting non-blocking mandatory locks
1916 * in the region being changed.
1917 */
1918 struct vattr bva;
1919 u_offset_t offset;
1920 ssize_t length;
1921
1922 nbl_start_crit(tvp, RW_READER);
1923 in_crit = 1;
1924
1925 bva.va_mask = AT_SIZE;
1926 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1927 if (!error) {
1928 if (va.va_size < bva.va_size) {
1929 offset = va.va_size;
1930 length = bva.va_size - va.va_size;
1931 } else {
1932 offset = bva.va_size;
1933 length = va.va_size - bva.va_size;
1934 }
1935 if (length) {
1936 if (nbl_conflict(tvp, NBL_WRITE,
1937 offset, length, 0, NULL)) {
1938 error = EACCES;
1939 }
1940 }
1941 }
1942 if (error) {
1943 nbl_end_crit(tvp);
1944 VN_RELE(tvp);
1945 in_crit = 0;
1946 }
1947 } else if (tvp != NULL) {
1948 VN_RELE(tvp);
1949 }
1950 }
1951
1952 if (!error) {
1953 /*
1954 * If filesystem is shared with nosuid the remove any
1955 * setuid/setgid bits on create.
1956 */
1957 if (va.va_type == VREG &&
1958 exi->exi_export.ex_flags & EX_NOSUID)
1959 va.va_mode &= ~(VSUID | VSGID);
1960
1961 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1962 NULL, NULL);
1963
1964 if (!error) {
1965
1966 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1967 trunc = TRUE;
1968 else
1969 trunc = FALSE;
1970
1971 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1972 VN_RELE(vp);
1973 curthread->t_flag |= T_WOULDBLOCK;
1974 goto out;
1975 }
1976 va.va_mask = AT_ALL;
1977
1978 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1979
1980 /* check for overflows */
1981 if (!error) {
1982 acl_perm(vp, exi, &va, cr);
1983 error = vattr_to_nattr(&va, &dr->dr_attr);
1984 if (!error) {
1985 error = makefh(&dr->dr_fhandle, vp,
1986 exi);
1987 }
1988 }
1989 /*
1990 * Force modified metadata out to stable storage.
1991 *
1992 * if a underlying vp exists, pass it to VOP_FSYNC
1993 */
1994 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1995 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1996 else
1997 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1998 VN_RELE(vp);
1999 }
2000
2001 if (in_crit) {
2002 nbl_end_crit(tvp);
2003 VN_RELE(tvp);
2004 }
2005 }
2006
2007 /*
2008 * Force modified data and metadata out to stable storage.
2009 */
2010 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2011
2012 out:
2013
2014 VN_RELE(dvp);
2015
2016 dr->dr_status = puterrno(error);
2017
2018 if (name != args->ca_da.da_name)
2019 kmem_free(name, MAXPATHLEN);
2020 }
2021 void *
2022 rfs_create_getfh(struct nfscreatargs *args)
2023 {
2024 return (args->ca_da.da_fhandle);
2025 }
2026
2027 /*
2028 * Remove a file.
2029 * Remove named file from parent directory.
2030 */
2031 /* ARGSUSED */
2032 void
2033 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2034 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2035 {
2036 int error = 0;
2037 vnode_t *vp;
2038 vnode_t *targvp;
2039 int in_crit = 0;
2040
2041 /*
2042 * Disallow NULL paths
2043 */
2044 if (da->da_name == NULL || *da->da_name == '\0') {
2045 *status = NFSERR_ACCES;
2046 return;
2047 }
2048
2049 vp = nfs_fhtovp(da->da_fhandle, exi);
2050 if (vp == NULL) {
2051 *status = NFSERR_STALE;
2052 return;
2053 }
2054
2055 if (rdonly(ro, vp)) {
2056 VN_RELE(vp);
2057 *status = NFSERR_ROFS;
2058 return;
2059 }
2060
2061 /*
2062 * Check for a conflict with a non-blocking mandatory share reservation.
2063 */
2064 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2065 NULL, cr, NULL, NULL, NULL);
2066 if (error != 0) {
2067 VN_RELE(vp);
2068 *status = puterrno(error);
2069 return;
2070 }
2071
2072 /*
2073 * If the file is delegated to an v4 client, then initiate
2074 * recall and drop this request (by setting T_WOULDBLOCK).
2075 * The client will eventually re-transmit the request and
2076 * (hopefully), by then, the v4 client will have returned
2077 * the delegation.
2078 */
2079
2080 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2081 VN_RELE(vp);
2082 VN_RELE(targvp);
2083 curthread->t_flag |= T_WOULDBLOCK;
2084 return;
2085 }
2086
2087 if (nbl_need_check(targvp)) {
2088 nbl_start_crit(targvp, RW_READER);
2089 in_crit = 1;
2090 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2091 error = EACCES;
2092 goto out;
2093 }
2094 }
2095
2096 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2097
2098 /*
2099 * Force modified data and metadata out to stable storage.
2100 */
2101 (void) VOP_FSYNC(vp, 0, cr, NULL);
2102
2103 out:
2104 if (in_crit)
2105 nbl_end_crit(targvp);
2106 VN_RELE(targvp);
2107 VN_RELE(vp);
2108
2109 *status = puterrno(error);
2110
2111 }
2112
2113 void *
2114 rfs_remove_getfh(struct nfsdiropargs *da)
2115 {
2116 return (da->da_fhandle);
2117 }
2118
2119 /*
2120 * rename a file
2121 * Give a file (from) a new name (to).
2122 */
2123 /* ARGSUSED */
2124 void
2125 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2126 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2127 {
2128 int error = 0;
2129 vnode_t *fromvp;
2130 vnode_t *tovp;
2131 struct exportinfo *to_exi;
2132 fhandle_t *fh;
2133 vnode_t *srcvp;
2134 vnode_t *targvp;
2135 int in_crit = 0;
2136
2137 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2138 if (fromvp == NULL) {
2139 *status = NFSERR_STALE;
2140 return;
2141 }
2142
2143 fh = args->rna_to.da_fhandle;
2144 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2145 if (to_exi == NULL) {
2146 VN_RELE(fromvp);
2147 *status = NFSERR_ACCES;
2148 return;
2149 }
2150 exi_rele(to_exi);
2151
2152 if (to_exi != exi) {
2153 VN_RELE(fromvp);
2154 *status = NFSERR_XDEV;
2155 return;
2156 }
2157
2158 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2159 if (tovp == NULL) {
2160 VN_RELE(fromvp);
2161 *status = NFSERR_STALE;
2162 return;
2163 }
2164
2165 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2166 VN_RELE(tovp);
2167 VN_RELE(fromvp);
2168 *status = NFSERR_NOTDIR;
2169 return;
2170 }
2171
2172 /*
2173 * Disallow NULL paths
2174 */
2175 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2176 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2177 VN_RELE(tovp);
2178 VN_RELE(fromvp);
2179 *status = NFSERR_ACCES;
2180 return;
2181 }
2182
2183 if (rdonly(ro, tovp)) {
2184 VN_RELE(tovp);
2185 VN_RELE(fromvp);
2186 *status = NFSERR_ROFS;
2187 return;
2188 }
2189
2190 /*
2191 * Check for a conflict with a non-blocking mandatory share reservation.
2192 */
2193 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2194 NULL, cr, NULL, NULL, NULL);
2195 if (error != 0) {
2196 VN_RELE(tovp);
2197 VN_RELE(fromvp);
2198 *status = puterrno(error);
2199 return;
2200 }
2201
2202 /* Check for delegations on the source file */
2203
2204 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2205 VN_RELE(tovp);
2206 VN_RELE(fromvp);
2207 VN_RELE(srcvp);
2208 curthread->t_flag |= T_WOULDBLOCK;
2209 return;
2210 }
2211
2212 /* Check for delegation on the file being renamed over, if it exists */
2213
2214 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2215 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2216 NULL, NULL, NULL) == 0) {
2217
2218 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2219 VN_RELE(tovp);
2220 VN_RELE(fromvp);
2221 VN_RELE(srcvp);
2222 VN_RELE(targvp);
2223 curthread->t_flag |= T_WOULDBLOCK;
2224 return;
2225 }
2226 VN_RELE(targvp);
2227 }
2228
2229
2230 if (nbl_need_check(srcvp)) {
2231 nbl_start_crit(srcvp, RW_READER);
2232 in_crit = 1;
2233 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2234 error = EACCES;
2235 goto out;
2236 }
2237 }
2238
2239 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2240 tovp, args->rna_to.da_name, cr, NULL, 0);
2241
2242 if (error == 0)
2243 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2244 strlen(args->rna_to.da_name));
2245
2246 /*
2247 * Force modified data and metadata out to stable storage.
2248 */
2249 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2250 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2251
2252 out:
2253 if (in_crit)
2254 nbl_end_crit(srcvp);
2255 VN_RELE(srcvp);
2256 VN_RELE(tovp);
2257 VN_RELE(fromvp);
2258
2259 *status = puterrno(error);
2260
2261 }
2262 void *
2263 rfs_rename_getfh(struct nfsrnmargs *args)
2264 {
2265 return (args->rna_from.da_fhandle);
2266 }
2267
2268 /*
2269 * Link to a file.
2270 * Create a file (to) which is a hard link to the given file (from).
2271 */
2272 /* ARGSUSED */
2273 void
2274 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2275 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2276 {
2277 int error;
2278 vnode_t *fromvp;
2279 vnode_t *tovp;
2280 struct exportinfo *to_exi;
2281 fhandle_t *fh;
2282
2283 fromvp = nfs_fhtovp(args->la_from, exi);
2284 if (fromvp == NULL) {
2285 *status = NFSERR_STALE;
2286 return;
2287 }
2288
2289 fh = args->la_to.da_fhandle;
2290 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2291 if (to_exi == NULL) {
2292 VN_RELE(fromvp);
2293 *status = NFSERR_ACCES;
2294 return;
2295 }
2296 exi_rele(to_exi);
2297
2298 if (to_exi != exi) {
2299 VN_RELE(fromvp);
2300 *status = NFSERR_XDEV;
2301 return;
2302 }
2303
2304 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2305 if (tovp == NULL) {
2306 VN_RELE(fromvp);
2307 *status = NFSERR_STALE;
2308 return;
2309 }
2310
2311 if (tovp->v_type != VDIR) {
2312 VN_RELE(tovp);
2313 VN_RELE(fromvp);
2314 *status = NFSERR_NOTDIR;
2315 return;
2316 }
2317 /*
2318 * Disallow NULL paths
2319 */
2320 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2321 VN_RELE(tovp);
2322 VN_RELE(fromvp);
2323 *status = NFSERR_ACCES;
2324 return;
2325 }
2326
2327 if (rdonly(ro, tovp)) {
2328 VN_RELE(tovp);
2329 VN_RELE(fromvp);
2330 *status = NFSERR_ROFS;
2331 return;
2332 }
2333
2334 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2335
2336 /*
2337 * Force modified data and metadata out to stable storage.
2338 */
2339 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2340 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2341
2342 VN_RELE(tovp);
2343 VN_RELE(fromvp);
2344
2345 *status = puterrno(error);
2346
2347 }
2348 void *
2349 rfs_link_getfh(struct nfslinkargs *args)
2350 {
2351 return (args->la_from);
2352 }
2353
2354 /*
2355 * Symbolicly link to a file.
2356 * Create a file (to) with the given attributes which is a symbolic link
2357 * to the given path name (to).
2358 */
2359 void
2360 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2361 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2362 {
2363 int error;
2364 struct vattr va;
2365 vnode_t *vp;
2366 vnode_t *svp;
2367 int lerror;
2368 struct sockaddr *ca;
2369 char *name = NULL;
2370
2371 /*
2372 * Disallow NULL paths
2373 */
2374 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2375 *status = NFSERR_ACCES;
2376 return;
2377 }
2378
2379 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2380 if (vp == NULL) {
2381 *status = NFSERR_STALE;
2382 return;
2383 }
2384
2385 if (rdonly(ro, vp)) {
2386 VN_RELE(vp);
2387 *status = NFSERR_ROFS;
2388 return;
2389 }
2390
2391 error = sattr_to_vattr(args->sla_sa, &va);
2392 if (error) {
2393 VN_RELE(vp);
2394 *status = puterrno(error);
2395 return;
2396 }
2397
2398 if (!(va.va_mask & AT_MODE)) {
2399 VN_RELE(vp);
2400 *status = NFSERR_INVAL;
2401 return;
2402 }
2403
2404 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2405 name = nfscmd_convname(ca, exi, args->sla_tnm,
2406 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2407
2408 if (name == NULL) {
2409 *status = NFSERR_ACCES;
2410 return;
2411 }
2412
2413 va.va_type = VLNK;
2414 va.va_mask |= AT_TYPE;
2415
2416 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2417
2418 /*
2419 * Force new data and metadata out to stable storage.
2420 */
2421 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2422 NULL, cr, NULL, NULL, NULL);
2423
2424 if (!lerror) {
2425 (void) VOP_FSYNC(svp, 0, cr, NULL);
2426 VN_RELE(svp);
2427 }
2428
2429 /*
2430 * Force modified data and metadata out to stable storage.
2431 */
2432 (void) VOP_FSYNC(vp, 0, cr, NULL);
2433
2434 VN_RELE(vp);
2435
2436 *status = puterrno(error);
2437 if (name != args->sla_tnm)
2438 kmem_free(name, MAXPATHLEN);
2439
2440 }
2441 void *
2442 rfs_symlink_getfh(struct nfsslargs *args)
2443 {
2444 return (args->sla_from.da_fhandle);
2445 }
2446
2447 /*
2448 * Make a directory.
2449 * Create a directory with the given name, parent directory, and attributes.
2450 * Returns a file handle and attributes for the new directory.
2451 */
2452 /* ARGSUSED */
2453 void
2454 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2455 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2456 {
2457 int error;
2458 struct vattr va;
2459 vnode_t *dvp = NULL;
2460 vnode_t *vp;
2461 char *name = args->ca_da.da_name;
2462
2463 /*
2464 * Disallow NULL paths
2465 */
2466 if (name == NULL || *name == '\0') {
2467 dr->dr_status = NFSERR_ACCES;
2468 return;
2469 }
2470
2471 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2472 if (vp == NULL) {
2473 dr->dr_status = NFSERR_STALE;
2474 return;
2475 }
2476
2477 if (rdonly(ro, vp)) {
2478 VN_RELE(vp);
2479 dr->dr_status = NFSERR_ROFS;
2480 return;
2481 }
2482
2483 error = sattr_to_vattr(args->ca_sa, &va);
2484 if (error) {
2485 VN_RELE(vp);
2486 dr->dr_status = puterrno(error);
2487 return;
2488 }
2489
2490 if (!(va.va_mask & AT_MODE)) {
2491 VN_RELE(vp);
2492 dr->dr_status = NFSERR_INVAL;
2493 return;
2494 }
2495
2496 va.va_type = VDIR;
2497 va.va_mask |= AT_TYPE;
2498
2499 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2500
2501 if (!error) {
2502 /*
2503 * Attribtutes of the newly created directory should
2504 * be returned to the client.
2505 */
2506 va.va_mask = AT_ALL; /* We want everything */
2507 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2508
2509 /* check for overflows */
2510 if (!error) {
2511 acl_perm(vp, exi, &va, cr);
2512 error = vattr_to_nattr(&va, &dr->dr_attr);
2513 if (!error) {
2514 error = makefh(&dr->dr_fhandle, dvp, exi);
2515 }
2516 }
2517 /*
2518 * Force new data and metadata out to stable storage.
2519 */
2520 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2521 VN_RELE(dvp);
2522 }
2523
2524 /*
2525 * Force modified data and metadata out to stable storage.
2526 */
2527 (void) VOP_FSYNC(vp, 0, cr, NULL);
2528
2529 VN_RELE(vp);
2530
2531 dr->dr_status = puterrno(error);
2532
2533 }
2534 void *
2535 rfs_mkdir_getfh(struct nfscreatargs *args)
2536 {
2537 return (args->ca_da.da_fhandle);
2538 }
2539
2540 /*
2541 * Remove a directory.
2542 * Remove the given directory name from the given parent directory.
2543 */
2544 /* ARGSUSED */
2545 void
2546 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2547 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2548 {
2549 int error;
2550 vnode_t *vp;
2551
2552 /*
2553 * Disallow NULL paths
2554 */
2555 if (da->da_name == NULL || *da->da_name == '\0') {
2556 *status = NFSERR_ACCES;
2557 return;
2558 }
2559
2560 vp = nfs_fhtovp(da->da_fhandle, exi);
2561 if (vp == NULL) {
2562 *status = NFSERR_STALE;
2563 return;
2564 }
2565
2566 if (rdonly(ro, vp)) {
2567 VN_RELE(vp);
2568 *status = NFSERR_ROFS;
2569 return;
2570 }
2571
2572 /*
2573 * VOP_RMDIR takes a third argument (the current
2574 * directory of the process). That's because someone
2575 * wants to return EINVAL if one tries to remove ".".
2576 * Of course, NFS servers have no idea what their
2577 * clients' current directories are. We fake it by
2578 * supplying a vnode known to exist and illegal to
2579 * remove.
2580 */
2581 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2582
2583 /*
2584 * Force modified data and metadata out to stable storage.
2585 */
2586 (void) VOP_FSYNC(vp, 0, cr, NULL);
2587
2588 VN_RELE(vp);
2589
2590 /*
2591 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2592 * if the directory is not empty. A System V NFS server
2593 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2594 * over the wire.
2595 */
2596 if (error == EEXIST)
2597 *status = NFSERR_NOTEMPTY;
2598 else
2599 *status = puterrno(error);
2600
2601 }
2602 void *
2603 rfs_rmdir_getfh(struct nfsdiropargs *da)
2604 {
2605 return (da->da_fhandle);
2606 }
2607
2608 /* ARGSUSED */
2609 void
2610 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2611 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2612 {
2613 int error;
2614 int iseof;
2615 struct iovec iov;
2616 struct uio uio;
2617 vnode_t *vp;
2618 char *ndata = NULL;
2619 struct sockaddr *ca;
2620 size_t nents;
2621 int ret;
2622
2623 vp = nfs_fhtovp(&rda->rda_fh, exi);
2624 if (vp == NULL) {
2625 rd->rd_entries = NULL;
2626 rd->rd_status = NFSERR_STALE;
2627 return;
2628 }
2629
2630 if (vp->v_type != VDIR) {
2631 VN_RELE(vp);
2632 rd->rd_entries = NULL;
2633 rd->rd_status = NFSERR_NOTDIR;
2634 return;
2635 }
2636
2637 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2638
2639 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2640
2641 if (error) {
2642 rd->rd_entries = NULL;
2643 goto bad;
2644 }
2645
2646 if (rda->rda_count == 0) {
2647 rd->rd_entries = NULL;
2648 rd->rd_size = 0;
2649 rd->rd_eof = FALSE;
2650 goto bad;
2651 }
2652
2653 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2654
2655 /*
2656 * Allocate data for entries. This will be freed by rfs_rddirfree.
2657 */
2658 rd->rd_bufsize = (uint_t)rda->rda_count;
2659 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2660
2661 /*
2662 * Set up io vector to read directory data
2663 */
2664 iov.iov_base = (caddr_t)rd->rd_entries;
2665 iov.iov_len = rda->rda_count;
2666 uio.uio_iov = &iov;
2667 uio.uio_iovcnt = 1;
2668 uio.uio_segflg = UIO_SYSSPACE;
2669 uio.uio_extflg = UIO_COPY_CACHED;
2670 uio.uio_loffset = (offset_t)rda->rda_offset;
2671 uio.uio_resid = rda->rda_count;
2672
2673 /*
2674 * read directory
2675 */
2676 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2677
2678 /*
2679 * Clean up
2680 */
2681 if (!error) {
2682 /*
2683 * set size and eof
2684 */
2685 if (uio.uio_resid == rda->rda_count) {
2686 rd->rd_size = 0;
2687 rd->rd_eof = TRUE;
2688 } else {
2689 rd->rd_size = (uint32_t)(rda->rda_count -
2690 uio.uio_resid);
2691 rd->rd_eof = iseof ? TRUE : FALSE;
2692 }
2693 }
2694
2695 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2696 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2697 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2698 rda->rda_count, &ndata);
2699
2700 if (ret != 0) {
2701 size_t dropbytes;
2702 /*
2703 * We had to drop one or more entries in order to fit
2704 * during the character conversion. We need to patch
2705 * up the size and eof info.
2706 */
2707 if (rd->rd_eof)
2708 rd->rd_eof = FALSE;
2709 dropbytes = nfscmd_dropped_entrysize(
2710 (struct dirent64 *)rd->rd_entries, nents, ret);
2711 rd->rd_size -= dropbytes;
2712 }
2713 if (ndata == NULL) {
2714 ndata = (char *)rd->rd_entries;
2715 } else if (ndata != (char *)rd->rd_entries) {
2716 kmem_free(rd->rd_entries, rd->rd_bufsize);
2717 rd->rd_entries = (void *)ndata;
2718 rd->rd_bufsize = rda->rda_count;
2719 }
2720
2721 bad:
2722 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2723
2724 #if 0 /* notyet */
2725 /*
2726 * Don't do this. It causes local disk writes when just
2727 * reading the file and the overhead is deemed larger
2728 * than the benefit.
2729 */
2730 /*
2731 * Force modified metadata out to stable storage.
2732 */
2733 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2734 #endif
2735
2736 VN_RELE(vp);
2737
2738 rd->rd_status = puterrno(error);
2739
2740 }
2741 void *
2742 rfs_readdir_getfh(struct nfsrddirargs *rda)
2743 {
2744 return (&rda->rda_fh);
2745 }
2746 void
2747 rfs_rddirfree(struct nfsrddirres *rd)
2748 {
2749 if (rd->rd_entries != NULL)
2750 kmem_free(rd->rd_entries, rd->rd_bufsize);
2751 }
2752
2753 /* ARGSUSED */
2754 void
2755 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2756 struct svc_req *req, cred_t *cr, bool_t ro)
2757 {
2758 int error;
2759 struct statvfs64 sb;
2760 vnode_t *vp;
2761
2762 vp = nfs_fhtovp(fh, exi);
2763 if (vp == NULL) {
2764 fs->fs_status = NFSERR_STALE;
2765 return;
2766 }
2767
2768 error = VFS_STATVFS(vp->v_vfsp, &sb);
2769
2770 if (!error) {
2771 fs->fs_tsize = nfstsize();
2772 fs->fs_bsize = sb.f_frsize;
2773 fs->fs_blocks = sb.f_blocks;
2774 fs->fs_bfree = sb.f_bfree;
2775 fs->fs_bavail = sb.f_bavail;
2776 }
2777
2778 VN_RELE(vp);
2779
2780 fs->fs_status = puterrno(error);
2781
2782 }
2783 void *
2784 rfs_statfs_getfh(fhandle_t *fh)
2785 {
2786 return (fh);
2787 }
2788
2789 static int
2790 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2791 {
2792 vap->va_mask = 0;
2793
2794 /*
2795 * There was a sign extension bug in some VFS based systems
2796 * which stored the mode as a short. When it would get
2797 * assigned to a u_long, no sign extension would occur.
2798 * It needed to, but this wasn't noticed because sa_mode
2799 * would then get assigned back to the short, thus ignoring
2800 * the upper 16 bits of sa_mode.
2801 *
2802 * To make this implementation work for both broken
2803 * clients and good clients, we check for both versions
2804 * of the mode.
2805 */
2806 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2807 sa->sa_mode != (uint32_t)-1) {
2808 vap->va_mask |= AT_MODE;
2809 vap->va_mode = sa->sa_mode;
2810 }
2811 if (sa->sa_uid != (uint32_t)-1) {
2812 vap->va_mask |= AT_UID;
2813 vap->va_uid = sa->sa_uid;
2814 }
2815 if (sa->sa_gid != (uint32_t)-1) {
2816 vap->va_mask |= AT_GID;
2817 vap->va_gid = sa->sa_gid;
2818 }
2819 if (sa->sa_size != (uint32_t)-1) {
2820 vap->va_mask |= AT_SIZE;
2821 vap->va_size = sa->sa_size;
2822 }
2823 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2824 sa->sa_atime.tv_usec != (int32_t)-1) {
2825 #ifndef _LP64
2826 /* return error if time overflow */
2827 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2828 return (EOVERFLOW);
2829 #endif
2830 vap->va_mask |= AT_ATIME;
2831 /*
2832 * nfs protocol defines times as unsigned so don't extend sign,
2833 * unless sysadmin set nfs_allow_preepoch_time.
2834 */
2835 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2836 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2837 }
2838 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2839 sa->sa_mtime.tv_usec != (int32_t)-1) {
2840 #ifndef _LP64
2841 /* return error if time overflow */
2842 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2843 return (EOVERFLOW);
2844 #endif
2845 vap->va_mask |= AT_MTIME;
2846 /*
2847 * nfs protocol defines times as unsigned so don't extend sign,
2848 * unless sysadmin set nfs_allow_preepoch_time.
2849 */
2850 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2851 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2852 }
2853 return (0);
2854 }
2855
2856 static enum nfsftype vt_to_nf[] = {
2857 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2858 };
2859
2860 /*
2861 * check the following fields for overflow: nodeid, size, and time.
2862 * There could be a problem when converting 64-bit LP64 fields
2863 * into 32-bit ones. Return an error if there is an overflow.
2864 */
2865 int
2866 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2867 {
2868 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2869 na->na_type = vt_to_nf[vap->va_type];
2870
2871 if (vap->va_mode == (unsigned short) -1)
2872 na->na_mode = (uint32_t)-1;
2873 else
2874 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2875
2876 if (vap->va_uid == (unsigned short)(-1))
2877 na->na_uid = (uint32_t)(-1);
2878 else if (vap->va_uid == UID_NOBODY)
2879 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2880 else
2881 na->na_uid = vap->va_uid;
2882
2883 if (vap->va_gid == (unsigned short)(-1))
2884 na->na_gid = (uint32_t)-1;
2885 else if (vap->va_gid == GID_NOBODY)
2886 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2887 else
2888 na->na_gid = vap->va_gid;
2889
2890 /*
2891 * Do we need to check fsid for overflow? It is 64-bit in the
2892 * vattr, but are bigger than 32 bit values supported?
2893 */
2894 na->na_fsid = vap->va_fsid;
2895
2896 na->na_nodeid = vap->va_nodeid;
2897
2898 /*
2899 * Check to make sure that the nodeid is representable over the
2900 * wire without losing bits.
2901 */
2902 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2903 return (EFBIG);
2904 na->na_nlink = vap->va_nlink;
2905
2906 /*
2907 * Check for big files here, instead of at the caller. See
2908 * comments in cstat for large special file explanation.
2909 */
2910 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2911 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2912 return (EFBIG);
2913 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2914 /* UNKNOWN_SIZE | OVERFLOW */
2915 na->na_size = MAXOFF32_T;
2916 } else
2917 na->na_size = vap->va_size;
2918 } else
2919 na->na_size = vap->va_size;
2920
2921 /*
2922 * If the vnode times overflow the 32-bit times that NFS2
2923 * uses on the wire then return an error.
2924 */
2925 if (!NFS_VAP_TIME_OK(vap)) {
2926 return (EOVERFLOW);
2927 }
2928 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2929 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2930
2931 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2932 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2933
2934 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2935 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2936
2937 /*
2938 * If the dev_t will fit into 16 bits then compress
2939 * it, otherwise leave it alone. See comments in
2940 * nfs_client.c.
2941 */
2942 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2943 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2944 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2945 else
2946 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2947
2948 na->na_blocks = vap->va_nblocks;
2949 na->na_blocksize = vap->va_blksize;
2950
2951 /*
2952 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2953 * over-the-wire protocols for named-pipe vnodes. It remaps the
2954 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2955 *
2956 * BUYER BEWARE:
2957 * If you are porting the NFS to a non-Sun server, you probably
2958 * don't want to include the following block of code. The
2959 * over-the-wire special file types will be changing with the
2960 * NFS Protocol Revision.
2961 */
2962 if (vap->va_type == VFIFO)
2963 NA_SETFIFO(na);
2964 return (0);
2965 }
2966
2967 /*
2968 * acl v2 support: returns approximate permission.
2969 * default: returns minimal permission (more restrictive)
2970 * aclok: returns maximal permission (less restrictive)
2971 * This routine changes the permissions that are alaredy in *va.
2972 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2973 * CLASS_OBJ is always the same as GROUP_OBJ entry.
2974 */
2975 static void
2976 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2977 {
2978 vsecattr_t vsa;
2979 int aclcnt;
2980 aclent_t *aclentp;
2981 mode_t mask_perm;
2982 mode_t grp_perm;
2983 mode_t other_perm;
2984 mode_t other_orig;
2985 int error;
2986
2987 /* dont care default acl */
2988 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2989 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2990
2991 if (!error) {
2992 aclcnt = vsa.vsa_aclcnt;
2993 if (aclcnt > MIN_ACL_ENTRIES) {
2994 /* non-trivial ACL */
2995 aclentp = vsa.vsa_aclentp;
2996 if (exi->exi_export.ex_flags & EX_ACLOK) {
2997 /* maximal permissions */
2998 grp_perm = 0;
2999 other_perm = 0;
3000 for (; aclcnt > 0; aclcnt--, aclentp++) {
3001 switch (aclentp->a_type) {
3002 case USER_OBJ:
3003 break;
3004 case USER:
3005 grp_perm |=
3006 aclentp->a_perm << 3;
3007 other_perm |= aclentp->a_perm;
3008 break;
3009 case GROUP_OBJ:
3010 grp_perm |=
3011 aclentp->a_perm << 3;
3012 break;
3013 case GROUP:
3014 other_perm |= aclentp->a_perm;
3015 break;
3016 case OTHER_OBJ:
3017 other_orig = aclentp->a_perm;
3018 break;
3019 case CLASS_OBJ:
3020 mask_perm = aclentp->a_perm;
3021 break;
3022 default:
3023 break;
3024 }
3025 }
3026 grp_perm &= mask_perm << 3;
3027 other_perm &= mask_perm;
3028 other_perm |= other_orig;
3029
3030 } else {
3031 /* minimal permissions */
3032 grp_perm = 070;
3033 other_perm = 07;
3034 for (; aclcnt > 0; aclcnt--, aclentp++) {
3035 switch (aclentp->a_type) {
3036 case USER_OBJ:
3037 break;
3038 case USER:
3039 case CLASS_OBJ:
3040 grp_perm &=
3041 aclentp->a_perm << 3;
3042 other_perm &=
3043 aclentp->a_perm;
3044 break;
3045 case GROUP_OBJ:
3046 grp_perm &=
3047 aclentp->a_perm << 3;
3048 break;
3049 case GROUP:
3050 other_perm &=
3051 aclentp->a_perm;
3052 break;
3053 case OTHER_OBJ:
3054 other_perm &=
3055 aclentp->a_perm;
3056 break;
3057 default:
3058 break;
3059 }
3060 }
3061 }
3062 /* copy to va */
3063 va->va_mode &= ~077;
3064 va->va_mode |= grp_perm | other_perm;
3065 }
3066 if (vsa.vsa_aclcnt)
3067 kmem_free(vsa.vsa_aclentp,
3068 vsa.vsa_aclcnt * sizeof (aclent_t));
3069 }
3070 }
3071
3072 void
3073 rfs_srvrinit(void)
3074 {
3075 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3076 nfs2_srv_caller_id = fs_new_caller_id();
3077 }
3078
3079 void
3080 rfs_srvrfini(void)
3081 {
3082 mutex_destroy(&rfs_async_write_lock);
3083 }
3084
3085 static int
3086 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3087 {
3088 struct clist *wcl;
3089 int wlist_len;
3090 uint32_t count = rr->rr_count;
3091
3092 wcl = ra->ra_wlist;
3093
3094 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3095 return (FALSE);
3096 }
3097
3098 wcl = ra->ra_wlist;
3099 rr->rr_ok.rrok_wlist_len = wlist_len;
3100 rr->rr_ok.rrok_wlist = wcl;
3101
3102 return (TRUE);
3103 }